Added Blowpass scraper. Split shootId and pageId.

This commit is contained in:
ThePendulum 2019-04-06 23:24:26 +02:00
parent 069c2c1628
commit 3a90f98d41
14 changed files with 208 additions and 11 deletions

View File

@ -2,6 +2,12 @@
The latest releases from your favorite porn studios in one place.
## Supported networks & sites
* **Blowpass**
* 1000 Facials
* Immoral Live
* Mommy Blows Best
* Only Teen Blowjobs
* Throated
* **Brazzers**
* Asses In Public
* Baby Got Boobs

View File

@ -5,6 +5,8 @@ exports.up = knex => Promise.resolve()
table.increments('id', 8);
table.string('name');
table.string('gender', 18);
table.integer('alias_for', 8)
.references('id')
.inTable('actors');
@ -58,7 +60,9 @@ exports.up = knex => Promise.resolve()
.inTable('sites');
table.string('shoot_id');
table.string('entry_id');
table.unique(['site_id', 'shoot_id']);
table.unique(['site_id', 'entry_id']);
table.string('url');
table.string('title');
@ -116,4 +120,5 @@ exports.down = knex => Promise.resolve()
.then(() => knex.schema.dropTable('sites'))
.then(() => knex.schema.dropTable('networks'))
.then(() => knex.schema.dropTable('actors'))
.then(() => knex.schema.dropTable('directors'))
.then(() => knex.schema.dropTable('tags'));

View File

@ -4,6 +4,12 @@
exports.seed = knex => Promise.resolve()
.then(() => knex('networks').del())
.then(() => knex('networks').insert([
{
id: 'blowpass',
name: 'Blowpass',
url: 'https://www.blowpass.com',
description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!',
},
{
id: 'brazzers',
name: 'Brazzers',

View File

@ -4,6 +4,47 @@
exports.seed = knex => Promise.resolve()
.then(() => knex('sites').del())
.then(() => knex('sites').insert([
// BLOWPASS
{
id: '1000facials',
name: '1000 Facials',
label: '1000fc',
url: 'https://www.1000facials.com',
description: 'Welcome to 1000Facials.com, your source for the best facial porn with huge cumshots on your favorite teen and MILF pornstars. Watch all the blowjob action inside!',
network_id: 'blowpass',
},
{
id: 'immorallive',
name: 'Immoral Live',
label: 'imlive',
url: 'https://www.immorallive.com',
description: 'Watch live sex shows and videos on ImmoralLive.com, featuring wild and crazy sex orgies, group sex, blowjob competitions and toy play from the famous Porno Dan. The hottest pornstars and amateur girls cum hard inside',
network_id: 'blowpass',
},
{
id: 'mommyblowsbest',
name: 'Mommy Blows Best',
label: 'momblb',
url: 'https://www.mommyblowsbest.com',
description: 'Welcome to MommyBlowsBest.com. Home to thousands of MILF blowjobs and hot mom porn! Come see why experience counts, right here at MommyBlowsBest.com!',
network_id: 'blowpass',
},
{
id: 'onlyteenblowjobs',
name: 'Only Teen Blowjobs',
label: 'teenbj',
url: 'https://www.onlyteenblowjobs.com',
description: 'OnlyTeenBlowjobs.com brings you the best teen blowjob porn featuring today\'s hottest young pornstars and amateurs. Watch as teens use their little mouths to suck and deepthroat the biggest of cocks!',
network_id: 'blowpass',
},
{
id: 'throated',
name: 'Throated',
label: 'throat',
url: 'https://www.throated.com',
description: 'Throated.com is your portal for extreme throat fuck porn, face fucking videos and deepthroat gagging pornstars. Watch teens and MILFs go balls deep, swallowing cock in HD!',
network_id: 'blowpass',
},
// BRAZZERS
{
id: 'momsincontrol',

View File

@ -272,6 +272,10 @@ exports.seed = knex => Promise.resolve()
tag: 'nipple clamps',
alias_for: null,
},
{
tag: 'oral creampie',
alias_for: null,
},
{
tag: 'pain',
alias_for: null,
@ -527,6 +531,10 @@ exports.seed = knex => Promise.resolve()
tag: 'crop', // a type of whip, not short for corporal
alias_for: 'corporal punishment',
},
{
tag: 'cum in mouth',
alias_for: 'oral creampie',
},
{
tag: 'cum swallowing',
alias_for: 'swallowing',
@ -683,6 +691,10 @@ exports.seed = knex => Promise.resolve()
tag: 'MFF',
alias_for: 'FMF',
},
{
tag: 'oral',
alias_for: 'blowjob',
},
{
tag: 'piercing',
alias_for: 'piercings',

View File

@ -56,17 +56,20 @@ async function accumulateIncludedSites() {
return curateSites(rawSites);
}
async function findDuplicateReleases(latestReleases) {
const latestReleasesIds = latestReleases.map(release => release.shootId);
async function findDuplicateReleases(latestReleases, _siteId) {
const latestReleasesShootIds = latestReleases.map(release => release.shootId).filter(release => release !== undefined);
const latestReleasesPageIds = latestReleases.map(release => release.pageId).filter(release => release !== undefined);
return knex('releases')
.whereIn('shoot_id', latestReleasesIds);
.whereIn('shoot_id', latestReleasesShootIds)
.orWhereIn('shoot_id', latestReleasesPageIds);
}
async function storeReleases(releases) {
const curatedReleases = releases.map(release => ({
site_id: release.site.id,
shoot_id: release.shootId || null,
entry_id: release.entry_id || null,
url: release.url,
title: release.title,
date: release.date,
@ -93,7 +96,7 @@ async function storeReleases(releases) {
async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) {
const latestReleases = await scraper.fetchLatest(site, page);
const duplicateReleases = await findDuplicateReleases(latestReleases);
const duplicateReleases = await findDuplicateReleases(latestReleases, site.id);
const duplicateReleasesShootIds = new Set(
duplicateReleases
.map(release => release.shoot_id)

View File

@ -19,6 +19,7 @@ async function findSite(url) {
.orWhere({ url: `${protocol}//${hostname}` })
.first();
return {
id: site.id,
name: site.name,

110
src/scrapers/blowpass.js Normal file
View File

@ -0,0 +1,110 @@
'use strict';
/* eslint-disable */
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
const { matchTags } = require('../tags');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.sceneList .scene').toArray();
return sceneElements.map((element) => {
const entryId = $(element).attr('data-itemid');
const sceneLinkElement = $(element).find('.sceneTitle a');
const title = sceneLinkElement.attr('title');
const url = `${site.url}/en/scene/${sceneLinkElement.attr('href').split('/').slice(-2).join('/')}`;
const date = moment.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY').toDate();
const actors = $(element).find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const likes = Number($(element).find('.rating .state_1 .value').text());
return {
url,
entryId,
title,
actors,
date,
rating: {
likes,
},
site,
};
});
}
async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const json = $('script[type="application/ld+json"]').html();
const data = JSON.parse(json).slice(-1)[0];
const sceneElement = $('#wrapper');
const workName = data.isPartOf.name.split(' - ');
const shootId = workName.length > 1 ? workName[0] : null;
const entryId = url.split('/').slice(-1)[0];
const title = data.name;
const description = data.description;
const date = moment.utc(data.isPartOf.datePublished, 'YYYY-MM-DD').toDate();
// const actors = sceneElement.find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const actors = data.actor
.sort(({ genderA }, { genderB }) => {
if (genderA === 'female' && genderB === 'male') return 1;
if (genderA === 'male' && genderB === 'female') return -1;
return 0;
})
.map(actor => actor.name);
const likes = Number(sceneElement.find('.rating .state_1 .value').text());
const dislikes = Number(sceneElement.find('.rating .state_2 .value').text());
const duration = moment.duration(data.duration.slice(2)).asSeconds();
const rawTags = data.keywords.split(', ');
const tags = await matchTags(rawTags);
return {
url,
shootId,
entryId,
title,
actors,
date,
duration,
tags,
rating: {
likes,
dislikes,
},
site,
};
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/latest/All-Categories/0/All-Pornstars/0/${page}`);
return scrape(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/upcoming`);
return scrape(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(`https://www.blowpass.com/en/video/${site.id}/${new URL(url).pathname.split('/').slice(-2).join('/')}`);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
};

View File

@ -1,5 +1,6 @@
'use strict';
const blowpass = require('./blowpass');
const brazzers = require('./brazzers');
const julesjordan = require('./julesjordan');
const kink = require('./kink');
@ -10,6 +11,7 @@ const vixen = require('./vixen');
const xempire = require('./xempire');
module.exports = {
blowpass,
brazzers,
julesjordan,
kink,

View File

@ -29,6 +29,7 @@ function scrapeLatest(html, site) {
return {
url,
shootId,
entryId: shootId,
title,
actors,
date,
@ -73,6 +74,7 @@ async function scrapeScene(html, url, shootId, ratingRes, site) {
return {
url,
shootId,
entryId: shootId,
title,
date,
actors,

View File

@ -25,13 +25,14 @@ function scrapeLatest(html, site) {
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
const { shootId, title } = extractTitle(originalTitle);
const internalId = new URL(url).pathname.split('/')[2];
const entryId = new URL(url).pathname.split('/')[2];
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
return {
url,
shootId: shootId || internalId,
shootId,
entryId,
title,
date,
site,
@ -44,6 +45,7 @@ async function scrapeScene(html, url, site) {
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
@ -60,6 +62,7 @@ async function scrapeScene(html, url, site) {
return {
url,
shootId,
entryId,
title,
date,
actors,

View File

@ -32,7 +32,6 @@ function scrapeLatest(html, site) {
date,
rating: {
likes,
dislikes: 0,
},
site,
};
@ -79,7 +78,6 @@ async function scrapeScene(html, url, site) {
tags,
rating: {
likes,
dislikes: 0,
},
site: channelSite || site,
};

View File

@ -49,16 +49,20 @@ function scrapeUpcoming(html, site) {
});
}
function scrapeScene(html, url, site) {
async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const rawTags = [];
const tags = await matchTags(rawTags);
return {
url,
shootId,
title,
actors,
director: '',
director,
date,
tags,
rating: {
likes,
dislikes,
@ -68,7 +72,7 @@ function scrapeScene(html, url, site) {
};
}
async function fetchLatest(site) {
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/url`);
return scrapeLatest(res.body.toString(), site);

View File

@ -11,6 +11,10 @@ const formatters = {
return '\x1b[90mUnrated\x1b[0m';
}
if (rating.likes !== undefined && rating.dislikes === undefined) {
return `\x1b[93m★\x1b[0m N/A \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)}`;
}
if (rating.stars) {
return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`;
}