Added Blowpass scraper. Split shootId and pageId.

This commit is contained in:
ThePendulum 2019-04-06 23:24:26 +02:00
parent 069c2c1628
commit 3a90f98d41
14 changed files with 208 additions and 11 deletions

View File

@ -2,6 +2,12 @@
The latest releases from your favorite porn studios in one place. The latest releases from your favorite porn studios in one place.
## Supported networks & sites ## Supported networks & sites
* **Blowpass**
* 1000 Facials
* Immoral Live
* Mommy Blows Best
* Only Teen Blowjobs
* Throated
* **Brazzers** * **Brazzers**
* Asses In Public * Asses In Public
* Baby Got Boobs * Baby Got Boobs

View File

@ -5,6 +5,8 @@ exports.up = knex => Promise.resolve()
table.increments('id', 8); table.increments('id', 8);
table.string('name'); table.string('name');
table.string('gender', 18);
table.integer('alias_for', 8) table.integer('alias_for', 8)
.references('id') .references('id')
.inTable('actors'); .inTable('actors');
@ -58,7 +60,9 @@ exports.up = knex => Promise.resolve()
.inTable('sites'); .inTable('sites');
table.string('shoot_id'); table.string('shoot_id');
table.string('entry_id');
table.unique(['site_id', 'shoot_id']); table.unique(['site_id', 'shoot_id']);
table.unique(['site_id', 'entry_id']);
table.string('url'); table.string('url');
table.string('title'); table.string('title');
@ -116,4 +120,5 @@ exports.down = knex => Promise.resolve()
.then(() => knex.schema.dropTable('sites')) .then(() => knex.schema.dropTable('sites'))
.then(() => knex.schema.dropTable('networks')) .then(() => knex.schema.dropTable('networks'))
.then(() => knex.schema.dropTable('actors')) .then(() => knex.schema.dropTable('actors'))
.then(() => knex.schema.dropTable('directors'))
.then(() => knex.schema.dropTable('tags')); .then(() => knex.schema.dropTable('tags'));

View File

@ -4,6 +4,12 @@
exports.seed = knex => Promise.resolve() exports.seed = knex => Promise.resolve()
.then(() => knex('networks').del()) .then(() => knex('networks').del())
.then(() => knex('networks').insert([ .then(() => knex('networks').insert([
{
id: 'blowpass',
name: 'Blowpass',
url: 'https://www.blowpass.com',
description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!',
},
{ {
id: 'brazzers', id: 'brazzers',
name: 'Brazzers', name: 'Brazzers',

View File

@ -4,6 +4,47 @@
exports.seed = knex => Promise.resolve() exports.seed = knex => Promise.resolve()
.then(() => knex('sites').del()) .then(() => knex('sites').del())
.then(() => knex('sites').insert([ .then(() => knex('sites').insert([
// BLOWPASS
{
id: '1000facials',
name: '1000 Facials',
label: '1000fc',
url: 'https://www.1000facials.com',
description: 'Welcome to 1000Facials.com, your source for the best facial porn with huge cumshots on your favorite teen and MILF pornstars. Watch all the blowjob action inside!',
network_id: 'blowpass',
},
{
id: 'immorallive',
name: 'Immoral Live',
label: 'imlive',
url: 'https://www.immorallive.com',
description: 'Watch live sex shows and videos on ImmoralLive.com, featuring wild and crazy sex orgies, group sex, blowjob competitions and toy play from the famous Porno Dan. The hottest pornstars and amateur girls cum hard inside',
network_id: 'blowpass',
},
{
id: 'mommyblowsbest',
name: 'Mommy Blows Best',
label: 'momblb',
url: 'https://www.mommyblowsbest.com',
description: 'Welcome to MommyBlowsBest.com. Home to thousands of MILF blowjobs and hot mom porn! Come see why experience counts, right here at MommyBlowsBest.com!',
network_id: 'blowpass',
},
{
id: 'onlyteenblowjobs',
name: 'Only Teen Blowjobs',
label: 'teenbj',
url: 'https://www.onlyteenblowjobs.com',
description: 'OnlyTeenBlowjobs.com brings you the best teen blowjob porn featuring today\'s hottest young pornstars and amateurs. Watch as teens use their little mouths to suck and deepthroat the biggest of cocks!',
network_id: 'blowpass',
},
{
id: 'throated',
name: 'Throated',
label: 'throat',
url: 'https://www.throated.com',
description: 'Throated.com is your portal for extreme throat fuck porn, face fucking videos and deepthroat gagging pornstars. Watch teens and MILFs go balls deep, swallowing cock in HD!',
network_id: 'blowpass',
},
// BRAZZERS // BRAZZERS
{ {
id: 'momsincontrol', id: 'momsincontrol',

View File

@ -272,6 +272,10 @@ exports.seed = knex => Promise.resolve()
tag: 'nipple clamps', tag: 'nipple clamps',
alias_for: null, alias_for: null,
}, },
{
tag: 'oral creampie',
alias_for: null,
},
{ {
tag: 'pain', tag: 'pain',
alias_for: null, alias_for: null,
@ -527,6 +531,10 @@ exports.seed = knex => Promise.resolve()
tag: 'crop', // a type of whip, not short for corporal tag: 'crop', // a type of whip, not short for corporal
alias_for: 'corporal punishment', alias_for: 'corporal punishment',
}, },
{
tag: 'cum in mouth',
alias_for: 'oral creampie',
},
{ {
tag: 'cum swallowing', tag: 'cum swallowing',
alias_for: 'swallowing', alias_for: 'swallowing',
@ -683,6 +691,10 @@ exports.seed = knex => Promise.resolve()
tag: 'MFF', tag: 'MFF',
alias_for: 'FMF', alias_for: 'FMF',
}, },
{
tag: 'oral',
alias_for: 'blowjob',
},
{ {
tag: 'piercing', tag: 'piercing',
alias_for: 'piercings', alias_for: 'piercings',

View File

@ -56,17 +56,20 @@ async function accumulateIncludedSites() {
return curateSites(rawSites); return curateSites(rawSites);
} }
async function findDuplicateReleases(latestReleases) { async function findDuplicateReleases(latestReleases, _siteId) {
const latestReleasesIds = latestReleases.map(release => release.shootId); const latestReleasesShootIds = latestReleases.map(release => release.shootId).filter(release => release !== undefined);
const latestReleasesPageIds = latestReleases.map(release => release.pageId).filter(release => release !== undefined);
return knex('releases') return knex('releases')
.whereIn('shoot_id', latestReleasesIds); .whereIn('shoot_id', latestReleasesShootIds)
.orWhereIn('shoot_id', latestReleasesPageIds);
} }
async function storeReleases(releases) { async function storeReleases(releases) {
const curatedReleases = releases.map(release => ({ const curatedReleases = releases.map(release => ({
site_id: release.site.id, site_id: release.site.id,
shoot_id: release.shootId || null, shoot_id: release.shootId || null,
entry_id: release.entry_id || null,
url: release.url, url: release.url,
title: release.title, title: release.title,
date: release.date, date: release.date,
@ -93,7 +96,7 @@ async function storeReleases(releases) {
async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) { async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) {
const latestReleases = await scraper.fetchLatest(site, page); const latestReleases = await scraper.fetchLatest(site, page);
const duplicateReleases = await findDuplicateReleases(latestReleases); const duplicateReleases = await findDuplicateReleases(latestReleases, site.id);
const duplicateReleasesShootIds = new Set( const duplicateReleasesShootIds = new Set(
duplicateReleases duplicateReleases
.map(release => release.shoot_id) .map(release => release.shoot_id)

View File

@ -19,6 +19,7 @@ async function findSite(url) {
.orWhere({ url: `${protocol}//${hostname}` }) .orWhere({ url: `${protocol}//${hostname}` })
.first(); .first();
return { return {
id: site.id, id: site.id,
name: site.name, name: site.name,

110
src/scrapers/blowpass.js Normal file
View File

@ -0,0 +1,110 @@
'use strict';
/* eslint-disable */
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
const { matchTags } = require('../tags');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.sceneList .scene').toArray();
return sceneElements.map((element) => {
const entryId = $(element).attr('data-itemid');
const sceneLinkElement = $(element).find('.sceneTitle a');
const title = sceneLinkElement.attr('title');
const url = `${site.url}/en/scene/${sceneLinkElement.attr('href').split('/').slice(-2).join('/')}`;
const date = moment.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY').toDate();
const actors = $(element).find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const likes = Number($(element).find('.rating .state_1 .value').text());
return {
url,
entryId,
title,
actors,
date,
rating: {
likes,
},
site,
};
});
}
async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const json = $('script[type="application/ld+json"]').html();
const data = JSON.parse(json).slice(-1)[0];
const sceneElement = $('#wrapper');
const workName = data.isPartOf.name.split(' - ');
const shootId = workName.length > 1 ? workName[0] : null;
const entryId = url.split('/').slice(-1)[0];
const title = data.name;
const description = data.description;
const date = moment.utc(data.isPartOf.datePublished, 'YYYY-MM-DD').toDate();
// const actors = sceneElement.find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const actors = data.actor
.sort(({ genderA }, { genderB }) => {
if (genderA === 'female' && genderB === 'male') return 1;
if (genderA === 'male' && genderB === 'female') return -1;
return 0;
})
.map(actor => actor.name);
const likes = Number(sceneElement.find('.rating .state_1 .value').text());
const dislikes = Number(sceneElement.find('.rating .state_2 .value').text());
const duration = moment.duration(data.duration.slice(2)).asSeconds();
const rawTags = data.keywords.split(', ');
const tags = await matchTags(rawTags);
return {
url,
shootId,
entryId,
title,
actors,
date,
duration,
tags,
rating: {
likes,
dislikes,
},
site,
};
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/latest/All-Categories/0/All-Pornstars/0/${page}`);
return scrape(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/upcoming`);
return scrape(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(`https://www.blowpass.com/en/video/${site.id}/${new URL(url).pathname.split('/').slice(-2).join('/')}`);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
};

View File

@ -1,5 +1,6 @@
'use strict'; 'use strict';
const blowpass = require('./blowpass');
const brazzers = require('./brazzers'); const brazzers = require('./brazzers');
const julesjordan = require('./julesjordan'); const julesjordan = require('./julesjordan');
const kink = require('./kink'); const kink = require('./kink');
@ -10,6 +11,7 @@ const vixen = require('./vixen');
const xempire = require('./xempire'); const xempire = require('./xempire');
module.exports = { module.exports = {
blowpass,
brazzers, brazzers,
julesjordan, julesjordan,
kink, kink,

View File

@ -29,6 +29,7 @@ function scrapeLatest(html, site) {
return { return {
url, url,
shootId, shootId,
entryId: shootId,
title, title,
actors, actors,
date, date,
@ -73,6 +74,7 @@ async function scrapeScene(html, url, shootId, ratingRes, site) {
return { return {
url, url,
shootId, shootId,
entryId: shootId,
title, title,
date, date,
actors, actors,

View File

@ -25,13 +25,14 @@ function scrapeLatest(html, site) {
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
const { shootId, title } = extractTitle(originalTitle); const { shootId, title } = extractTitle(originalTitle);
const internalId = new URL(url).pathname.split('/')[2]; const entryId = new URL(url).pathname.split('/')[2];
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate(); const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
return { return {
url, url,
shootId: shootId || internalId, shootId,
entryId,
title, title,
date, date,
site, site,
@ -44,6 +45,7 @@ async function scrapeScene(html, url, site) {
const originalTitle = $('h1.watchpage-title').text().trim(); const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle); const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate(); const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
@ -60,6 +62,7 @@ async function scrapeScene(html, url, site) {
return { return {
url, url,
shootId, shootId,
entryId,
title, title,
date, date,
actors, actors,

View File

@ -32,7 +32,6 @@ function scrapeLatest(html, site) {
date, date,
rating: { rating: {
likes, likes,
dislikes: 0,
}, },
site, site,
}; };
@ -79,7 +78,6 @@ async function scrapeScene(html, url, site) {
tags, tags,
rating: { rating: {
likes, likes,
dislikes: 0,
}, },
site: channelSite || site, site: channelSite || site,
}; };

View File

@ -49,16 +49,20 @@ function scrapeUpcoming(html, site) {
}); });
} }
function scrapeScene(html, url, site) { async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const rawTags = [];
const tags = await matchTags(rawTags);
return { return {
url, url,
shootId, shootId,
title, title,
actors, actors,
director: '', director,
date, date,
tags,
rating: { rating: {
likes, likes,
dislikes, dislikes,
@ -68,7 +72,7 @@ function scrapeScene(html, url, site) {
}; };
} }
async function fetchLatest(site) { async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/url`); const res = await bhttp.get(`${site.url}/url`);
return scrapeLatest(res.body.toString(), site); return scrapeLatest(res.body.toString(), site);

View File

@ -11,6 +11,10 @@ const formatters = {
return '\x1b[90mUnrated\x1b[0m'; return '\x1b[90mUnrated\x1b[0m';
} }
if (rating.likes !== undefined && rating.dislikes === undefined) {
return `\x1b[93m★\x1b[0m N/A \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)}`;
}
if (rating.stars) { if (rating.stars) {
return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`; return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`;
} }