Added Blowpass scraper. Split shootId and pageId.
This commit is contained in:
parent
069c2c1628
commit
3a90f98d41
|
@ -2,6 +2,12 @@
|
||||||
The latest releases from your favorite porn studios in one place.
|
The latest releases from your favorite porn studios in one place.
|
||||||
|
|
||||||
## Supported networks & sites
|
## Supported networks & sites
|
||||||
|
* **Blowpass**
|
||||||
|
* 1000 Facials
|
||||||
|
* Immoral Live
|
||||||
|
* Mommy Blows Best
|
||||||
|
* Only Teen Blowjobs
|
||||||
|
* Throated
|
||||||
* **Brazzers**
|
* **Brazzers**
|
||||||
* Asses In Public
|
* Asses In Public
|
||||||
* Baby Got Boobs
|
* Baby Got Boobs
|
||||||
|
|
|
@ -5,6 +5,8 @@ exports.up = knex => Promise.resolve()
|
||||||
table.increments('id', 8);
|
table.increments('id', 8);
|
||||||
|
|
||||||
table.string('name');
|
table.string('name');
|
||||||
|
table.string('gender', 18);
|
||||||
|
|
||||||
table.integer('alias_for', 8)
|
table.integer('alias_for', 8)
|
||||||
.references('id')
|
.references('id')
|
||||||
.inTable('actors');
|
.inTable('actors');
|
||||||
|
@ -58,7 +60,9 @@ exports.up = knex => Promise.resolve()
|
||||||
.inTable('sites');
|
.inTable('sites');
|
||||||
|
|
||||||
table.string('shoot_id');
|
table.string('shoot_id');
|
||||||
|
table.string('entry_id');
|
||||||
table.unique(['site_id', 'shoot_id']);
|
table.unique(['site_id', 'shoot_id']);
|
||||||
|
table.unique(['site_id', 'entry_id']);
|
||||||
|
|
||||||
table.string('url');
|
table.string('url');
|
||||||
table.string('title');
|
table.string('title');
|
||||||
|
@ -116,4 +120,5 @@ exports.down = knex => Promise.resolve()
|
||||||
.then(() => knex.schema.dropTable('sites'))
|
.then(() => knex.schema.dropTable('sites'))
|
||||||
.then(() => knex.schema.dropTable('networks'))
|
.then(() => knex.schema.dropTable('networks'))
|
||||||
.then(() => knex.schema.dropTable('actors'))
|
.then(() => knex.schema.dropTable('actors'))
|
||||||
|
.then(() => knex.schema.dropTable('directors'))
|
||||||
.then(() => knex.schema.dropTable('tags'));
|
.then(() => knex.schema.dropTable('tags'));
|
||||||
|
|
|
@ -4,6 +4,12 @@
|
||||||
exports.seed = knex => Promise.resolve()
|
exports.seed = knex => Promise.resolve()
|
||||||
.then(() => knex('networks').del())
|
.then(() => knex('networks').del())
|
||||||
.then(() => knex('networks').insert([
|
.then(() => knex('networks').insert([
|
||||||
|
{
|
||||||
|
id: 'blowpass',
|
||||||
|
name: 'Blowpass',
|
||||||
|
url: 'https://www.blowpass.com',
|
||||||
|
description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
id: 'brazzers',
|
id: 'brazzers',
|
||||||
name: 'Brazzers',
|
name: 'Brazzers',
|
||||||
|
|
|
@ -4,6 +4,47 @@
|
||||||
exports.seed = knex => Promise.resolve()
|
exports.seed = knex => Promise.resolve()
|
||||||
.then(() => knex('sites').del())
|
.then(() => knex('sites').del())
|
||||||
.then(() => knex('sites').insert([
|
.then(() => knex('sites').insert([
|
||||||
|
// BLOWPASS
|
||||||
|
{
|
||||||
|
id: '1000facials',
|
||||||
|
name: '1000 Facials',
|
||||||
|
label: '1000fc',
|
||||||
|
url: 'https://www.1000facials.com',
|
||||||
|
description: 'Welcome to 1000Facials.com, your source for the best facial porn with huge cumshots on your favorite teen and MILF pornstars. Watch all the blowjob action inside!',
|
||||||
|
network_id: 'blowpass',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'immorallive',
|
||||||
|
name: 'Immoral Live',
|
||||||
|
label: 'imlive',
|
||||||
|
url: 'https://www.immorallive.com',
|
||||||
|
description: 'Watch live sex shows and videos on ImmoralLive.com, featuring wild and crazy sex orgies, group sex, blowjob competitions and toy play from the famous Porno Dan. The hottest pornstars and amateur girls cum hard inside',
|
||||||
|
network_id: 'blowpass',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'mommyblowsbest',
|
||||||
|
name: 'Mommy Blows Best',
|
||||||
|
label: 'momblb',
|
||||||
|
url: 'https://www.mommyblowsbest.com',
|
||||||
|
description: 'Welcome to MommyBlowsBest.com. Home to thousands of MILF blowjobs and hot mom porn! Come see why experience counts, right here at MommyBlowsBest.com!',
|
||||||
|
network_id: 'blowpass',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'onlyteenblowjobs',
|
||||||
|
name: 'Only Teen Blowjobs',
|
||||||
|
label: 'teenbj',
|
||||||
|
url: 'https://www.onlyteenblowjobs.com',
|
||||||
|
description: 'OnlyTeenBlowjobs.com brings you the best teen blowjob porn featuring today\'s hottest young pornstars and amateurs. Watch as teens use their little mouths to suck and deepthroat the biggest of cocks!',
|
||||||
|
network_id: 'blowpass',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'throated',
|
||||||
|
name: 'Throated',
|
||||||
|
label: 'throat',
|
||||||
|
url: 'https://www.throated.com',
|
||||||
|
description: 'Throated.com is your portal for extreme throat fuck porn, face fucking videos and deepthroat gagging pornstars. Watch teens and MILFs go balls deep, swallowing cock in HD!',
|
||||||
|
network_id: 'blowpass',
|
||||||
|
},
|
||||||
// BRAZZERS
|
// BRAZZERS
|
||||||
{
|
{
|
||||||
id: 'momsincontrol',
|
id: 'momsincontrol',
|
||||||
|
|
|
@ -272,6 +272,10 @@ exports.seed = knex => Promise.resolve()
|
||||||
tag: 'nipple clamps',
|
tag: 'nipple clamps',
|
||||||
alias_for: null,
|
alias_for: null,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
tag: 'oral creampie',
|
||||||
|
alias_for: null,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
tag: 'pain',
|
tag: 'pain',
|
||||||
alias_for: null,
|
alias_for: null,
|
||||||
|
@ -527,6 +531,10 @@ exports.seed = knex => Promise.resolve()
|
||||||
tag: 'crop', // a type of whip, not short for corporal
|
tag: 'crop', // a type of whip, not short for corporal
|
||||||
alias_for: 'corporal punishment',
|
alias_for: 'corporal punishment',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
tag: 'cum in mouth',
|
||||||
|
alias_for: 'oral creampie',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
tag: 'cum swallowing',
|
tag: 'cum swallowing',
|
||||||
alias_for: 'swallowing',
|
alias_for: 'swallowing',
|
||||||
|
@ -683,6 +691,10 @@ exports.seed = knex => Promise.resolve()
|
||||||
tag: 'MFF',
|
tag: 'MFF',
|
||||||
alias_for: 'FMF',
|
alias_for: 'FMF',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
tag: 'oral',
|
||||||
|
alias_for: 'blowjob',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
tag: 'piercing',
|
tag: 'piercing',
|
||||||
alias_for: 'piercings',
|
alias_for: 'piercings',
|
||||||
|
|
|
@ -56,17 +56,20 @@ async function accumulateIncludedSites() {
|
||||||
return curateSites(rawSites);
|
return curateSites(rawSites);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function findDuplicateReleases(latestReleases) {
|
async function findDuplicateReleases(latestReleases, _siteId) {
|
||||||
const latestReleasesIds = latestReleases.map(release => release.shootId);
|
const latestReleasesShootIds = latestReleases.map(release => release.shootId).filter(release => release !== undefined);
|
||||||
|
const latestReleasesPageIds = latestReleases.map(release => release.pageId).filter(release => release !== undefined);
|
||||||
|
|
||||||
return knex('releases')
|
return knex('releases')
|
||||||
.whereIn('shoot_id', latestReleasesIds);
|
.whereIn('shoot_id', latestReleasesShootIds)
|
||||||
|
.orWhereIn('shoot_id', latestReleasesPageIds);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function storeReleases(releases) {
|
async function storeReleases(releases) {
|
||||||
const curatedReleases = releases.map(release => ({
|
const curatedReleases = releases.map(release => ({
|
||||||
site_id: release.site.id,
|
site_id: release.site.id,
|
||||||
shoot_id: release.shootId || null,
|
shoot_id: release.shootId || null,
|
||||||
|
entry_id: release.entry_id || null,
|
||||||
url: release.url,
|
url: release.url,
|
||||||
title: release.title,
|
title: release.title,
|
||||||
date: release.date,
|
date: release.date,
|
||||||
|
@ -93,7 +96,7 @@ async function storeReleases(releases) {
|
||||||
async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) {
|
async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) {
|
||||||
const latestReleases = await scraper.fetchLatest(site, page);
|
const latestReleases = await scraper.fetchLatest(site, page);
|
||||||
|
|
||||||
const duplicateReleases = await findDuplicateReleases(latestReleases);
|
const duplicateReleases = await findDuplicateReleases(latestReleases, site.id);
|
||||||
const duplicateReleasesShootIds = new Set(
|
const duplicateReleasesShootIds = new Set(
|
||||||
duplicateReleases
|
duplicateReleases
|
||||||
.map(release => release.shoot_id)
|
.map(release => release.shoot_id)
|
||||||
|
|
|
@ -19,6 +19,7 @@ async function findSite(url) {
|
||||||
.orWhere({ url: `${protocol}//${hostname}` })
|
.orWhere({ url: `${protocol}//${hostname}` })
|
||||||
.first();
|
.first();
|
||||||
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: site.id,
|
id: site.id,
|
||||||
name: site.name,
|
name: site.name,
|
||||||
|
|
|
@ -0,0 +1,110 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
/* eslint-disable */
|
||||||
|
const bhttp = require('bhttp');
|
||||||
|
const cheerio = require('cheerio');
|
||||||
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const { matchTags } = require('../tags');
|
||||||
|
|
||||||
|
function scrape(html, site) {
|
||||||
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
const sceneElements = $('.sceneList .scene').toArray();
|
||||||
|
|
||||||
|
return sceneElements.map((element) => {
|
||||||
|
const entryId = $(element).attr('data-itemid');
|
||||||
|
|
||||||
|
const sceneLinkElement = $(element).find('.sceneTitle a');
|
||||||
|
const title = sceneLinkElement.attr('title');
|
||||||
|
const url = `${site.url}/en/scene/${sceneLinkElement.attr('href').split('/').slice(-2).join('/')}`;
|
||||||
|
|
||||||
|
const date = moment.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY').toDate();
|
||||||
|
const actors = $(element).find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||||
|
|
||||||
|
const likes = Number($(element).find('.rating .state_1 .value').text());
|
||||||
|
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
entryId,
|
||||||
|
title,
|
||||||
|
actors,
|
||||||
|
date,
|
||||||
|
rating: {
|
||||||
|
likes,
|
||||||
|
},
|
||||||
|
site,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeScene(html, url, site) {
|
||||||
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
const json = $('script[type="application/ld+json"]').html();
|
||||||
|
const data = JSON.parse(json).slice(-1)[0];
|
||||||
|
const sceneElement = $('#wrapper');
|
||||||
|
|
||||||
|
const workName = data.isPartOf.name.split(' - ');
|
||||||
|
const shootId = workName.length > 1 ? workName[0] : null;
|
||||||
|
const entryId = url.split('/').slice(-1)[0];
|
||||||
|
const title = data.name;
|
||||||
|
const description = data.description;
|
||||||
|
const date = moment.utc(data.isPartOf.datePublished, 'YYYY-MM-DD').toDate();
|
||||||
|
|
||||||
|
// const actors = sceneElement.find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
|
||||||
|
const actors = data.actor
|
||||||
|
.sort(({ genderA }, { genderB }) => {
|
||||||
|
if (genderA === 'female' && genderB === 'male') return 1;
|
||||||
|
if (genderA === 'male' && genderB === 'female') return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
})
|
||||||
|
.map(actor => actor.name);
|
||||||
|
|
||||||
|
const likes = Number(sceneElement.find('.rating .state_1 .value').text());
|
||||||
|
const dislikes = Number(sceneElement.find('.rating .state_2 .value').text());
|
||||||
|
|
||||||
|
const duration = moment.duration(data.duration.slice(2)).asSeconds();
|
||||||
|
|
||||||
|
const rawTags = data.keywords.split(', ');
|
||||||
|
const tags = await matchTags(rawTags);
|
||||||
|
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
shootId,
|
||||||
|
entryId,
|
||||||
|
title,
|
||||||
|
actors,
|
||||||
|
date,
|
||||||
|
duration,
|
||||||
|
tags,
|
||||||
|
rating: {
|
||||||
|
likes,
|
||||||
|
dislikes,
|
||||||
|
},
|
||||||
|
site,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(site, page = 1) {
|
||||||
|
const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/latest/All-Categories/0/All-Pornstars/0/${page}`);
|
||||||
|
|
||||||
|
return scrape(res.body.toString(), site);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchUpcoming(site) {
|
||||||
|
const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/upcoming`);
|
||||||
|
|
||||||
|
return scrape(res.body.toString(), site);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchScene(url, site) {
|
||||||
|
const res = await bhttp.get(`https://www.blowpass.com/en/video/${site.id}/${new URL(url).pathname.split('/').slice(-2).join('/')}`);
|
||||||
|
|
||||||
|
return scrapeScene(res.body.toString(), url, site);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
fetchLatest,
|
||||||
|
fetchUpcoming,
|
||||||
|
fetchScene,
|
||||||
|
};
|
|
@ -1,5 +1,6 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const blowpass = require('./blowpass');
|
||||||
const brazzers = require('./brazzers');
|
const brazzers = require('./brazzers');
|
||||||
const julesjordan = require('./julesjordan');
|
const julesjordan = require('./julesjordan');
|
||||||
const kink = require('./kink');
|
const kink = require('./kink');
|
||||||
|
@ -10,6 +11,7 @@ const vixen = require('./vixen');
|
||||||
const xempire = require('./xempire');
|
const xempire = require('./xempire');
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
blowpass,
|
||||||
brazzers,
|
brazzers,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
kink,
|
kink,
|
||||||
|
|
|
@ -29,6 +29,7 @@ function scrapeLatest(html, site) {
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
shootId,
|
shootId,
|
||||||
|
entryId: shootId,
|
||||||
title,
|
title,
|
||||||
actors,
|
actors,
|
||||||
date,
|
date,
|
||||||
|
@ -73,6 +74,7 @@ async function scrapeScene(html, url, shootId, ratingRes, site) {
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
shootId,
|
shootId,
|
||||||
|
entryId: shootId,
|
||||||
title,
|
title,
|
||||||
date,
|
date,
|
||||||
actors,
|
actors,
|
||||||
|
|
|
@ -25,13 +25,14 @@ function scrapeLatest(html, site) {
|
||||||
|
|
||||||
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
|
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
|
||||||
const { shootId, title } = extractTitle(originalTitle);
|
const { shootId, title } = extractTitle(originalTitle);
|
||||||
const internalId = new URL(url).pathname.split('/')[2];
|
const entryId = new URL(url).pathname.split('/')[2];
|
||||||
|
|
||||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
shootId: shootId || internalId,
|
shootId,
|
||||||
|
entryId,
|
||||||
title,
|
title,
|
||||||
date,
|
date,
|
||||||
site,
|
site,
|
||||||
|
@ -44,6 +45,7 @@ async function scrapeScene(html, url, site) {
|
||||||
|
|
||||||
const originalTitle = $('h1.watchpage-title').text().trim();
|
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||||
const { shootId, title } = extractTitle(originalTitle);
|
const { shootId, title } = extractTitle(originalTitle);
|
||||||
|
const entryId = new URL(url).pathname.split('/')[2];
|
||||||
|
|
||||||
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
|
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
|
||||||
|
|
||||||
|
@ -60,6 +62,7 @@ async function scrapeScene(html, url, site) {
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
shootId,
|
shootId,
|
||||||
|
entryId,
|
||||||
title,
|
title,
|
||||||
date,
|
date,
|
||||||
actors,
|
actors,
|
||||||
|
|
|
@ -32,7 +32,6 @@ function scrapeLatest(html, site) {
|
||||||
date,
|
date,
|
||||||
rating: {
|
rating: {
|
||||||
likes,
|
likes,
|
||||||
dislikes: 0,
|
|
||||||
},
|
},
|
||||||
site,
|
site,
|
||||||
};
|
};
|
||||||
|
@ -79,7 +78,6 @@ async function scrapeScene(html, url, site) {
|
||||||
tags,
|
tags,
|
||||||
rating: {
|
rating: {
|
||||||
likes,
|
likes,
|
||||||
dislikes: 0,
|
|
||||||
},
|
},
|
||||||
site: channelSite || site,
|
site: channelSite || site,
|
||||||
};
|
};
|
||||||
|
|
|
@ -49,16 +49,20 @@ function scrapeUpcoming(html, site) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene(html, url, site) {
|
async function scrapeScene(html, url, site) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
|
||||||
|
const rawTags = [];
|
||||||
|
const tags = await matchTags(rawTags);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
shootId,
|
shootId,
|
||||||
title,
|
title,
|
||||||
actors,
|
actors,
|
||||||
director: '',
|
director,
|
||||||
date,
|
date,
|
||||||
|
tags,
|
||||||
rating: {
|
rating: {
|
||||||
likes,
|
likes,
|
||||||
dislikes,
|
dislikes,
|
||||||
|
@ -68,7 +72,7 @@ function scrapeScene(html, url, site) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(site) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const res = await bhttp.get(`${site.url}/url`);
|
const res = await bhttp.get(`${site.url}/url`);
|
||||||
|
|
||||||
return scrapeLatest(res.body.toString(), site);
|
return scrapeLatest(res.body.toString(), site);
|
||||||
|
|
|
@ -11,6 +11,10 @@ const formatters = {
|
||||||
return '\x1b[90mUnrated\x1b[0m';
|
return '\x1b[90mUnrated\x1b[0m';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rating.likes !== undefined && rating.dislikes === undefined) {
|
||||||
|
return `\x1b[93m★\x1b[0m N/A \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)}`;
|
||||||
|
}
|
||||||
|
|
||||||
if (rating.stars) {
|
if (rating.stars) {
|
||||||
return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`;
|
return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue