Added parameters column to sites database, fixes Perv City scraper. Getting shoot ID from all existing scrapers.

This commit is contained in:
ThePendulum 2019-03-26 01:26:47 +01:00
parent 6a20cbc721
commit 8421cd8648
11 changed files with 45 additions and 37 deletions

View File

@ -32,15 +32,15 @@ module.exports = {
}, },
{ {
value: 'title', value: 'title',
width: 60, width: 100,
}, },
{ {
value: 'actors', value: 'actors',
width: 40, width: 60,
}, },
{ {
value: 'rating', value: 'rating',
width: 20, width: 30,
}, },
], ],
filename: { filename: {

View File

@ -39,6 +39,7 @@ exports.up = knex => Promise.resolve()
table.string('name'); table.string('name');
table.string('url'); table.string('url');
table.string('description'); table.string('description');
table.string('parameters');
})) }))
.then(() => knex.schema.createTable('releases', (table) => { .then(() => knex.schema.createTable('releases', (table) => {
table.increments('id', 12); table.increments('id', 12);

View File

@ -272,6 +272,7 @@ exports.seed = knex => Promise.resolve()
}, },
// LEGALPORNO // LEGALPORNO
{ {
id: 'legalporno',
name: 'LegalPorno', name: 'LegalPorno',
label: 'legalp', label: 'legalp',
url: 'https://www.legalporno.com', url: 'https://www.legalporno.com',
@ -283,9 +284,10 @@ exports.seed = knex => Promise.resolve()
id: 'analoverdose', id: 'analoverdose',
name: 'Anal Overdose', name: 'Anal Overdose',
label: 'AnalOD', label: 'AnalOD',
description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.',
url: 'http://www.analoverdose.com', url: 'http://www.analoverdose.com',
description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.',
network_id: 'pervcity', network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 3 }),
}, },
{ {
id: 'bangingbeauties', id: 'bangingbeauties',
@ -294,6 +296,7 @@ exports.seed = knex => Promise.resolve()
description: "Banging Beauties isn't just a porn site; it's the gateway to all your pussy-obsessed fantasies! Our members' area is flowing with beautiful pornstars anticipating big dick throbbing in their syrupy pink slits. These experienced babes love brutal vaginal pounding! Similarly, they're eager for anal switch-hitting to shake things up. However, it's not only about gorgeous sexperts filling their hungry holes. Sometimes, it's all about innocent rookies earning their pornstar status in first time threesomes and premier interracial scenes.", description: "Banging Beauties isn't just a porn site; it's the gateway to all your pussy-obsessed fantasies! Our members' area is flowing with beautiful pornstars anticipating big dick throbbing in their syrupy pink slits. These experienced babes love brutal vaginal pounding! Similarly, they're eager for anal switch-hitting to shake things up. However, it's not only about gorgeous sexperts filling their hungry holes. Sometimes, it's all about innocent rookies earning their pornstar status in first time threesomes and premier interracial scenes.",
url: 'http://www.bangingbeauties.com', url: 'http://www.bangingbeauties.com',
network_id: 'pervcity', network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 7 }),
}, },
{ {
id: 'oraloverdose', id: 'oraloverdose',
@ -302,6 +305,7 @@ exports.seed = knex => Promise.resolve()
description: "Oral Overdose is the only site you need to live out every saliva soaked blowjob of your dreams in HD POV! We've got the most stunning cocksuckers in the world going to town on big dick. These babes not only love cock, they can't get enough of it! In fact, there is no prick too huge for our hungry girls' throats. You'll find gorgeous, big tits pornstars exercising their gag reflex in intense balls deep facefuck scenes. We also feature fresh, young newbies taking on the gagging deepthroat challenge.", description: "Oral Overdose is the only site you need to live out every saliva soaked blowjob of your dreams in HD POV! We've got the most stunning cocksuckers in the world going to town on big dick. These babes not only love cock, they can't get enough of it! In fact, there is no prick too huge for our hungry girls' throats. You'll find gorgeous, big tits pornstars exercising their gag reflex in intense balls deep facefuck scenes. We also feature fresh, young newbies taking on the gagging deepthroat challenge.",
url: 'http://www.oraloverdose.com', url: 'http://www.oraloverdose.com',
network_id: 'pervcity', network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 4 }),
}, },
{ {
id: 'chocolatebjs', id: 'chocolatebjs',
@ -310,6 +314,7 @@ exports.seed = knex => Promise.resolve()
description: "You've just won the golden ticket to the best Chocolate BJs on the planet! We've sought far and wide to bring you the most beautiful black and ethnic pornstars. And they're in our members' area now! They can't wait to suck your white lollipop and lick the thick cream shooting from your big dick. Of course, no matter how sweet the booty or juicy the big tits, these brown foxes aren't all sugar and spice. In fact, when it comes to giving head, these big ass ebony babes know what they want: huge white cocks filling their throats!", description: "You've just won the golden ticket to the best Chocolate BJs on the planet! We've sought far and wide to bring you the most beautiful black and ethnic pornstars. And they're in our members' area now! They can't wait to suck your white lollipop and lick the thick cream shooting from your big dick. Of course, no matter how sweet the booty or juicy the big tits, these brown foxes aren't all sugar and spice. In fact, when it comes to giving head, these big ass ebony babes know what they want: huge white cocks filling their throats!",
url: 'http://www.chocolatebjs.com', url: 'http://www.chocolatebjs.com',
network_id: 'pervcity', network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 6 }),
}, },
{ {
id: 'upherasshole', id: 'upherasshole',
@ -318,6 +323,7 @@ exports.seed = knex => Promise.resolve()
description: "You don't need to travel the globe in search of the anal wonders of the world, because you get your own private tour right here on Up Her Asshole! Our stunning pornstars and rookie starlets welcome all ass fetish and anal sex fans, with their twerking bubble butts and winking assholes. However, big booty worship is just a slice of the fun. Combined with juicy tits (big and small), wet pussy (hairy and bald), these girls deliver a spectacular sensory experience in HD POV. Not only are you in danger of busting a nut before the going gets good, but also when the good turns remarkable with rimming, fingering and butt toys!", description: "You don't need to travel the globe in search of the anal wonders of the world, because you get your own private tour right here on Up Her Asshole! Our stunning pornstars and rookie starlets welcome all ass fetish and anal sex fans, with their twerking bubble butts and winking assholes. However, big booty worship is just a slice of the fun. Combined with juicy tits (big and small), wet pussy (hairy and bald), these girls deliver a spectacular sensory experience in HD POV. Not only are you in danger of busting a nut before the going gets good, but also when the good turns remarkable with rimming, fingering and butt toys!",
url: 'http://www.upherasshole.com', url: 'http://www.upherasshole.com',
network_id: 'pervcity', network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 9 }),
}, },
// XEMPIRE // XEMPIRE
{ {

View File

@ -33,6 +33,7 @@ function curateSites(sites) {
description: site.description, description: site.description,
url: site.url, url: site.url,
networkId: site.network_id, networkId: site.network_id,
parameters: JSON.parse(site.parameters),
})); }));
} }
@ -50,7 +51,7 @@ async function fetchReleases() {
const sites = await accumulateIncludedSites(); const sites = await accumulateIncludedSites();
const scenesPerSite = await Promise.all(sites.map(async (site) => { const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.network]; const scraper = scrapers[site.id] || scrapers[site.networkId];
if (scraper) { if (scraper) {
const [latest, upcoming] = await Promise.all([ const [latest, upcoming] = await Promise.all([

View File

@ -23,7 +23,7 @@ async function findSite(url) {
function deriveFilename(scene) { function deriveFilename(scene) {
const props = { const props = {
siteName: scene.site.name, siteName: scene.site.name,
sceneId: scene.id, sceneId: scene.shootId,
sceneTitle: scene.title, sceneTitle: scene.title,
sceneActors: scene.actors.join(config.filename.actorsJoin), sceneActors: scene.actors.join(config.filename.actorsJoin),
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat), sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),

View File

@ -15,6 +15,8 @@ function scrapeLatest(html, site) {
const url = sceneLinkElement.attr('href'); const url = sceneLinkElement.attr('href');
const title = sceneLinkElement.text(); const title = sceneLinkElement.text();
const shootId = $(element).attr('data-setid');
const date = moment const date = moment
.utc($(element).find('.update_date').text(), 'MM/DD/YYYY') .utc($(element).find('.update_date').text(), 'MM/DD/YYYY')
.toDate(); .toDate();
@ -25,6 +27,7 @@ function scrapeLatest(html, site) {
return { return {
url, url,
shootId,
title, title,
actors, actors,
date, date,
@ -38,6 +41,8 @@ function scrapeUpcoming(html, site) {
const scenesElements = $('#coming_soon_carousel').find('.table').toArray(); const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
return scenesElements.map((element) => { return scenesElements.map((element) => {
const shootId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
const details = $(element).find('.update_details_comingsoon') const details = $(element).find('.update_details_comingsoon')
.eq(1) .eq(1)
.children() .children()
@ -59,6 +64,7 @@ function scrapeUpcoming(html, site) {
return { return {
url: null, url: null,
shootId,
title, title,
actors, actors,
date, date,

View File

@ -15,7 +15,7 @@ function scrapeLatest(html, site) {
const sceneLinkElement = $(element).find('.shoot-thumb-title a'); const sceneLinkElement = $(element).find('.shoot-thumb-title a');
const href = sceneLinkElement.attr('href'); const href = sceneLinkElement.attr('href');
const url = `https://kink.com${href}`; const url = `https://kink.com${href}`;
const id = href.split('/')[2]; const shootId = href.split('/')[2];
const title = sceneLinkElement.text(); const title = sceneLinkElement.text();
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate(); const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
@ -28,7 +28,7 @@ function scrapeLatest(html, site) {
return { return {
url, url,
id, shootId,
title, title,
actors, actors,
date, date,
@ -41,7 +41,7 @@ function scrapeLatest(html, site) {
}); });
} }
async function scrapeScene(html, url, id, ratingRes, site) { async function scrapeScene(html, url, shootId, ratingRes, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart // const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
@ -70,7 +70,7 @@ async function scrapeScene(html, url, id, ratingRes, site) {
return { return {
url, url,
id, shootId,
title, title,
date, date,
actors, actors,
@ -90,14 +90,14 @@ async function fetchLatest(site) {
} }
async function fetchScene(url, site) { async function fetchScene(url, site) {
const id = new URL(url).pathname.split('/')[2]; const shootId = new URL(url).pathname.split('/')[2];
const [res, ratingRes] = await Promise.all([ const [res, ratingRes] = await Promise.all([
bhttp.get(url), bhttp.get(url),
bhttp.get(`https://kink.com/api/ratings/${id}`), bhttp.get(`https://kink.com/api/ratings/${shootId}`),
]); ]);
return scrapeScene(res.body.toString(), url, id, ratingRes, site); return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
} }
module.exports = { module.exports = {

View File

@ -6,29 +6,13 @@ const moment = require('moment');
const { matchTags } = require('../tags'); const { matchTags } = require('../tags');
const tagMap = {
'3+ on 1': 'gangbang',
anal: 'anal',
bbc: 'big black cock',
'cum swallowing': 'swallowing',
rough: 'rough',
'deep throat': 'deepthroat',
'double penetration (DP)': 'DP',
'double anal (DAP)': 'DAP',
'double vaginal (DPP)': 'DVP',
'gapes (gaping asshole)': 'gaping',
'huge toys': 'toys',
interracial: 'interracial',
'triple penetration': 'TP',
};
function extractTitle(originalTitle) { function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' '); const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes
const id = sceneIdMatch ? sceneIdMatch[0] : null; const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle; const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
return { id, title }; return { shootId, title };
} }
function scrapeLatest(html, site) { function scrapeLatest(html, site) {
@ -40,13 +24,13 @@ function scrapeLatest(html, site) {
const url = sceneLinkElement.attr('href'); const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.attr('title'); const originalTitle = sceneLinkElement.attr('title');
const { id, title } = extractTitle(originalTitle); const { shootId, title } = extractTitle(originalTitle);
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate(); const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
return { return {
url, url,
id, shootId,
title, title,
date, date,
site, site,
@ -58,7 +42,7 @@ async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const originalTitle = $('h1.watchpage-title').text().trim(); const originalTitle = $('h1.watchpage-title').text().trim();
const { id, title } = extractTitle(originalTitle); const { shootId, title } = extractTitle(originalTitle);
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate(); const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
@ -74,7 +58,7 @@ async function scrapeScene(html, url, site) {
return { return {
url, url,
id, shootId,
title, title,
date, date,
actors, actors,

View File

@ -7,6 +7,7 @@ const moment = require('moment');
function scrape(html, site) { function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const shootId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a'); const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`; const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
@ -21,6 +22,7 @@ function scrape(html, site) {
return { return {
url, url,
shootId,
title, title,
actors, actors,
date, date,

View File

@ -8,13 +8,15 @@ const { matchTags } = require('../tags');
function scrape(html, site) { function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.sceneInfo').toArray(); const scenesElements = $('li[data-itemtype=scene]').toArray();
return scenesElements.map((element) => { return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.sceneTitle a'); const sceneLinkElement = $(element).find('.sceneTitle a');
const url = `${site.url}${sceneLinkElement.attr('href')}`; const url = `${site.url}${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title'); const title = sceneLinkElement.attr('title');
const shootId = $(element).attr('data-itemid');
const date = moment const date = moment
.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY') .utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY')
.toDate(); .toDate();
@ -29,6 +31,7 @@ function scrape(html, site) {
return { return {
url, url,
shootId,
title, title,
actors, actors,
date, date,
@ -42,6 +45,7 @@ function scrape(html, site) {
} }
async function scrapeSceneFallback($, url, site) { async function scrapeSceneFallback($, url, site) {
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
const title = $('h1.title').text(); const title = $('h1.title').text();
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate(); const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
@ -54,6 +58,7 @@ async function scrapeSceneFallback($, url, site) {
return { return {
url, url,
shootId,
title, title,
date, date,
actors, actors,
@ -75,6 +80,7 @@ async function scrapeScene(html, url, site) {
} }
const data = JSON.parse(json)[0]; const data = JSON.parse(json)[0];
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
const title = data.isPartOf.name; const title = data.isPartOf.name;
const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate(); const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
@ -98,6 +104,7 @@ async function scrapeScene(html, url, site) {
return { return {
url, url,
shootId,
title, title,
date, date,
actors, actors,

View File

@ -47,7 +47,8 @@ function renderReleases(scenes, screen) {
}, },
top: 1, top: 1,
height: screen.rows - 3, height: screen.rows - 3,
width: 161, // width: 161,
width: config.columns.reduce((acc, column) => acc + column.width, 0),
keys: true, keys: true,
vi: true, vi: true,
mouse: true, mouse: true,