Added parameters column to sites database, fixes Perv City scraper. Getting shoot ID from all existing scrapers.

This commit is contained in:
ThePendulum 2019-03-26 01:26:47 +01:00
parent 6a20cbc721
commit 8421cd8648
11 changed files with 45 additions and 37 deletions

View File

@ -32,15 +32,15 @@ module.exports = {
},
{
value: 'title',
width: 60,
width: 100,
},
{
value: 'actors',
width: 40,
width: 60,
},
{
value: 'rating',
width: 20,
width: 30,
},
],
filename: {

View File

@ -39,6 +39,7 @@ exports.up = knex => Promise.resolve()
table.string('name');
table.string('url');
table.string('description');
table.string('parameters');
}))
.then(() => knex.schema.createTable('releases', (table) => {
table.increments('id', 12);

View File

@ -272,6 +272,7 @@ exports.seed = knex => Promise.resolve()
},
// LEGALPORNO
{
id: 'legalporno',
name: 'LegalPorno',
label: 'legalp',
url: 'https://www.legalporno.com',
@ -283,9 +284,10 @@ exports.seed = knex => Promise.resolve()
id: 'analoverdose',
name: 'Anal Overdose',
label: 'AnalOD',
description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.',
url: 'http://www.analoverdose.com',
description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.',
network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 3 }),
},
{
id: 'bangingbeauties',
@ -294,6 +296,7 @@ exports.seed = knex => Promise.resolve()
description: "Banging Beauties isn't just a porn site; it's the gateway to all your pussy-obsessed fantasies! Our members' area is flowing with beautiful pornstars anticipating big dick throbbing in their syrupy pink slits. These experienced babes love brutal vaginal pounding! Similarly, they're eager for anal switch-hitting to shake things up. However, it's not only about gorgeous sexperts filling their hungry holes. Sometimes, it's all about innocent rookies earning their pornstar status in first time threesomes and premier interracial scenes.",
url: 'http://www.bangingbeauties.com',
network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 7 }),
},
{
id: 'oraloverdose',
@ -302,6 +305,7 @@ exports.seed = knex => Promise.resolve()
description: "Oral Overdose is the only site you need to live out every saliva soaked blowjob of your dreams in HD POV! We've got the most stunning cocksuckers in the world going to town on big dick. These babes not only love cock, they can't get enough of it! In fact, there is no prick too huge for our hungry girls' throats. You'll find gorgeous, big tits pornstars exercising their gag reflex in intense balls deep facefuck scenes. We also feature fresh, young newbies taking on the gagging deepthroat challenge.",
url: 'http://www.oraloverdose.com',
network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 4 }),
},
{
id: 'chocolatebjs',
@ -310,6 +314,7 @@ exports.seed = knex => Promise.resolve()
description: "You've just won the golden ticket to the best Chocolate BJs on the planet! We've sought far and wide to bring you the most beautiful black and ethnic pornstars. And they're in our members' area now! They can't wait to suck your white lollipop and lick the thick cream shooting from your big dick. Of course, no matter how sweet the booty or juicy the big tits, these brown foxes aren't all sugar and spice. In fact, when it comes to giving head, these big ass ebony babes know what they want: huge white cocks filling their throats!",
url: 'http://www.chocolatebjs.com',
network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 6 }),
},
{
id: 'upherasshole',
@ -318,6 +323,7 @@ exports.seed = knex => Promise.resolve()
description: "You don't need to travel the globe in search of the anal wonders of the world, because you get your own private tour right here on Up Her Asshole! Our stunning pornstars and rookie starlets welcome all ass fetish and anal sex fans, with their twerking bubble butts and winking assholes. However, big booty worship is just a slice of the fun. Combined with juicy tits (big and small), wet pussy (hairy and bald), these girls deliver a spectacular sensory experience in HD POV. Not only are you in danger of busting a nut before the going gets good, but also when the good turns remarkable with rimming, fingering and butt toys!",
url: 'http://www.upherasshole.com',
network_id: 'pervcity',
parameters: JSON.stringify({ tourId: 9 }),
},
// XEMPIRE
{

View File

@ -33,6 +33,7 @@ function curateSites(sites) {
description: site.description,
url: site.url,
networkId: site.network_id,
parameters: JSON.parse(site.parameters),
}));
}
@ -50,7 +51,7 @@ async function fetchReleases() {
const sites = await accumulateIncludedSites();
const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.network];
const scraper = scrapers[site.id] || scrapers[site.networkId];
if (scraper) {
const [latest, upcoming] = await Promise.all([

View File

@ -23,7 +23,7 @@ async function findSite(url) {
function deriveFilename(scene) {
const props = {
siteName: scene.site.name,
sceneId: scene.id,
sceneId: scene.shootId,
sceneTitle: scene.title,
sceneActors: scene.actors.join(config.filename.actorsJoin),
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),

View File

@ -15,6 +15,8 @@ function scrapeLatest(html, site) {
const url = sceneLinkElement.attr('href');
const title = sceneLinkElement.text();
const shootId = $(element).attr('data-setid');
const date = moment
.utc($(element).find('.update_date').text(), 'MM/DD/YYYY')
.toDate();
@ -25,6 +27,7 @@ function scrapeLatest(html, site) {
return {
url,
shootId,
title,
actors,
date,
@ -38,6 +41,8 @@ function scrapeUpcoming(html, site) {
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
return scenesElements.map((element) => {
const shootId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
const details = $(element).find('.update_details_comingsoon')
.eq(1)
.children()
@ -59,6 +64,7 @@ function scrapeUpcoming(html, site) {
return {
url: null,
shootId,
title,
actors,
date,

View File

@ -15,7 +15,7 @@ function scrapeLatest(html, site) {
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
const href = sceneLinkElement.attr('href');
const url = `https://kink.com${href}`;
const id = href.split('/')[2];
const shootId = href.split('/')[2];
const title = sceneLinkElement.text();
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
@ -28,7 +28,7 @@ function scrapeLatest(html, site) {
return {
url,
id,
shootId,
title,
actors,
date,
@ -41,7 +41,7 @@ function scrapeLatest(html, site) {
});
}
async function scrapeScene(html, url, id, ratingRes, site) {
async function scrapeScene(html, url, shootId, ratingRes, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
@ -70,7 +70,7 @@ async function scrapeScene(html, url, id, ratingRes, site) {
return {
url,
id,
shootId,
title,
date,
actors,
@ -90,14 +90,14 @@ async function fetchLatest(site) {
}
async function fetchScene(url, site) {
const id = new URL(url).pathname.split('/')[2];
const shootId = new URL(url).pathname.split('/')[2];
const [res, ratingRes] = await Promise.all([
bhttp.get(url),
bhttp.get(`https://kink.com/api/ratings/${id}`),
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
]);
return scrapeScene(res.body.toString(), url, id, ratingRes, site);
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
}
module.exports = {

View File

@ -6,29 +6,13 @@ const moment = require('moment');
const { matchTags } = require('../tags');
const tagMap = {
'3+ on 1': 'gangbang',
anal: 'anal',
bbc: 'big black cock',
'cum swallowing': 'swallowing',
rough: 'rough',
'deep throat': 'deepthroat',
'double penetration (DP)': 'DP',
'double anal (DAP)': 'DAP',
'double vaginal (DPP)': 'DVP',
'gapes (gaping asshole)': 'gaping',
'huge toys': 'toys',
interracial: 'interracial',
'triple penetration': 'TP',
};
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes
const id = sceneIdMatch ? sceneIdMatch[0] : null;
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
return { id, title };
return { shootId, title };
}
function scrapeLatest(html, site) {
@ -40,13 +24,13 @@ function scrapeLatest(html, site) {
const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.attr('title');
const { id, title } = extractTitle(originalTitle);
const { shootId, title } = extractTitle(originalTitle);
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
return {
url,
id,
shootId,
title,
date,
site,
@ -58,7 +42,7 @@ async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const originalTitle = $('h1.watchpage-title').text().trim();
const { id, title } = extractTitle(originalTitle);
const { shootId, title } = extractTitle(originalTitle);
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
@ -74,7 +58,7 @@ async function scrapeScene(html, url, site) {
return {
url,
id,
shootId,
title,
date,
actors,

View File

@ -7,6 +7,7 @@ const moment = require('moment');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const shootId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
@ -21,6 +22,7 @@ function scrape(html, site) {
return {
url,
shootId,
title,
actors,
date,

View File

@ -8,13 +8,15 @@ const { matchTags } = require('../tags');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.sceneInfo').toArray();
const scenesElements = $('li[data-itemtype=scene]').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.sceneTitle a');
const url = `${site.url}${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const shootId = $(element).attr('data-itemid');
const date = moment
.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY')
.toDate();
@ -29,6 +31,7 @@ function scrape(html, site) {
return {
url,
shootId,
title,
actors,
date,
@ -42,6 +45,7 @@ function scrape(html, site) {
}
async function scrapeSceneFallback($, url, site) {
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
const title = $('h1.title').text();
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
@ -54,6 +58,7 @@ async function scrapeSceneFallback($, url, site) {
return {
url,
shootId,
title,
date,
actors,
@ -75,6 +80,7 @@ async function scrapeScene(html, url, site) {
}
const data = JSON.parse(json)[0];
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
const title = data.isPartOf.name;
const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
@ -98,6 +104,7 @@ async function scrapeScene(html, url, site) {
return {
url,
shootId,
title,
date,
actors,

View File

@ -47,7 +47,8 @@ function renderReleases(scenes, screen) {
},
top: 1,
height: screen.rows - 3,
width: 161,
// width: 161,
width: config.columns.reduce((acc, column) => acc + column.width, 0),
keys: true,
vi: true,
mouse: true,