From 8421cd8648d0ce087f8851ff25dc885e0c03e892 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Tue, 26 Mar 2019 01:26:47 +0100 Subject: [PATCH] Added parameters column to sites database, fixes Perv City scraper. Getting shoot ID from all existing scrapers. --- config/default.js | 6 +++--- migrations/20190325001339_releases.js | 1 + seeds/sites.js | 8 +++++++- src/fetch-releases.js | 3 ++- src/fetch-scene.js | 2 +- src/scrapers/julesjordan.js | 6 ++++++ src/scrapers/kink.js | 14 +++++++------- src/scrapers/legalporno.js | 28 ++++++--------------------- src/scrapers/pervcity.js | 2 ++ src/scrapers/xempire.js | 9 ++++++++- src/tui/render.js | 3 ++- 11 files changed, 45 insertions(+), 37 deletions(-) diff --git a/config/default.js b/config/default.js index 5e29e9df..7c2e3b50 100644 --- a/config/default.js +++ b/config/default.js @@ -32,15 +32,15 @@ module.exports = { }, { value: 'title', - width: 60, + width: 100, }, { value: 'actors', - width: 40, + width: 60, }, { value: 'rating', - width: 20, + width: 30, }, ], filename: { diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index a2be4fb7..0a7b4b8a 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -39,6 +39,7 @@ exports.up = knex => Promise.resolve() table.string('name'); table.string('url'); table.string('description'); + table.string('parameters'); })) .then(() => knex.schema.createTable('releases', (table) => { table.increments('id', 12); diff --git a/seeds/sites.js b/seeds/sites.js index 7fb0bec3..1163e3c2 100644 --- a/seeds/sites.js +++ b/seeds/sites.js @@ -272,6 +272,7 @@ exports.seed = knex => Promise.resolve() }, // LEGALPORNO { + id: 'legalporno', name: 'LegalPorno', label: 'legalp', url: 'https://www.legalporno.com', @@ -283,9 +284,10 @@ exports.seed = knex => Promise.resolve() id: 'analoverdose', name: 'Anal Overdose', label: 'AnalOD', - description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.', url: 'http://www.analoverdose.com', + description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.', network_id: 'pervcity', + parameters: JSON.stringify({ tourId: 3 }), }, { id: 'bangingbeauties', @@ -294,6 +296,7 @@ exports.seed = knex => Promise.resolve() description: "Banging Beauties isn't just a porn site; it's the gateway to all your pussy-obsessed fantasies! Our members' area is flowing with beautiful pornstars anticipating big dick throbbing in their syrupy pink slits. These experienced babes love brutal vaginal pounding! Similarly, they're eager for anal switch-hitting to shake things up. However, it's not only about gorgeous sexperts filling their hungry holes. Sometimes, it's all about innocent rookies earning their pornstar status in first time threesomes and premier interracial scenes.", url: 'http://www.bangingbeauties.com', network_id: 'pervcity', + parameters: JSON.stringify({ tourId: 7 }), }, { id: 'oraloverdose', @@ -302,6 +305,7 @@ exports.seed = knex => Promise.resolve() description: "Oral Overdose is the only site you need to live out every saliva soaked blowjob of your dreams in HD POV! We've got the most stunning cocksuckers in the world going to town on big dick. These babes not only love cock, they can't get enough of it! In fact, there is no prick too huge for our hungry girls' throats. You'll find gorgeous, big tits pornstars exercising their gag reflex in intense balls deep facefuck scenes. We also feature fresh, young newbies taking on the gagging deepthroat challenge.", url: 'http://www.oraloverdose.com', network_id: 'pervcity', + parameters: JSON.stringify({ tourId: 4 }), }, { id: 'chocolatebjs', @@ -310,6 +314,7 @@ exports.seed = knex => Promise.resolve() description: "You've just won the golden ticket to the best Chocolate BJs on the planet! We've sought far and wide to bring you the most beautiful black and ethnic pornstars. And they're in our members' area now! They can't wait to suck your white lollipop and lick the thick cream shooting from your big dick. Of course, no matter how sweet the booty or juicy the big tits, these brown foxes aren't all sugar and spice. In fact, when it comes to giving head, these big ass ebony babes know what they want: huge white cocks filling their throats!", url: 'http://www.chocolatebjs.com', network_id: 'pervcity', + parameters: JSON.stringify({ tourId: 6 }), }, { id: 'upherasshole', @@ -318,6 +323,7 @@ exports.seed = knex => Promise.resolve() description: "You don't need to travel the globe in search of the anal wonders of the world, because you get your own private tour right here on Up Her Asshole! Our stunning pornstars and rookie starlets welcome all ass fetish and anal sex fans, with their twerking bubble butts and winking assholes. However, big booty worship is just a slice of the fun. Combined with juicy tits (big and small), wet pussy (hairy and bald), these girls deliver a spectacular sensory experience in HD POV. Not only are you in danger of busting a nut before the going gets good, but also when the good turns remarkable with rimming, fingering and butt toys!", url: 'http://www.upherasshole.com', network_id: 'pervcity', + parameters: JSON.stringify({ tourId: 9 }), }, // XEMPIRE { diff --git a/src/fetch-releases.js b/src/fetch-releases.js index 8c2c7d61..f9a120c8 100644 --- a/src/fetch-releases.js +++ b/src/fetch-releases.js @@ -33,6 +33,7 @@ function curateSites(sites) { description: site.description, url: site.url, networkId: site.network_id, + parameters: JSON.parse(site.parameters), })); } @@ -50,7 +51,7 @@ async function fetchReleases() { const sites = await accumulateIncludedSites(); const scenesPerSite = await Promise.all(sites.map(async (site) => { - const scraper = scrapers[site.id] || scrapers[site.network]; + const scraper = scrapers[site.id] || scrapers[site.networkId]; if (scraper) { const [latest, upcoming] = await Promise.all([ diff --git a/src/fetch-scene.js b/src/fetch-scene.js index 24517d17..b5889032 100644 --- a/src/fetch-scene.js +++ b/src/fetch-scene.js @@ -23,7 +23,7 @@ async function findSite(url) { function deriveFilename(scene) { const props = { siteName: scene.site.name, - sceneId: scene.id, + sceneId: scene.shootId, sceneTitle: scene.title, sceneActors: scene.actors.join(config.filename.actorsJoin), sceneDate: moment.utc(scene.date).format(config.filename.dateFormat), diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index 70b00c7c..c8ed186a 100644 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -15,6 +15,8 @@ function scrapeLatest(html, site) { const url = sceneLinkElement.attr('href'); const title = sceneLinkElement.text(); + const shootId = $(element).attr('data-setid'); + const date = moment .utc($(element).find('.update_date').text(), 'MM/DD/YYYY') .toDate(); @@ -25,6 +27,7 @@ function scrapeLatest(html, site) { return { url, + shootId, title, actors, date, @@ -38,6 +41,8 @@ function scrapeUpcoming(html, site) { const scenesElements = $('#coming_soon_carousel').find('.table').toArray(); return scenesElements.map((element) => { + const shootId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0]; + const details = $(element).find('.update_details_comingsoon') .eq(1) .children() @@ -59,6 +64,7 @@ function scrapeUpcoming(html, site) { return { url: null, + shootId, title, actors, date, diff --git a/src/scrapers/kink.js b/src/scrapers/kink.js index 7f2276f7..51df74a7 100644 --- a/src/scrapers/kink.js +++ b/src/scrapers/kink.js @@ -15,7 +15,7 @@ function scrapeLatest(html, site) { const sceneLinkElement = $(element).find('.shoot-thumb-title a'); const href = sceneLinkElement.attr('href'); const url = `https://kink.com${href}`; - const id = href.split('/')[2]; + const shootId = href.split('/')[2]; const title = sceneLinkElement.text(); const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate(); @@ -28,7 +28,7 @@ function scrapeLatest(html, site) { return { url, - id, + shootId, title, actors, date, @@ -41,7 +41,7 @@ function scrapeLatest(html, site) { }); } -async function scrapeScene(html, url, id, ratingRes, site) { +async function scrapeScene(html, url, shootId, ratingRes, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); // const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart @@ -70,7 +70,7 @@ async function scrapeScene(html, url, id, ratingRes, site) { return { url, - id, + shootId, title, date, actors, @@ -90,14 +90,14 @@ async function fetchLatest(site) { } async function fetchScene(url, site) { - const id = new URL(url).pathname.split('/')[2]; + const shootId = new URL(url).pathname.split('/')[2]; const [res, ratingRes] = await Promise.all([ bhttp.get(url), - bhttp.get(`https://kink.com/api/ratings/${id}`), + bhttp.get(`https://kink.com/api/ratings/${shootId}`), ]); - return scrapeScene(res.body.toString(), url, id, ratingRes, site); + return scrapeScene(res.body.toString(), url, shootId, ratingRes, site); } module.exports = { diff --git a/src/scrapers/legalporno.js b/src/scrapers/legalporno.js index d192424d..7930d8ea 100644 --- a/src/scrapers/legalporno.js +++ b/src/scrapers/legalporno.js @@ -6,29 +6,13 @@ const moment = require('moment'); const { matchTags } = require('../tags'); -const tagMap = { - '3+ on 1': 'gangbang', - anal: 'anal', - bbc: 'big black cock', - 'cum swallowing': 'swallowing', - rough: 'rough', - 'deep throat': 'deepthroat', - 'double penetration (DP)': 'DP', - 'double anal (DAP)': 'DAP', - 'double vaginal (DPP)': 'DVP', - 'gapes (gaping asshole)': 'gaping', - 'huge toys': 'toys', - interracial: 'interracial', - 'triple penetration': 'TP', -}; - function extractTitle(originalTitle) { const titleComponents = originalTitle.split(' '); const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes - const id = sceneIdMatch ? sceneIdMatch[0] : null; + const shootId = sceneIdMatch ? sceneIdMatch[0] : null; const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle; - return { id, title }; + return { shootId, title }; } function scrapeLatest(html, site) { @@ -40,13 +24,13 @@ function scrapeLatest(html, site) { const url = sceneLinkElement.attr('href'); const originalTitle = sceneLinkElement.attr('title'); - const { id, title } = extractTitle(originalTitle); + const { shootId, title } = extractTitle(originalTitle); const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate(); return { url, - id, + shootId, title, date, site, @@ -58,7 +42,7 @@ async function scrapeScene(html, url, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const originalTitle = $('h1.watchpage-title').text().trim(); - const { id, title } = extractTitle(originalTitle); + const { shootId, title } = extractTitle(originalTitle); const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate(); @@ -74,7 +58,7 @@ async function scrapeScene(html, url, site) { return { url, - id, + shootId, title, date, actors, diff --git a/src/scrapers/pervcity.js b/src/scrapers/pervcity.js index 05420b12..f5300e6b 100644 --- a/src/scrapers/pervcity.js +++ b/src/scrapers/pervcity.js @@ -7,6 +7,7 @@ const moment = require('moment'); function scrape(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); + const shootId = $('li').attr('id'); const sceneLinkElement = $('#scene_title_border a'); const url = `${site.url}/${sceneLinkElement.attr('href')}`; const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes @@ -21,6 +22,7 @@ function scrape(html, site) { return { url, + shootId, title, actors, date, diff --git a/src/scrapers/xempire.js b/src/scrapers/xempire.js index 742223b6..40a8ae28 100644 --- a/src/scrapers/xempire.js +++ b/src/scrapers/xempire.js @@ -8,13 +8,15 @@ const { matchTags } = require('../tags'); function scrape(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); - const scenesElements = $('.sceneInfo').toArray(); + const scenesElements = $('li[data-itemtype=scene]').toArray(); return scenesElements.map((element) => { const sceneLinkElement = $(element).find('.sceneTitle a'); const url = `${site.url}${sceneLinkElement.attr('href')}`; const title = sceneLinkElement.attr('title'); + const shootId = $(element).attr('data-itemid'); + const date = moment .utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY') .toDate(); @@ -29,6 +31,7 @@ function scrape(html, site) { return { url, + shootId, title, actors, date, @@ -42,6 +45,7 @@ function scrape(html, site) { } async function scrapeSceneFallback($, url, site) { + const shootId = new URL(url).pathname.split('/').slice(-1)[0]; const title = $('h1.title').text(); const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate(); const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); @@ -54,6 +58,7 @@ async function scrapeSceneFallback($, url, site) { return { url, + shootId, title, date, actors, @@ -75,6 +80,7 @@ async function scrapeScene(html, url, site) { } const data = JSON.parse(json)[0]; + const shootId = new URL(url).pathname.split('/').slice(-1)[0]; const title = data.isPartOf.name; const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate(); @@ -98,6 +104,7 @@ async function scrapeScene(html, url, site) { return { url, + shootId, title, date, actors, diff --git a/src/tui/render.js b/src/tui/render.js index a1684882..8e175a16 100644 --- a/src/tui/render.js +++ b/src/tui/render.js @@ -47,7 +47,8 @@ function renderReleases(scenes, screen) { }, top: 1, height: screen.rows - 3, - width: 161, + // width: 161, + width: config.columns.reduce((acc, column) => acc + column.width, 0), keys: true, vi: true, mouse: true,