From af9565b296900e1b963730f45b0a6a773b0521cd Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Mon, 4 Mar 2019 04:19:03 +0100 Subject: [PATCH] Completed Perv City scraper. Outputting results as list. --- docs/stack.md | 2 ++ networks.js | 9 +++++++++ package-lock.json | 10 +++++----- package.json | 2 +- src/app.js | 29 +++++++++++++++++++++++------ src/scrapers/pervcity.js | 38 ++++++++++++++++++++++++++++++++++---- src/scrapers/xempire.js | 5 +++-- 7 files changed, 77 insertions(+), 18 deletions(-) create mode 100644 docs/stack.md diff --git a/docs/stack.md b/docs/stack.md new file mode 100644 index 00000000..96f2a516 --- /dev/null +++ b/docs/stack.md @@ -0,0 +1,2 @@ +# Stack +Moment.js is used over date-fns because the parse function of date-fns only accepts ISO 8601, which many scraped dates aren't. diff --git a/networks.js b/networks.js index eea1d2a9..566d4810 100644 --- a/networks.js +++ b/networks.js @@ -38,16 +38,25 @@ module.exports = { name: 'Anal Overdose', url: 'http://www.analoverdose.com', description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.', + parameters: { + tourId: 3, + }, }, bangingbeauties: { name: 'Banging Beauties', url: 'http://www.bangingbeauties.com', description: 'Banging Beauties isn\'t just a porn site; it\'s the gateway to all your pussy-obsessed fantasies! Our members\' area is flowing with beautiful pornstars anticipating big dick throbbing in their syrupy pink slits. These experienced babes love brutal vaginal pounding! Similarly, they\'re eager for anal switch-hitting to shake things up. However, it\'s not only about gorgeous sexperts filling their hungry holes. Sometimes, it\'s all about innocent rookies earning their pornstar status in first time threesomes and premier interracial scenes.', + parameters: { + tourId: 7, + }, }, oraloverdose: { name: 'Oral Overdose', url: 'http://www.oraloverdose.com', description: 'Oral Overdose is the only site you need to live out every saliva soaked blowjob of your dreams in HD POV! We\'ve got the most stunning cocksuckers in the world going to town on big dick. These babes not only love cock, they can\'t get enough of it! In fact, there is no prick too huge for our hungry girls\' throats. You\'ll find gorgeous, big tits pornstars exercising their gag reflex in intense balls deep facefuck scenes. We also feature fresh, young newbies taking on the gagging deepthroat challenge.', + parameters: { + tourId: 4, + }, }, chocolatebjs: { name: 'Chocolate BJs', diff --git a/package-lock.json b/package-lock.json index 8dfb0405..9f24cfa2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1509,11 +1509,6 @@ "uniq": "^1.0.0" } }, - "date-fns": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-1.30.1.tgz", - "integrity": "sha512-hBSVCvSmWC+QypYObzwGOd9wqdDpOt+0wl0KbU+R+uuZBS1jN8VsD1ss3irQDknRj5NvxiTF6oj/nDRnN/UQNw==" - }, "debug": { "version": "2.6.9", "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", @@ -3408,6 +3403,11 @@ "minimist": "0.0.8" } }, + "moment": { + "version": "2.24.0", + "resolved": "https://registry.npmjs.org/moment/-/moment-2.24.0.tgz", + "integrity": "sha512-bV7f+6l2QigeBBZSM/6yTNq4P2fNpSWj/0e7jQcy87A8e7o2nAfP/34/2ky5Vw4B9S446EtIhodAzkFCcR4dQg==" + }, "ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", diff --git a/package.json b/package.json index 42e1f9dc..05cbc10e 100644 --- a/package.json +++ b/package.json @@ -35,7 +35,7 @@ "bhttp": "^1.2.4", "cheerio": "^1.0.0-rc.2", "config": "^3.0.1", - "date-fns": "^1.30.1", + "moment": "^2.24.0", "terminal-kit": "^1.27.0" } } diff --git a/src/app.js b/src/app.js index 712d5b8d..34947a0c 100644 --- a/src/app.js +++ b/src/app.js @@ -1,7 +1,9 @@ 'use strict'; -const util = require('util'); const config = require('config'); +const { terminal } = require('terminal-kit'); +const moment = require('moment'); + const networks = require('../networks.js'); const scrapers = require('./scrapers'); @@ -80,10 +82,8 @@ function accumulateSites() { return config.include ? accumulateIncludedSites() : accumulateExcludedSites(); } -async function init() { - const sites = accumulateSites(); - - const scenes = await Promise.all(sites.map(async (site) => { +async function fetchScenes(sites) { + return Promise.all(sites.map(async (site) => { const scraper = scrapers[site.id] || scrapers[site.network]; if (scraper) { @@ -92,8 +92,25 @@ async function init() { return []; })); +} - console.log(util.inspect(scenes, { depth: 5 })); +function exit() { + terminal.grabInput(false); + terminal.clear(); + terminal.processExit(); +} + +async function init() { + const sites = accumulateSites(); + const scenes = await fetchScenes(sites); + + terminal.singleColumnMenu(scenes[0].latest.map(scene => `[${scene.siteId} ${moment(scene.date).format('YYYY-MM-DD')}] ${scene.title} (${scene.actors.join(', ')}) ★ ${scene.rating.stars.toFixed(2)}`)); + + terminal.on('key', (name) => { + if (name === 'CTRL_C') { + exit(); + } + }); } init(); diff --git a/src/scrapers/pervcity.js b/src/scrapers/pervcity.js index 8089030a..d14ee1f9 100644 --- a/src/scrapers/pervcity.js +++ b/src/scrapers/pervcity.js @@ -2,16 +2,46 @@ const bhttp = require('bhttp'); const cheerio = require('cheerio'); +const moment = require('moment'); + +function scrape(html, site) { + const $ = cheerio.load(html, { normalizeWhitespace: true }); + + const sceneLinkElement = $('#scene_title_border a'); + const url = `${site.url}/${sceneLinkElement.attr('href')}`; + const title = sceneLinkElement.attr('title'); + + const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); + const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate(); + + const stars = $('img[src*="/star.png"]').toArray().map(element => $(element).attr('src')).length || null; + + return { + url, + title, + actors, + date, + rating: { + likes: null, + dislikes: null, + stars, + }, + siteId: site.id, + }; +} + async function fetchReleases(site) { // const session = bhttp.session(); - return {}; - console.log(site.url, site.parameters.tourId) + const res = await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`); + const elements = JSON.parse(res.body.toString()); - const res = await bhttp.get(`${site.url}/final_latestupdateview.php&limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`); + const latest = elements.total_arr.map(html => scrape(html, site)); - console.log(res.body.toString()); + return { + latest, + }; } module.exports = fetchReleases; diff --git a/src/scrapers/xempire.js b/src/scrapers/xempire.js index 242d4a86..1978b41a 100644 --- a/src/scrapers/xempire.js +++ b/src/scrapers/xempire.js @@ -2,7 +2,7 @@ const bhttp = require('bhttp'); const cheerio = require('cheerio'); -const { parse, startOfDay } = require('date-fns'); +const moment = require('moment'); function scrape(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); @@ -22,7 +22,7 @@ function scrape(html, site) { .map((actorIndex, actorElement) => $(actorElement).attr('title')) .toArray(); - const date = startOfDay(parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY')); + const date = moment.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY').toDate(); return { url, @@ -34,6 +34,7 @@ function scrape(html, site) { dislikes, stars, }, + siteId: site.id, }; }); }