Completed Perv City scraper. Outputting results as list.

This commit is contained in:
ThePendulum 2019-03-04 04:19:03 +01:00
parent 5e4e12fbf8
commit af9565b296
7 changed files with 77 additions and 18 deletions

2
docs/stack.md Normal file
View File

@ -0,0 +1,2 @@
# Stack
Moment.js is used over date-fns because the parse function of date-fns only accepts ISO 8601, which many scraped dates aren't.

View File

@ -38,16 +38,25 @@ module.exports = {
name: 'Anal Overdose',
url: 'http://www.analoverdose.com',
description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.',
parameters: {
tourId: 3,
},
},
bangingbeauties: {
name: 'Banging Beauties',
url: 'http://www.bangingbeauties.com',
description: 'Banging Beauties isn\'t just a porn site; it\'s the gateway to all your pussy-obsessed fantasies! Our members\' area is flowing with beautiful pornstars anticipating big dick throbbing in their syrupy pink slits. These experienced babes love brutal vaginal pounding! Similarly, they\'re eager for anal switch-hitting to shake things up. However, it\'s not only about gorgeous sexperts filling their hungry holes. Sometimes, it\'s all about innocent rookies earning their pornstar status in first time threesomes and premier interracial scenes.',
parameters: {
tourId: 7,
},
},
oraloverdose: {
name: 'Oral Overdose',
url: 'http://www.oraloverdose.com',
description: 'Oral Overdose is the only site you need to live out every saliva soaked blowjob of your dreams in HD POV! We\'ve got the most stunning cocksuckers in the world going to town on big dick. These babes not only love cock, they can\'t get enough of it! In fact, there is no prick too huge for our hungry girls\' throats. You\'ll find gorgeous, big tits pornstars exercising their gag reflex in intense balls deep facefuck scenes. We also feature fresh, young newbies taking on the gagging deepthroat challenge.',
parameters: {
tourId: 4,
},
},
chocolatebjs: {
name: 'Chocolate BJs',

10
package-lock.json generated
View File

@ -1509,11 +1509,6 @@
"uniq": "^1.0.0"
}
},
"date-fns": {
"version": "1.30.1",
"resolved": "https://registry.npmjs.org/date-fns/-/date-fns-1.30.1.tgz",
"integrity": "sha512-hBSVCvSmWC+QypYObzwGOd9wqdDpOt+0wl0KbU+R+uuZBS1jN8VsD1ss3irQDknRj5NvxiTF6oj/nDRnN/UQNw=="
},
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
@ -3408,6 +3403,11 @@
"minimist": "0.0.8"
}
},
"moment": {
"version": "2.24.0",
"resolved": "https://registry.npmjs.org/moment/-/moment-2.24.0.tgz",
"integrity": "sha512-bV7f+6l2QigeBBZSM/6yTNq4P2fNpSWj/0e7jQcy87A8e7o2nAfP/34/2ky5Vw4B9S446EtIhodAzkFCcR4dQg=="
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",

View File

@ -35,7 +35,7 @@
"bhttp": "^1.2.4",
"cheerio": "^1.0.0-rc.2",
"config": "^3.0.1",
"date-fns": "^1.30.1",
"moment": "^2.24.0",
"terminal-kit": "^1.27.0"
}
}

View File

@ -1,7 +1,9 @@
'use strict';
const util = require('util');
const config = require('config');
const { terminal } = require('terminal-kit');
const moment = require('moment');
const networks = require('../networks.js');
const scrapers = require('./scrapers');
@ -80,10 +82,8 @@ function accumulateSites() {
return config.include ? accumulateIncludedSites() : accumulateExcludedSites();
}
async function init() {
const sites = accumulateSites();
const scenes = await Promise.all(sites.map(async (site) => {
async function fetchScenes(sites) {
return Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.network];
if (scraper) {
@ -92,8 +92,25 @@ async function init() {
return [];
}));
}
console.log(util.inspect(scenes, { depth: 5 }));
function exit() {
terminal.grabInput(false);
terminal.clear();
terminal.processExit();
}
async function init() {
const sites = accumulateSites();
const scenes = await fetchScenes(sites);
terminal.singleColumnMenu(scenes[0].latest.map(scene => `[${scene.siteId} ${moment(scene.date).format('YYYY-MM-DD')}] ${scene.title} (${scene.actors.join(', ')}) ★ ${scene.rating.stars.toFixed(2)}`));
terminal.on('key', (name) => {
if (name === 'CTRL_C') {
exit();
}
});
}
init();

View File

@ -2,16 +2,46 @@
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, ''));
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
const stars = $('img[src*="/star.png"]').toArray().map(element => $(element).attr('src')).length || null;
return {
url,
title,
actors,
date,
rating: {
likes: null,
dislikes: null,
stars,
},
siteId: site.id,
};
}
async function fetchReleases(site) {
// const session = bhttp.session();
return {};
console.log(site.url, site.parameters.tourId)
const res = await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
const res = await bhttp.get(`${site.url}/final_latestupdateview.php&limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const latest = elements.total_arr.map(html => scrape(html, site));
console.log(res.body.toString());
return {
latest,
};
}
module.exports = fetchReleases;

View File

@ -2,7 +2,7 @@
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { parse, startOfDay } = require('date-fns');
const moment = require('moment');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
@ -22,7 +22,7 @@ function scrape(html, site) {
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
.toArray();
const date = startOfDay(parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY'));
const date = moment.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY').toDate();
return {
url,
@ -34,6 +34,7 @@ function scrape(html, site) {
dislikes,
stars,
},
siteId: site.id,
};
});
}