From 53ea694894164c873cf0e3f3f656f570146560b0 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Mon, 4 Mar 2019 02:46:33 +0100 Subject: [PATCH] Added include and exclude config options. Moved network definitions to dedicated file. Scraping all XEmpire sites. Added definitions for Perv City. --- config/default.js | 8 +++ networks.js | 70 ++++++++++++++++++++++ src/app.js | 122 ++++++++++++++++++++++++++++----------- src/scrapers/index.js | 9 +++ src/scrapers/pervcity.js | 17 ++++++ src/scrapers/xempire.js | 53 +++++++++++++++++ 6 files changed, 245 insertions(+), 34 deletions(-) create mode 100644 config/default.js create mode 100644 networks.js create mode 100644 src/scrapers/index.js create mode 100644 src/scrapers/pervcity.js create mode 100644 src/scrapers/xempire.js diff --git a/config/default.js b/config/default.js new file mode 100644 index 00000000..53ef5fbb --- /dev/null +++ b/config/default.js @@ -0,0 +1,8 @@ +'use strict'; + +module.exports = { + include: [ + 'xempire', + 'pervcity', + ], +}; diff --git a/networks.js b/networks.js new file mode 100644 index 00000000..eea1d2a9 --- /dev/null +++ b/networks.js @@ -0,0 +1,70 @@ +'use strict'; + +/* eslint-disable max-len */ +module.exports = { + xempire: { + name: 'XEmpire', + url: 'https://www.xempire.com/en', + description: 'XEmpire.com brings you today\'s top pornstars in beautifully shot, HD sex scenes across 4 unique porn sites of gonzo porn, interracial, lesbian & erotica!', + sites: { + hardx: { + name: 'HardX', + url: 'https://www.hardx.com', + description: 'Welcome to HardX.com, home of exclusive hardcore gonzo porn and first time anal scenes, DP, blowbangs and gangbangs from today\'s hottest porn stars!', + }, + eroticax: { + name: 'EroticaX', + url: 'https://www.eroticax.com', + description: 'EroticaX.com features intimate scenes of passionate, erotic sex. Watch the sensual side of hardcore porn as your favorite pornstars have real, intense orgasms.', + }, + darkx: { + name: 'DarkX', + url: 'https://www.darkx.com', + description: 'Watch interracial BBC porn videos on DarkX.com, featuring the best pornstars taking big black cock in exclusive scenes. The best black on white porn inside!', + }, + lesbianx: { + name: 'LesbianX', + url: 'https://www.lesbianx.com', + description: 'LesbianX.com features today\'s top pornstars in hardcore lesbian porn. Watch passionate & intense girl on girl sex videos, from erotic kissing to pussy licking.', + }, + }, + }, + pervcity: { + name: 'Perv City', + url: 'https://www.pervcity.com', + description: '', + sites: { + analoverdose: { + name: 'Anal Overdose', + url: 'http://www.analoverdose.com', + description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.', + }, + bangingbeauties: { + name: 'Banging Beauties', + url: 'http://www.bangingbeauties.com', + description: 'Banging Beauties isn\'t just a porn site; it\'s the gateway to all your pussy-obsessed fantasies! Our members\' area is flowing with beautiful pornstars anticipating big dick throbbing in their syrupy pink slits. These experienced babes love brutal vaginal pounding! Similarly, they\'re eager for anal switch-hitting to shake things up. However, it\'s not only about gorgeous sexperts filling their hungry holes. Sometimes, it\'s all about innocent rookies earning their pornstar status in first time threesomes and premier interracial scenes.', + }, + oraloverdose: { + name: 'Oral Overdose', + url: 'http://www.oraloverdose.com', + description: 'Oral Overdose is the only site you need to live out every saliva soaked blowjob of your dreams in HD POV! We\'ve got the most stunning cocksuckers in the world going to town on big dick. These babes not only love cock, they can\'t get enough of it! In fact, there is no prick too huge for our hungry girls\' throats. You\'ll find gorgeous, big tits pornstars exercising their gag reflex in intense balls deep facefuck scenes. We also feature fresh, young newbies taking on the gagging deepthroat challenge.', + }, + chocolatebjs: { + name: 'Chocolate BJs', + url: 'http://www.chocolatebjs.com', + description: 'You\'ve just won the golden ticket to the best Chocolate BJs on the planet! We\'ve sought far and wide to bring you the most beautiful black and ethnic pornstars. And they\'re in our members\' area now! They can\'t wait to suck your white lollipop and lick the thick cream shooting from your big dick. Of course, no matter how sweet the booty or juicy the big tits, these brown foxes aren\'t all sugar and spice. In fact, when it comes to giving head, these big ass ebony babes know what they want: huge white cocks filling their throats!', + parameters: { + tourId: 6, + }, + }, + upherasshole: { + name: 'Up Her Asshole', + url: 'http://www.upherasshole.com', + description: 'You don\'t need to travel the globe in search of the anal wonders of the world, because you get your own private tour right here on Up Her Asshole! Our stunning pornstars and rookie starlets welcome all ass fetish and anal sex fans, with their twerking bubble butts and winking assholes. However, big booty worship is just a slice of the fun. Combined with juicy tits (big and small), wet pussy (hairy and bald), these girls deliver a spectacular sensory experience in HD POV. Not only are you in danger of busting a nut before the going gets good, but also when the good turns remarkable with rimming, fingering and butt toys!', + parameters: { + tourId: 9, + }, + }, + }, + }, +}; diff --git a/src/app.js b/src/app.js index 1f03f69c..712d5b8d 100644 --- a/src/app.js +++ b/src/app.js @@ -1,45 +1,99 @@ 'use strict'; -const bhttp = require('bhttp'); -const cheerio = require('cheerio'); -const { parse, format } = require('date-fns'); +const util = require('util'); +const config = require('config'); +const networks = require('../networks.js'); +const scrapers = require('./scrapers'); + +function accumulateIncludedSites() { + return config.include.reduce((acc, network) => { + // network included with specific sites, only include specified sites + if (Array.isArray(network)) { + const [networkId, siteIds] = network; + + return [ + ...acc, + ...siteIds.map(siteId => ({ + id: siteId, + network: networkId, + ...networks[networkId].sites[siteId], + })), + ]; + } + + // network included without further specification, include all sites + return [ + ...acc, + ...Object.entries(networks[network].sites).map(([siteId, site]) => ({ + id: siteId, + network, + ...site, + })), + ]; + }, []); +} + +function accumulateExcludedSites() { + return Object.entries(networks).reduce((acc, [networkId, network]) => { + const excludedNetwork = config.exclude.find((excludedNetworkX) => { + if (Array.isArray(excludedNetworkX)) { + return excludedNetworkX[0] === networkId; + } + + return excludedNetworkX === networkId; + }); + + // network excluded with specific sites, only exclude specified sites + if (excludedNetwork && Array.isArray(excludedNetwork)) { + const [, excludedSiteIds] = excludedNetwork; + + return [ + ...acc, + ...Object.entries(network.sites) + .filter(([siteId]) => !excludedSiteIds.includes(siteId)) + .map(([siteId, site]) => ({ + id: siteId, + network: networkId, + ...site, + })), + ]; + } + + // network excluded without further specification, exclude all its sites + if (excludedNetwork) { + return acc; + } + + // network not excluded, include all its sites + return [ + ...acc, + ...Object.entries(network.sites).map(([siteId, site]) => ({ + id: siteId, + network: networkId, + ...site, + })), + ]; + }, []); +} + +function accumulateSites() { + return config.include ? accumulateIncludedSites() : accumulateExcludedSites(); +} async function init() { - const baseUrl = 'https://www.hardx.com'; - const res = await bhttp.get(`${baseUrl}/en/videos`, {}); - const $ = cheerio.load(res.body.toString(), { normalizeWhitespace: true }); - const scenesElements = $('.sceneInfo').toArray(); + const sites = accumulateSites(); - const scenes = scenesElements.map((element) => { - const sceneLinkElement = $(element).find('.sceneTitle a'); - const url = `${baseUrl}${sceneLinkElement.attr('href')}`; - const title = sceneLinkElement.attr('title'); + const scenes = await Promise.all(sites.map(async (site) => { + const scraper = scrapers[site.id] || scrapers[site.network]; - const [likes, dislikes] = $(element).find('.value') - .toArray() - .map(value => Number($(value).text())); - const stars = Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100; + if (scraper) { + return scraper(site); + } - const actors = $(element).find('.sceneActors a') - .map((actorIndex, actorElement) => $(actorElement).attr('title')) - .toArray(); + return []; + })); - const date = parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY'); - - return { - url, - title, - actors, - date, - rating: { - likes, - dislikes, - stars, - }, - }; - }); - - console.log(scenes); + console.log(util.inspect(scenes, { depth: 5 })); } init(); diff --git a/src/scrapers/index.js b/src/scrapers/index.js new file mode 100644 index 00000000..7a05d10b --- /dev/null +++ b/src/scrapers/index.js @@ -0,0 +1,9 @@ +'use strict'; + +const pervcity = require('./pervcity'); +const xempire = require('./xempire'); + +module.exports = { + pervcity, + xempire, +}; diff --git a/src/scrapers/pervcity.js b/src/scrapers/pervcity.js new file mode 100644 index 00000000..8089030a --- /dev/null +++ b/src/scrapers/pervcity.js @@ -0,0 +1,17 @@ +'use strict'; + +const bhttp = require('bhttp'); +const cheerio = require('cheerio'); + +async function fetchReleases(site) { + // const session = bhttp.session(); + return {}; + + console.log(site.url, site.parameters.tourId) + + const res = await bhttp.get(`${site.url}/final_latestupdateview.php&limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`); + + console.log(res.body.toString()); +} + +module.exports = fetchReleases; diff --git a/src/scrapers/xempire.js b/src/scrapers/xempire.js new file mode 100644 index 00000000..242d4a86 --- /dev/null +++ b/src/scrapers/xempire.js @@ -0,0 +1,53 @@ +'use strict'; + +const bhttp = require('bhttp'); +const cheerio = require('cheerio'); +const { parse, startOfDay } = require('date-fns'); + +function scrape(html, site) { + const $ = cheerio.load(html, { normalizeWhitespace: true }); + const scenesElements = $('.sceneInfo').toArray(); + + return scenesElements.map((element) => { + const sceneLinkElement = $(element).find('.sceneTitle a'); + const url = `${site.url}${sceneLinkElement.attr('href')}`; + const title = sceneLinkElement.attr('title'); + + const [likes, dislikes] = $(element).find('.value') + .toArray() + .map(value => Number($(value).text())); + const stars = Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100; + + const actors = $(element).find('.sceneActors a') + .map((actorIndex, actorElement) => $(actorElement).attr('title')) + .toArray(); + + const date = startOfDay(parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY')); + + return { + url, + title, + actors, + date, + rating: { + likes, + dislikes, + stars, + }, + }; + }); +} + +async function fetchReleases(site) { + const [latestRes, upcomingRes] = await Promise.all([ + bhttp.get(`${site.url}/en/videos`), + bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`), + ]); + + return { + latest: scrape(latestRes.body.toString(), site), + upcoming: scrape(upcomingRes.body.toString(), site), + }; +} + +module.exports = fetchReleases;