Added include and exclude config options. Moved network definitions to dedicated file. Scraping all XEmpire sites. Added definitions for Perv City.

This commit is contained in:
2019-03-04 02:46:33 +01:00
parent 71aa31dda6
commit 53ea694894
6 changed files with 245 additions and 34 deletions

9
src/scrapers/index.js Normal file
View File

@@ -0,0 +1,9 @@
'use strict';
const pervcity = require('./pervcity');
const xempire = require('./xempire');
module.exports = {
pervcity,
xempire,
};

17
src/scrapers/pervcity.js Normal file
View File

@@ -0,0 +1,17 @@
'use strict';
const bhttp = require('bhttp');
const cheerio = require('cheerio');
async function fetchReleases(site) {
// const session = bhttp.session();
return {};
console.log(site.url, site.parameters.tourId)
const res = await bhttp.get(`${site.url}/final_latestupdateview.php&limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
console.log(res.body.toString());
}
module.exports = fetchReleases;

53
src/scrapers/xempire.js Normal file
View File

@@ -0,0 +1,53 @@
'use strict';
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { parse, startOfDay } = require('date-fns');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.sceneInfo').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.sceneTitle a');
const url = `${site.url}${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const [likes, dislikes] = $(element).find('.value')
.toArray()
.map(value => Number($(value).text()));
const stars = Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100;
const actors = $(element).find('.sceneActors a')
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
.toArray();
const date = startOfDay(parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY'));
return {
url,
title,
actors,
date,
rating: {
likes,
dislikes,
stars,
},
};
});
}
async function fetchReleases(site) {
const [latestRes, upcomingRes] = await Promise.all([
bhttp.get(`${site.url}/en/videos`),
bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`),
]);
return {
latest: scrape(latestRes.body.toString(), site),
upcoming: scrape(upcomingRes.body.toString(), site),
};
}
module.exports = fetchReleases;