Added include and exclude config options. Moved network definitions to dedicated file. Scraping all XEmpire sites. Added definitions for Perv City.
This commit is contained in:
122
src/app.js
122
src/app.js
@@ -1,45 +1,99 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const { parse, format } = require('date-fns');
|
||||
const util = require('util');
|
||||
const config = require('config');
|
||||
const networks = require('../networks.js');
|
||||
const scrapers = require('./scrapers');
|
||||
|
||||
function accumulateIncludedSites() {
|
||||
return config.include.reduce((acc, network) => {
|
||||
// network included with specific sites, only include specified sites
|
||||
if (Array.isArray(network)) {
|
||||
const [networkId, siteIds] = network;
|
||||
|
||||
return [
|
||||
...acc,
|
||||
...siteIds.map(siteId => ({
|
||||
id: siteId,
|
||||
network: networkId,
|
||||
...networks[networkId].sites[siteId],
|
||||
})),
|
||||
];
|
||||
}
|
||||
|
||||
// network included without further specification, include all sites
|
||||
return [
|
||||
...acc,
|
||||
...Object.entries(networks[network].sites).map(([siteId, site]) => ({
|
||||
id: siteId,
|
||||
network,
|
||||
...site,
|
||||
})),
|
||||
];
|
||||
}, []);
|
||||
}
|
||||
|
||||
function accumulateExcludedSites() {
|
||||
return Object.entries(networks).reduce((acc, [networkId, network]) => {
|
||||
const excludedNetwork = config.exclude.find((excludedNetworkX) => {
|
||||
if (Array.isArray(excludedNetworkX)) {
|
||||
return excludedNetworkX[0] === networkId;
|
||||
}
|
||||
|
||||
return excludedNetworkX === networkId;
|
||||
});
|
||||
|
||||
// network excluded with specific sites, only exclude specified sites
|
||||
if (excludedNetwork && Array.isArray(excludedNetwork)) {
|
||||
const [, excludedSiteIds] = excludedNetwork;
|
||||
|
||||
return [
|
||||
...acc,
|
||||
...Object.entries(network.sites)
|
||||
.filter(([siteId]) => !excludedSiteIds.includes(siteId))
|
||||
.map(([siteId, site]) => ({
|
||||
id: siteId,
|
||||
network: networkId,
|
||||
...site,
|
||||
})),
|
||||
];
|
||||
}
|
||||
|
||||
// network excluded without further specification, exclude all its sites
|
||||
if (excludedNetwork) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
// network not excluded, include all its sites
|
||||
return [
|
||||
...acc,
|
||||
...Object.entries(network.sites).map(([siteId, site]) => ({
|
||||
id: siteId,
|
||||
network: networkId,
|
||||
...site,
|
||||
})),
|
||||
];
|
||||
}, []);
|
||||
}
|
||||
|
||||
function accumulateSites() {
|
||||
return config.include ? accumulateIncludedSites() : accumulateExcludedSites();
|
||||
}
|
||||
|
||||
async function init() {
|
||||
const baseUrl = 'https://www.hardx.com';
|
||||
const res = await bhttp.get(`${baseUrl}/en/videos`, {});
|
||||
const $ = cheerio.load(res.body.toString(), { normalizeWhitespace: true });
|
||||
const scenesElements = $('.sceneInfo').toArray();
|
||||
const sites = accumulateSites();
|
||||
|
||||
const scenes = scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.sceneTitle a');
|
||||
const url = `${baseUrl}${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const scenes = await Promise.all(sites.map(async (site) => {
|
||||
const scraper = scrapers[site.id] || scrapers[site.network];
|
||||
|
||||
const [likes, dislikes] = $(element).find('.value')
|
||||
.toArray()
|
||||
.map(value => Number($(value).text()));
|
||||
const stars = Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100;
|
||||
if (scraper) {
|
||||
return scraper(site);
|
||||
}
|
||||
|
||||
const actors = $(element).find('.sceneActors a')
|
||||
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
|
||||
.toArray();
|
||||
return [];
|
||||
}));
|
||||
|
||||
const date = parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY');
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
stars,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
console.log(scenes);
|
||||
console.log(util.inspect(scenes, { depth: 5 }));
|
||||
}
|
||||
|
||||
init();
|
||||
|
||||
9
src/scrapers/index.js
Normal file
9
src/scrapers/index.js
Normal file
@@ -0,0 +1,9 @@
|
||||
'use strict';
|
||||
|
||||
const pervcity = require('./pervcity');
|
||||
const xempire = require('./xempire');
|
||||
|
||||
module.exports = {
|
||||
pervcity,
|
||||
xempire,
|
||||
};
|
||||
17
src/scrapers/pervcity.js
Normal file
17
src/scrapers/pervcity.js
Normal file
@@ -0,0 +1,17 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
async function fetchReleases(site) {
|
||||
// const session = bhttp.session();
|
||||
return {};
|
||||
|
||||
console.log(site.url, site.parameters.tourId)
|
||||
|
||||
const res = await bhttp.get(`${site.url}/final_latestupdateview.php&limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
|
||||
|
||||
console.log(res.body.toString());
|
||||
}
|
||||
|
||||
module.exports = fetchReleases;
|
||||
53
src/scrapers/xempire.js
Normal file
53
src/scrapers/xempire.js
Normal file
@@ -0,0 +1,53 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const { parse, startOfDay } = require('date-fns');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.sceneInfo').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.sceneTitle a');
|
||||
const url = `${site.url}${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
|
||||
const [likes, dislikes] = $(element).find('.value')
|
||||
.toArray()
|
||||
.map(value => Number($(value).text()));
|
||||
const stars = Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100;
|
||||
|
||||
const actors = $(element).find('.sceneActors a')
|
||||
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
|
||||
.toArray();
|
||||
|
||||
const date = startOfDay(parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY'));
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
stars,
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchReleases(site) {
|
||||
const [latestRes, upcomingRes] = await Promise.all([
|
||||
bhttp.get(`${site.url}/en/videos`),
|
||||
bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`),
|
||||
]);
|
||||
|
||||
return {
|
||||
latest: scrape(latestRes.body.toString(), site),
|
||||
upcoming: scrape(upcomingRes.body.toString(), site),
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = fetchReleases;
|
||||
Reference in New Issue
Block a user