Improved module structure. Added individual scene scrapers for Jules Jordan and XEmpire.

This commit is contained in:
2019-03-23 22:48:39 +01:00
parent d70d5f85aa
commit e8d4b76403
14 changed files with 434 additions and 132 deletions

View File

@@ -1,13 +1,12 @@
'use strict';
const config = require('config');
const moment = require('moment');
const blessed = require('neo-blessed');
const argv = require('./argv');
const networks = require('../networks.js');
const scrapers = require('./scrapers');
const render = require('./tui/render');
const { renderReleases, renderScene } = require('./tui/render');
const fetchReleases = require('./fetch-releases');
const fetchScene = require('./fetch-scene');
function initScreen() {
const screen = blessed.screen({
@@ -26,104 +25,32 @@ function initScreen() {
return screen;
}
function accumulateIncludedSites() {
return config.include.reduce((acc, network) => {
// network included with specific sites, only include specified sites
if (Array.isArray(network)) {
const [networkId, siteIds] = network;
function getMethod() {
if (argv.scene) {
return {
fetch: () => fetchScene(argv.scene),
render: renderScene,
};
}
return [
...acc,
...siteIds.map(siteId => ({
id: siteId,
network: networkId,
...networks[networkId].sites[siteId],
})),
];
}
// network included without further specification, include all sites
return [
...acc,
...Object.entries(networks[network].sites).map(([siteId, site]) => ({
id: siteId,
network,
...site,
})),
];
}, []);
}
function accumulateExcludedSites() {
return Object.entries(networks).reduce((acc, [networkId, network]) => {
const excludedNetwork = config.exclude.find((excludedNetworkX) => {
if (Array.isArray(excludedNetworkX)) {
return excludedNetworkX[0] === networkId;
}
return excludedNetworkX === networkId;
});
// network excluded with specific sites, only exclude specified sites
if (excludedNetwork && Array.isArray(excludedNetwork)) {
const [, excludedSiteIds] = excludedNetwork;
return [
...acc,
...Object.entries(network.sites)
.filter(([siteId]) => !excludedSiteIds.includes(siteId))
.map(([siteId, site]) => ({
id: siteId,
network: networkId,
...site,
})),
];
}
// network excluded without further specification, exclude all its sites
if (excludedNetwork) {
return acc;
}
// network not excluded, include all its sites
return [
...acc,
...Object.entries(network.sites).map(([siteId, site]) => ({
id: siteId,
network: networkId,
...site,
})),
];
}, []);
}
function accumulateSites() {
return config.include ? accumulateIncludedSites() : accumulateExcludedSites();
}
async function fetchScenes(sites) {
const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.network];
if (scraper) {
return scraper(site);
}
return [];
}));
return scenesPerSite.reduce((acc, siteScenes) => ([...acc, ...siteScenes]), []);
return {
fetch: () => fetchReleases(),
render: renderReleases,
};
}
async function init() {
const sites = accumulateSites();
const scenes = await fetchScenes(sites);
const screen = argv.render && initScreen();
if (argv.render) {
const screen = initScreen();
const sortedScenes = scenes.sort(({ date: dateA }, { date: dateB }) => moment(dateB).diff(dateA));
try {
const method = getMethod();
const result = await method.fetch();
render(sortedScenes, screen);
if (argv.render) {
method.render(result, screen);
}
} catch (error) {
console.error(error.message);
}
}

View File

@@ -8,6 +8,15 @@ const { argv } = yargs
describe: 'Fetch data without rendering interface',
type: 'boolean',
default: true,
})
.option('scene', {
describe: 'Fetch scene info from URL',
type: 'string',
})
.option('copy', {
describe: 'Copy relevant result to clipboard',
type: 'boolean',
alias: 'c',
});
module.exports = argv;

108
src/fetch-releases.js Normal file
View File

@@ -0,0 +1,108 @@
'use strict';
const config = require('config');
const moment = require('moment');
const networks = require('../networks.js');
const scrapers = require('./scrapers');
function accumulateIncludedSites() {
return config.include.reduce((acc, network) => {
// network included with specific sites, only include specified sites
if (Array.isArray(network)) {
const [networkId, siteIds] = network;
return [
...acc,
...siteIds.map(siteId => ({
id: siteId,
network: networkId,
...networks[networkId].sites[siteId],
})),
];
}
// network included without further specification, include all sites
return [
...acc,
...Object.entries(networks[network].sites).map(([siteId, site]) => ({
id: siteId,
network,
...site,
})),
];
}, []);
}
function accumulateExcludedSites() {
return Object.entries(networks).reduce((acc, [networkId, network]) => {
const excludedNetwork = config.exclude.find((excludedNetworkX) => {
if (Array.isArray(excludedNetworkX)) {
return excludedNetworkX[0] === networkId;
}
return excludedNetworkX === networkId;
});
// network excluded with specific sites, only exclude specified sites
if (excludedNetwork && Array.isArray(excludedNetwork)) {
const [, excludedSiteIds] = excludedNetwork;
return [
...acc,
...Object.entries(network.sites)
.filter(([siteId]) => !excludedSiteIds.includes(siteId))
.map(([siteId, site]) => ({
id: siteId,
network: networkId,
...site,
})),
];
}
// network excluded without further specification, exclude all its sites
if (excludedNetwork) {
return acc;
}
// network not excluded, include all its sites
return [
...acc,
...Object.entries(network.sites).map(([siteId, site]) => ({
id: siteId,
network: networkId,
...site,
})),
];
}, []);
}
function accumulateSites() {
return config.include ? accumulateIncludedSites() : accumulateExcludedSites();
}
async function fetchReleases() {
const sites = await accumulateSites();
const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.network];
if (scraper) {
const [latest, upcoming] = await Promise.all([
scraper.fetchLatest(site),
scraper.fetchUpcoming ? scraper.fetchUpcoming(site) : [],
]);
return [...latest, ...upcoming];
}
return [];
}));
const accumulatedScenes = scenesPerSite.reduce((acc, siteScenes) => ([...acc, ...siteScenes]), []);
const sortedScenes = accumulatedScenes.sort(({ date: dateA }, { date: dateB }) => moment(dateB).diff(dateA));
return sortedScenes;
}
module.exports = fetchReleases;

79
src/fetch-scene.js Normal file
View File

@@ -0,0 +1,79 @@
'use strict';
const config = require('config');
const moment = require('moment');
const networks = require('../networks.js');
const scrapers = require('./scrapers');
function findSite(url) {
const { origin } = new URL(url);
console.log(origin);
return Object.entries(networks)
.reduce((foundNetwork, [networkId, network]) => foundNetwork || Object.entries(network.sites)
.reduce((foundSite, [siteId, site]) => {
if (foundSite) return foundSite;
if (site.url !== origin) return null;
return {
site: {
...site,
id: siteId,
},
network: {
...network,
id: networkId,
},
};
}, null),
null);
}
function deriveFilename(scene) {
const props = {
siteName: scene.site.name,
sceneId: scene.id,
sceneTitle: scene.title,
sceneActors: scene.actors.join(config.filename.actorsJoin),
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),
};
const filename = config.filename.pattern.replace(/\{\w+\}/g, (match) => {
const prop = match.slice(1, -1);
const value = props[prop];
if (value && config.filename.subpatterns[prop]) {
return config.filename.subpatterns[prop]
.replace(/\{\w+\}/, value)
.replace(/\//g, config.filename.slash);
}
return value.replace(/\//g, config.filename.slash) || '';
});
return filename;
}
async function fetchScene(url) {
const { site, network } = findSite(url);
const scraper = scrapers[site.id] || scrapers[network.id];
if (!scraper) {
throw new Error('Could not find scraper for URL');
}
if (!scraper.fetchScene) {
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
}
const scene = await scraper.fetchScene(url, site);
return {
...scene,
filename: deriveFilename(scene),
};
}
module.exports = fetchScene;

View File

@@ -67,17 +67,55 @@ function scrapeUpcoming(html, site) {
});
}
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
async function fetchReleases(site) {
const [latestRes, upcomingRes] = await Promise.all([
bhttp.get(`${site.url}/categories/movies_1_d.html`),
bhttp.get(`${site.url}/index.php`),
]);
const title = $('.title_bar_hilite').text();
const date = moment
.utc($('.update_date').text(), 'MM/DD/YYYY')
.toDate();
return [
...scrapeUpcoming(upcomingRes.body.toString(), site, true),
...scrapeLatest(latestRes.body.toString(), site),
];
const actors = $('.update_description + .update_models a')
.map((_actorIndex, actorElement) => $(actorElement).text())
.toArray();
const description = $('.update_description').text().trim();
const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
return {
url,
title,
date,
actors,
description,
rating: {
stars,
},
site,
};
}
module.exports = fetchReleases;
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/trial/categories/movies_1_d.html`);
return scrapeLatest(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/trial/index.php`);
return scrapeUpcoming(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
};

View File

@@ -0,0 +1 @@
'use strict';

View File

@@ -17,7 +17,7 @@ function scrape(html, site) {
const stars = $('img[src*="/star.png"]')
.toArray()
.map(element => $(element).attr('src'))
.length || null;
.length || 0;
return {
url,
@@ -25,15 +25,13 @@ function scrape(html, site) {
actors,
date,
rating: {
likes: null,
dislikes: null,
stars,
},
site,
};
}
async function fetchReleases(site) {
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
@@ -42,4 +40,6 @@ async function fetchReleases(site) {
return latest;
}
module.exports = fetchReleases;
module.exports = {
fetchLatest,
};

View File

@@ -24,7 +24,6 @@ function scrape(html, site) {
const [likes, dislikes] = $(element).find('.value')
.toArray()
.map(value => Number($(value).text()));
const stars = likes || dislikes ? Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100 : null;
return {
url,
@@ -34,23 +33,54 @@ function scrape(html, site) {
rating: {
likes,
dislikes,
stars,
},
site,
};
});
}
async function fetchReleases(site) {
const [latestRes, upcomingRes] = await Promise.all([
bhttp.get(`${site.url}/en/videos`),
bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`),
]);
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
return [
...scrape(upcomingRes.body.toString(), site, true),
...scrape(latestRes.body.toString(), site),
];
const title = $('h1.title').text();
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('.sceneDesc').text().replace(/Video Description:/g, ' ').trim();
const stars = $('.currentRating').text().split('/')[0] / 2;
return {
url,
title,
date,
actors,
description,
rating: {
stars,
},
site,
};
}
module.exports = fetchReleases;
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/en/videos`);
return scrape(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
return scrape(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
};

View File

@@ -11,15 +11,17 @@ const formatters = {
return '\x1b[90mNot available\x1b[0m';
}
if (rating.stars === null) {
if ((rating.likes === 0 && rating.dislikes === 0) || rating.stars === 0) {
return '\x1b[90mUnrated\x1b[0m';
}
if (rating.likes === null || rating.dislikes === null) {
if (rating.stars) {
return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`;
}
return `\x1b[93m★\x1b[0m ${rating.stars.toFixed(2)} \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)} \x1b[31m▼\x1b[0m ${String(rating.dislikes).padEnd(3)}`;
const stars = rating.likes || rating.dislikes ? Math.floor(((rating.likes * 5 + rating.dislikes) / (rating.likes + rating.dislikes)) * 100) / 100 : null;
return `\x1b[93m★\x1b[0m ${stars.toFixed(2)} \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)} \x1b[31m▼\x1b[0m ${String(rating.dislikes).padEnd(3)}`;
},
};

View File

@@ -4,10 +4,12 @@ const config = require('config');
const blessed = require('neo-blessed');
const moment = require('moment');
const opn = require('opn');
const clipboard = require('clipboardy');
const argv = require('../argv');
const formatters = require('./formatters');
function render(scenes, screen) {
function renderReleases(scenes, screen) {
const tableTop = blessed.Text({
content: config.columns.reduce((acc, column, index) => `${acc}${'─'.repeat(column.width)}${index < config.columns.length - 1 ? '┬' : '┐\x1b[0m'}`, '\x1b[30m┌'),
});
@@ -102,4 +104,15 @@ function render(scenes, screen) {
screen.render();
}
module.exports = render;
function renderScene(scene, _screen) {
console.log(scene);
if (argv.copy) {
clipboard.writeSync(scene.filename);
}
}
module.exports = {
renderReleases,
renderScene,
};