Improved module structure. Added individual scene scrapers for Jules Jordan and XEmpire.

This commit is contained in:
ThePendulum 2019-03-23 22:48:39 +01:00
parent d70d5f85aa
commit e8d4b76403
14 changed files with 434 additions and 132 deletions

View File

@ -29,4 +29,15 @@ module.exports = {
width: 20,
},
],
filename: {
dateFormat: 'DD-MM-YYYY',
actorsJoin: ', ',
slash: '_',
subpatterns: {
siteName: '{siteName} - ',
sceneDate: ', {sceneDate}',
sceneId: ' {sceneId}',
},
pattern: '{siteName}{sceneTitle} ({sceneActors}{sceneDate}{sceneId})',
},
};

View File

@ -4,12 +4,12 @@
module.exports = {
julesjordan: {
name: 'Jules Jordan',
url: 'https://www.julesjordan.com/trial',
url: 'https://www.julesjordan.com',
sites: {
julesjordan: {
name: 'Jules Jordan',
label: 'julesj',
url: 'https://www.julesjordan.com/trial',
url: 'https://www.julesjordan.com',
description: 'Jules Jordan\'s Official Membership Site',
},
/* also listed on main site
@ -38,7 +38,7 @@ module.exports = {
},
xempire: {
name: 'XEmpire',
url: 'https://www.xempire.com/en',
url: 'https://www.xempire.com',
description: 'XEmpire.com brings you today\'s top pornstars in beautifully shot, HD sex scenes across 4 unique porn sites of gonzo porn, interracial, lesbian & erotica!',
sites: {
hardx: {
@ -67,6 +67,19 @@ module.exports = {
},
},
},
legalporno: {
name: 'LegalPorno',
url: 'https://www.legalporno.com/',
description: 'The Best HD Porn For You!',
sites: {
legalporno: {
name: 'LegalPorno',
label: 'legalp',
url: 'https://www.legalporno.com/',
description: 'The Best HD Porn For You!',
},
},
},
pervcity: {
name: 'Perv City',
url: 'https://www.pervcity.com',

69
package-lock.json generated
View File

@ -964,6 +964,11 @@
}
}
},
"arch": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/arch/-/arch-2.1.1.tgz",
"integrity": "sha512-BLM56aPo9vLLFVa8+/+pJLnrZ7QGGTVHWsCwieAWT9o9K8UeGaQbzZbGoabWLOo2ksBCztoXdqBZBplqLDDCSg=="
},
"argparse": {
"version": "1.0.10",
"resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
@ -1350,6 +1355,46 @@
"integrity": "sha1-/xnt6Kml5XkyQUewwR8PvLq+1jk=",
"dev": true
},
"clipboardy": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/clipboardy/-/clipboardy-1.2.3.tgz",
"integrity": "sha512-2WNImOvCRe6r63Gk9pShfkwXsVtKCroMAevIbiae021mS850UkWPbevxsBz3tnvjZIEGvlwaqCPsw+4ulzNgJA==",
"requires": {
"arch": "^2.1.0",
"execa": "^0.8.0"
},
"dependencies": {
"cross-spawn": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-5.1.0.tgz",
"integrity": "sha1-6L0O/uWPz/b4+UUQoKVUu/ojVEk=",
"requires": {
"lru-cache": "^4.0.1",
"shebang-command": "^1.2.0",
"which": "^1.2.9"
}
},
"execa": {
"version": "0.8.0",
"resolved": "https://registry.npmjs.org/execa/-/execa-0.8.0.tgz",
"integrity": "sha1-2NdrvBtVIX7RkP1t1J08d07PyNo=",
"requires": {
"cross-spawn": "^5.0.1",
"get-stream": "^3.0.0",
"is-stream": "^1.1.0",
"npm-run-path": "^2.0.0",
"p-finally": "^1.0.0",
"signal-exit": "^3.0.0",
"strip-eof": "^1.0.0"
}
},
"get-stream": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz",
"integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ="
}
}
},
"cliui": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz",
@ -3415,6 +3460,15 @@
"js-tokens": "^3.0.0 || ^4.0.0"
}
},
"lru-cache": {
"version": "4.1.5",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.5.tgz",
"integrity": "sha512-sWZlbEP2OsHNkXrMl5GYk/jKk70MBng6UU4YI/qGDYbgf6YbP4EvmqISbXCoJiRKs+1bSpFHVgQxvJ17F2li5g==",
"requires": {
"pseudomap": "^1.0.2",
"yallist": "^2.1.2"
}
},
"map-age-cleaner": {
"version": "0.1.3",
"resolved": "https://registry.npmjs.org/map-age-cleaner/-/map-age-cleaner-0.1.3.tgz",
@ -3590,6 +3644,11 @@
"resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz",
"integrity": "sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ=="
},
"node-fetch": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.3.0.tgz",
"integrity": "sha512-MOd8pV3fxENbryESLgVIeaGKrdl+uaYhCSSVkjeOb/31/njTpcis5aWfdqgNlHIrKOLRbMnfPINPOML2CIFeXA=="
},
"node-releases": {
"version": "1.1.9",
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-1.1.9.tgz",
@ -3941,6 +4000,11 @@
"integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
"dev": true
},
"pseudomap": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz",
"integrity": "sha1-8FKijacOYYkX7wqKw0wa5aaChrM="
},
"psl": {
"version": "1.1.31",
"resolved": "https://registry.npmjs.org/psl/-/psl-1.1.31.tgz",
@ -5203,6 +5267,11 @@
"resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz",
"integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w=="
},
"yallist": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-2.1.2.tgz",
"integrity": "sha1-HBH5IY8HYImkfdUS+TxmmaaoHVI="
},
"yargs": {
"version": "13.2.2",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-13.2.2.tgz",

View File

@ -34,9 +34,11 @@
"dependencies": {
"bhttp": "^1.2.4",
"cheerio": "^1.0.0-rc.2",
"clipboardy": "^1.2.3",
"config": "^3.0.1",
"moment": "^2.24.0",
"neo-blessed": "^0.2.0",
"node-fetch": "^2.3.0",
"opn": "^5.4.0",
"tty-table": "^2.7.0",
"yargs": "^13.2.2"

View File

@ -1,13 +1,12 @@
'use strict';
const config = require('config');
const moment = require('moment');
const blessed = require('neo-blessed');
const argv = require('./argv');
const networks = require('../networks.js');
const scrapers = require('./scrapers');
const render = require('./tui/render');
const { renderReleases, renderScene } = require('./tui/render');
const fetchReleases = require('./fetch-releases');
const fetchScene = require('./fetch-scene');
function initScreen() {
const screen = blessed.screen({
@ -26,104 +25,32 @@ function initScreen() {
return screen;
}
function accumulateIncludedSites() {
return config.include.reduce((acc, network) => {
// network included with specific sites, only include specified sites
if (Array.isArray(network)) {
const [networkId, siteIds] = network;
return [
...acc,
...siteIds.map(siteId => ({
id: siteId,
network: networkId,
...networks[networkId].sites[siteId],
})),
];
function getMethod() {
if (argv.scene) {
return {
fetch: () => fetchScene(argv.scene),
render: renderScene,
};
}
// network included without further specification, include all sites
return [
...acc,
...Object.entries(networks[network].sites).map(([siteId, site]) => ({
id: siteId,
network,
...site,
})),
];
}, []);
}
function accumulateExcludedSites() {
return Object.entries(networks).reduce((acc, [networkId, network]) => {
const excludedNetwork = config.exclude.find((excludedNetworkX) => {
if (Array.isArray(excludedNetworkX)) {
return excludedNetworkX[0] === networkId;
}
return excludedNetworkX === networkId;
});
// network excluded with specific sites, only exclude specified sites
if (excludedNetwork && Array.isArray(excludedNetwork)) {
const [, excludedSiteIds] = excludedNetwork;
return [
...acc,
...Object.entries(network.sites)
.filter(([siteId]) => !excludedSiteIds.includes(siteId))
.map(([siteId, site]) => ({
id: siteId,
network: networkId,
...site,
})),
];
}
// network excluded without further specification, exclude all its sites
if (excludedNetwork) {
return acc;
}
// network not excluded, include all its sites
return [
...acc,
...Object.entries(network.sites).map(([siteId, site]) => ({
id: siteId,
network: networkId,
...site,
})),
];
}, []);
}
function accumulateSites() {
return config.include ? accumulateIncludedSites() : accumulateExcludedSites();
}
async function fetchScenes(sites) {
const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.network];
if (scraper) {
return scraper(site);
}
return [];
}));
return scenesPerSite.reduce((acc, siteScenes) => ([...acc, ...siteScenes]), []);
return {
fetch: () => fetchReleases(),
render: renderReleases,
};
}
async function init() {
const sites = accumulateSites();
const scenes = await fetchScenes(sites);
const screen = argv.render && initScreen();
try {
const method = getMethod();
const result = await method.fetch();
if (argv.render) {
const screen = initScreen();
const sortedScenes = scenes.sort(({ date: dateA }, { date: dateB }) => moment(dateB).diff(dateA));
render(sortedScenes, screen);
method.render(result, screen);
}
} catch (error) {
console.error(error.message);
}
}

View File

@ -8,6 +8,15 @@ const { argv } = yargs
describe: 'Fetch data without rendering interface',
type: 'boolean',
default: true,
})
.option('scene', {
describe: 'Fetch scene info from URL',
type: 'string',
})
.option('copy', {
describe: 'Copy relevant result to clipboard',
type: 'boolean',
alias: 'c',
});
module.exports = argv;

108
src/fetch-releases.js Normal file
View File

@ -0,0 +1,108 @@
'use strict';
const config = require('config');
const moment = require('moment');
const networks = require('../networks.js');
const scrapers = require('./scrapers');
function accumulateIncludedSites() {
return config.include.reduce((acc, network) => {
// network included with specific sites, only include specified sites
if (Array.isArray(network)) {
const [networkId, siteIds] = network;
return [
...acc,
...siteIds.map(siteId => ({
id: siteId,
network: networkId,
...networks[networkId].sites[siteId],
})),
];
}
// network included without further specification, include all sites
return [
...acc,
...Object.entries(networks[network].sites).map(([siteId, site]) => ({
id: siteId,
network,
...site,
})),
];
}, []);
}
function accumulateExcludedSites() {
return Object.entries(networks).reduce((acc, [networkId, network]) => {
const excludedNetwork = config.exclude.find((excludedNetworkX) => {
if (Array.isArray(excludedNetworkX)) {
return excludedNetworkX[0] === networkId;
}
return excludedNetworkX === networkId;
});
// network excluded with specific sites, only exclude specified sites
if (excludedNetwork && Array.isArray(excludedNetwork)) {
const [, excludedSiteIds] = excludedNetwork;
return [
...acc,
...Object.entries(network.sites)
.filter(([siteId]) => !excludedSiteIds.includes(siteId))
.map(([siteId, site]) => ({
id: siteId,
network: networkId,
...site,
})),
];
}
// network excluded without further specification, exclude all its sites
if (excludedNetwork) {
return acc;
}
// network not excluded, include all its sites
return [
...acc,
...Object.entries(network.sites).map(([siteId, site]) => ({
id: siteId,
network: networkId,
...site,
})),
];
}, []);
}
function accumulateSites() {
return config.include ? accumulateIncludedSites() : accumulateExcludedSites();
}
async function fetchReleases() {
const sites = await accumulateSites();
const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.network];
if (scraper) {
const [latest, upcoming] = await Promise.all([
scraper.fetchLatest(site),
scraper.fetchUpcoming ? scraper.fetchUpcoming(site) : [],
]);
return [...latest, ...upcoming];
}
return [];
}));
const accumulatedScenes = scenesPerSite.reduce((acc, siteScenes) => ([...acc, ...siteScenes]), []);
const sortedScenes = accumulatedScenes.sort(({ date: dateA }, { date: dateB }) => moment(dateB).diff(dateA));
return sortedScenes;
}
module.exports = fetchReleases;

79
src/fetch-scene.js Normal file
View File

@ -0,0 +1,79 @@
'use strict';
const config = require('config');
const moment = require('moment');
const networks = require('../networks.js');
const scrapers = require('./scrapers');
function findSite(url) {
const { origin } = new URL(url);
console.log(origin);
return Object.entries(networks)
.reduce((foundNetwork, [networkId, network]) => foundNetwork || Object.entries(network.sites)
.reduce((foundSite, [siteId, site]) => {
if (foundSite) return foundSite;
if (site.url !== origin) return null;
return {
site: {
...site,
id: siteId,
},
network: {
...network,
id: networkId,
},
};
}, null),
null);
}
function deriveFilename(scene) {
const props = {
siteName: scene.site.name,
sceneId: scene.id,
sceneTitle: scene.title,
sceneActors: scene.actors.join(config.filename.actorsJoin),
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),
};
const filename = config.filename.pattern.replace(/\{\w+\}/g, (match) => {
const prop = match.slice(1, -1);
const value = props[prop];
if (value && config.filename.subpatterns[prop]) {
return config.filename.subpatterns[prop]
.replace(/\{\w+\}/, value)
.replace(/\//g, config.filename.slash);
}
return value.replace(/\//g, config.filename.slash) || '';
});
return filename;
}
async function fetchScene(url) {
const { site, network } = findSite(url);
const scraper = scrapers[site.id] || scrapers[network.id];
if (!scraper) {
throw new Error('Could not find scraper for URL');
}
if (!scraper.fetchScene) {
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
}
const scene = await scraper.fetchScene(url, site);
return {
...scene,
filename: deriveFilename(scene),
};
}
module.exports = fetchScene;

View File

@ -67,17 +67,55 @@ function scrapeUpcoming(html, site) {
});
}
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
async function fetchReleases(site) {
const [latestRes, upcomingRes] = await Promise.all([
bhttp.get(`${site.url}/categories/movies_1_d.html`),
bhttp.get(`${site.url}/index.php`),
]);
const title = $('.title_bar_hilite').text();
const date = moment
.utc($('.update_date').text(), 'MM/DD/YYYY')
.toDate();
return [
...scrapeUpcoming(upcomingRes.body.toString(), site, true),
...scrapeLatest(latestRes.body.toString(), site),
];
const actors = $('.update_description + .update_models a')
.map((_actorIndex, actorElement) => $(actorElement).text())
.toArray();
const description = $('.update_description').text().trim();
const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
return {
url,
title,
date,
actors,
description,
rating: {
stars,
},
site,
};
}
module.exports = fetchReleases;
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/trial/categories/movies_1_d.html`);
return scrapeLatest(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/trial/index.php`);
return scrapeUpcoming(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
};

View File

@ -0,0 +1 @@
'use strict';

View File

@ -17,7 +17,7 @@ function scrape(html, site) {
const stars = $('img[src*="/star.png"]')
.toArray()
.map(element => $(element).attr('src'))
.length || null;
.length || 0;
return {
url,
@ -25,15 +25,13 @@ function scrape(html, site) {
actors,
date,
rating: {
likes: null,
dislikes: null,
stars,
},
site,
};
}
async function fetchReleases(site) {
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
@ -42,4 +40,6 @@ async function fetchReleases(site) {
return latest;
}
module.exports = fetchReleases;
module.exports = {
fetchLatest,
};

View File

@ -24,7 +24,6 @@ function scrape(html, site) {
const [likes, dislikes] = $(element).find('.value')
.toArray()
.map(value => Number($(value).text()));
const stars = likes || dislikes ? Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100 : null;
return {
url,
@ -34,23 +33,54 @@ function scrape(html, site) {
rating: {
likes,
dislikes,
stars,
},
site,
};
});
}
async function fetchReleases(site) {
const [latestRes, upcomingRes] = await Promise.all([
bhttp.get(`${site.url}/en/videos`),
bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`),
]);
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
return [
...scrape(upcomingRes.body.toString(), site, true),
...scrape(latestRes.body.toString(), site),
];
const title = $('h1.title').text();
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('.sceneDesc').text().replace(/Video Description:/g, ' ').trim();
const stars = $('.currentRating').text().split('/')[0] / 2;
return {
url,
title,
date,
actors,
description,
rating: {
stars,
},
site,
};
}
module.exports = fetchReleases;
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/en/videos`);
return scrape(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
return scrape(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
};

View File

@ -11,15 +11,17 @@ const formatters = {
return '\x1b[90mNot available\x1b[0m';
}
if (rating.stars === null) {
if ((rating.likes === 0 && rating.dislikes === 0) || rating.stars === 0) {
return '\x1b[90mUnrated\x1b[0m';
}
if (rating.likes === null || rating.dislikes === null) {
if (rating.stars) {
return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`;
}
return `\x1b[93m★\x1b[0m ${rating.stars.toFixed(2)} \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)} \x1b[31m▼\x1b[0m ${String(rating.dislikes).padEnd(3)}`;
const stars = rating.likes || rating.dislikes ? Math.floor(((rating.likes * 5 + rating.dislikes) / (rating.likes + rating.dislikes)) * 100) / 100 : null;
return `\x1b[93m★\x1b[0m ${stars.toFixed(2)} \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)} \x1b[31m▼\x1b[0m ${String(rating.dislikes).padEnd(3)}`;
},
};

View File

@ -4,10 +4,12 @@ const config = require('config');
const blessed = require('neo-blessed');
const moment = require('moment');
const opn = require('opn');
const clipboard = require('clipboardy');
const argv = require('../argv');
const formatters = require('./formatters');
function render(scenes, screen) {
function renderReleases(scenes, screen) {
const tableTop = blessed.Text({
content: config.columns.reduce((acc, column, index) => `${acc}${'─'.repeat(column.width)}${index < config.columns.length - 1 ? '┬' : '┐\x1b[0m'}`, '\x1b[30m┌'),
});
@ -102,4 +104,15 @@ function render(scenes, screen) {
screen.render();
}
module.exports = render;
function renderScene(scene, _screen) {
console.log(scene);
if (argv.copy) {
clipboard.writeSync(scene.filename);
}
}
module.exports = {
renderReleases,
renderScene,
};