Added tags and duration to scraping. Added LegalPorno scraper.

This commit is contained in:
ThePendulum 2019-03-24 01:29:22 +01:00
parent e8d4b76403
commit 4fcabb4aae
11 changed files with 273 additions and 15 deletions

View File

@ -69,13 +69,13 @@ module.exports = {
}, },
legalporno: { legalporno: {
name: 'LegalPorno', name: 'LegalPorno',
url: 'https://www.legalporno.com/', url: 'https://www.legalporno.com',
description: 'The Best HD Porn For You!', description: 'The Best HD Porn For You!',
sites: { sites: {
legalporno: { legalporno: {
name: 'LegalPorno', name: 'LegalPorno',
label: 'legalp', label: 'legalp',
url: 'https://www.legalporno.com/', url: 'https://www.legalporno.com',
description: 'The Best HD Porn For You!', description: 'The Best HD Porn For You!',
}, },
}, },

View File

@ -50,7 +50,7 @@ async function init() {
method.render(result, screen); method.render(result, screen);
} }
} catch (error) { } catch (error) {
console.error(error.message); console.error(argv.debug ? error : error.message);
} }
} }

View File

@ -17,6 +17,10 @@ const { argv } = yargs
describe: 'Copy relevant result to clipboard', describe: 'Copy relevant result to clipboard',
type: 'boolean', type: 'boolean',
alias: 'c', alias: 'c',
})
.option('debug', {
describe: 'Show error stack traces',
type: 'boolean',
}); });
module.exports = argv; module.exports = argv;

View File

@ -50,7 +50,11 @@ function deriveFilename(scene) {
.replace(/\//g, config.filename.slash); .replace(/\//g, config.filename.slash);
} }
if (value) {
return value.replace(/\//g, config.filename.slash) || ''; return value.replace(/\//g, config.filename.slash) || '';
}
return '';
}); });
return filename; return filename;

View File

@ -2,10 +2,12 @@
const xempire = require('./xempire'); const xempire = require('./xempire');
const julesjordan = require('./julesjordan'); const julesjordan = require('./julesjordan');
const legalporno = require('./legalporno');
const pervcity = require('./pervcity'); const pervcity = require('./pervcity');
module.exports = { module.exports = {
xempire, xempire,
julesjordan, julesjordan,
legalporno,
pervcity, pervcity,
}; };

View File

@ -4,6 +4,27 @@ const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
const tagMap = {
Anal: 'anal',
Asian: 'asian',
'Ass To Mouth': 'ATM',
'Big Cocks': 'big cock',
Black: 'BBC',
Blondes: 'blonde',
Brunettes: 'brunette',
Blowjobs: 'blowjob',
Creampie: 'creampie',
'Deep Throat': 'deepthroat',
Facial: 'facial',
Interracial: 'interracial',
Lingerie: 'lingerie',
Natural: 'natural',
'Red Head': 'readhead',
'School Girl': 'schoolgirl',
Tattoo: 'tattoo',
Teen: 'teen',
};
function scrapeLatest(html, site) { function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.update_details').toArray(); const scenesElements = $('.update_details').toArray();
@ -26,7 +47,6 @@ function scrapeLatest(html, site) {
title, title,
actors, actors,
date, date,
rating: null,
site, site,
}; };
}); });
@ -83,6 +103,9 @@ function scrapeScene(html, url, site) {
const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, '')); const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
const rawTags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
return { return {
url, url,
title, title,
@ -92,6 +115,7 @@ function scrapeScene(html, url, site) {
rating: { rating: {
stars, stars,
}, },
tags,
site, site,
}; };
} }

View File

@ -1 +1,101 @@
'use strict'; 'use strict';
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
const tagMap = {
'3+ on 1': 'gangbang',
anal: 'anal',
bbc: 'big black cock',
'cum swallowing': 'swallowing',
rough: 'rough',
'deep throat': 'deepthroat',
'double penetration (DP)': 'DP',
'double anal (DAP)': 'DAP',
'double vaginal (DPP)': 'DVP',
'gapes (gaping asshole)': 'gaping',
'huge toys': 'toys',
interracial: 'interracial',
'triple penetration': 'TP',
};
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes
const id = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
return { id, title };
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.thumbnails > div').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.thumbnail-title a');
const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.attr('title');
const { id, title } = extractTitle(originalTitle);
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
return {
url,
id,
title,
date,
site,
};
});
}
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const originalTitle = $('h1.watchpage-title').text().trim();
const { id, title } = extractTitle(originalTitle);
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
const [actorsElement, tagsElement] = $('.scene-description__row').toArray();
const actors = $(actorsElement)
.find('a[href*="com/model"]')
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const runtime = $('span[title="Runtime"]').text().trim().split(':');
const duration = Number(runtime[0]) * 3600 + Number(runtime[1]) * 60 + Number(runtime[2]);
const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
return {
url,
id,
title,
date,
actors,
duration,
tags,
site,
};
}
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/new-videos`);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchScene,
};

View File

@ -4,6 +4,36 @@ const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
const tagMap = {
Anal: 'anal',
'Ass Licking': 'ass licking',
'Ass To Mouth': 'ATM',
'Big Ass': 'big butt',
'Big Tits': 'big boobs',
Black: 'big black cock',
Blonde: 'blonde',
Blowjob: 'blowjob',
'Blowjob (double)': 'double blowjob',
Brunette: 'brunette',
'Cum Swallowing': 'swallowing',
Cumshot: 'cumshot',
Deepthroat: 'deepthroat',
'Double Penetration (DP)': 'DP',
Ebony: 'ebony',
Facial: 'facial',
Gangbang: 'gangbang',
Gonzo: 'gonzo',
Hardcore: 'hardcore',
Interracial: 'interracial',
Latina: 'latina',
Petite: 'petite',
'Pussy Licking': 'pussy licking',
Rimjob: 'ass licking',
'Rough Sex': 'rough',
'Small Tits': 'small boobs',
Threesome: 'threesome',
};
function scrape(html, site) { function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.sceneInfo').toArray(); const scenesElements = $('.sceneInfo').toArray();
@ -39,21 +69,70 @@ function scrape(html, site) {
}); });
} }
function scrapeScene(html, url, site) { function scrapeSceneFallback($, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const title = $('h1.title').text(); const title = $('h1.title').text();
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate(); const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('.sceneDesc').text().replace(/Video Description:/g, ' ').trim();
const description = ($('.sceneDesc').text() || '').replace(/Video Description:/g, ' ').trim();
const stars = $('.currentRating').text().split('/')[0] / 2; const stars = $('.currentRating').text().split('/')[0] / 2;
const rawTags = $('.sceneColCategories > a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
return { return {
url, url,
title, title,
date, date,
actors, actors,
description, description,
tags,
rating: {
stars,
},
site,
};
}
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const json = $('script[type="application/ld+json"]').html();
if (!json) {
return scrapeSceneFallback($, url, site);
}
const data = JSON.parse(json)[0];
const title = data.isPartOf.name;
const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
const actors = data.actor
.sort(({ genderA }, { genderB }) => {
if (genderA === 'female' && genderB === 'male') return 1;
if (genderA === 'male' && genderB === 'female') return -1;
return 0;
})
.map(actor => actor.name);
const description = data.description || undefined;
const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5;
const runtime = data.duration.slice(2).split(':');
const duration = Number(runtime[0]) * 3600 + Number(runtime[1]) * 60 + Number(runtime[2]);
const rawTags = data.keywords.split(', ');
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
return {
url,
title,
date,
actors,
description,
duration,
tags,
rating: { rating: {
stars, stars,
}, },

View File

@ -7,10 +7,6 @@ const formatters = {
date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'), date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'),
actors: actors => actors.join(', '), actors: actors => actors.join(', '),
rating: (rating) => { rating: (rating) => {
if (rating === null) {
return '\x1b[90mNot available\x1b[0m';
}
if ((rating.likes === 0 && rating.dislikes === 0) || rating.stars === 0) { if ((rating.likes === 0 && rating.dislikes === 0) || rating.stars === 0) {
return '\x1b[90mUnrated\x1b[0m'; return '\x1b[90mUnrated\x1b[0m';
} }

View File

@ -18,10 +18,10 @@ function renderReleases(scenes, screen) {
const isFuture = moment(scene.date).isAfter(); const isFuture = moment(scene.date).isAfter();
const row = config.columns.reduce((acc, column) => { const row = config.columns.reduce((acc, column) => {
const value = (formatters[column.value] const value = (scene[column.value] && (formatters[column.value]
? formatters[column.value](scene[column.value], column) ? formatters[column.value](scene[column.value], column)
: scene[column.value]) : scene[column.value])
.toString(); .toString()) || '\x1b[90mNot available\x1b[0m';
const realLength = value.replace(/\x1b\[\d+m/g, '').length; // eslint-disable-line no-control-regex const realLength = value.replace(/\x1b\[\d+m/g, '').length; // eslint-disable-line no-control-regex
const entityLength = value.length - realLength; const entityLength = value.length - realLength;

49
tags.js Normal file
View File

@ -0,0 +1,49 @@
'use strict';
module.exports = {
anal: [],
asian: ['asians'],
'ass licking': ['rimming', 'rimjob', 'anilingus'],
ATM: ['ass to mouth'],
'big black cock': ['BBC', 'bbc', 'big black cock', 'big black cocks'],
'big cock': ['big cocks'],
'big butt': ['big butts'],
'big boobs': ['big tits'],
blonde: ['blondes'],
blowjob: [],
blowbang: [],
brunette: ['brunettes'],
bukkake: [],
cheerleader: ['cheer leader'],
creampie: ['creampies'],
cumshot: [],
deepthroat: ['deep throat'],
DAP: ['dap', 'double anal penetration'],
DP: ['dp', 'double penetration'],
DVP: ['DPP', 'dpp', 'dvp', 'double vaginal penetration', 'double pussy penetration'],
'double blowjob': [],
ebony: [],
facefucking: [],
facial: ['facials'],
gangbang: [],
gaping: ['gape', 'gapes'],
gonzo: [],
hardcore: [],
latina: [],
lingerie: [],
maid: [],
MILF: ['milf'],
petite: [],
'pussy licking': ['cunnilingus'],
redhead: ['red head'],
rough: [],
schoolgirl: ['school girl'],
'small boobs': ['small tits'],
swallowing: ['swallow'],
stockings: [],
tattoo: ['tattoos'],
threesome: ['threesome'],
teen: ['teens'],
toy: ['toys'],
TP: ['tp', 'triple penetration'],
};