Added tags and duration to scraping. Added LegalPorno scraper.
This commit is contained in:
parent
e8d4b76403
commit
4fcabb4aae
|
@ -69,13 +69,13 @@ module.exports = {
|
|||
},
|
||||
legalporno: {
|
||||
name: 'LegalPorno',
|
||||
url: 'https://www.legalporno.com/',
|
||||
url: 'https://www.legalporno.com',
|
||||
description: 'The Best HD Porn For You!',
|
||||
sites: {
|
||||
legalporno: {
|
||||
name: 'LegalPorno',
|
||||
label: 'legalp',
|
||||
url: 'https://www.legalporno.com/',
|
||||
url: 'https://www.legalporno.com',
|
||||
description: 'The Best HD Porn For You!',
|
||||
},
|
||||
},
|
||||
|
|
|
@ -50,7 +50,7 @@ async function init() {
|
|||
method.render(result, screen);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
console.error(argv.debug ? error : error.message);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,10 @@ const { argv } = yargs
|
|||
describe: 'Copy relevant result to clipboard',
|
||||
type: 'boolean',
|
||||
alias: 'c',
|
||||
})
|
||||
.option('debug', {
|
||||
describe: 'Show error stack traces',
|
||||
type: 'boolean',
|
||||
});
|
||||
|
||||
module.exports = argv;
|
||||
|
|
|
@ -50,7 +50,11 @@ function deriveFilename(scene) {
|
|||
.replace(/\//g, config.filename.slash);
|
||||
}
|
||||
|
||||
return value.replace(/\//g, config.filename.slash) || '';
|
||||
if (value) {
|
||||
return value.replace(/\//g, config.filename.slash) || '';
|
||||
}
|
||||
|
||||
return '';
|
||||
});
|
||||
|
||||
return filename;
|
||||
|
|
|
@ -2,10 +2,12 @@
|
|||
|
||||
const xempire = require('./xempire');
|
||||
const julesjordan = require('./julesjordan');
|
||||
const legalporno = require('./legalporno');
|
||||
const pervcity = require('./pervcity');
|
||||
|
||||
module.exports = {
|
||||
xempire,
|
||||
julesjordan,
|
||||
legalporno,
|
||||
pervcity,
|
||||
};
|
||||
|
|
|
@ -4,6 +4,27 @@ const bhttp = require('bhttp');
|
|||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const tagMap = {
|
||||
Anal: 'anal',
|
||||
Asian: 'asian',
|
||||
'Ass To Mouth': 'ATM',
|
||||
'Big Cocks': 'big cock',
|
||||
Black: 'BBC',
|
||||
Blondes: 'blonde',
|
||||
Brunettes: 'brunette',
|
||||
Blowjobs: 'blowjob',
|
||||
Creampie: 'creampie',
|
||||
'Deep Throat': 'deepthroat',
|
||||
Facial: 'facial',
|
||||
Interracial: 'interracial',
|
||||
Lingerie: 'lingerie',
|
||||
Natural: 'natural',
|
||||
'Red Head': 'readhead',
|
||||
'School Girl': 'schoolgirl',
|
||||
Tattoo: 'tattoo',
|
||||
Teen: 'teen',
|
||||
};
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.update_details').toArray();
|
||||
|
@ -26,7 +47,6 @@ function scrapeLatest(html, site) {
|
|||
title,
|
||||
actors,
|
||||
date,
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
|
@ -83,6 +103,9 @@ function scrapeScene(html, url, site) {
|
|||
|
||||
const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
|
||||
|
||||
const rawTags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
|
@ -92,6 +115,7 @@ function scrapeScene(html, url, site) {
|
|||
rating: {
|
||||
stars,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1 +1,101 @@
|
|||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const tagMap = {
|
||||
'3+ on 1': 'gangbang',
|
||||
anal: 'anal',
|
||||
bbc: 'big black cock',
|
||||
'cum swallowing': 'swallowing',
|
||||
rough: 'rough',
|
||||
'deep throat': 'deepthroat',
|
||||
'double penetration (DP)': 'DP',
|
||||
'double anal (DAP)': 'DAP',
|
||||
'double vaginal (DPP)': 'DVP',
|
||||
'gapes (gaping asshole)': 'gaping',
|
||||
'huge toys': 'toys',
|
||||
interracial: 'interracial',
|
||||
'triple penetration': 'TP',
|
||||
};
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes
|
||||
const id = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||
|
||||
return { id, title };
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.thumbnails > div').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thumbnail-title a');
|
||||
const url = sceneLinkElement.attr('href');
|
||||
|
||||
const originalTitle = sceneLinkElement.attr('title');
|
||||
const { id, title } = extractTitle(originalTitle);
|
||||
|
||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||
|
||||
return {
|
||||
url,
|
||||
id,
|
||||
title,
|
||||
date,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||
const { id, title } = extractTitle(originalTitle);
|
||||
|
||||
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
|
||||
|
||||
const [actorsElement, tagsElement] = $('.scene-description__row').toArray();
|
||||
const actors = $(actorsElement)
|
||||
.find('a[href*="com/model"]')
|
||||
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
const runtime = $('span[title="Runtime"]').text().trim().split(':');
|
||||
const duration = Number(runtime[0]) * 3600 + Number(runtime[1]) * 60 + Number(runtime[2]);
|
||||
|
||||
const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
|
||||
|
||||
return {
|
||||
url,
|
||||
id,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
duration,
|
||||
tags,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site) {
|
||||
const res = await bhttp.get(`${site.url}/new-videos`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
|
|
@ -4,6 +4,36 @@ const bhttp = require('bhttp');
|
|||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const tagMap = {
|
||||
Anal: 'anal',
|
||||
'Ass Licking': 'ass licking',
|
||||
'Ass To Mouth': 'ATM',
|
||||
'Big Ass': 'big butt',
|
||||
'Big Tits': 'big boobs',
|
||||
Black: 'big black cock',
|
||||
Blonde: 'blonde',
|
||||
Blowjob: 'blowjob',
|
||||
'Blowjob (double)': 'double blowjob',
|
||||
Brunette: 'brunette',
|
||||
'Cum Swallowing': 'swallowing',
|
||||
Cumshot: 'cumshot',
|
||||
Deepthroat: 'deepthroat',
|
||||
'Double Penetration (DP)': 'DP',
|
||||
Ebony: 'ebony',
|
||||
Facial: 'facial',
|
||||
Gangbang: 'gangbang',
|
||||
Gonzo: 'gonzo',
|
||||
Hardcore: 'hardcore',
|
||||
Interracial: 'interracial',
|
||||
Latina: 'latina',
|
||||
Petite: 'petite',
|
||||
'Pussy Licking': 'pussy licking',
|
||||
Rimjob: 'ass licking',
|
||||
'Rough Sex': 'rough',
|
||||
'Small Tits': 'small boobs',
|
||||
Threesome: 'threesome',
|
||||
};
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.sceneInfo').toArray();
|
||||
|
@ -39,21 +69,70 @@ function scrape(html, site) {
|
|||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
function scrapeSceneFallback($, url, site) {
|
||||
const title = $('h1.title').text();
|
||||
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
|
||||
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const description = $('.sceneDesc').text().replace(/Video Description:/g, ' ').trim();
|
||||
|
||||
const description = ($('.sceneDesc').text() || '').replace(/Video Description:/g, ' ').trim();
|
||||
const stars = $('.currentRating').text().split('/')[0] / 2;
|
||||
|
||||
const rawTags = $('.sceneColCategories > a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
description,
|
||||
tags,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const json = $('script[type="application/ld+json"]').html();
|
||||
|
||||
if (!json) {
|
||||
return scrapeSceneFallback($, url, site);
|
||||
}
|
||||
|
||||
const data = JSON.parse(json)[0];
|
||||
|
||||
const title = data.isPartOf.name;
|
||||
const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
|
||||
|
||||
const actors = data.actor
|
||||
.sort(({ genderA }, { genderB }) => {
|
||||
if (genderA === 'female' && genderB === 'male') return 1;
|
||||
if (genderA === 'male' && genderB === 'female') return -1;
|
||||
|
||||
return 0;
|
||||
})
|
||||
.map(actor => actor.name);
|
||||
|
||||
const description = data.description || undefined;
|
||||
const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5;
|
||||
|
||||
const runtime = data.duration.slice(2).split(':');
|
||||
const duration = Number(runtime[0]) * 3600 + Number(runtime[1]) * 60 + Number(runtime[2]);
|
||||
|
||||
const rawTags = data.keywords.split(', ');
|
||||
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
description,
|
||||
duration,
|
||||
tags,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
|
|
|
@ -7,10 +7,6 @@ const formatters = {
|
|||
date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'),
|
||||
actors: actors => actors.join(', '),
|
||||
rating: (rating) => {
|
||||
if (rating === null) {
|
||||
return '\x1b[90mNot available\x1b[0m';
|
||||
}
|
||||
|
||||
if ((rating.likes === 0 && rating.dislikes === 0) || rating.stars === 0) {
|
||||
return '\x1b[90mUnrated\x1b[0m';
|
||||
}
|
||||
|
|
|
@ -18,10 +18,10 @@ function renderReleases(scenes, screen) {
|
|||
const isFuture = moment(scene.date).isAfter();
|
||||
|
||||
const row = config.columns.reduce((acc, column) => {
|
||||
const value = (formatters[column.value]
|
||||
const value = (scene[column.value] && (formatters[column.value]
|
||||
? formatters[column.value](scene[column.value], column)
|
||||
: scene[column.value])
|
||||
.toString();
|
||||
.toString()) || '\x1b[90mNot available\x1b[0m';
|
||||
|
||||
const realLength = value.replace(/\x1b\[\d+m/g, '').length; // eslint-disable-line no-control-regex
|
||||
const entityLength = value.length - realLength;
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
'use strict';
|
||||
|
||||
module.exports = {
|
||||
anal: [],
|
||||
asian: ['asians'],
|
||||
'ass licking': ['rimming', 'rimjob', 'anilingus'],
|
||||
ATM: ['ass to mouth'],
|
||||
'big black cock': ['BBC', 'bbc', 'big black cock', 'big black cocks'],
|
||||
'big cock': ['big cocks'],
|
||||
'big butt': ['big butts'],
|
||||
'big boobs': ['big tits'],
|
||||
blonde: ['blondes'],
|
||||
blowjob: [],
|
||||
blowbang: [],
|
||||
brunette: ['brunettes'],
|
||||
bukkake: [],
|
||||
cheerleader: ['cheer leader'],
|
||||
creampie: ['creampies'],
|
||||
cumshot: [],
|
||||
deepthroat: ['deep throat'],
|
||||
DAP: ['dap', 'double anal penetration'],
|
||||
DP: ['dp', 'double penetration'],
|
||||
DVP: ['DPP', 'dpp', 'dvp', 'double vaginal penetration', 'double pussy penetration'],
|
||||
'double blowjob': [],
|
||||
ebony: [],
|
||||
facefucking: [],
|
||||
facial: ['facials'],
|
||||
gangbang: [],
|
||||
gaping: ['gape', 'gapes'],
|
||||
gonzo: [],
|
||||
hardcore: [],
|
||||
latina: [],
|
||||
lingerie: [],
|
||||
maid: [],
|
||||
MILF: ['milf'],
|
||||
petite: [],
|
||||
'pussy licking': ['cunnilingus'],
|
||||
redhead: ['red head'],
|
||||
rough: [],
|
||||
schoolgirl: ['school girl'],
|
||||
'small boobs': ['small tits'],
|
||||
swallowing: ['swallow'],
|
||||
stockings: [],
|
||||
tattoo: ['tattoos'],
|
||||
threesome: ['threesome'],
|
||||
teen: ['teens'],
|
||||
toy: ['toys'],
|
||||
TP: ['tp', 'triple penetration'],
|
||||
};
|
Loading…
Reference in New Issue