Added tags and duration to scraping. Added LegalPorno scraper.
This commit is contained in:
parent
e8d4b76403
commit
4fcabb4aae
|
@ -69,13 +69,13 @@ module.exports = {
|
||||||
},
|
},
|
||||||
legalporno: {
|
legalporno: {
|
||||||
name: 'LegalPorno',
|
name: 'LegalPorno',
|
||||||
url: 'https://www.legalporno.com/',
|
url: 'https://www.legalporno.com',
|
||||||
description: 'The Best HD Porn For You!',
|
description: 'The Best HD Porn For You!',
|
||||||
sites: {
|
sites: {
|
||||||
legalporno: {
|
legalporno: {
|
||||||
name: 'LegalPorno',
|
name: 'LegalPorno',
|
||||||
label: 'legalp',
|
label: 'legalp',
|
||||||
url: 'https://www.legalporno.com/',
|
url: 'https://www.legalporno.com',
|
||||||
description: 'The Best HD Porn For You!',
|
description: 'The Best HD Porn For You!',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -50,7 +50,7 @@ async function init() {
|
||||||
method.render(result, screen);
|
method.render(result, screen);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(error.message);
|
console.error(argv.debug ? error : error.message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,10 @@ const { argv } = yargs
|
||||||
describe: 'Copy relevant result to clipboard',
|
describe: 'Copy relevant result to clipboard',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
alias: 'c',
|
alias: 'c',
|
||||||
|
})
|
||||||
|
.option('debug', {
|
||||||
|
describe: 'Show error stack traces',
|
||||||
|
type: 'boolean',
|
||||||
});
|
});
|
||||||
|
|
||||||
module.exports = argv;
|
module.exports = argv;
|
||||||
|
|
|
@ -50,7 +50,11 @@ function deriveFilename(scene) {
|
||||||
.replace(/\//g, config.filename.slash);
|
.replace(/\//g, config.filename.slash);
|
||||||
}
|
}
|
||||||
|
|
||||||
return value.replace(/\//g, config.filename.slash) || '';
|
if (value) {
|
||||||
|
return value.replace(/\//g, config.filename.slash) || '';
|
||||||
|
}
|
||||||
|
|
||||||
|
return '';
|
||||||
});
|
});
|
||||||
|
|
||||||
return filename;
|
return filename;
|
||||||
|
|
|
@ -2,10 +2,12 @@
|
||||||
|
|
||||||
const xempire = require('./xempire');
|
const xempire = require('./xempire');
|
||||||
const julesjordan = require('./julesjordan');
|
const julesjordan = require('./julesjordan');
|
||||||
|
const legalporno = require('./legalporno');
|
||||||
const pervcity = require('./pervcity');
|
const pervcity = require('./pervcity');
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
xempire,
|
xempire,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
|
legalporno,
|
||||||
pervcity,
|
pervcity,
|
||||||
};
|
};
|
||||||
|
|
|
@ -4,6 +4,27 @@ const bhttp = require('bhttp');
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const tagMap = {
|
||||||
|
Anal: 'anal',
|
||||||
|
Asian: 'asian',
|
||||||
|
'Ass To Mouth': 'ATM',
|
||||||
|
'Big Cocks': 'big cock',
|
||||||
|
Black: 'BBC',
|
||||||
|
Blondes: 'blonde',
|
||||||
|
Brunettes: 'brunette',
|
||||||
|
Blowjobs: 'blowjob',
|
||||||
|
Creampie: 'creampie',
|
||||||
|
'Deep Throat': 'deepthroat',
|
||||||
|
Facial: 'facial',
|
||||||
|
Interracial: 'interracial',
|
||||||
|
Lingerie: 'lingerie',
|
||||||
|
Natural: 'natural',
|
||||||
|
'Red Head': 'readhead',
|
||||||
|
'School Girl': 'schoolgirl',
|
||||||
|
Tattoo: 'tattoo',
|
||||||
|
Teen: 'teen',
|
||||||
|
};
|
||||||
|
|
||||||
function scrapeLatest(html, site) {
|
function scrapeLatest(html, site) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
const scenesElements = $('.update_details').toArray();
|
const scenesElements = $('.update_details').toArray();
|
||||||
|
@ -26,7 +47,6 @@ function scrapeLatest(html, site) {
|
||||||
title,
|
title,
|
||||||
actors,
|
actors,
|
||||||
date,
|
date,
|
||||||
rating: null,
|
|
||||||
site,
|
site,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
@ -83,6 +103,9 @@ function scrapeScene(html, url, site) {
|
||||||
|
|
||||||
const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
|
const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
|
||||||
|
|
||||||
|
const rawTags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||||
|
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
title,
|
title,
|
||||||
|
@ -92,6 +115,7 @@ function scrapeScene(html, url, site) {
|
||||||
rating: {
|
rating: {
|
||||||
stars,
|
stars,
|
||||||
},
|
},
|
||||||
|
tags,
|
||||||
site,
|
site,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -1 +1,101 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const bhttp = require('bhttp');
|
||||||
|
const cheerio = require('cheerio');
|
||||||
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const tagMap = {
|
||||||
|
'3+ on 1': 'gangbang',
|
||||||
|
anal: 'anal',
|
||||||
|
bbc: 'big black cock',
|
||||||
|
'cum swallowing': 'swallowing',
|
||||||
|
rough: 'rough',
|
||||||
|
'deep throat': 'deepthroat',
|
||||||
|
'double penetration (DP)': 'DP',
|
||||||
|
'double anal (DAP)': 'DAP',
|
||||||
|
'double vaginal (DPP)': 'DVP',
|
||||||
|
'gapes (gaping asshole)': 'gaping',
|
||||||
|
'huge toys': 'toys',
|
||||||
|
interracial: 'interracial',
|
||||||
|
'triple penetration': 'TP',
|
||||||
|
};
|
||||||
|
|
||||||
|
function extractTitle(originalTitle) {
|
||||||
|
const titleComponents = originalTitle.split(' ');
|
||||||
|
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes
|
||||||
|
const id = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||||
|
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||||
|
|
||||||
|
return { id, title };
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeLatest(html, site) {
|
||||||
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
const scenesElements = $('.thumbnails > div').toArray();
|
||||||
|
|
||||||
|
return scenesElements.map((element) => {
|
||||||
|
const sceneLinkElement = $(element).find('.thumbnail-title a');
|
||||||
|
const url = sceneLinkElement.attr('href');
|
||||||
|
|
||||||
|
const originalTitle = sceneLinkElement.attr('title');
|
||||||
|
const { id, title } = extractTitle(originalTitle);
|
||||||
|
|
||||||
|
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||||
|
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
id,
|
||||||
|
title,
|
||||||
|
date,
|
||||||
|
site,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeScene(html, url, site) {
|
||||||
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
|
||||||
|
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||||
|
const { id, title } = extractTitle(originalTitle);
|
||||||
|
|
||||||
|
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
|
||||||
|
|
||||||
|
const [actorsElement, tagsElement] = $('.scene-description__row').toArray();
|
||||||
|
const actors = $(actorsElement)
|
||||||
|
.find('a[href*="com/model"]')
|
||||||
|
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||||
|
|
||||||
|
const runtime = $('span[title="Runtime"]').text().trim().split(':');
|
||||||
|
const duration = Number(runtime[0]) * 3600 + Number(runtime[1]) * 60 + Number(runtime[2]);
|
||||||
|
|
||||||
|
const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||||
|
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
|
||||||
|
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
id,
|
||||||
|
title,
|
||||||
|
date,
|
||||||
|
actors,
|
||||||
|
duration,
|
||||||
|
tags,
|
||||||
|
site,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(site) {
|
||||||
|
const res = await bhttp.get(`${site.url}/new-videos`);
|
||||||
|
|
||||||
|
return scrapeLatest(res.body.toString(), site);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchScene(url, site) {
|
||||||
|
const res = await bhttp.get(url);
|
||||||
|
|
||||||
|
return scrapeScene(res.body.toString(), url, site);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
fetchLatest,
|
||||||
|
fetchScene,
|
||||||
|
};
|
||||||
|
|
|
@ -4,6 +4,36 @@ const bhttp = require('bhttp');
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const tagMap = {
|
||||||
|
Anal: 'anal',
|
||||||
|
'Ass Licking': 'ass licking',
|
||||||
|
'Ass To Mouth': 'ATM',
|
||||||
|
'Big Ass': 'big butt',
|
||||||
|
'Big Tits': 'big boobs',
|
||||||
|
Black: 'big black cock',
|
||||||
|
Blonde: 'blonde',
|
||||||
|
Blowjob: 'blowjob',
|
||||||
|
'Blowjob (double)': 'double blowjob',
|
||||||
|
Brunette: 'brunette',
|
||||||
|
'Cum Swallowing': 'swallowing',
|
||||||
|
Cumshot: 'cumshot',
|
||||||
|
Deepthroat: 'deepthroat',
|
||||||
|
'Double Penetration (DP)': 'DP',
|
||||||
|
Ebony: 'ebony',
|
||||||
|
Facial: 'facial',
|
||||||
|
Gangbang: 'gangbang',
|
||||||
|
Gonzo: 'gonzo',
|
||||||
|
Hardcore: 'hardcore',
|
||||||
|
Interracial: 'interracial',
|
||||||
|
Latina: 'latina',
|
||||||
|
Petite: 'petite',
|
||||||
|
'Pussy Licking': 'pussy licking',
|
||||||
|
Rimjob: 'ass licking',
|
||||||
|
'Rough Sex': 'rough',
|
||||||
|
'Small Tits': 'small boobs',
|
||||||
|
Threesome: 'threesome',
|
||||||
|
};
|
||||||
|
|
||||||
function scrape(html, site) {
|
function scrape(html, site) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
const scenesElements = $('.sceneInfo').toArray();
|
const scenesElements = $('.sceneInfo').toArray();
|
||||||
|
@ -39,21 +69,70 @@ function scrape(html, site) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene(html, url, site) {
|
function scrapeSceneFallback($, url, site) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
|
||||||
|
|
||||||
const title = $('h1.title').text();
|
const title = $('h1.title').text();
|
||||||
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
|
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
|
||||||
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||||
const description = $('.sceneDesc').text().replace(/Video Description:/g, ' ').trim();
|
|
||||||
|
const description = ($('.sceneDesc').text() || '').replace(/Video Description:/g, ' ').trim();
|
||||||
const stars = $('.currentRating').text().split('/')[0] / 2;
|
const stars = $('.currentRating').text().split('/')[0] / 2;
|
||||||
|
|
||||||
|
const rawTags = $('.sceneColCategories > a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||||
|
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
title,
|
title,
|
||||||
date,
|
date,
|
||||||
actors,
|
actors,
|
||||||
description,
|
description,
|
||||||
|
tags,
|
||||||
|
rating: {
|
||||||
|
stars,
|
||||||
|
},
|
||||||
|
site,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeScene(html, url, site) {
|
||||||
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
const json = $('script[type="application/ld+json"]').html();
|
||||||
|
|
||||||
|
if (!json) {
|
||||||
|
return scrapeSceneFallback($, url, site);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = JSON.parse(json)[0];
|
||||||
|
|
||||||
|
const title = data.isPartOf.name;
|
||||||
|
const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
|
||||||
|
|
||||||
|
const actors = data.actor
|
||||||
|
.sort(({ genderA }, { genderB }) => {
|
||||||
|
if (genderA === 'female' && genderB === 'male') return 1;
|
||||||
|
if (genderA === 'male' && genderB === 'female') return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
})
|
||||||
|
.map(actor => actor.name);
|
||||||
|
|
||||||
|
const description = data.description || undefined;
|
||||||
|
const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5;
|
||||||
|
|
||||||
|
const runtime = data.duration.slice(2).split(':');
|
||||||
|
const duration = Number(runtime[0]) * 3600 + Number(runtime[1]) * 60 + Number(runtime[2]);
|
||||||
|
|
||||||
|
const rawTags = data.keywords.split(', ');
|
||||||
|
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
|
||||||
|
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
date,
|
||||||
|
actors,
|
||||||
|
description,
|
||||||
|
duration,
|
||||||
|
tags,
|
||||||
rating: {
|
rating: {
|
||||||
stars,
|
stars,
|
||||||
},
|
},
|
||||||
|
|
|
@ -7,10 +7,6 @@ const formatters = {
|
||||||
date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'),
|
date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'),
|
||||||
actors: actors => actors.join(', '),
|
actors: actors => actors.join(', '),
|
||||||
rating: (rating) => {
|
rating: (rating) => {
|
||||||
if (rating === null) {
|
|
||||||
return '\x1b[90mNot available\x1b[0m';
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((rating.likes === 0 && rating.dislikes === 0) || rating.stars === 0) {
|
if ((rating.likes === 0 && rating.dislikes === 0) || rating.stars === 0) {
|
||||||
return '\x1b[90mUnrated\x1b[0m';
|
return '\x1b[90mUnrated\x1b[0m';
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,10 +18,10 @@ function renderReleases(scenes, screen) {
|
||||||
const isFuture = moment(scene.date).isAfter();
|
const isFuture = moment(scene.date).isAfter();
|
||||||
|
|
||||||
const row = config.columns.reduce((acc, column) => {
|
const row = config.columns.reduce((acc, column) => {
|
||||||
const value = (formatters[column.value]
|
const value = (scene[column.value] && (formatters[column.value]
|
||||||
? formatters[column.value](scene[column.value], column)
|
? formatters[column.value](scene[column.value], column)
|
||||||
: scene[column.value])
|
: scene[column.value])
|
||||||
.toString();
|
.toString()) || '\x1b[90mNot available\x1b[0m';
|
||||||
|
|
||||||
const realLength = value.replace(/\x1b\[\d+m/g, '').length; // eslint-disable-line no-control-regex
|
const realLength = value.replace(/\x1b\[\d+m/g, '').length; // eslint-disable-line no-control-regex
|
||||||
const entityLength = value.length - realLength;
|
const entityLength = value.length - realLength;
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
anal: [],
|
||||||
|
asian: ['asians'],
|
||||||
|
'ass licking': ['rimming', 'rimjob', 'anilingus'],
|
||||||
|
ATM: ['ass to mouth'],
|
||||||
|
'big black cock': ['BBC', 'bbc', 'big black cock', 'big black cocks'],
|
||||||
|
'big cock': ['big cocks'],
|
||||||
|
'big butt': ['big butts'],
|
||||||
|
'big boobs': ['big tits'],
|
||||||
|
blonde: ['blondes'],
|
||||||
|
blowjob: [],
|
||||||
|
blowbang: [],
|
||||||
|
brunette: ['brunettes'],
|
||||||
|
bukkake: [],
|
||||||
|
cheerleader: ['cheer leader'],
|
||||||
|
creampie: ['creampies'],
|
||||||
|
cumshot: [],
|
||||||
|
deepthroat: ['deep throat'],
|
||||||
|
DAP: ['dap', 'double anal penetration'],
|
||||||
|
DP: ['dp', 'double penetration'],
|
||||||
|
DVP: ['DPP', 'dpp', 'dvp', 'double vaginal penetration', 'double pussy penetration'],
|
||||||
|
'double blowjob': [],
|
||||||
|
ebony: [],
|
||||||
|
facefucking: [],
|
||||||
|
facial: ['facials'],
|
||||||
|
gangbang: [],
|
||||||
|
gaping: ['gape', 'gapes'],
|
||||||
|
gonzo: [],
|
||||||
|
hardcore: [],
|
||||||
|
latina: [],
|
||||||
|
lingerie: [],
|
||||||
|
maid: [],
|
||||||
|
MILF: ['milf'],
|
||||||
|
petite: [],
|
||||||
|
'pussy licking': ['cunnilingus'],
|
||||||
|
redhead: ['red head'],
|
||||||
|
rough: [],
|
||||||
|
schoolgirl: ['school girl'],
|
||||||
|
'small boobs': ['small tits'],
|
||||||
|
swallowing: ['swallow'],
|
||||||
|
stockings: [],
|
||||||
|
tattoo: ['tattoos'],
|
||||||
|
threesome: ['threesome'],
|
||||||
|
teen: ['teens'],
|
||||||
|
toy: ['toys'],
|
||||||
|
TP: ['tp', 'triple penetration'],
|
||||||
|
};
|
Loading…
Reference in New Issue