forked from DebaucheryLibrarian/traxxx
Added tags and duration to scraping. Added LegalPorno scraper.
This commit is contained in:
@@ -4,6 +4,27 @@ const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const tagMap = {
|
||||
Anal: 'anal',
|
||||
Asian: 'asian',
|
||||
'Ass To Mouth': 'ATM',
|
||||
'Big Cocks': 'big cock',
|
||||
Black: 'BBC',
|
||||
Blondes: 'blonde',
|
||||
Brunettes: 'brunette',
|
||||
Blowjobs: 'blowjob',
|
||||
Creampie: 'creampie',
|
||||
'Deep Throat': 'deepthroat',
|
||||
Facial: 'facial',
|
||||
Interracial: 'interracial',
|
||||
Lingerie: 'lingerie',
|
||||
Natural: 'natural',
|
||||
'Red Head': 'readhead',
|
||||
'School Girl': 'schoolgirl',
|
||||
Tattoo: 'tattoo',
|
||||
Teen: 'teen',
|
||||
};
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.update_details').toArray();
|
||||
@@ -26,7 +47,6 @@ function scrapeLatest(html, site) {
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
@@ -83,6 +103,9 @@ function scrapeScene(html, url, site) {
|
||||
|
||||
const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
|
||||
|
||||
const rawTags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const tags = rawTags.reduce((accTags, tag) => (tagMap[tag] ? [...accTags, tagMap[tag]] : accTags), []);
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
@@ -92,6 +115,7 @@ function scrapeScene(html, url, site) {
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user