Added parameters column to sites database, fixes Perv City scraper. Getting shoot ID from all existing scrapers.

This commit is contained in:
2019-03-26 01:26:47 +01:00
parent 6a20cbc721
commit 8421cd8648
11 changed files with 45 additions and 37 deletions

View File

@@ -6,29 +6,13 @@ const moment = require('moment');
const { matchTags } = require('../tags');
const tagMap = {
'3+ on 1': 'gangbang',
anal: 'anal',
bbc: 'big black cock',
'cum swallowing': 'swallowing',
rough: 'rough',
'deep throat': 'deepthroat',
'double penetration (DP)': 'DP',
'double anal (DAP)': 'DAP',
'double vaginal (DPP)': 'DVP',
'gapes (gaping asshole)': 'gaping',
'huge toys': 'toys',
interracial: 'interracial',
'triple penetration': 'TP',
};
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes
const id = sceneIdMatch ? sceneIdMatch[0] : null;
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
return { id, title };
return { shootId, title };
}
function scrapeLatest(html, site) {
@@ -40,13 +24,13 @@ function scrapeLatest(html, site) {
const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.attr('title');
const { id, title } = extractTitle(originalTitle);
const { shootId, title } = extractTitle(originalTitle);
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
return {
url,
id,
shootId,
title,
date,
site,
@@ -58,7 +42,7 @@ async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const originalTitle = $('h1.watchpage-title').text().trim();
const { id, title } = extractTitle(originalTitle);
const { shootId, title } = extractTitle(originalTitle);
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
@@ -74,7 +58,7 @@ async function scrapeScene(html, url, site) {
return {
url,
id,
shootId,
title,
date,
actors,