Added parameters column to sites database, fixes Perv City scraper. Getting shoot ID from all existing scrapers.

This commit is contained in:
2019-03-26 01:26:47 +01:00
parent 6a20cbc721
commit 8421cd8648
11 changed files with 45 additions and 37 deletions

View File

@@ -8,13 +8,15 @@ const { matchTags } = require('../tags');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.sceneInfo').toArray();
const scenesElements = $('li[data-itemtype=scene]').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.sceneTitle a');
const url = `${site.url}${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const shootId = $(element).attr('data-itemid');
const date = moment
.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY')
.toDate();
@@ -29,6 +31,7 @@ function scrape(html, site) {
return {
url,
shootId,
title,
actors,
date,
@@ -42,6 +45,7 @@ function scrape(html, site) {
}
async function scrapeSceneFallback($, url, site) {
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
const title = $('h1.title').text();
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
@@ -54,6 +58,7 @@ async function scrapeSceneFallback($, url, site) {
return {
url,
shootId,
title,
date,
actors,
@@ -75,6 +80,7 @@ async function scrapeScene(html, url, site) {
}
const data = JSON.parse(json)[0];
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
const title = data.isPartOf.name;
const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
@@ -98,6 +104,7 @@ async function scrapeScene(html, url, site) {
return {
url,
shootId,
title,
date,
actors,