Fixed ID number/string incompatability in duplicate detection. Expanded Reality Kings scraper to handle older scenes and fix URLs.

This commit is contained in:
2019-11-17 00:45:31 +01:00
parent 88c8bb1ced
commit bd041c528d
4 changed files with 32 additions and 21 deletions

View File

@@ -8,6 +8,20 @@ const { JSDOM } = require('jsdom');
const { matchTags } = require('../tags');
function getThumbs(scene) {
if (scene.images.poster) {
return scene.images.poster.map(image => image.xl.url);
}
if (scene.images.card_main_rect) {
return scene.images.card_main_rect
.concat(scene.images.card_secondary_rect || [])
.map(image => image.xl.url.replace('.thumb', ''));
}
return [];
}
async function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
@@ -29,24 +43,17 @@ async function scrapeLatest(html, site) {
description,
} = scene;
const url = `https://www.realitykings.com/scene/${entryId}`;
const url = `https://www.realitykings.com/scene/${entryId}/`;
const date = new Date(scene.dateReleased);
const actors = scene.actors.map(actorId => actorsMap[actorId].name);
const duration = scene.videos.mediabook && scene.videos.mediabook.length;
const rawTags = scene.tags.map(tagId => tagsMap[tagId].name);
const tags = await matchTags(rawTags);
if (!scene.images.poster) {
console.log(site.name, site.id);
console.log(scene);
console.log(title, url, scene.images);
}
const [poster, ...photos] = scene.images.poster.map(image => image.xl.url);
const duration = scene.videos.mediabook.length;
const trailer720p = scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view;
const trailer360p = scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view;
const [poster, ...photos] = getThumbs(scene);
const trailer720p = scene.videos.mediabook && scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view;
const trailer360p = scene.videos.mediabook && scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view;
const { likes, dislikes } = scene.stats;
@@ -86,11 +93,11 @@ async function scrapeScene(data, url, site) {
const rawTags = data.tags.map(tag => tag.name);
const tags = await matchTags(rawTags);
const [poster, ...photos] = data.images.poster.map(image => image.xl.url);
const [poster, ...photos] = getThumbs(data);
const duration = data.videos.mediabook.length;
const trailer720p = data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view;
const trailer360p = data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view;
const duration = data.videos.mediabook && data.videos.mediabook.length;
const trailer720p = data.videos.mediabook && data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view;
const trailer360p = data.videos.mediabook && data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view;
return {
url,