Fixed ID number/string incompatability in duplicate detection. Expanded Reality Kings scraper to handle older scenes and fix URLs.

This commit is contained in:
ThePendulum 2019-11-17 00:45:31 +01:00
parent 88c8bb1ced
commit bd041c528d
4 changed files with 32 additions and 21 deletions

View File

@ -133,7 +133,9 @@ async function storeRelease(release) {
async function storeReleases(releases) {
return Promise.map(releases, async (release) => {
try {
return storeRelease(release);
const releaseId = await storeRelease(release);
return releaseId;
} catch (error) {
console.error(error);

View File

@ -24,8 +24,8 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) {
// include accumulated releases as duplicates to prevent an infinite
// loop when the next page contains the same releases as the previous
return new Set(duplicateReleases
.map(release => release.entry_id)
.concat(accReleases.map(release => release.entryId)));
.map(release => String(release.entry_id))
.concat(accReleases.map(release => String(release.entryId))));
}
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
@ -122,11 +122,11 @@ async function scrapeReleases() {
}
} catch (error) {
if (argv.debug) {
console.error(`${site.id}: Failed to fetch releases`, error);
console.error(`${site.id}: Failed to scrape releases`, error);
return;
}
console.warn(`${site.id}: Failed to fetch releases`);
console.warn(`${site.id}: Failed to scrape releases`);
}
}, {
concurrency: 2,

View File

@ -8,6 +8,20 @@ const { JSDOM } = require('jsdom');
const { matchTags } = require('../tags');
function getThumbs(scene) {
if (scene.images.poster) {
return scene.images.poster.map(image => image.xl.url);
}
if (scene.images.card_main_rect) {
return scene.images.card_main_rect
.concat(scene.images.card_secondary_rect || [])
.map(image => image.xl.url.replace('.thumb', ''));
}
return [];
}
async function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
@ -29,24 +43,17 @@ async function scrapeLatest(html, site) {
description,
} = scene;
const url = `https://www.realitykings.com/scene/${entryId}`;
const url = `https://www.realitykings.com/scene/${entryId}/`;
const date = new Date(scene.dateReleased);
const actors = scene.actors.map(actorId => actorsMap[actorId].name);
const duration = scene.videos.mediabook && scene.videos.mediabook.length;
const rawTags = scene.tags.map(tagId => tagsMap[tagId].name);
const tags = await matchTags(rawTags);
if (!scene.images.poster) {
console.log(site.name, site.id);
console.log(scene);
console.log(title, url, scene.images);
}
const [poster, ...photos] = scene.images.poster.map(image => image.xl.url);
const duration = scene.videos.mediabook.length;
const trailer720p = scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view;
const trailer360p = scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view;
const [poster, ...photos] = getThumbs(scene);
const trailer720p = scene.videos.mediabook && scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view;
const trailer360p = scene.videos.mediabook && scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view;
const { likes, dislikes } = scene.stats;
@ -86,11 +93,11 @@ async function scrapeScene(data, url, site) {
const rawTags = data.tags.map(tag => tag.name);
const tags = await matchTags(rawTags);
const [poster, ...photos] = data.images.poster.map(image => image.xl.url);
const [poster, ...photos] = getThumbs(data);
const duration = data.videos.mediabook.length;
const trailer720p = data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view;
const trailer360p = data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view;
const duration = data.videos.mediabook && data.videos.mediabook.length;
const trailer720p = data.videos.mediabook && data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view;
const trailer360p = data.videos.mediabook && data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view;
return {
url,

2
traxxx Executable file
View File

@ -0,0 +1,2 @@
#!/usr/bin/bash
node ./src/app.js "$@";