Fixed ID number/string incompatability in duplicate detection. Expanded Reality Kings scraper to handle older scenes and fix URLs.
This commit is contained in:
parent
88c8bb1ced
commit
bd041c528d
|
@ -133,7 +133,9 @@ async function storeRelease(release) {
|
|||
async function storeReleases(releases) {
|
||||
return Promise.map(releases, async (release) => {
|
||||
try {
|
||||
return storeRelease(release);
|
||||
const releaseId = await storeRelease(release);
|
||||
|
||||
return releaseId;
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
|
||||
|
|
|
@ -24,8 +24,8 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) {
|
|||
// include accumulated releases as duplicates to prevent an infinite
|
||||
// loop when the next page contains the same releases as the previous
|
||||
return new Set(duplicateReleases
|
||||
.map(release => release.entry_id)
|
||||
.concat(accReleases.map(release => release.entryId)));
|
||||
.map(release => String(release.entry_id))
|
||||
.concat(accReleases.map(release => String(release.entryId))));
|
||||
}
|
||||
|
||||
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
|
||||
|
@ -122,11 +122,11 @@ async function scrapeReleases() {
|
|||
}
|
||||
} catch (error) {
|
||||
if (argv.debug) {
|
||||
console.error(`${site.id}: Failed to fetch releases`, error);
|
||||
console.error(`${site.id}: Failed to scrape releases`, error);
|
||||
return;
|
||||
}
|
||||
|
||||
console.warn(`${site.id}: Failed to fetch releases`);
|
||||
console.warn(`${site.id}: Failed to scrape releases`);
|
||||
}
|
||||
}, {
|
||||
concurrency: 2,
|
||||
|
|
|
@ -8,6 +8,20 @@ const { JSDOM } = require('jsdom');
|
|||
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
function getThumbs(scene) {
|
||||
if (scene.images.poster) {
|
||||
return scene.images.poster.map(image => image.xl.url);
|
||||
}
|
||||
|
||||
if (scene.images.card_main_rect) {
|
||||
return scene.images.card_main_rect
|
||||
.concat(scene.images.card_secondary_rect || [])
|
||||
.map(image => image.xl.url.replace('.thumb', ''));
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
|
@ -29,24 +43,17 @@ async function scrapeLatest(html, site) {
|
|||
description,
|
||||
} = scene;
|
||||
|
||||
const url = `https://www.realitykings.com/scene/${entryId}`;
|
||||
const url = `https://www.realitykings.com/scene/${entryId}/`;
|
||||
const date = new Date(scene.dateReleased);
|
||||
const actors = scene.actors.map(actorId => actorsMap[actorId].name);
|
||||
const duration = scene.videos.mediabook && scene.videos.mediabook.length;
|
||||
|
||||
const rawTags = scene.tags.map(tagId => tagsMap[tagId].name);
|
||||
const tags = await matchTags(rawTags);
|
||||
|
||||
if (!scene.images.poster) {
|
||||
console.log(site.name, site.id);
|
||||
console.log(scene);
|
||||
console.log(title, url, scene.images);
|
||||
}
|
||||
|
||||
const [poster, ...photos] = scene.images.poster.map(image => image.xl.url);
|
||||
|
||||
const duration = scene.videos.mediabook.length;
|
||||
const trailer720p = scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view;
|
||||
const trailer360p = scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view;
|
||||
const [poster, ...photos] = getThumbs(scene);
|
||||
const trailer720p = scene.videos.mediabook && scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view;
|
||||
const trailer360p = scene.videos.mediabook && scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view;
|
||||
|
||||
const { likes, dislikes } = scene.stats;
|
||||
|
||||
|
@ -86,11 +93,11 @@ async function scrapeScene(data, url, site) {
|
|||
const rawTags = data.tags.map(tag => tag.name);
|
||||
const tags = await matchTags(rawTags);
|
||||
|
||||
const [poster, ...photos] = data.images.poster.map(image => image.xl.url);
|
||||
const [poster, ...photos] = getThumbs(data);
|
||||
|
||||
const duration = data.videos.mediabook.length;
|
||||
const trailer720p = data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view;
|
||||
const trailer360p = data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view;
|
||||
const duration = data.videos.mediabook && data.videos.mediabook.length;
|
||||
const trailer720p = data.videos.mediabook && data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view;
|
||||
const trailer360p = data.videos.mediabook && data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view;
|
||||
|
||||
return {
|
||||
url,
|
||||
|
|
Loading…
Reference in New Issue