Fixed ID number/string incompatability in duplicate detection. Expanded Reality Kings scraper to handle older scenes and fix URLs.
This commit is contained in:
parent
88c8bb1ced
commit
bd041c528d
|
@ -133,7 +133,9 @@ async function storeRelease(release) {
|
||||||
async function storeReleases(releases) {
|
async function storeReleases(releases) {
|
||||||
return Promise.map(releases, async (release) => {
|
return Promise.map(releases, async (release) => {
|
||||||
try {
|
try {
|
||||||
return storeRelease(release);
|
const releaseId = await storeRelease(release);
|
||||||
|
|
||||||
|
return releaseId;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(error);
|
console.error(error);
|
||||||
|
|
||||||
|
|
|
@ -24,8 +24,8 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) {
|
||||||
// include accumulated releases as duplicates to prevent an infinite
|
// include accumulated releases as duplicates to prevent an infinite
|
||||||
// loop when the next page contains the same releases as the previous
|
// loop when the next page contains the same releases as the previous
|
||||||
return new Set(duplicateReleases
|
return new Set(duplicateReleases
|
||||||
.map(release => release.entry_id)
|
.map(release => String(release.entry_id))
|
||||||
.concat(accReleases.map(release => release.entryId)));
|
.concat(accReleases.map(release => String(release.entryId))));
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
|
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
|
||||||
|
@ -122,11 +122,11 @@ async function scrapeReleases() {
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (argv.debug) {
|
if (argv.debug) {
|
||||||
console.error(`${site.id}: Failed to fetch releases`, error);
|
console.error(`${site.id}: Failed to scrape releases`, error);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.warn(`${site.id}: Failed to fetch releases`);
|
console.warn(`${site.id}: Failed to scrape releases`);
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
concurrency: 2,
|
concurrency: 2,
|
||||||
|
|
|
@ -8,6 +8,20 @@ const { JSDOM } = require('jsdom');
|
||||||
|
|
||||||
const { matchTags } = require('../tags');
|
const { matchTags } = require('../tags');
|
||||||
|
|
||||||
|
function getThumbs(scene) {
|
||||||
|
if (scene.images.poster) {
|
||||||
|
return scene.images.poster.map(image => image.xl.url);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scene.images.card_main_rect) {
|
||||||
|
return scene.images.card_main_rect
|
||||||
|
.concat(scene.images.card_secondary_rect || [])
|
||||||
|
.map(image => image.xl.url.replace('.thumb', ''));
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
async function scrapeLatest(html, site) {
|
async function scrapeLatest(html, site) {
|
||||||
const { document } = new JSDOM(html).window;
|
const { document } = new JSDOM(html).window;
|
||||||
|
|
||||||
|
@ -29,24 +43,17 @@ async function scrapeLatest(html, site) {
|
||||||
description,
|
description,
|
||||||
} = scene;
|
} = scene;
|
||||||
|
|
||||||
const url = `https://www.realitykings.com/scene/${entryId}`;
|
const url = `https://www.realitykings.com/scene/${entryId}/`;
|
||||||
const date = new Date(scene.dateReleased);
|
const date = new Date(scene.dateReleased);
|
||||||
const actors = scene.actors.map(actorId => actorsMap[actorId].name);
|
const actors = scene.actors.map(actorId => actorsMap[actorId].name);
|
||||||
|
const duration = scene.videos.mediabook && scene.videos.mediabook.length;
|
||||||
|
|
||||||
const rawTags = scene.tags.map(tagId => tagsMap[tagId].name);
|
const rawTags = scene.tags.map(tagId => tagsMap[tagId].name);
|
||||||
const tags = await matchTags(rawTags);
|
const tags = await matchTags(rawTags);
|
||||||
|
|
||||||
if (!scene.images.poster) {
|
const [poster, ...photos] = getThumbs(scene);
|
||||||
console.log(site.name, site.id);
|
const trailer720p = scene.videos.mediabook && scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view;
|
||||||
console.log(scene);
|
const trailer360p = scene.videos.mediabook && scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view;
|
||||||
console.log(title, url, scene.images);
|
|
||||||
}
|
|
||||||
|
|
||||||
const [poster, ...photos] = scene.images.poster.map(image => image.xl.url);
|
|
||||||
|
|
||||||
const duration = scene.videos.mediabook.length;
|
|
||||||
const trailer720p = scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view;
|
|
||||||
const trailer360p = scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view;
|
|
||||||
|
|
||||||
const { likes, dislikes } = scene.stats;
|
const { likes, dislikes } = scene.stats;
|
||||||
|
|
||||||
|
@ -86,11 +93,11 @@ async function scrapeScene(data, url, site) {
|
||||||
const rawTags = data.tags.map(tag => tag.name);
|
const rawTags = data.tags.map(tag => tag.name);
|
||||||
const tags = await matchTags(rawTags);
|
const tags = await matchTags(rawTags);
|
||||||
|
|
||||||
const [poster, ...photos] = data.images.poster.map(image => image.xl.url);
|
const [poster, ...photos] = getThumbs(data);
|
||||||
|
|
||||||
const duration = data.videos.mediabook.length;
|
const duration = data.videos.mediabook && data.videos.mediabook.length;
|
||||||
const trailer720p = data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view;
|
const trailer720p = data.videos.mediabook && data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view;
|
||||||
const trailer360p = data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view;
|
const trailer360p = data.videos.mediabook && data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
|
|
Loading…
Reference in New Issue