Added before scene fetch method to prevent e.g. unnecessary session requests, moved scraper assignment to entity lookup. Removed channel URL hostname matching..
This commit is contained in:
25
src/deep.js
25
src/deep.js
@@ -5,7 +5,6 @@ const { mergeAdvanced: merge } = require('object-merge-advanced');
|
||||
|
||||
const argv = require('./argv');
|
||||
const include = require('./utils/argv-include')(argv);
|
||||
const { resolveScraper, resolveLayoutScraper } = require('./scrapers/resolve');
|
||||
const { fetchReleaseEntities, urlToSiteSlug } = require('./entities');
|
||||
const logger = require('./logger')(__filename);
|
||||
const qu = require('./utils/qu');
|
||||
@@ -96,10 +95,9 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||
};
|
||||
}
|
||||
|
||||
const scraper = resolveScraper(entity);
|
||||
const layoutScraper = resolveLayoutScraper(entity, scraper);
|
||||
const layoutScraper = entity.scraper;
|
||||
|
||||
if (!layoutScraper) {
|
||||
if (!entity.scraper) {
|
||||
logger.warn(`Could not find scraper for ${baseRelease.url}`);
|
||||
return baseRelease;
|
||||
}
|
||||
@@ -114,13 +112,16 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||
|
||||
const options = {
|
||||
...include,
|
||||
beforeFetchScene: entity.preData,
|
||||
parameters: getRecursiveParameters(entity),
|
||||
};
|
||||
|
||||
const scrapedRelease = type === 'scene'
|
||||
const rawScrapedRelease = type === 'scene'
|
||||
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options, null)
|
||||
: await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, options, null);
|
||||
|
||||
const scrapedRelease = rawScrapedRelease?.scene || rawScrapedRelease;
|
||||
|
||||
if (!scrapedRelease || typeof scrapedRelease !== 'object' || Array.isArray(scrapedRelease)) {
|
||||
// scraper is unable to fetch the releases and returned a HTTP code or null
|
||||
throw new Error(`Scraper returned '${scrapedRelease}' when fetching latest from '${entity.name}' (${entity.parent?.name})`);
|
||||
@@ -170,9 +171,21 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||
}
|
||||
|
||||
async function scrapeReleases(baseReleases, entitiesBySlug, type) {
|
||||
const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesBySlug).map(async ([slug, entity]) => {
|
||||
if (entity.scraper?.beforeFetchScene) {
|
||||
const preData = await entity.scraper.beforeFetchScene(entity);
|
||||
|
||||
return [slug, { ...entity, preData }];
|
||||
}
|
||||
|
||||
return null;
|
||||
}));
|
||||
|
||||
const entitiesWithBeforeDataBySlug = Object.fromEntries(entitiesWithBeforeDataEntries.filter(Boolean));
|
||||
|
||||
return Promise.map(
|
||||
baseReleases,
|
||||
async baseRelease => scrapeRelease(baseRelease, entitiesBySlug, type),
|
||||
async baseRelease => scrapeRelease(baseRelease, entitiesWithBeforeDataBySlug, type),
|
||||
{ concurrency: 10 },
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user