forked from DebaucheryLibrarian/traxxx
Added before scene fetch method to prevent e.g. unnecessary session requests, moved scraper assignment to entity lookup. Removed channel URL hostname matching..
This commit is contained in:
parent
6c5d4389fe
commit
100a35b4e8
25
src/deep.js
25
src/deep.js
|
@ -5,7 +5,6 @@ const { mergeAdvanced: merge } = require('object-merge-advanced');
|
||||||
|
|
||||||
const argv = require('./argv');
|
const argv = require('./argv');
|
||||||
const include = require('./utils/argv-include')(argv);
|
const include = require('./utils/argv-include')(argv);
|
||||||
const { resolveScraper, resolveLayoutScraper } = require('./scrapers/resolve');
|
|
||||||
const { fetchReleaseEntities, urlToSiteSlug } = require('./entities');
|
const { fetchReleaseEntities, urlToSiteSlug } = require('./entities');
|
||||||
const logger = require('./logger')(__filename);
|
const logger = require('./logger')(__filename);
|
||||||
const qu = require('./utils/qu');
|
const qu = require('./utils/qu');
|
||||||
|
@ -96,10 +95,9 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const scraper = resolveScraper(entity);
|
const layoutScraper = entity.scraper;
|
||||||
const layoutScraper = resolveLayoutScraper(entity, scraper);
|
|
||||||
|
|
||||||
if (!layoutScraper) {
|
if (!entity.scraper) {
|
||||||
logger.warn(`Could not find scraper for ${baseRelease.url}`);
|
logger.warn(`Could not find scraper for ${baseRelease.url}`);
|
||||||
return baseRelease;
|
return baseRelease;
|
||||||
}
|
}
|
||||||
|
@ -114,13 +112,16 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
...include,
|
...include,
|
||||||
|
beforeFetchScene: entity.preData,
|
||||||
parameters: getRecursiveParameters(entity),
|
parameters: getRecursiveParameters(entity),
|
||||||
};
|
};
|
||||||
|
|
||||||
const scrapedRelease = type === 'scene'
|
const rawScrapedRelease = type === 'scene'
|
||||||
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options, null)
|
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options, null)
|
||||||
: await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, options, null);
|
: await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, options, null);
|
||||||
|
|
||||||
|
const scrapedRelease = rawScrapedRelease?.scene || rawScrapedRelease;
|
||||||
|
|
||||||
if (!scrapedRelease || typeof scrapedRelease !== 'object' || Array.isArray(scrapedRelease)) {
|
if (!scrapedRelease || typeof scrapedRelease !== 'object' || Array.isArray(scrapedRelease)) {
|
||||||
// scraper is unable to fetch the releases and returned a HTTP code or null
|
// scraper is unable to fetch the releases and returned a HTTP code or null
|
||||||
throw new Error(`Scraper returned '${scrapedRelease}' when fetching latest from '${entity.name}' (${entity.parent?.name})`);
|
throw new Error(`Scraper returned '${scrapedRelease}' when fetching latest from '${entity.name}' (${entity.parent?.name})`);
|
||||||
|
@ -170,9 +171,21 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeReleases(baseReleases, entitiesBySlug, type) {
|
async function scrapeReleases(baseReleases, entitiesBySlug, type) {
|
||||||
|
const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesBySlug).map(async ([slug, entity]) => {
|
||||||
|
if (entity.scraper?.beforeFetchScene) {
|
||||||
|
const preData = await entity.scraper.beforeFetchScene(entity);
|
||||||
|
|
||||||
|
return [slug, { ...entity, preData }];
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}));
|
||||||
|
|
||||||
|
const entitiesWithBeforeDataBySlug = Object.fromEntries(entitiesWithBeforeDataEntries.filter(Boolean));
|
||||||
|
|
||||||
return Promise.map(
|
return Promise.map(
|
||||||
baseReleases,
|
baseReleases,
|
||||||
async baseRelease => scrapeRelease(baseRelease, entitiesBySlug, type),
|
async baseRelease => scrapeRelease(baseRelease, entitiesWithBeforeDataBySlug, type),
|
||||||
{ concurrency: 10 },
|
{ concurrency: 10 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ const argv = require('./argv');
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const { deleteScenes, deleteMovies } = require('./releases');
|
const { deleteScenes, deleteMovies } = require('./releases');
|
||||||
const { flushOrphanedMedia } = require('./media');
|
const { flushOrphanedMedia } = require('./media');
|
||||||
|
const { resolveScraper, resolveLayoutScraper } = require('./scrapers/resolve');
|
||||||
|
|
||||||
function getRecursiveParent(entity) {
|
function getRecursiveParent(entity) {
|
||||||
if (!entity) {
|
if (!entity) {
|
||||||
|
@ -71,6 +72,9 @@ function curateEntity(entity, includeParameters = false) {
|
||||||
}, includeParameters));
|
}, includeParameters));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const scraper = resolveScraper(curatedEntity);
|
||||||
|
curatedEntity.scraper = resolveLayoutScraper(entity, scraper);
|
||||||
|
|
||||||
return curatedEntity;
|
return curatedEntity;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,7 +191,6 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
|
||||||
array['parent'] as parent_path
|
array['parent'] as parent_path
|
||||||
FROM entities
|
FROM entities
|
||||||
WHERE slug = ANY(:entitySlugs)
|
WHERE slug = ANY(:entitySlugs)
|
||||||
OR substring(url from 'https%://%#"[a-z0-9-]+#".(com|net)%' for '#') = ANY(:entitySlugs)
|
|
||||||
|
|
||||||
UNION ALL
|
UNION ALL
|
||||||
|
|
||||||
|
|
|
@ -294,7 +294,7 @@ async function fetchScene(url, site, baseScene, options) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1];
|
const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1];
|
||||||
const { session, instanceToken } = await getSession(site, options.parameters);
|
const { session, instanceToken } = options.beforeFetchScene || await getSession(site, options.parameters);
|
||||||
|
|
||||||
const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||||
session,
|
session,
|
||||||
|
@ -306,7 +306,9 @@ async function fetchScene(url, site, baseScene, options) {
|
||||||
});
|
});
|
||||||
|
|
||||||
if (res.status === 200 && res.body.result) {
|
if (res.status === 200 && res.body.result) {
|
||||||
return scrapeScene(res.body.result, url, site);
|
return {
|
||||||
|
scene: scrapeScene(res.body.result, url, site),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
@ -361,6 +363,7 @@ async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, para
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
beforeFetchScene: getSession,
|
||||||
scrapeLatestX,
|
scrapeLatestX,
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchUpcoming,
|
fetchUpcoming,
|
||||||
|
|
Loading…
Reference in New Issue