Matching URLs to entity using hostname rather than slug to minimize collisions. Fixed missing Cum Louder POV logo.

This commit is contained in:
DebaucheryLibrarian
2023-06-04 21:50:59 +02:00
parent 7e2840a00d
commit 164757ee26
55 changed files with 67 additions and 24 deletions

View File

@@ -7,7 +7,7 @@ const { mergeAdvanced: merge } = require('object-merge-advanced');
const argv = require('./argv');
const include = require('./utils/argv-include')(argv);
const { fetchReleaseEntities, urlToSiteSlug } = require('./entities');
const { fetchReleaseEntities, urlToHostname } = require('./entities');
const logger = require('./logger')(__filename);
const qu = require('./utils/qu');
const getRecursiveParameters = require('./utils/get-recursive-parameters');
@@ -110,8 +110,11 @@ function fetchMovie(scraper, url, entity, baseRelease, options) {
return fetchScene(scraper, url, entity, baseRelease, options, 'movie');
}
async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)];
async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') {
const entity = baseRelease.entity || entitiesByHostname[urlToHostname(baseRelease.url)];
console.log(entitiesByHostname);
console.log(entity);
if (!entity) {
logger.warn(`No entity available for ${baseRelease.url}`);
@@ -222,10 +225,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
}
}
async function scrapeReleases(baseReleases, entitiesBySlug, type) {
const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesBySlug).map(async ([slug, entity]) => {
console.log('scraper', entity.scraper?.beforeFetchScenes);
async function scrapeReleases(baseReleases, entitiesByHostname, type) {
const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesByHostname).map(async ([slug, entity]) => {
if (entity.scraper?.beforeFetchScenes) {
const parameters = getRecursiveParameters(entity);
const preData = await entity.scraper.beforeFetchScenes(entity, parameters);
@@ -249,9 +250,9 @@ async function scrapeReleases(baseReleases, entitiesBySlug, type) {
async function fetchReleases(baseReleasesOrUrls, type = 'scene') {
const baseReleases = toBaseReleases(baseReleasesOrUrls);
const entitiesBySlug = await fetchReleaseEntities(baseReleases);
const entitiesByHostname = await fetchReleaseEntities(baseReleases);
const deepReleases = await scrapeReleases(baseReleases, entitiesBySlug, type);
const deepReleases = await scrapeReleases(baseReleases, entitiesByHostname, type);
return deepReleases.filter(Boolean);
}