From 6b768b6828929e524c94831c0c93dd005c50827a Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sat, 21 Feb 2026 00:56:15 +0100 Subject: [PATCH] Added entity option to bypass deep scraped check on release day update. --- seeds/01_networks.js | 5 +++++ src/updates.js | 29 ++++++++++++++++++--------- src/utils/get-recursive-parameters.js | 6 +++--- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/seeds/01_networks.js b/seeds/01_networks.js index f2fa3287..9d7a6b75 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -703,6 +703,9 @@ const networks = [ name: 'Nubiles', url: 'https://www.nubiles.com', description: 'Welcome to the teen megasite that started it all! Browse our massive HD collection of fresh legal hotties at Nubiles.net.', + options: { + forceDeepUpdate: true, + }, }, { slug: 'perfectgonzo', @@ -951,6 +954,7 @@ exports.seed = (knex) => Promise.resolve() has_logo: network.hasLogo ?? true, showcased: typeof network.showcased === 'boolean' ? network.showcased : true, parameters: network.parameters, + options: network.options, parent_id: grandParentNetworksBySlug[network.parent] || null, })); @@ -967,6 +971,7 @@ exports.seed = (knex) => Promise.resolve() has_logo: network.hasLogo ?? true, showcased: typeof network.showcased === 'boolean' ? network.showcased : true, parameters: network.parameters, + options: network.options, parent_id: parentNetworksBySlug[network.parent] || grandParentNetworksBySlug[network.parent] || null, })); diff --git a/src/updates.js b/src/updates.js index 13fe8a71..0e2db229 100755 --- a/src/updates.js +++ b/src/updates.js @@ -38,7 +38,7 @@ function filterLocalUniqueReleases(releases, accReleases) { }; } -async function filterUniqueReleases(releases) { +async function filterUniqueReleases(releases, options) { const releaseIdentifierChunks = chunk(releases.map((release) => [release.entity.id, release.entryId.toString()])); const duplicateReleaseEntryChunks = await Promise.map(releaseIdentifierChunks, async (releaseIdentifiers) => { @@ -47,9 +47,13 @@ async function filterUniqueReleases(releases) { .leftJoin('entities', 'entities.id', 'releases.entity_id') .whereIn(['entity_id', 'entry_id'], releaseIdentifiers) .where((builder) => { + if (!options?.forceDeepUpdate) { + // we do not want to force deep rescrapes on release day, so if the scene is already deep-scraped, we add it to the duplicate list + builder.where('deep', true); // scene is already deep scraped + } + // check if previously upcoming scenes can be excluded from duplicates to be rescraped for release day updates builder - .where('deep', true) // scene is already deep scraped .orWhereNull('date') .orWhereNotIn('date_precision', ['day', 'minute']) // don't worry about scenes without (accurate) dates for now .orWhere(knex.raw('date > NOW() - INTERVAL \'12 hours\'')) // scene is still upcoming, with a rough offset to wait for the end of the day west of UTC @@ -110,18 +114,23 @@ function needNextPage(pageReleases, accReleases, isUpcoming, unextracted = []) { } async function scrapeReleases(scraper, entity, preData, isUpcoming) { + const options = { + ...config.options[scraper.slug], + ...include, + ...preData, + parameters: getRecursiveParameters(entity), + options: getRecursiveParameters(entity, 'options'), + }; + async function scrapeReleasesPage(page, accReleases, pageContext) { - const options = { - ...config.options[scraper.slug], - ...include, - ...preData, + const pageOptions = { + ...options, ...pageContext, - parameters: getRecursiveParameters(entity), }; const rawPageReleases = isUpcoming - ? await scraper.fetchUpcoming(entity, page, options, preData) - : await scraper.fetchLatest(entity, page, options, preData); + ? await scraper.fetchUpcoming(entity, page, pageOptions, preData) + : await scraper.fetchLatest(entity, page, pageOptions, preData); const pageReleases = rawPageReleases?.scenes || rawPageReleases; @@ -157,7 +166,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) { const { uniqueReleases, duplicateReleases } = argv.force ? { uniqueReleases: limitedReleases, duplicateReleases: [] } - : await filterUniqueReleases(limitedReleases); + : await filterUniqueReleases(limitedReleases, options); return { uniqueReleases, duplicateReleases }; } diff --git a/src/utils/get-recursive-parameters.js b/src/utils/get-recursive-parameters.js index 96aecd26..438c979b 100755 --- a/src/utils/get-recursive-parameters.js +++ b/src/utils/get-recursive-parameters.js @@ -1,11 +1,11 @@ 'use strict'; -function getRecursiveParameters(entity, parameters) { +function getRecursiveParameters(entity, property = 'parameters', parameters) { if (entity.parent) { - return getRecursiveParameters(entity.parent, { ...entity.parameters, ...parameters }); + return getRecursiveParameters(entity.parent, property, { ...entity[property], ...parameters }); } - return { ...entity.parameters, ...parameters }; + return { ...entity[property], ...parameters }; } module.exports = getRecursiveParameters;