Added entity option to bypass deep scraped check on release day update.

This commit is contained in:
DebaucheryLibrarian
2026-02-21 00:56:15 +01:00
parent 0f44b4d410
commit 6b768b6828
3 changed files with 27 additions and 13 deletions

View File

@@ -703,6 +703,9 @@ const networks = [
name: 'Nubiles', name: 'Nubiles',
url: 'https://www.nubiles.com', url: 'https://www.nubiles.com',
description: 'Welcome to the teen megasite that started it all! Browse our massive HD collection of fresh legal hotties at Nubiles.net.', description: 'Welcome to the teen megasite that started it all! Browse our massive HD collection of fresh legal hotties at Nubiles.net.',
options: {
forceDeepUpdate: true,
},
}, },
{ {
slug: 'perfectgonzo', slug: 'perfectgonzo',
@@ -951,6 +954,7 @@ exports.seed = (knex) => Promise.resolve()
has_logo: network.hasLogo ?? true, has_logo: network.hasLogo ?? true,
showcased: typeof network.showcased === 'boolean' ? network.showcased : true, showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
parameters: network.parameters, parameters: network.parameters,
options: network.options,
parent_id: grandParentNetworksBySlug[network.parent] || null, parent_id: grandParentNetworksBySlug[network.parent] || null,
})); }));
@@ -967,6 +971,7 @@ exports.seed = (knex) => Promise.resolve()
has_logo: network.hasLogo ?? true, has_logo: network.hasLogo ?? true,
showcased: typeof network.showcased === 'boolean' ? network.showcased : true, showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
parameters: network.parameters, parameters: network.parameters,
options: network.options,
parent_id: parentNetworksBySlug[network.parent] || grandParentNetworksBySlug[network.parent] || null, parent_id: parentNetworksBySlug[network.parent] || grandParentNetworksBySlug[network.parent] || null,
})); }));

View File

@@ -38,7 +38,7 @@ function filterLocalUniqueReleases(releases, accReleases) {
}; };
} }
async function filterUniqueReleases(releases) { async function filterUniqueReleases(releases, options) {
const releaseIdentifierChunks = chunk(releases.map((release) => [release.entity.id, release.entryId.toString()])); const releaseIdentifierChunks = chunk(releases.map((release) => [release.entity.id, release.entryId.toString()]));
const duplicateReleaseEntryChunks = await Promise.map(releaseIdentifierChunks, async (releaseIdentifiers) => { const duplicateReleaseEntryChunks = await Promise.map(releaseIdentifierChunks, async (releaseIdentifiers) => {
@@ -47,9 +47,13 @@ async function filterUniqueReleases(releases) {
.leftJoin('entities', 'entities.id', 'releases.entity_id') .leftJoin('entities', 'entities.id', 'releases.entity_id')
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers) .whereIn(['entity_id', 'entry_id'], releaseIdentifiers)
.where((builder) => { .where((builder) => {
if (!options?.forceDeepUpdate) {
// we do not want to force deep rescrapes on release day, so if the scene is already deep-scraped, we add it to the duplicate list
builder.where('deep', true); // scene is already deep scraped
}
// check if previously upcoming scenes can be excluded from duplicates to be rescraped for release day updates // check if previously upcoming scenes can be excluded from duplicates to be rescraped for release day updates
builder builder
.where('deep', true) // scene is already deep scraped
.orWhereNull('date') .orWhereNull('date')
.orWhereNotIn('date_precision', ['day', 'minute']) // don't worry about scenes without (accurate) dates for now .orWhereNotIn('date_precision', ['day', 'minute']) // don't worry about scenes without (accurate) dates for now
.orWhere(knex.raw('date > NOW() - INTERVAL \'12 hours\'')) // scene is still upcoming, with a rough offset to wait for the end of the day west of UTC .orWhere(knex.raw('date > NOW() - INTERVAL \'12 hours\'')) // scene is still upcoming, with a rough offset to wait for the end of the day west of UTC
@@ -110,18 +114,23 @@ function needNextPage(pageReleases, accReleases, isUpcoming, unextracted = []) {
} }
async function scrapeReleases(scraper, entity, preData, isUpcoming) { async function scrapeReleases(scraper, entity, preData, isUpcoming) {
const options = {
...config.options[scraper.slug],
...include,
...preData,
parameters: getRecursiveParameters(entity),
options: getRecursiveParameters(entity, 'options'),
};
async function scrapeReleasesPage(page, accReleases, pageContext) { async function scrapeReleasesPage(page, accReleases, pageContext) {
const options = { const pageOptions = {
...config.options[scraper.slug], ...options,
...include,
...preData,
...pageContext, ...pageContext,
parameters: getRecursiveParameters(entity),
}; };
const rawPageReleases = isUpcoming const rawPageReleases = isUpcoming
? await scraper.fetchUpcoming(entity, page, options, preData) ? await scraper.fetchUpcoming(entity, page, pageOptions, preData)
: await scraper.fetchLatest(entity, page, options, preData); : await scraper.fetchLatest(entity, page, pageOptions, preData);
const pageReleases = rawPageReleases?.scenes || rawPageReleases; const pageReleases = rawPageReleases?.scenes || rawPageReleases;
@@ -157,7 +166,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
const { uniqueReleases, duplicateReleases } = argv.force const { uniqueReleases, duplicateReleases } = argv.force
? { uniqueReleases: limitedReleases, duplicateReleases: [] } ? { uniqueReleases: limitedReleases, duplicateReleases: [] }
: await filterUniqueReleases(limitedReleases); : await filterUniqueReleases(limitedReleases, options);
return { uniqueReleases, duplicateReleases }; return { uniqueReleases, duplicateReleases };
} }

View File

@@ -1,11 +1,11 @@
'use strict'; 'use strict';
function getRecursiveParameters(entity, parameters) { function getRecursiveParameters(entity, property = 'parameters', parameters) {
if (entity.parent) { if (entity.parent) {
return getRecursiveParameters(entity.parent, { ...entity.parameters, ...parameters }); return getRecursiveParameters(entity.parent, property, { ...entity[property], ...parameters });
} }
return { ...entity.parameters, ...parameters }; return { ...entity[property], ...parameters };
} }
module.exports = getRecursiveParameters; module.exports = getRecursiveParameters;