Added entity option to bypass deep scraped check on release day update.

This commit is contained in:
DebaucheryLibrarian
2026-02-21 00:56:15 +01:00
parent 0f44b4d410
commit 6b768b6828
3 changed files with 27 additions and 13 deletions

View File

@@ -703,6 +703,9 @@ const networks = [
name: 'Nubiles',
url: 'https://www.nubiles.com',
description: 'Welcome to the teen megasite that started it all! Browse our massive HD collection of fresh legal hotties at Nubiles.net.',
options: {
forceDeepUpdate: true,
},
},
{
slug: 'perfectgonzo',
@@ -951,6 +954,7 @@ exports.seed = (knex) => Promise.resolve()
has_logo: network.hasLogo ?? true,
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
parameters: network.parameters,
options: network.options,
parent_id: grandParentNetworksBySlug[network.parent] || null,
}));
@@ -967,6 +971,7 @@ exports.seed = (knex) => Promise.resolve()
has_logo: network.hasLogo ?? true,
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
parameters: network.parameters,
options: network.options,
parent_id: parentNetworksBySlug[network.parent] || grandParentNetworksBySlug[network.parent] || null,
}));

View File

@@ -38,7 +38,7 @@ function filterLocalUniqueReleases(releases, accReleases) {
};
}
async function filterUniqueReleases(releases) {
async function filterUniqueReleases(releases, options) {
const releaseIdentifierChunks = chunk(releases.map((release) => [release.entity.id, release.entryId.toString()]));
const duplicateReleaseEntryChunks = await Promise.map(releaseIdentifierChunks, async (releaseIdentifiers) => {
@@ -47,9 +47,13 @@ async function filterUniqueReleases(releases) {
.leftJoin('entities', 'entities.id', 'releases.entity_id')
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers)
.where((builder) => {
if (!options?.forceDeepUpdate) {
// we do not want to force deep rescrapes on release day, so if the scene is already deep-scraped, we add it to the duplicate list
builder.where('deep', true); // scene is already deep scraped
}
// check if previously upcoming scenes can be excluded from duplicates to be rescraped for release day updates
builder
.where('deep', true) // scene is already deep scraped
.orWhereNull('date')
.orWhereNotIn('date_precision', ['day', 'minute']) // don't worry about scenes without (accurate) dates for now
.orWhere(knex.raw('date > NOW() - INTERVAL \'12 hours\'')) // scene is still upcoming, with a rough offset to wait for the end of the day west of UTC
@@ -110,18 +114,23 @@ function needNextPage(pageReleases, accReleases, isUpcoming, unextracted = []) {
}
async function scrapeReleases(scraper, entity, preData, isUpcoming) {
const options = {
...config.options[scraper.slug],
...include,
...preData,
parameters: getRecursiveParameters(entity),
options: getRecursiveParameters(entity, 'options'),
};
async function scrapeReleasesPage(page, accReleases, pageContext) {
const options = {
...config.options[scraper.slug],
...include,
...preData,
const pageOptions = {
...options,
...pageContext,
parameters: getRecursiveParameters(entity),
};
const rawPageReleases = isUpcoming
? await scraper.fetchUpcoming(entity, page, options, preData)
: await scraper.fetchLatest(entity, page, options, preData);
? await scraper.fetchUpcoming(entity, page, pageOptions, preData)
: await scraper.fetchLatest(entity, page, pageOptions, preData);
const pageReleases = rawPageReleases?.scenes || rawPageReleases;
@@ -157,7 +166,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
const { uniqueReleases, duplicateReleases } = argv.force
? { uniqueReleases: limitedReleases, duplicateReleases: [] }
: await filterUniqueReleases(limitedReleases);
: await filterUniqueReleases(limitedReleases, options);
return { uniqueReleases, duplicateReleases };
}

View File

@@ -1,11 +1,11 @@
'use strict';
function getRecursiveParameters(entity, parameters) {
function getRecursiveParameters(entity, property = 'parameters', parameters) {
if (entity.parent) {
return getRecursiveParameters(entity.parent, { ...entity.parameters, ...parameters });
return getRecursiveParameters(entity.parent, property, { ...entity[property], ...parameters });
}
return { ...entity.parameters, ...parameters };
return { ...entity[property], ...parameters };
}
module.exports = getRecursiveParameters;