Added entity option to bypass deep scraped check on release day update.
This commit is contained in:
@@ -703,6 +703,9 @@ const networks = [
|
||||
name: 'Nubiles',
|
||||
url: 'https://www.nubiles.com',
|
||||
description: 'Welcome to the teen megasite that started it all! Browse our massive HD collection of fresh legal hotties at Nubiles.net.',
|
||||
options: {
|
||||
forceDeepUpdate: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'perfectgonzo',
|
||||
@@ -951,6 +954,7 @@ exports.seed = (knex) => Promise.resolve()
|
||||
has_logo: network.hasLogo ?? true,
|
||||
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
|
||||
parameters: network.parameters,
|
||||
options: network.options,
|
||||
parent_id: grandParentNetworksBySlug[network.parent] || null,
|
||||
}));
|
||||
|
||||
@@ -967,6 +971,7 @@ exports.seed = (knex) => Promise.resolve()
|
||||
has_logo: network.hasLogo ?? true,
|
||||
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
|
||||
parameters: network.parameters,
|
||||
options: network.options,
|
||||
parent_id: parentNetworksBySlug[network.parent] || grandParentNetworksBySlug[network.parent] || null,
|
||||
}));
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ function filterLocalUniqueReleases(releases, accReleases) {
|
||||
};
|
||||
}
|
||||
|
||||
async function filterUniqueReleases(releases) {
|
||||
async function filterUniqueReleases(releases, options) {
|
||||
const releaseIdentifierChunks = chunk(releases.map((release) => [release.entity.id, release.entryId.toString()]));
|
||||
|
||||
const duplicateReleaseEntryChunks = await Promise.map(releaseIdentifierChunks, async (releaseIdentifiers) => {
|
||||
@@ -47,9 +47,13 @@ async function filterUniqueReleases(releases) {
|
||||
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
||||
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers)
|
||||
.where((builder) => {
|
||||
if (!options?.forceDeepUpdate) {
|
||||
// we do not want to force deep rescrapes on release day, so if the scene is already deep-scraped, we add it to the duplicate list
|
||||
builder.where('deep', true); // scene is already deep scraped
|
||||
}
|
||||
|
||||
// check if previously upcoming scenes can be excluded from duplicates to be rescraped for release day updates
|
||||
builder
|
||||
.where('deep', true) // scene is already deep scraped
|
||||
.orWhereNull('date')
|
||||
.orWhereNotIn('date_precision', ['day', 'minute']) // don't worry about scenes without (accurate) dates for now
|
||||
.orWhere(knex.raw('date > NOW() - INTERVAL \'12 hours\'')) // scene is still upcoming, with a rough offset to wait for the end of the day west of UTC
|
||||
@@ -110,18 +114,23 @@ function needNextPage(pageReleases, accReleases, isUpcoming, unextracted = []) {
|
||||
}
|
||||
|
||||
async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
||||
const options = {
|
||||
...config.options[scraper.slug],
|
||||
...include,
|
||||
...preData,
|
||||
parameters: getRecursiveParameters(entity),
|
||||
options: getRecursiveParameters(entity, 'options'),
|
||||
};
|
||||
|
||||
async function scrapeReleasesPage(page, accReleases, pageContext) {
|
||||
const options = {
|
||||
...config.options[scraper.slug],
|
||||
...include,
|
||||
...preData,
|
||||
const pageOptions = {
|
||||
...options,
|
||||
...pageContext,
|
||||
parameters: getRecursiveParameters(entity),
|
||||
};
|
||||
|
||||
const rawPageReleases = isUpcoming
|
||||
? await scraper.fetchUpcoming(entity, page, options, preData)
|
||||
: await scraper.fetchLatest(entity, page, options, preData);
|
||||
? await scraper.fetchUpcoming(entity, page, pageOptions, preData)
|
||||
: await scraper.fetchLatest(entity, page, pageOptions, preData);
|
||||
|
||||
const pageReleases = rawPageReleases?.scenes || rawPageReleases;
|
||||
|
||||
@@ -157,7 +166,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
||||
|
||||
const { uniqueReleases, duplicateReleases } = argv.force
|
||||
? { uniqueReleases: limitedReleases, duplicateReleases: [] }
|
||||
: await filterUniqueReleases(limitedReleases);
|
||||
: await filterUniqueReleases(limitedReleases, options);
|
||||
|
||||
return { uniqueReleases, duplicateReleases };
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
'use strict';
|
||||
|
||||
function getRecursiveParameters(entity, parameters) {
|
||||
function getRecursiveParameters(entity, property = 'parameters', parameters) {
|
||||
if (entity.parent) {
|
||||
return getRecursiveParameters(entity.parent, { ...entity.parameters, ...parameters });
|
||||
return getRecursiveParameters(entity.parent, property, { ...entity[property], ...parameters });
|
||||
}
|
||||
|
||||
return { ...entity.parameters, ...parameters };
|
||||
return { ...entity[property], ...parameters };
|
||||
}
|
||||
|
||||
module.exports = getRecursiveParameters;
|
||||
|
||||
Reference in New Issue
Block a user