Added entity option to bypass deep scraped check on release day update.
This commit is contained in:
@@ -703,6 +703,9 @@ const networks = [
|
|||||||
name: 'Nubiles',
|
name: 'Nubiles',
|
||||||
url: 'https://www.nubiles.com',
|
url: 'https://www.nubiles.com',
|
||||||
description: 'Welcome to the teen megasite that started it all! Browse our massive HD collection of fresh legal hotties at Nubiles.net.',
|
description: 'Welcome to the teen megasite that started it all! Browse our massive HD collection of fresh legal hotties at Nubiles.net.',
|
||||||
|
options: {
|
||||||
|
forceDeepUpdate: true,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'perfectgonzo',
|
slug: 'perfectgonzo',
|
||||||
@@ -951,6 +954,7 @@ exports.seed = (knex) => Promise.resolve()
|
|||||||
has_logo: network.hasLogo ?? true,
|
has_logo: network.hasLogo ?? true,
|
||||||
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
|
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
|
||||||
parameters: network.parameters,
|
parameters: network.parameters,
|
||||||
|
options: network.options,
|
||||||
parent_id: grandParentNetworksBySlug[network.parent] || null,
|
parent_id: grandParentNetworksBySlug[network.parent] || null,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
@@ -967,6 +971,7 @@ exports.seed = (knex) => Promise.resolve()
|
|||||||
has_logo: network.hasLogo ?? true,
|
has_logo: network.hasLogo ?? true,
|
||||||
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
|
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
|
||||||
parameters: network.parameters,
|
parameters: network.parameters,
|
||||||
|
options: network.options,
|
||||||
parent_id: parentNetworksBySlug[network.parent] || grandParentNetworksBySlug[network.parent] || null,
|
parent_id: parentNetworksBySlug[network.parent] || grandParentNetworksBySlug[network.parent] || null,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ function filterLocalUniqueReleases(releases, accReleases) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function filterUniqueReleases(releases) {
|
async function filterUniqueReleases(releases, options) {
|
||||||
const releaseIdentifierChunks = chunk(releases.map((release) => [release.entity.id, release.entryId.toString()]));
|
const releaseIdentifierChunks = chunk(releases.map((release) => [release.entity.id, release.entryId.toString()]));
|
||||||
|
|
||||||
const duplicateReleaseEntryChunks = await Promise.map(releaseIdentifierChunks, async (releaseIdentifiers) => {
|
const duplicateReleaseEntryChunks = await Promise.map(releaseIdentifierChunks, async (releaseIdentifiers) => {
|
||||||
@@ -47,9 +47,13 @@ async function filterUniqueReleases(releases) {
|
|||||||
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
||||||
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers)
|
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers)
|
||||||
.where((builder) => {
|
.where((builder) => {
|
||||||
|
if (!options?.forceDeepUpdate) {
|
||||||
|
// we do not want to force deep rescrapes on release day, so if the scene is already deep-scraped, we add it to the duplicate list
|
||||||
|
builder.where('deep', true); // scene is already deep scraped
|
||||||
|
}
|
||||||
|
|
||||||
// check if previously upcoming scenes can be excluded from duplicates to be rescraped for release day updates
|
// check if previously upcoming scenes can be excluded from duplicates to be rescraped for release day updates
|
||||||
builder
|
builder
|
||||||
.where('deep', true) // scene is already deep scraped
|
|
||||||
.orWhereNull('date')
|
.orWhereNull('date')
|
||||||
.orWhereNotIn('date_precision', ['day', 'minute']) // don't worry about scenes without (accurate) dates for now
|
.orWhereNotIn('date_precision', ['day', 'minute']) // don't worry about scenes without (accurate) dates for now
|
||||||
.orWhere(knex.raw('date > NOW() - INTERVAL \'12 hours\'')) // scene is still upcoming, with a rough offset to wait for the end of the day west of UTC
|
.orWhere(knex.raw('date > NOW() - INTERVAL \'12 hours\'')) // scene is still upcoming, with a rough offset to wait for the end of the day west of UTC
|
||||||
@@ -110,18 +114,23 @@ function needNextPage(pageReleases, accReleases, isUpcoming, unextracted = []) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
||||||
|
const options = {
|
||||||
|
...config.options[scraper.slug],
|
||||||
|
...include,
|
||||||
|
...preData,
|
||||||
|
parameters: getRecursiveParameters(entity),
|
||||||
|
options: getRecursiveParameters(entity, 'options'),
|
||||||
|
};
|
||||||
|
|
||||||
async function scrapeReleasesPage(page, accReleases, pageContext) {
|
async function scrapeReleasesPage(page, accReleases, pageContext) {
|
||||||
const options = {
|
const pageOptions = {
|
||||||
...config.options[scraper.slug],
|
...options,
|
||||||
...include,
|
|
||||||
...preData,
|
|
||||||
...pageContext,
|
...pageContext,
|
||||||
parameters: getRecursiveParameters(entity),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const rawPageReleases = isUpcoming
|
const rawPageReleases = isUpcoming
|
||||||
? await scraper.fetchUpcoming(entity, page, options, preData)
|
? await scraper.fetchUpcoming(entity, page, pageOptions, preData)
|
||||||
: await scraper.fetchLatest(entity, page, options, preData);
|
: await scraper.fetchLatest(entity, page, pageOptions, preData);
|
||||||
|
|
||||||
const pageReleases = rawPageReleases?.scenes || rawPageReleases;
|
const pageReleases = rawPageReleases?.scenes || rawPageReleases;
|
||||||
|
|
||||||
@@ -157,7 +166,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
|||||||
|
|
||||||
const { uniqueReleases, duplicateReleases } = argv.force
|
const { uniqueReleases, duplicateReleases } = argv.force
|
||||||
? { uniqueReleases: limitedReleases, duplicateReleases: [] }
|
? { uniqueReleases: limitedReleases, duplicateReleases: [] }
|
||||||
: await filterUniqueReleases(limitedReleases);
|
: await filterUniqueReleases(limitedReleases, options);
|
||||||
|
|
||||||
return { uniqueReleases, duplicateReleases };
|
return { uniqueReleases, duplicateReleases };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
function getRecursiveParameters(entity, parameters) {
|
function getRecursiveParameters(entity, property = 'parameters', parameters) {
|
||||||
if (entity.parent) {
|
if (entity.parent) {
|
||||||
return getRecursiveParameters(entity.parent, { ...entity.parameters, ...parameters });
|
return getRecursiveParameters(entity.parent, property, { ...entity[property], ...parameters });
|
||||||
}
|
}
|
||||||
|
|
||||||
return { ...entity.parameters, ...parameters };
|
return { ...entity[property], ...parameters };
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = getRecursiveParameters;
|
module.exports = getRecursiveParameters;
|
||||||
|
|||||||
Reference in New Issue
Block a user