Returning results from new pagination.
This commit is contained in:
parent
013e85cf2a
commit
99a4751c20
Binary file not shown.
Before Width: | Height: | Size: 189 KiB After Width: | Height: | Size: 938 KiB |
Binary file not shown.
Before Width: | Height: | Size: 8.1 KiB After Width: | Height: | Size: 8.0 KiB |
Binary file not shown.
Before Width: | Height: | Size: 35 KiB After Width: | Height: | Size: 34 KiB |
|
@ -13,18 +13,18 @@ const { fetchIncludedEntities } = require('./entities');
|
||||||
|
|
||||||
const emptyReleases = { uniqueReleases: [], duplicateReleases: [] };
|
const emptyReleases = { uniqueReleases: [], duplicateReleases: [] };
|
||||||
|
|
||||||
function mapReleasesToSiteIdAndEntryId(acc, release) {
|
function mapReleasesToEntityIdAndEntryId(acc, release) {
|
||||||
const entityId = release.entityId || release.entity.id;
|
const entityId = release.entityId || release.entity.id;
|
||||||
const entryId = release.entryId || release.entryId;
|
const entryId = release.entryId || release.entryId;
|
||||||
|
|
||||||
if (!acc[entityId]) acc[entityId] = {};
|
if (!acc[entityId]) acc[entityId] = {};
|
||||||
acc[entityId][entryId] = true;
|
acc[entityId][entryId] = release;
|
||||||
|
|
||||||
return acc;
|
return acc;
|
||||||
}
|
}
|
||||||
|
|
||||||
function filterLocalUniqueReleases(releases, accReleases) {
|
function filterLocalUniqueReleases(releases, accReleases) {
|
||||||
const localDuplicateReleasesBySiteIdAndEntryId = accReleases.reduce(mapReleasesToSiteIdAndEntryId, {});
|
const localDuplicateReleasesBySiteIdAndEntryId = accReleases.reduce(mapReleasesToEntityIdAndEntryId, {});
|
||||||
|
|
||||||
const localUniqueReleases = releases.filter(release => !localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
const localUniqueReleases = releases.filter(release => !localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
||||||
const localDuplicateReleases = releases.filter(release => localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
const localDuplicateReleases = releases.filter(release => localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
||||||
|
@ -35,7 +35,6 @@ function filterLocalUniqueReleases(releases, accReleases) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
async function filterUniqueReleases(releases) {
|
async function filterUniqueReleases(releases) {
|
||||||
const releaseIdentifiers = releases
|
const releaseIdentifiers = releases
|
||||||
.map(release => [release.entity.id, release.entryId]);
|
.map(release => [release.entity.id, release.entryId]);
|
||||||
|
@ -46,43 +45,15 @@ async function filterUniqueReleases(releases) {
|
||||||
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers);
|
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers);
|
||||||
|
|
||||||
const duplicateReleases = duplicateReleaseEntries.map(release => curateRelease(release));
|
const duplicateReleases = duplicateReleaseEntries.map(release => curateRelease(release));
|
||||||
|
const duplicateReleasesByEntityIdAndEntryId = duplicateReleases.reduce(mapReleasesToEntityIdAndEntryId, {});
|
||||||
|
|
||||||
|
const internalUniqueReleasesByEntityIdAndEntryId = releases.reduce((acc, release) => mapReleasesToEntityIdAndEntryId(acc, release), {});
|
||||||
|
const internalUniqueReleases = Object.values(internalUniqueReleasesByEntityIdAndEntryId).map(releasesByEntryId => Object.values(releasesByEntryId)).flat();
|
||||||
|
|
||||||
|
const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]);
|
||||||
|
|
||||||
|
return { uniqueReleases, duplicateReleases };
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
async function filterUniqueReleasesLegacy(latestReleases, accReleases) {
|
|
||||||
const latestReleaseIdentifiers = latestReleases
|
|
||||||
.map(release => [release.entity.id, release.entryId]);
|
|
||||||
|
|
||||||
const duplicateReleaseEntries = await knex('releases')
|
|
||||||
.select(knex.raw('releases.*, row_to_json(entities) as entity'))
|
|
||||||
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
|
||||||
.whereIn(['entity_id', 'entry_id'], latestReleaseIdentifiers);
|
|
||||||
|
|
||||||
const duplicateReleases = duplicateReleaseEntries.map(release => curateRelease(release));
|
|
||||||
|
|
||||||
// add entry IDs of accumulated releases to prevent an infinite scrape loop
|
|
||||||
// when one page contains the same release as the previous
|
|
||||||
const duplicateReleasesBySiteIdAndEntryId = duplicateReleases
|
|
||||||
.concat(accReleases.uniqueReleases)
|
|
||||||
.reduce(mapReleasesToSiteIdAndEntryId, {});
|
|
||||||
|
|
||||||
const localDuplicateReleasesBySiteIdAndEntryId = accReleases.uniqueReleases
|
|
||||||
.concat(accReleases.duplicateReleases)
|
|
||||||
.reduce(mapReleasesToSiteIdAndEntryId, {});
|
|
||||||
|
|
||||||
console.log(localDuplicateReleasesBySiteIdAndEntryId);
|
|
||||||
|
|
||||||
const uniqueReleases = latestReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
|
||||||
const localUniqueReleases = latestReleases.filter(release => !localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
|
||||||
|
|
||||||
return {
|
|
||||||
uniqueReleases,
|
|
||||||
localUniqueReleases,
|
|
||||||
duplicateReleases,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
function needNextPage(pageReleases, accReleases, isUpcoming) {
|
function needNextPage(pageReleases, accReleases, isUpcoming) {
|
||||||
const { localUniqueReleases: uniquePageReleases } = filterLocalUniqueReleases(pageReleases, accReleases);
|
const { localUniqueReleases: uniquePageReleases } = filterLocalUniqueReleases(pageReleases, accReleases);
|
||||||
|
@ -132,7 +103,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
||||||
return accReleases;
|
return accReleases;
|
||||||
}
|
}
|
||||||
|
|
||||||
const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity }));
|
const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity: release.entity || entity }));
|
||||||
|
|
||||||
if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) {
|
if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) {
|
||||||
return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming);
|
return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming);
|
||||||
|
@ -148,19 +119,12 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
||||||
|| (hasDates && releases.filter(release => moment(release.date).isAfter(argv.after)))
|
|| (hasDates && releases.filter(release => moment(release.date).isAfter(argv.after)))
|
||||||
|| releases.slice(0, Math.max(argv.nullDateLimit, 0));
|
|| releases.slice(0, Math.max(argv.nullDateLimit, 0));
|
||||||
|
|
||||||
console.log(releases.length, limitedReleases.length);
|
|
||||||
|
|
||||||
/*
|
|
||||||
// attach entity the release is assigned to when stored
|
|
||||||
const releasesWithEntity = limitedReleases.map(release => ({
|
|
||||||
...release,
|
|
||||||
entity: release.entity || entity, // allow override
|
|
||||||
}));
|
|
||||||
|
|
||||||
const { uniqueReleases, duplicateReleases } = argv.force
|
const { uniqueReleases, duplicateReleases } = argv.force
|
||||||
? { uniqueReleases: limitedReleases, localUniqueReleases: releases, duplicateReleases: [] }
|
? { uniqueReleases: limitedReleases, duplicateReleases: [] }
|
||||||
: await filterUniqueReleases(releasesWithEntity);
|
: await filterUniqueReleases(limitedReleases);
|
||||||
*/
|
|
||||||
|
console.log(releases.length, uniqueReleases.length, duplicateReleases.length);
|
||||||
|
return { uniqueReleases, duplicateReleases };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeLatestReleases(scraper, entity, preData) {
|
async function scrapeLatestReleases(scraper, entity, preData) {
|
||||||
|
|
Loading…
Reference in New Issue