Returning results from new pagination.
This commit is contained in:
parent
013e85cf2a
commit
99a4751c20
Binary file not shown.
Before Width: | Height: | Size: 189 KiB After Width: | Height: | Size: 938 KiB |
Binary file not shown.
Before Width: | Height: | Size: 8.1 KiB After Width: | Height: | Size: 8.0 KiB |
Binary file not shown.
Before Width: | Height: | Size: 35 KiB After Width: | Height: | Size: 34 KiB |
|
@ -13,18 +13,18 @@ const { fetchIncludedEntities } = require('./entities');
|
|||
|
||||
const emptyReleases = { uniqueReleases: [], duplicateReleases: [] };
|
||||
|
||||
function mapReleasesToSiteIdAndEntryId(acc, release) {
|
||||
function mapReleasesToEntityIdAndEntryId(acc, release) {
|
||||
const entityId = release.entityId || release.entity.id;
|
||||
const entryId = release.entryId || release.entryId;
|
||||
|
||||
if (!acc[entityId]) acc[entityId] = {};
|
||||
acc[entityId][entryId] = true;
|
||||
acc[entityId][entryId] = release;
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
function filterLocalUniqueReleases(releases, accReleases) {
|
||||
const localDuplicateReleasesBySiteIdAndEntryId = accReleases.reduce(mapReleasesToSiteIdAndEntryId, {});
|
||||
const localDuplicateReleasesBySiteIdAndEntryId = accReleases.reduce(mapReleasesToEntityIdAndEntryId, {});
|
||||
|
||||
const localUniqueReleases = releases.filter(release => !localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
||||
const localDuplicateReleases = releases.filter(release => localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
||||
|
@ -35,7 +35,6 @@ function filterLocalUniqueReleases(releases, accReleases) {
|
|||
};
|
||||
}
|
||||
|
||||
/*
|
||||
async function filterUniqueReleases(releases) {
|
||||
const releaseIdentifiers = releases
|
||||
.map(release => [release.entity.id, release.entryId]);
|
||||
|
@ -46,43 +45,15 @@ async function filterUniqueReleases(releases) {
|
|||
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers);
|
||||
|
||||
const duplicateReleases = duplicateReleaseEntries.map(release => curateRelease(release));
|
||||
const duplicateReleasesByEntityIdAndEntryId = duplicateReleases.reduce(mapReleasesToEntityIdAndEntryId, {});
|
||||
|
||||
const internalUniqueReleasesByEntityIdAndEntryId = releases.reduce((acc, release) => mapReleasesToEntityIdAndEntryId(acc, release), {});
|
||||
const internalUniqueReleases = Object.values(internalUniqueReleasesByEntityIdAndEntryId).map(releasesByEntryId => Object.values(releasesByEntryId)).flat();
|
||||
|
||||
const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]);
|
||||
|
||||
return { uniqueReleases, duplicateReleases };
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
async function filterUniqueReleasesLegacy(latestReleases, accReleases) {
|
||||
const latestReleaseIdentifiers = latestReleases
|
||||
.map(release => [release.entity.id, release.entryId]);
|
||||
|
||||
const duplicateReleaseEntries = await knex('releases')
|
||||
.select(knex.raw('releases.*, row_to_json(entities) as entity'))
|
||||
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
||||
.whereIn(['entity_id', 'entry_id'], latestReleaseIdentifiers);
|
||||
|
||||
const duplicateReleases = duplicateReleaseEntries.map(release => curateRelease(release));
|
||||
|
||||
// add entry IDs of accumulated releases to prevent an infinite scrape loop
|
||||
// when one page contains the same release as the previous
|
||||
const duplicateReleasesBySiteIdAndEntryId = duplicateReleases
|
||||
.concat(accReleases.uniqueReleases)
|
||||
.reduce(mapReleasesToSiteIdAndEntryId, {});
|
||||
|
||||
const localDuplicateReleasesBySiteIdAndEntryId = accReleases.uniqueReleases
|
||||
.concat(accReleases.duplicateReleases)
|
||||
.reduce(mapReleasesToSiteIdAndEntryId, {});
|
||||
|
||||
console.log(localDuplicateReleasesBySiteIdAndEntryId);
|
||||
|
||||
const uniqueReleases = latestReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
||||
const localUniqueReleases = latestReleases.filter(release => !localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
|
||||
|
||||
return {
|
||||
uniqueReleases,
|
||||
localUniqueReleases,
|
||||
duplicateReleases,
|
||||
};
|
||||
}
|
||||
*/
|
||||
|
||||
function needNextPage(pageReleases, accReleases, isUpcoming) {
|
||||
const { localUniqueReleases: uniquePageReleases } = filterLocalUniqueReleases(pageReleases, accReleases);
|
||||
|
@ -132,7 +103,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
|||
return accReleases;
|
||||
}
|
||||
|
||||
const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity }));
|
||||
const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity: release.entity || entity }));
|
||||
|
||||
if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) {
|
||||
return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming);
|
||||
|
@ -148,19 +119,12 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
|||
|| (hasDates && releases.filter(release => moment(release.date).isAfter(argv.after)))
|
||||
|| releases.slice(0, Math.max(argv.nullDateLimit, 0));
|
||||
|
||||
console.log(releases.length, limitedReleases.length);
|
||||
|
||||
/*
|
||||
// attach entity the release is assigned to when stored
|
||||
const releasesWithEntity = limitedReleases.map(release => ({
|
||||
...release,
|
||||
entity: release.entity || entity, // allow override
|
||||
}));
|
||||
|
||||
const { uniqueReleases, duplicateReleases } = argv.force
|
||||
? { uniqueReleases: limitedReleases, localUniqueReleases: releases, duplicateReleases: [] }
|
||||
: await filterUniqueReleases(releasesWithEntity);
|
||||
*/
|
||||
? { uniqueReleases: limitedReleases, duplicateReleases: [] }
|
||||
: await filterUniqueReleases(limitedReleases);
|
||||
|
||||
console.log(releases.length, uniqueReleases.length, duplicateReleases.length);
|
||||
return { uniqueReleases, duplicateReleases };
|
||||
}
|
||||
|
||||
async function scrapeLatestReleases(scraper, entity, preData) {
|
||||
|
|
Loading…
Reference in New Issue