Returning results from new pagination.

This commit is contained in:
DebaucheryLibrarian 2020-10-14 03:17:03 +02:00
parent 013e85cf2a
commit 99a4751c20
4 changed files with 17 additions and 53 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 189 KiB

After

Width:  |  Height:  |  Size: 938 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.1 KiB

After

Width:  |  Height:  |  Size: 8.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

After

Width:  |  Height:  |  Size: 34 KiB

View File

@ -13,18 +13,18 @@ const { fetchIncludedEntities } = require('./entities');
const emptyReleases = { uniqueReleases: [], duplicateReleases: [] };
function mapReleasesToSiteIdAndEntryId(acc, release) {
function mapReleasesToEntityIdAndEntryId(acc, release) {
const entityId = release.entityId || release.entity.id;
const entryId = release.entryId || release.entryId;
if (!acc[entityId]) acc[entityId] = {};
acc[entityId][entryId] = true;
acc[entityId][entryId] = release;
return acc;
}
function filterLocalUniqueReleases(releases, accReleases) {
const localDuplicateReleasesBySiteIdAndEntryId = accReleases.reduce(mapReleasesToSiteIdAndEntryId, {});
const localDuplicateReleasesBySiteIdAndEntryId = accReleases.reduce(mapReleasesToEntityIdAndEntryId, {});
const localUniqueReleases = releases.filter(release => !localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
const localDuplicateReleases = releases.filter(release => localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
@ -35,7 +35,6 @@ function filterLocalUniqueReleases(releases, accReleases) {
};
}
/*
async function filterUniqueReleases(releases) {
const releaseIdentifiers = releases
.map(release => [release.entity.id, release.entryId]);
@ -46,43 +45,15 @@ async function filterUniqueReleases(releases) {
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers);
const duplicateReleases = duplicateReleaseEntries.map(release => curateRelease(release));
const duplicateReleasesByEntityIdAndEntryId = duplicateReleases.reduce(mapReleasesToEntityIdAndEntryId, {});
const internalUniqueReleasesByEntityIdAndEntryId = releases.reduce((acc, release) => mapReleasesToEntityIdAndEntryId(acc, release), {});
const internalUniqueReleases = Object.values(internalUniqueReleasesByEntityIdAndEntryId).map(releasesByEntryId => Object.values(releasesByEntryId)).flat();
const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]);
return { uniqueReleases, duplicateReleases };
}
*/
/*
async function filterUniqueReleasesLegacy(latestReleases, accReleases) {
const latestReleaseIdentifiers = latestReleases
.map(release => [release.entity.id, release.entryId]);
const duplicateReleaseEntries = await knex('releases')
.select(knex.raw('releases.*, row_to_json(entities) as entity'))
.leftJoin('entities', 'entities.id', 'releases.entity_id')
.whereIn(['entity_id', 'entry_id'], latestReleaseIdentifiers);
const duplicateReleases = duplicateReleaseEntries.map(release => curateRelease(release));
// add entry IDs of accumulated releases to prevent an infinite scrape loop
// when one page contains the same release as the previous
const duplicateReleasesBySiteIdAndEntryId = duplicateReleases
.concat(accReleases.uniqueReleases)
.reduce(mapReleasesToSiteIdAndEntryId, {});
const localDuplicateReleasesBySiteIdAndEntryId = accReleases.uniqueReleases
.concat(accReleases.duplicateReleases)
.reduce(mapReleasesToSiteIdAndEntryId, {});
console.log(localDuplicateReleasesBySiteIdAndEntryId);
const uniqueReleases = latestReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
const localUniqueReleases = latestReleases.filter(release => !localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]);
return {
uniqueReleases,
localUniqueReleases,
duplicateReleases,
};
}
*/
function needNextPage(pageReleases, accReleases, isUpcoming) {
const { localUniqueReleases: uniquePageReleases } = filterLocalUniqueReleases(pageReleases, accReleases);
@ -132,7 +103,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
return accReleases;
}
const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity }));
const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity: release.entity || entity }));
if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) {
return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming);
@ -148,19 +119,12 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|| (hasDates && releases.filter(release => moment(release.date).isAfter(argv.after)))
|| releases.slice(0, Math.max(argv.nullDateLimit, 0));
console.log(releases.length, limitedReleases.length);
/*
// attach entity the release is assigned to when stored
const releasesWithEntity = limitedReleases.map(release => ({
...release,
entity: release.entity || entity, // allow override
}));
const { uniqueReleases, duplicateReleases } = argv.force
? { uniqueReleases: limitedReleases, localUniqueReleases: releases, duplicateReleases: [] }
: await filterUniqueReleases(releasesWithEntity);
*/
? { uniqueReleases: limitedReleases, duplicateReleases: [] }
: await filterUniqueReleases(limitedReleases);
console.log(releases.length, uniqueReleases.length, duplicateReleases.length);
return { uniqueReleases, duplicateReleases };
}
async function scrapeLatestReleases(scraper, entity, preData) {