Added PurgatoryX scraper.

This commit is contained in:
DebaucheryLibrarian
2021-11-27 23:55:16 +01:00
parent 2539e88f47
commit 9d7183ac69
32 changed files with 196 additions and 16 deletions

View File

@@ -39,9 +39,11 @@ function filterLocalUniqueReleases(releases, accReleases) {
async function filterUniqueReleases(releases) {
const releaseIdentifiers = releases
.map((release) => [release.entity.id, release.entryId]);
.map((release) => [release.entity.id, release.entryId.toString()]);
const duplicateReleaseEntries = await knex('releases')
console.log(releaseIdentifiers.length);
const duplicateReleaseEntriesQuery = knex('releases')
.select(knex.raw('releases.*, row_to_json(entities) as entity'))
.leftJoin('entities', 'entities.id', 'releases.entity_id')
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers)
@@ -55,6 +57,10 @@ async function filterUniqueReleases(releases) {
.orWhere(knex.raw('updated_at - date > INTERVAL \'1 day\'')); // scene was updated after the release date, no updates expected
});
console.log(duplicateReleaseEntriesQuery.toString());
const duplicateReleaseEntries = await duplicateReleaseEntriesQuery;
const duplicateReleases = duplicateReleaseEntries.map((release) => curateRelease(release));
const duplicateReleasesByEntityIdAndEntryId = duplicateReleases.reduce(mapReleasesToEntityIdAndEntryId, {});
@@ -262,7 +268,7 @@ async function scrapeNetworkSequential(networkEntity) {
}
async function scrapeNetworkParallel(networkEntity) {
const beforeNetwork = await networkEntity.scraper.beforeNetwork?.(networkEntity);
const beforeNetwork = await networkEntity.scraper?.beforeNetwork?.(networkEntity);
return Promise.map(
networkEntity.includedChildren,