Added PurgatoryX scraper.
This commit is contained in:
@@ -39,9 +39,11 @@ function filterLocalUniqueReleases(releases, accReleases) {
|
||||
|
||||
async function filterUniqueReleases(releases) {
|
||||
const releaseIdentifiers = releases
|
||||
.map((release) => [release.entity.id, release.entryId]);
|
||||
.map((release) => [release.entity.id, release.entryId.toString()]);
|
||||
|
||||
const duplicateReleaseEntries = await knex('releases')
|
||||
console.log(releaseIdentifiers.length);
|
||||
|
||||
const duplicateReleaseEntriesQuery = knex('releases')
|
||||
.select(knex.raw('releases.*, row_to_json(entities) as entity'))
|
||||
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
||||
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers)
|
||||
@@ -55,6 +57,10 @@ async function filterUniqueReleases(releases) {
|
||||
.orWhere(knex.raw('updated_at - date > INTERVAL \'1 day\'')); // scene was updated after the release date, no updates expected
|
||||
});
|
||||
|
||||
console.log(duplicateReleaseEntriesQuery.toString());
|
||||
|
||||
const duplicateReleaseEntries = await duplicateReleaseEntriesQuery;
|
||||
|
||||
const duplicateReleases = duplicateReleaseEntries.map((release) => curateRelease(release));
|
||||
const duplicateReleasesByEntityIdAndEntryId = duplicateReleases.reduce(mapReleasesToEntityIdAndEntryId, {});
|
||||
|
||||
@@ -262,7 +268,7 @@ async function scrapeNetworkSequential(networkEntity) {
|
||||
}
|
||||
|
||||
async function scrapeNetworkParallel(networkEntity) {
|
||||
const beforeNetwork = await networkEntity.scraper.beforeNetwork?.(networkEntity);
|
||||
const beforeNetwork = await networkEntity.scraper?.beforeNetwork?.(networkEntity);
|
||||
|
||||
return Promise.map(
|
||||
networkEntity.includedChildren,
|
||||
|
||||
Reference in New Issue
Block a user