forked from DebaucheryLibrarian/traxxx
				
			Returning results from new pagination.
This commit is contained in:
		
							parent
							
								
									013e85cf2a
								
							
						
					
					
						commit
						99a4751c20
					
				
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 189 KiB After Width: | Height: | Size: 938 KiB | 
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 8.1 KiB After Width: | Height: | Size: 8.0 KiB | 
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 35 KiB After Width: | Height: | Size: 34 KiB | 
|  | @ -13,18 +13,18 @@ const { fetchIncludedEntities } = require('./entities'); | |||
| 
 | ||||
| const emptyReleases = { uniqueReleases: [], duplicateReleases: [] }; | ||||
| 
 | ||||
| function mapReleasesToSiteIdAndEntryId(acc, release) { | ||||
| function mapReleasesToEntityIdAndEntryId(acc, release) { | ||||
| 	const entityId = release.entityId || release.entity.id; | ||||
| 	const entryId = release.entryId || release.entryId; | ||||
| 
 | ||||
| 	if (!acc[entityId]) acc[entityId] = {}; | ||||
| 	acc[entityId][entryId] = true; | ||||
| 	acc[entityId][entryId] = release; | ||||
| 
 | ||||
| 	return acc; | ||||
| } | ||||
| 
 | ||||
| function filterLocalUniqueReleases(releases, accReleases) { | ||||
| 	const localDuplicateReleasesBySiteIdAndEntryId = accReleases.reduce(mapReleasesToSiteIdAndEntryId, {}); | ||||
| 	const localDuplicateReleasesBySiteIdAndEntryId = accReleases.reduce(mapReleasesToEntityIdAndEntryId, {}); | ||||
| 
 | ||||
| 	const localUniqueReleases = releases.filter(release => !localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]); | ||||
| 	const localDuplicateReleases = releases.filter(release => localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]); | ||||
|  | @ -35,7 +35,6 @@ function filterLocalUniqueReleases(releases, accReleases) { | |||
| 	}; | ||||
| } | ||||
| 
 | ||||
| /* | ||||
| async function filterUniqueReleases(releases) { | ||||
| 	const releaseIdentifiers = releases | ||||
| 		.map(release => [release.entity.id, release.entryId]); | ||||
|  | @ -46,43 +45,15 @@ async function filterUniqueReleases(releases) { | |||
| 		.whereIn(['entity_id', 'entry_id'], releaseIdentifiers); | ||||
| 
 | ||||
| 	const duplicateReleases = duplicateReleaseEntries.map(release => curateRelease(release)); | ||||
| 	const duplicateReleasesByEntityIdAndEntryId = duplicateReleases.reduce(mapReleasesToEntityIdAndEntryId, {}); | ||||
| 
 | ||||
| 	const internalUniqueReleasesByEntityIdAndEntryId = releases.reduce((acc, release) => mapReleasesToEntityIdAndEntryId(acc, release), {}); | ||||
| 	const internalUniqueReleases = Object.values(internalUniqueReleasesByEntityIdAndEntryId).map(releasesByEntryId => Object.values(releasesByEntryId)).flat(); | ||||
| 
 | ||||
| 	const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]); | ||||
| 
 | ||||
| 	return { uniqueReleases, duplicateReleases }; | ||||
| } | ||||
| */ | ||||
| 
 | ||||
| /* | ||||
| async function filterUniqueReleasesLegacy(latestReleases, accReleases) { | ||||
| 	const latestReleaseIdentifiers = latestReleases | ||||
| 		.map(release => [release.entity.id, release.entryId]); | ||||
| 
 | ||||
| 	const duplicateReleaseEntries = await knex('releases') | ||||
| 		.select(knex.raw('releases.*, row_to_json(entities) as entity')) | ||||
| 		.leftJoin('entities', 'entities.id', 'releases.entity_id') | ||||
| 		.whereIn(['entity_id', 'entry_id'], latestReleaseIdentifiers); | ||||
| 
 | ||||
| 	const duplicateReleases = duplicateReleaseEntries.map(release => curateRelease(release)); | ||||
| 
 | ||||
| 	// add entry IDs of accumulated releases to prevent an infinite scrape loop
 | ||||
| 	// when one page contains the same release as the previous
 | ||||
| 	const duplicateReleasesBySiteIdAndEntryId = duplicateReleases | ||||
| 		.concat(accReleases.uniqueReleases) | ||||
| 		.reduce(mapReleasesToSiteIdAndEntryId, {}); | ||||
| 
 | ||||
| 	const localDuplicateReleasesBySiteIdAndEntryId = accReleases.uniqueReleases | ||||
| 		.concat(accReleases.duplicateReleases) | ||||
| 		.reduce(mapReleasesToSiteIdAndEntryId, {}); | ||||
| 
 | ||||
| 	console.log(localDuplicateReleasesBySiteIdAndEntryId); | ||||
| 
 | ||||
| 	const uniqueReleases = latestReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]); | ||||
| 	const localUniqueReleases = latestReleases.filter(release => !localDuplicateReleasesBySiteIdAndEntryId[release.entity.id]?.[release.entryId]); | ||||
| 
 | ||||
| 	return { | ||||
| 		uniqueReleases, | ||||
| 		localUniqueReleases, | ||||
| 		duplicateReleases, | ||||
| 	}; | ||||
| } | ||||
| */ | ||||
| 
 | ||||
| function needNextPage(pageReleases, accReleases, isUpcoming) { | ||||
| 	const { localUniqueReleases: uniquePageReleases } = filterLocalUniqueReleases(pageReleases, accReleases); | ||||
|  | @ -132,7 +103,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) { | |||
| 			return accReleases; | ||||
| 		} | ||||
| 
 | ||||
| 		const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity })); | ||||
| 		const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity: release.entity || entity })); | ||||
| 
 | ||||
| 		if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) { | ||||
| 			return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming); | ||||
|  | @ -148,19 +119,12 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) { | |||
| 		|| (hasDates && releases.filter(release => moment(release.date).isAfter(argv.after))) | ||||
| 		|| releases.slice(0, Math.max(argv.nullDateLimit, 0)); | ||||
| 
 | ||||
| 	console.log(releases.length, limitedReleases.length); | ||||
| 
 | ||||
| 	/* | ||||
| 	// attach entity the release is assigned to when stored
 | ||||
| 	const releasesWithEntity = limitedReleases.map(release => ({ | ||||
| 		...release, | ||||
| 		entity: release.entity || entity, // allow override
 | ||||
| 	})); | ||||
| 
 | ||||
| 	const { uniqueReleases, duplicateReleases } = argv.force | ||||
| 		? { uniqueReleases: limitedReleases, localUniqueReleases: releases, duplicateReleases: [] } | ||||
| 		: await filterUniqueReleases(releasesWithEntity); | ||||
| 	*/ | ||||
| 		? { uniqueReleases: limitedReleases, duplicateReleases: [] } | ||||
| 		: await filterUniqueReleases(limitedReleases); | ||||
| 
 | ||||
| 	console.log(releases.length, uniqueReleases.length, duplicateReleases.length); | ||||
| 	return { uniqueReleases, duplicateReleases }; | ||||
| } | ||||
| 
 | ||||
| async function scrapeLatestReleases(scraper, entity, preData) { | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue