Filtering invalid actors from releases before storing.
This commit is contained in:
		
							parent
							
								
									2801732f57
								
							
						
					
					
						commit
						c37d4ad01f
					
				|  | @ -114,29 +114,37 @@ function getAverage(items) { | |||
| } | ||||
| 
 | ||||
| function toBaseActors(actorsOrNames, release) { | ||||
| 	return actorsOrNames.map((actorOrName) => { | ||||
| 		const [baseName, entryId] = (actorOrName.name || actorOrName).split(':'); | ||||
| 	if (!actorsOrNames) { | ||||
| 		return []; | ||||
| 	} | ||||
| 
 | ||||
| 		const name = capitalize(baseName); | ||||
| 		const slug = slugify(name); | ||||
| 	const baseActors = actorsOrNames | ||||
| 		.filter(actorOrName => actorOrName && (typeof actorOrName === 'string' || actorOrName.name)) | ||||
| 		.map((actorOrName) => { | ||||
| 			const [baseName, entryId] = (actorOrName.name || actorOrName).split(':'); | ||||
| 
 | ||||
| 		const baseActor = { | ||||
| 			name, | ||||
| 			slug, | ||||
| 			entryId: entryId || actorOrName.entryId || null, | ||||
| 			entity: release?.entity?.parent || release?.entity || null, | ||||
| 			hasProfile: !!actorOrName.name, // actor contains profile information
 | ||||
| 		}; | ||||
| 			const name = capitalize(baseName); | ||||
| 			const slug = slugify(name); | ||||
| 
 | ||||
| 		if (actorOrName.name) { | ||||
| 			return { | ||||
| 				...actorOrName, | ||||
| 				...baseActor, | ||||
| 			const baseActor = { | ||||
| 				name, | ||||
| 				slug, | ||||
| 				entryId: entryId || actorOrName.entryId || null, | ||||
| 				entity: release?.entity?.parent || release?.entity || null, | ||||
| 				hasProfile: !!actorOrName.name, // actor contains profile information
 | ||||
| 			}; | ||||
| 		} | ||||
| 
 | ||||
| 		return baseActor; | ||||
| 	}); | ||||
| 			if (actorOrName.name) { | ||||
| 				return { | ||||
| 					...actorOrName, | ||||
| 					...baseActor, | ||||
| 				}; | ||||
| 			} | ||||
| 
 | ||||
| 			return baseActor; | ||||
| 		}); | ||||
| 
 | ||||
| 	return baseActors; | ||||
| } | ||||
| 
 | ||||
| function curateActor(actor, withDetails = false, isProfile = false) { | ||||
|  | @ -832,4 +840,5 @@ module.exports = { | |||
| 	fetchActor, | ||||
| 	scrapeActors, | ||||
| 	searchActors, | ||||
| 	toBaseActors, | ||||
| }; | ||||
|  |  | |||
|  | @ -30,20 +30,20 @@ const { argv } = yargs | |||
| 		type: 'boolean', | ||||
| 		alias: 'web', | ||||
| 	}) | ||||
| 	.option('networks', { | ||||
| 	.option('include-networks', { | ||||
| 		describe: 'Network to scrape all channels from (overrides configuration)', | ||||
| 		type: 'array', | ||||
| 		alias: 'network', | ||||
| 		alias: ['include-network', 'networks', 'network'], | ||||
| 	}) | ||||
| 	.option('exclude-networks', { | ||||
| 		describe: 'Network not to scrape any channels from (overrides configuration)', | ||||
| 		type: 'array', | ||||
| 		alias: 'exclude-network', | ||||
| 	}) | ||||
| 	.option('channels', { | ||||
| 	.option('include-channels', { | ||||
| 		describe: 'Channel to scrape (overrides configuration)', | ||||
| 		type: 'array', | ||||
| 		alias: 'channel', | ||||
| 		alias: ['include-channel', 'channels', 'channel'], | ||||
| 	}) | ||||
| 	.option('exclude-channels', { | ||||
| 		describe: 'Channel not to scrape (overrides configuration)', | ||||
|  |  | |||
|  | @ -64,8 +64,8 @@ async function fetchIncludedEntities() { | |||
| 		includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels, | ||||
| 		includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [], | ||||
| 		includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [], | ||||
| 		excludedNetworks: argv.excludeNetworks || config.exclude?.networks || [], | ||||
| 		excludedChannels: argv.excludeChannels || config.exclude?.channels || [], | ||||
| 		excludedNetworks: argv.excludeNetworks || config.exclude?.networks.filter(network => !argv.networks?.includes(network)) || [], // ignore explicitly included networks
 | ||||
| 		excludedChannels: argv.excludeChannels || config.exclude?.channels.filter(channel => !argv.channels?.includes(channel)) || [], // ignore explicitly included channels
 | ||||
| 	}; | ||||
| 
 | ||||
| 	const rawNetworks = await knex.raw(` | ||||
|  |  | |||
|  | @ -258,7 +258,8 @@ async function fetchLatest(entity, page, options) { | |||
| 			.limit(faker.random.number({ min: 2, max: 15 })) | ||||
| 			.pluck('name'); | ||||
| 
 | ||||
| 		release.actors = actors(release); | ||||
| 		// release.actors = actors(release);
 | ||||
| 		release.actors = [null, 'Charles Darwin']; | ||||
| 		release.title = title(release); | ||||
| 
 | ||||
| 		return release; | ||||
|  |  | |||
|  | @ -9,7 +9,7 @@ const slugify = require('./utils/slugify'); | |||
| const bulkInsert = require('./utils/bulk-insert'); | ||||
| const resolvePlace = require('./utils/resolve-place'); | ||||
| const { formatDate } = require('./utils/qu'); | ||||
| const { associateActors, scrapeActors } = require('./actors'); | ||||
| const { associateActors, scrapeActors, toBaseActors } = require('./actors'); | ||||
| const { associateReleaseTags } = require('./tags'); | ||||
| const { curateEntity } = require('./entities'); | ||||
| const { associateReleaseMedia } = require('./media'); | ||||
|  | @ -291,7 +291,8 @@ async function storeScenes(releases) { | |||
| 	const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); | ||||
| 
 | ||||
| 	const releasesWithChannels = await attachChannelEntities(releases); | ||||
| 	const releasesWithStudios = await attachStudios(releasesWithChannels); | ||||
| 	const releasesWithBaseActors = releasesWithChannels.map(release => ({ ...release, actors: toBaseActors(release.actors) })); | ||||
| 	const releasesWithStudios = await attachStudios(releasesWithBaseActors); | ||||
| 
 | ||||
| 	// uniqueness is entity ID + entry ID, filter uniques after adding entities
 | ||||
| 	const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios); | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue