forked from DebaucheryLibrarian/traxxx
				
			Refactored deep and store modules to use entities.
This commit is contained in:
		
							parent
							
								
									f0a89df6ab
								
							
						
					
					
						commit
						4959dfd14f
					
				|  | @ -45,6 +45,7 @@ async function mounted() { | ||||||
| 			'double-penetration', | 			'double-penetration', | ||||||
| 			'facial', | 			'facial', | ||||||
| 			'creampie', | 			'creampie', | ||||||
|  | 			'squirting', | ||||||
| 		], | 		], | ||||||
| 		appearance: [ | 		appearance: [ | ||||||
| 			'asian', | 			'asian', | ||||||
|  | @ -100,6 +101,7 @@ async function mounted() { | ||||||
| 		], | 		], | ||||||
| 		misc: [ | 		misc: [ | ||||||
| 			'gaping', | 			'gaping', | ||||||
|  | 			'squirting', | ||||||
| 			'oil', | 			'oil', | ||||||
| 		], | 		], | ||||||
| 	}; | 	}; | ||||||
|  |  | ||||||
|  | @ -57,7 +57,7 @@ function initActorActions(store, _router) { | ||||||
|                     description |                     description | ||||||
| 					createdAt | 					createdAt | ||||||
| 					updatedAt | 					updatedAt | ||||||
|                     network { |                     network: entity { | ||||||
|                         id |                         id | ||||||
|                         name |                         name | ||||||
|                         slug |                         slug | ||||||
|  | @ -80,12 +80,7 @@ function initActorActions(store, _router) { | ||||||
| 					profiles: actorsProfiles { | 					profiles: actorsProfiles { | ||||||
| 						description | 						description | ||||||
| 						descriptionHash | 						descriptionHash | ||||||
| 						network { | 						network: entity { | ||||||
| 							id |  | ||||||
| 							slug |  | ||||||
| 							name |  | ||||||
| 						} |  | ||||||
| 						site { |  | ||||||
| 							id | 							id | ||||||
| 							slug | 							slug | ||||||
| 							name | 							name | ||||||
|  | @ -162,12 +157,12 @@ function initActorActions(store, _router) { | ||||||
| 								${releaseActorsFragment} | 								${releaseActorsFragment} | ||||||
| 								${releaseTagsFragment} | 								${releaseTagsFragment} | ||||||
| 								${releasePosterFragment} | 								${releasePosterFragment} | ||||||
| 								site { | 								site: entity { | ||||||
| 									id | 									id | ||||||
| 									name | 									name | ||||||
| 									slug | 									slug | ||||||
| 									url | 									url | ||||||
| 									network { | 									network: parent { | ||||||
| 										id | 										id | ||||||
| 										name | 										name | ||||||
| 										slug | 										slug | ||||||
|  | @ -265,7 +260,7 @@ function initActorActions(store, _router) { | ||||||
| 						dateOfBirth | 						dateOfBirth | ||||||
| 						dateOfDeath | 						dateOfDeath | ||||||
| 						gender | 						gender | ||||||
| 						network { | 						network: entity { | ||||||
| 							id | 							id | ||||||
| 							name | 							name | ||||||
| 							slug | 							slug | ||||||
|  |  | ||||||
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 1.8 KiB | 
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 492 KiB | 
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 6.8 KiB | 
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 27 KiB | 
|  | @ -635,6 +635,7 @@ const tagPosters = [ | ||||||
| 	['piercings', 0, 'Kaegune in "When The Sun Goes Down" for Suicide Girls'], | 	['piercings', 0, 'Kaegune in "When The Sun Goes Down" for Suicide Girls'], | ||||||
| 	['pussy-eating', 0, 'Kali Roses licking Emily Willis\' pussy in "Peeping On My Neighbor" for Girl Girl'], | 	['pussy-eating', 0, 'Kali Roses licking Emily Willis\' pussy in "Peeping On My Neighbor" for Girl Girl'], | ||||||
| 	['redhead', 1, 'Lacy Lennon in "Girl Crush" for When Girls Play'], | 	['redhead', 1, 'Lacy Lennon in "Girl Crush" for When Girls Play'], | ||||||
|  | 	['squirting', 0, 'Veronica Rodriguez in "Hot Latina Squirting" for Jules Jordan'], | ||||||
| 	['schoolgirl', 1, 'Eliza Ibarra for Brazzers'], | 	['schoolgirl', 1, 'Eliza Ibarra for Brazzers'], | ||||||
| 	['swallowing', 'poster'], | 	['swallowing', 'poster'], | ||||||
| 	['teen', 0, 'Eva Elfie in "Fresh New Talent" for Club Seventeen'], | 	['teen', 0, 'Eva Elfie in "Fresh New Talent" for Club Seventeen'], | ||||||
|  |  | ||||||
|  | @ -20,7 +20,6 @@ const logger = require('./logger')(__filename); | ||||||
| 
 | 
 | ||||||
| const { toBaseReleases } = require('./deep'); | const { toBaseReleases } = require('./deep'); | ||||||
| const { associateAvatars } = require('./media'); | const { associateAvatars } = require('./media'); | ||||||
| const { curateSite } = require('./sites'); |  | ||||||
| 
 | 
 | ||||||
| const slugify = require('./utils/slugify'); | const slugify = require('./utils/slugify'); | ||||||
| const capitalize = require('./utils/capitalize'); | const capitalize = require('./utils/capitalize'); | ||||||
|  | @ -120,7 +119,7 @@ function toBaseActors(actorsOrNames, release) { | ||||||
| 		const baseActor = { | 		const baseActor = { | ||||||
| 			name, | 			name, | ||||||
| 			slug, | 			slug, | ||||||
| 			network: release?.site.network, | 			entity: release?.site?.network || release?.entity?.parent || null, | ||||||
| 		}; | 		}; | ||||||
| 
 | 
 | ||||||
| 		if (actorOrName.name) { | 		if (actorOrName.name) { | ||||||
|  | @ -144,7 +143,7 @@ function curateActor(actor, withDetails = false) { | ||||||
| 		name: actor.name, | 		name: actor.name, | ||||||
| 		slug: actor.slug, | 		slug: actor.slug, | ||||||
| 		gender: actor.gender, | 		gender: actor.gender, | ||||||
| 		networkId: actor.entity_id, | 		entityId: actor.entity_id, | ||||||
| 		aliasFor: actor.alias_for, | 		aliasFor: actor.alias_for, | ||||||
| 		dateOfBirth: actor.date_of_birth, | 		dateOfBirth: actor.date_of_birth, | ||||||
| 		birthCountry: actor.birth_country_alpha2, | 		birthCountry: actor.birth_country_alpha2, | ||||||
|  | @ -155,10 +154,10 @@ function curateActor(actor, withDetails = false) { | ||||||
| 				slug: actor.slug, | 				slug: actor.slug, | ||||||
| 				gender: actor.alias.gender, | 				gender: actor.alias.gender, | ||||||
| 			}, | 			}, | ||||||
| 			network: actor.network && { | 			entity: actor.entity && { | ||||||
| 				id: actor.network.id, | 				id: actor.entity.id, | ||||||
| 				name: actor.network.name, | 				name: actor.entity.name, | ||||||
| 				slug: actor.network.slug, | 				slug: actor.entity.slug, | ||||||
| 			}, | 			}, | ||||||
| 			dateOfDeath: actor.date_of_death, | 			dateOfDeath: actor.date_of_death, | ||||||
| 			cup: actor.cup, | 			cup: actor.cup, | ||||||
|  | @ -224,8 +223,7 @@ function curateProfileEntry(profile) { | ||||||
| 	const curatedProfileEntry = { | 	const curatedProfileEntry = { | ||||||
| 		...(profile.update !== false && { id: profile.update }), | 		...(profile.update !== false && { id: profile.update }), | ||||||
| 		actor_id: profile.id, | 		actor_id: profile.id, | ||||||
| 		site_id: profile.site?.id || null, | 		entity_id: profile.entity?.id || null, | ||||||
| 		entity_id: profile.network?.id || null, |  | ||||||
| 		date_of_birth: profile.dateOfBirth, | 		date_of_birth: profile.dateOfBirth, | ||||||
| 		date_of_death: profile.dateOfDeath, | 		date_of_death: profile.dateOfDeath, | ||||||
| 		gender: profile.gender, | 		gender: profile.gender, | ||||||
|  | @ -268,8 +266,7 @@ async function curateProfile(profile) { | ||||||
| 			name: profile.name, | 			name: profile.name, | ||||||
| 			avatar: profile.avatar, | 			avatar: profile.avatar, | ||||||
| 			scraper: profile.scraper, | 			scraper: profile.scraper, | ||||||
| 			site: profile.site, | 			entity: profile.entity, | ||||||
| 			network: profile.network, |  | ||||||
| 			update: profile.update, | 			update: profile.update, | ||||||
| 		}; | 		}; | ||||||
| 
 | 
 | ||||||
|  | @ -343,7 +340,7 @@ async function curateProfile(profile) { | ||||||
| 					const { href } = new URL(social); | 					const { href } = new URL(social); | ||||||
| 					return href; | 					return href; | ||||||
| 				} catch (error) { | 				} catch (error) { | ||||||
| 					logger.warn(`Profile scraper for '${profile.site.name}' returned invalid social link: ${social}`); | 					logger.warn(`Profile scraper for '${profile.entity.name}' returned invalid social link: ${social}`); | ||||||
| 					return null; | 					return null; | ||||||
| 				} | 				} | ||||||
| 			}).filter(Boolean) | 			}).filter(Boolean) | ||||||
|  | @ -351,9 +348,9 @@ async function curateProfile(profile) { | ||||||
| 
 | 
 | ||||||
| 		curatedProfile.releases = toBaseReleases(profile.releases); | 		curatedProfile.releases = toBaseReleases(profile.releases); | ||||||
| 
 | 
 | ||||||
| 		if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.ethnicity}`); | 		if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.entity.name}' scraper: ${profile.ethnicity}`); | ||||||
| 		if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.hairColor || profile.hair}`); | 		if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.entity.name}' scraper: ${profile.hairColor || profile.hair}`); | ||||||
| 		if (profile.eyes && !curatedProfile.eyes) logger.warn(`Unrecognized eye color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.eyes}`); | 		if (profile.eyes && !curatedProfile.eyes) logger.warn(`Unrecognized eye color returned by '${profile.entity.name}' scraper: ${profile.eyes}`); | ||||||
| 
 | 
 | ||||||
| 		return curatedProfile; | 		return curatedProfile; | ||||||
| 	} catch (error) { | 	} catch (error) { | ||||||
|  | @ -499,7 +496,7 @@ async function upsertProfiles(profiles) { | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug, existingProfilesByActorNetworkSiteId) { | async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) { | ||||||
| 	const profiles = Promise.map(sources, async (source) => { | 	const profiles = Promise.map(sources, async (source) => { | ||||||
| 		try { | 		try { | ||||||
| 			// config may group sources to try until success
 | 			// config may group sources to try until success
 | ||||||
|  | @ -507,24 +504,25 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug, exist | ||||||
| 				try { | 				try { | ||||||
| 					const scraper = scrapers[scraperSlug]; | 					const scraper = scrapers[scraperSlug]; | ||||||
| 					const context = { | 					const context = { | ||||||
| 						site: sitesBySlug[scraperSlug] || null, | 						site: entitiesBySlug[scraperSlug] || null, | ||||||
| 						network: networksBySlug[scraperSlug] || sitesBySlug[scraperSlug]?.network || null, | 						network: entitiesBySlug[scraperSlug] || null, | ||||||
|  | 						entity: entitiesBySlug[scraperSlug] || null, | ||||||
| 						scraper: scraperSlug, | 						scraper: scraperSlug, | ||||||
| 					}; | 					}; | ||||||
| 
 | 
 | ||||||
| 					const label = context.site?.name || context.network?.name; | 					const label = context.entity?.name; | ||||||
| 
 | 
 | ||||||
| 					if (!scraper?.fetchProfile) { | 					if (!scraper?.fetchProfile) { | ||||||
| 						logger.warn(`No profile profile scraper available for ${scraperSlug}`); | 						logger.warn(`No profile profile scraper available for ${scraperSlug}`); | ||||||
| 						throw new Error(`No profile profile scraper available for ${scraperSlug}`); | 						throw new Error(`No profile profile scraper available for ${scraperSlug}`); | ||||||
| 					} | 					} | ||||||
| 
 | 
 | ||||||
| 					if (!context.site && !context.network) { | 					if (!context.entity) { | ||||||
| 						logger.warn(`No site or network found for ${scraperSlug}`); | 						logger.warn(`No entity found for ${scraperSlug}`); | ||||||
| 						throw new Error(`No site or network found for ${scraperSlug}`); | 						throw new Error(`No entity found for ${scraperSlug}`); | ||||||
| 					} | 					} | ||||||
| 
 | 
 | ||||||
| 					const existingProfile = existingProfilesByActorNetworkSiteId[actor.id]?.[context.network?.id || null]?.[context.site?.id || null]; | 					const existingProfile = existingProfilesByActorEntityId[actor.id]?.[context.entity?.id || null]; | ||||||
| 
 | 
 | ||||||
| 					if (existingProfile && !argv.force) { | 					if (existingProfile && !argv.force) { | ||||||
| 						logger.verbose(`Found existing profile for '${actor.name}' on '${label}', use --force to scrape again`); | 						logger.verbose(`Found existing profile for '${actor.name}' on '${label}', use --force to scrape again`); | ||||||
|  | @ -574,20 +572,14 @@ async function scrapeActors(actorNames) { | ||||||
| 	const baseActors = toBaseActors(actorNames); | 	const baseActors = toBaseActors(actorNames); | ||||||
| 
 | 
 | ||||||
| 	const sources = argv.sources || config.profiles || Object.keys(scrapers.actors); | 	const sources = argv.sources || config.profiles || Object.keys(scrapers.actors); | ||||||
| 	const siteSlugs = sources.flat(); | 	const entitySlugs = sources.flat(); | ||||||
| 
 | 
 | ||||||
| 	const [networks, sites, existingActorEntries] = await Promise.all([ | 	const [entities, existingActorEntries] = await Promise.all([ | ||||||
| 		knex('entities') | 		knex('entities') | ||||||
| 			.where('type', 2) | 			.select(knex.raw('entities.*, row_to_json(parents) as parent')) | ||||||
| 			.whereIn('slug', siteSlugs), | 			.whereIn('entities.slug', entitySlugs) | ||||||
| 		knex('entities') | 			.leftJoin('entities as parents', 'parents.id', 'entities.parent_id') | ||||||
| 			.select( | 			.orderBy('entities.type'), | ||||||
| 				'entities.*', |  | ||||||
| 				'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.description as network_description', 'parents.parameters as network_parameters', |  | ||||||
| 			) |  | ||||||
| 			.where('type', 2) |  | ||||||
| 			.whereIn('entities.slug', siteSlugs) |  | ||||||
| 			.leftJoin('entities as parents', 'parents.id', 'entities.parent_id'), |  | ||||||
| 		knex('actors') | 		knex('actors') | ||||||
| 			.select(['id', 'name', 'slug']) | 			.select(['id', 'name', 'slug']) | ||||||
| 			.modify((queryBuilder) => { | 			.modify((queryBuilder) => { | ||||||
|  | @ -598,8 +590,7 @@ async function scrapeActors(actorNames) { | ||||||
| 			.whereNull('alias_for'), | 			.whereNull('alias_for'), | ||||||
| 	]); | 	]); | ||||||
| 
 | 
 | ||||||
| 	const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: network }), {}); | 	const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {}); | ||||||
| 	const sitesBySlug = sites.reduce((acc, site) => ({ ...acc, [site.slug]: curateSite(site) }), {}); |  | ||||||
| 
 | 
 | ||||||
| 	const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {}); | 	const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {}); | ||||||
| 	const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]); | 	const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]); | ||||||
|  | @ -611,20 +602,17 @@ async function scrapeActors(actorNames) { | ||||||
| 	const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []); | 	const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []); | ||||||
| 
 | 
 | ||||||
| 	const existingProfiles = await knex('actors_profiles').whereIn('actor_id', actors.map(actor => actor.id)); | 	const existingProfiles = await knex('actors_profiles').whereIn('actor_id', actors.map(actor => actor.id)); | ||||||
| 	const existingProfilesByActorNetworkSiteId = existingProfiles.reduce((acc, profile) => ({ | 	const existingProfilesByActorEntityId = existingProfiles.reduce((acc, profile) => ({ | ||||||
| 		...acc, | 		...acc, | ||||||
| 		[profile.actor_id]: { | 		[profile.actor_id]: { | ||||||
| 			...acc[profile.actor_id], | 			...acc[profile.actor_id], | ||||||
| 			[profile.entity_id]: { | 			[profile.entity_id]: profile, | ||||||
| 				...acc[profile.entity_id], |  | ||||||
| 				[profile.site_id]: profile, |  | ||||||
| 			}, |  | ||||||
| 		}, | 		}, | ||||||
| 	}), {}); | 	}), {}); | ||||||
| 
 | 
 | ||||||
| 	const profilesPerActor = await Promise.map( | 	const profilesPerActor = await Promise.map( | ||||||
| 		actors, | 		actors, | ||||||
| 		async actor => scrapeProfiles(actor, sources, networksBySlug, sitesBySlug, existingProfilesByActorNetworkSiteId), | 		async actor => scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId), | ||||||
| 		{ concurrency: 10 }, | 		{ concurrency: 10 }, | ||||||
| 	); | 	); | ||||||
| 
 | 
 | ||||||
|  | @ -647,13 +635,11 @@ async function scrapeActors(actorNames) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function getOrCreateActors(baseActors, batchId) { | async function getOrCreateActors(baseActors, batchId) { | ||||||
| 	console.log(baseActors); |  | ||||||
| 
 |  | ||||||
| 	const existingActors = await knex('actors') | 	const existingActors = await knex('actors') | ||||||
| 		.select('id', 'alias_for', 'name', 'slug', 'entity_id') | 		.select('id', 'alias_for', 'name', 'slug', 'entity_id') | ||||||
| 		.whereIn('slug', baseActors.map(baseActor => baseActor.slug)) | 		.whereIn('slug', baseActors.map(baseActor => baseActor.slug)) | ||||||
| 		.whereNull('entity_id') | 		.whereNull('entity_id') | ||||||
| 		.orWhereIn(['slug', 'entity_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id])); | 		.orWhereIn(['slug', 'entity_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.entity.id])); | ||||||
| 
 | 
 | ||||||
| 	// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
 | 	// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
 | ||||||
| 	const existingActorSlugs = existingActors.reduce((acc, actor) => ({ | 	const existingActorSlugs = existingActors.reduce((acc, actor) => ({ | ||||||
|  | @ -664,7 +650,7 @@ async function getOrCreateActors(baseActors, batchId) { | ||||||
| 		}, | 		}, | ||||||
| 	}), {}); | 	}), {}); | ||||||
| 
 | 
 | ||||||
| 	const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]); | 	const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]); | ||||||
| 
 | 
 | ||||||
| 	const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId); | 	const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId); | ||||||
| 	const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'entity_id']); | 	const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'entity_id']); | ||||||
|  | @ -722,7 +708,7 @@ async function fetchActor(actorId) { | ||||||
| 	const actor = await knex('actors') | 	const actor = await knex('actors') | ||||||
| 		.select(knex.raw(` | 		.select(knex.raw(` | ||||||
| 			actors.*, | 			actors.*, | ||||||
| 			row_to_json(networks) as network, | 			row_to_json(entities) as entity, | ||||||
| 			row_to_json(actor_alias) as alias, | 			row_to_json(actor_alias) as alias, | ||||||
| 			row_to_json(birth_country) as birth_country, | 			row_to_json(birth_country) as birth_country, | ||||||
| 			row_to_json(residence_country) as residence_country, | 			row_to_json(residence_country) as residence_country, | ||||||
|  | @ -737,7 +723,7 @@ async function fetchActor(actorId) { | ||||||
| 			queryBuilder.where('actors.id', actorId); | 			queryBuilder.where('actors.id', actorId); | ||||||
| 		}) | 		}) | ||||||
| 		.leftJoin('actors as actor_alias', 'actor_alias.id', 'actors.alias_for') | 		.leftJoin('actors as actor_alias', 'actor_alias.id', 'actors.alias_for') | ||||||
| 		.leftJoin('networks', 'networks.id', 'actors.entity_id') | 		.leftJoin('entities', 'entities.id', 'actors.entity_id') | ||||||
| 		.leftJoin('countries as birth_country', 'birth_country.alpha2', 'actors.birth_country_alpha2') | 		.leftJoin('countries as birth_country', 'birth_country.alpha2', 'actors.birth_country_alpha2') | ||||||
| 		.leftJoin('countries as residence_country', 'residence_country.alpha2', 'actors.residence_country_alpha2') | 		.leftJoin('countries as residence_country', 'residence_country.alpha2', 'actors.residence_country_alpha2') | ||||||
| 		.leftJoin('media', 'media.id', 'actors.avatar_media_id') | 		.leftJoin('media', 'media.id', 'actors.avatar_media_id') | ||||||
|  |  | ||||||
							
								
								
									
										64
									
								
								src/deep.js
								
								
								
								
							
							
						
						
									
										64
									
								
								src/deep.js
								
								
								
								
							|  | @ -7,8 +7,6 @@ const include = require('./utils/argv-include')(argv); | ||||||
| const logger = require('./logger')(__filename); | const logger = require('./logger')(__filename); | ||||||
| const knex = require('./knex'); | const knex = require('./knex'); | ||||||
| const scrapers = require('./scrapers/scrapers'); | const scrapers = require('./scrapers/scrapers'); | ||||||
| const { curateSites } = require('./sites'); |  | ||||||
| const { curateNetworks } = require('./networks'); |  | ||||||
| 
 | 
 | ||||||
| function urlToSiteSlug(url) { | function urlToSiteSlug(url) { | ||||||
| 	try { | 	try { | ||||||
|  | @ -19,40 +17,31 @@ function urlToSiteSlug(url) { | ||||||
| 
 | 
 | ||||||
| 		return slug; | 		return slug; | ||||||
| 	} catch (error) { | 	} catch (error) { | ||||||
| 		logger.warn(`Failed to derive site slug from '${url}': ${error.message}`); | 		logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`); | ||||||
| 
 | 
 | ||||||
| 		return null; | 		return null; | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function findSites(baseReleases) { | async function findEntities(baseReleases) { | ||||||
| 	const baseReleasesWithoutSite = baseReleases.filter(release => release.url && !release.site); | 	const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity); | ||||||
| 
 | 
 | ||||||
| 	const siteSlugs = Array.from(new Set( | 	const entitySlugs = Array.from(new Set( | ||||||
| 		baseReleasesWithoutSite | 		baseReleasesWithoutEntity | ||||||
| 			.map(baseRelease => urlToSiteSlug(baseRelease.url)) | 			.map(baseRelease => urlToSiteSlug(baseRelease.url)) | ||||||
| 			.filter(Boolean), | 			.filter(Boolean), | ||||||
| 	)); | 	)); | ||||||
| 
 | 
 | ||||||
| 	const siteEntries = await knex('entities') | 	const entities = await knex('entities') | ||||||
|  | 		.select(knex.raw('entities.*, row_to_json(parents) as parent')) | ||||||
| 		.leftJoin('entities as parents', 'parents.id', 'entities.parent_id') | 		.leftJoin('entities as parents', 'parents.id', 'entities.parent_id') | ||||||
| 		.select('entities.*', 'parents.id as network_id', 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.parameters as network_parameters', 'parents.description as network_description') | 		.whereIn('entities.slug', entitySlugs) | ||||||
| 		.where('entities.type', 2) | 		.orderBy('entities.type', 'asc'); | ||||||
| 		.whereIn('entities.slug', siteSlugs); |  | ||||||
| 
 | 
 | ||||||
| 	const networkEntries = await knex('entities') | 	// channel entity will overwrite network entity
 | ||||||
| 		.where('type', 1) | 	const entitiesBySlug = entities.reduce((accEntities, entity) => ({ ...accEntities, [entity.slug]: entity }), {}); | ||||||
| 		.whereIn('slug', siteSlugs); |  | ||||||
| 
 | 
 | ||||||
| 	const sites = await curateSites(siteEntries, true, false); | 	return entitiesBySlug; | ||||||
| 	const networks = await curateNetworks(networkEntries, true, false, false); |  | ||||||
| 	const markedNetworks = networks.map(network => ({ ...network, isNetwork: true })); |  | ||||||
| 
 |  | ||||||
| 	const sitesBySlug = [] |  | ||||||
| 		.concat(markedNetworks, sites) |  | ||||||
| 		.reduce((accSites, site) => ({ ...accSites, [site.slug]: site }), {}); |  | ||||||
| 
 |  | ||||||
| 	return sitesBySlug; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function toBaseReleases(baseReleasesOrUrls) { | function toBaseReleases(baseReleasesOrUrls) { | ||||||
|  | @ -92,23 +81,22 @@ function toBaseReleases(baseReleasesOrUrls) { | ||||||
| 		.filter(Boolean); | 		.filter(Boolean); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function scrapeRelease(baseRelease, sites, type = 'scene') { | async function scrapeRelease(baseRelease, entities, type = 'scene') { | ||||||
| 	const site = baseRelease.site || sites[urlToSiteSlug(baseRelease.url)]; | 	const entity = baseRelease.entity || baseRelease.site || entities[urlToSiteSlug(baseRelease.url)]; | ||||||
| 
 | 
 | ||||||
| 	if (!site) { | 	if (!entity) { | ||||||
| 		logger.warn(`No site available for ${baseRelease.url}`); | 		logger.warn(`No entity available for ${baseRelease.url}`); | ||||||
| 		return baseRelease; | 		return baseRelease; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if ((!baseRelease.url && !baseRelease.path) || !argv.deep) { | 	if ((!baseRelease.url && !baseRelease.path) || !argv.deep) { | ||||||
| 		return { | 		return { | ||||||
| 			...baseRelease, | 			...baseRelease, | ||||||
| 			site, | 			entity, | ||||||
| 		}; | 		}; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	const siteWithFallbackNetwork = site.isNetwork ? { ...site, network: site } : site; // make site.network available, even when site is network fallback
 | 	const scraper = scrapers.releases[entity.slug] || scrapers.releases[entity.parent?.slug]; | ||||||
| 	const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug]; |  | ||||||
| 
 | 
 | ||||||
| 	if (!scraper) { | 	if (!scraper) { | ||||||
| 		logger.warn(`Could not find scraper for ${baseRelease.url}`); | 		logger.warn(`Could not find scraper for ${baseRelease.url}`); | ||||||
|  | @ -116,7 +104,7 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') { | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) { | 	if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) { | ||||||
| 		logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`); | 		logger.warn(`The '${entity.name}'-scraper cannot fetch individual ${type}s`); | ||||||
| 		return baseRelease; | 		return baseRelease; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -124,14 +112,14 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') { | ||||||
| 		logger.verbose(`Fetching ${type} ${baseRelease.url}`); | 		logger.verbose(`Fetching ${type} ${baseRelease.url}`); | ||||||
| 
 | 
 | ||||||
| 		const scrapedRelease = type === 'scene' | 		const scrapedRelease = type === 'scene' | ||||||
| 			? await scraper.fetchScene(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include) | 			? await scraper.fetchScene(baseRelease.url, entity, baseRelease, null, include) | ||||||
| 			: await scraper.fetchMovie(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include); | 			: await scraper.fetchMovie(baseRelease.url, entity, baseRelease, null, include); | ||||||
| 
 | 
 | ||||||
| 		const mergedRelease = { | 		const mergedRelease = { | ||||||
| 			...baseRelease, | 			...baseRelease, | ||||||
| 			...scrapedRelease, | 			...scrapedRelease, | ||||||
| 			deep: !!scrapedRelease, | 			deep: !!scrapedRelease, | ||||||
| 			site, | 			entity, | ||||||
| 		}; | 		}; | ||||||
| 
 | 
 | ||||||
| 		if (!mergedRelease.entryId) { | 		if (!mergedRelease.entryId) { | ||||||
|  | @ -155,19 +143,19 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') { | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function scrapeReleases(baseReleases, sites, type) { | async function scrapeReleases(baseReleases, entities, type) { | ||||||
| 	return Promise.map( | 	return Promise.map( | ||||||
| 		baseReleases, | 		baseReleases, | ||||||
| 		async baseRelease => scrapeRelease(baseRelease, sites, type), | 		async baseRelease => scrapeRelease(baseRelease, entities, type), | ||||||
| 		{ concurrency: 10 }, | 		{ concurrency: 10 }, | ||||||
| 	); | 	); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function fetchReleases(baseReleasesOrUrls, type = 'scene') { | async function fetchReleases(baseReleasesOrUrls, type = 'scene') { | ||||||
| 	const baseReleases = toBaseReleases(baseReleasesOrUrls); | 	const baseReleases = toBaseReleases(baseReleasesOrUrls); | ||||||
| 	const sites = await findSites(baseReleases); | 	const entities = await findEntities(baseReleases); | ||||||
| 
 | 
 | ||||||
| 	const deepReleases = await scrapeReleases(baseReleases, sites, type); | 	const deepReleases = await scrapeReleases(baseReleases, entities, type); | ||||||
| 
 | 
 | ||||||
| 	return deepReleases.filter(Boolean); | 	return deepReleases.filter(Boolean); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -3,21 +3,21 @@ | ||||||
| const util = require('util'); | const util = require('util'); | ||||||
| 
 | 
 | ||||||
| const knex = require('../knex'); | const knex = require('../knex'); | ||||||
| const { get, geta, ed, fd, ctxa } = require('../utils/q'); | const { get, geta, ed, formatDate, ctxa } = require('../utils/q'); | ||||||
| const slugify = require('../utils/slugify'); | const slugify = require('../utils/slugify'); | ||||||
| const { feetInchesToCm } = require('../utils/convert'); | const { feetInchesToCm } = require('../utils/convert'); | ||||||
| 
 | 
 | ||||||
| async function getChannelRegExp(site) { | async function getChannelRegExp(site) { | ||||||
| 	if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null; | 	if (!['hushpass', 'interracialpass'].includes(site.parent.slug)) return null; | ||||||
| 
 | 
 | ||||||
| 	const sites = await knex('sites').where('network_id', site.network.id); | 	const sites = await knex('sites').where('network_id', site.parent.id); | ||||||
| 
 | 
 | ||||||
| 	return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i'); | 	return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i'); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function deriveEntryId(release) { | function deriveEntryId(release) { | ||||||
| 	if (release.date && release.title) { | 	if (release.date && release.title) { | ||||||
| 		return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; | 		return `${slugify(formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return null; | 	return null; | ||||||
|  | @ -140,7 +140,7 @@ function scrapeScene({ html, qu }, site, url, baseRelease) { | ||||||
| 	release.title = qu.q('.centerwrap h2', true); | 	release.title = qu.q('.centerwrap h2', true); | ||||||
| 	release.description = qu.q('.videocontent p', true); | 	release.description = qu.q('.videocontent p', true); | ||||||
| 
 | 
 | ||||||
| 	release.date = qu.date('.videodetails .date', 'MM/DD/YYYY'); | 	release.date = qu.date('.videodetails .date', ['MM/DD/YYYY', 'YYYY-MM-DD']); | ||||||
| 	release.duration = qu.dur('.videodetails .date'); | 	release.duration = qu.dur('.videodetails .date'); | ||||||
| 
 | 
 | ||||||
| 	release.actors = qu.all('.modelname a', true); | 	release.actors = qu.all('.modelname a', true); | ||||||
|  |  | ||||||
|  | @ -8,7 +8,7 @@ const knex = require('./knex'); | ||||||
| const slugify = require('./utils/slugify'); | const slugify = require('./utils/slugify'); | ||||||
| const { associateActors, scrapeActors } = require('./actors'); | const { associateActors, scrapeActors } = require('./actors'); | ||||||
| const { associateReleaseTags } = require('./tags'); | const { associateReleaseTags } = require('./tags'); | ||||||
| const { curateSite } = require('./sites'); | const { curateEntity } = require('./entities'); | ||||||
| const { associateReleaseMedia } = require('./media'); | const { associateReleaseMedia } = require('./media'); | ||||||
| 
 | 
 | ||||||
| function curateReleaseEntry(release, batchId, existingRelease) { | function curateReleaseEntry(release, batchId, existingRelease) { | ||||||
|  | @ -20,9 +20,9 @@ function curateReleaseEntry(release, batchId, existingRelease) { | ||||||
| 	const curatedRelease = { | 	const curatedRelease = { | ||||||
| 		title: release.title, | 		title: release.title, | ||||||
| 		entry_id: release.entryId || null, | 		entry_id: release.entryId || null, | ||||||
| 		entity_id: release.site?.id, | 		entity_id: release.entity.id, | ||||||
| 		shoot_id: release.shootId || null, |  | ||||||
| 		studio_id: release.studio?.id || null, | 		studio_id: release.studio?.id || null, | ||||||
|  | 		shoot_id: release.shootId || null, | ||||||
| 		url: release.url, | 		url: release.url, | ||||||
| 		date: Number(release.date) ? release.date : null, | 		date: Number(release.date) ? release.date : null, | ||||||
| 		slug, | 		slug, | ||||||
|  | @ -45,51 +45,47 @@ function curateReleaseEntry(release, batchId, existingRelease) { | ||||||
| 	return curatedRelease; | 	return curatedRelease; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function attachChannelSites(releases) { | async function attachChannelEntities(releases) { | ||||||
| 	const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isNetwork || release.site.slug !== release.channel)); | 	const releasesWithoutEntity = releases.filter(release => release.channel && !release.entity && release.entity.type !== 1); | ||||||
| 
 | 
 | ||||||
| 	const channelSites = await knex('entities') | 	const channelEntities = await knex('entities') | ||||||
| 		.leftJoin('entities AS parents', 'parents.id', 'entities.parent_id') | 		.select(knex.raw('entities.*, row_to_json(parents) as parent')) | ||||||
| 		.select('entities.*', 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.parameters as network_parameters', 'parents.description as network_description') | 		.whereIn('entities.slug', releasesWithoutEntity.map(release => release.channel)) | ||||||
| 		.whereIn('entities.slug', releasesWithoutSite.map(release => release.channel)); | 		.where('entities.type', 2) | ||||||
|  | 		.leftJoin('entities AS parents', 'parents.id', 'entities.parent_id'); | ||||||
| 
 | 
 | ||||||
| 	const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); | 	const channelEntitiesBySlug = channelEntities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {}); | ||||||
| 
 | 
 | ||||||
| 	const releasesWithChannelSite = await Promise.all(releases | 	const releasesWithChannelEntity = await Promise.all(releases | ||||||
| 		.map(async (release) => { | 		.map(async (release) => { | ||||||
| 			if (release.channel && channelSitesBySlug[release.channel]) { | 			if (release.channel && channelEntitiesBySlug[release.channel]) { | ||||||
| 				const curatedSite = await curateSite(channelSitesBySlug[release.channel]); | 				const curatedEntity = await curateEntity(channelEntitiesBySlug[release.channel]); | ||||||
| 
 | 
 | ||||||
| 				return { | 				return { | ||||||
| 					...release, | 					...release, | ||||||
| 					site: curatedSite, | 					entity: curatedEntity, | ||||||
| 				}; | 				}; | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			if (release.site && !release.site.isNetwork) { | 			if (release.entity) { | ||||||
| 				return release; | 				return release; | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			if (release.site && release.site.isNetwork) { |  | ||||||
| 				return { |  | ||||||
| 					...release, |  | ||||||
| 					site: null, |  | ||||||
| 					network: release.site, |  | ||||||
| 				}; |  | ||||||
| 			} |  | ||||||
| 
 |  | ||||||
| 			logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`); | 			logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`); | ||||||
| 
 | 
 | ||||||
| 			return null; | 			return null; | ||||||
| 		})); | 		})); | ||||||
| 
 | 
 | ||||||
| 	return releasesWithChannelSite.filter(Boolean); | 	return releasesWithChannelEntity.filter(Boolean); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function attachStudios(releases) { | async function attachStudios(releases) { | ||||||
| 	const studioSlugs = releases.map(release => release.studio).filter(Boolean); | 	const studioSlugs = releases.map(release => release.studio).filter(Boolean); | ||||||
| 
 | 
 | ||||||
| 	const studios = await knex('studios').whereIn('slug', studioSlugs); | 	const studios = await knex('entities') | ||||||
|  | 		.whereIn('slug', studioSlugs) | ||||||
|  | 		.where('type', 3); | ||||||
|  | 
 | ||||||
| 	const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {}); | 	const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {}); | ||||||
| 
 | 
 | ||||||
| 	const releasesWithStudio = releases.map((release) => { | 	const releasesWithStudio = releases.map((release) => { | ||||||
|  | @ -111,7 +107,7 @@ async function attachStudios(releases) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function attachReleaseIds(releases, storedReleases) { | function attachReleaseIds(releases, storedReleases) { | ||||||
| 	const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => { | 	const storedReleaseIdsByEntityIdAndEntryId = storedReleases.reduce((acc, release) => { | ||||||
| 		if (!acc[release.entity_id]) acc[release.entity_id] = {}; | 		if (!acc[release.entity_id]) acc[release.entity_id] = {}; | ||||||
| 		acc[release.entity_id][release.entry_id] = release.id; | 		acc[release.entity_id][release.entry_id] = release.id; | ||||||
| 
 | 
 | ||||||
|  | @ -120,29 +116,29 @@ function attachReleaseIds(releases, storedReleases) { | ||||||
| 
 | 
 | ||||||
| 	const releasesWithId = releases.map(release => ({ | 	const releasesWithId = releases.map(release => ({ | ||||||
| 		...release, | 		...release, | ||||||
| 		id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId], | 		id: storedReleaseIdsByEntityIdAndEntryId[release.entity.id][release.entryId], | ||||||
| 	})); | 	})); | ||||||
| 
 | 
 | ||||||
| 	return releasesWithId; | 	return releasesWithId; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function filterInternalDuplicateReleases(releases) { | function filterInternalDuplicateReleases(releases) { | ||||||
| 	const releasesBySiteIdAndEntryId = releases.reduce((acc, release) => { | 	const releasesByEntityIdAndEntryId = releases.reduce((acc, release) => { | ||||||
| 		if (!release.site) { | 		if (!release.entity) { | ||||||
| 			return acc; | 			return acc; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if (!acc[release.site.id]) { | 		if (!acc[release.entity.id]) { | ||||||
| 			acc[release.site.id] = {}; | 			acc[release.entity.id] = {}; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		acc[release.site.id][release.entryId] = release; | 		acc[release.entity.id][release.entryId] = release; | ||||||
| 
 | 
 | ||||||
| 		return acc; | 		return acc; | ||||||
| 	}, {}); | 	}, {}); | ||||||
| 
 | 
 | ||||||
| 	return Object.values(releasesBySiteIdAndEntryId) | 	return Object.values(releasesByEntityIdAndEntryId) | ||||||
| 		.map(siteReleases => Object.values(siteReleases)) | 		.map(entityReleases => Object.values(entityReleases)) | ||||||
| 		.flat(); | 		.flat(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -150,17 +146,17 @@ async function filterDuplicateReleases(releases) { | ||||||
| 	const internalUniqueReleases = filterInternalDuplicateReleases(releases); | 	const internalUniqueReleases = filterInternalDuplicateReleases(releases); | ||||||
| 
 | 
 | ||||||
| 	const duplicateReleaseEntries = await knex('releases') | 	const duplicateReleaseEntries = await knex('releases') | ||||||
| 		.whereIn(['entry_id', 'entity_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id])); | 		.whereIn(['entry_id', 'entity_id'], internalUniqueReleases.map(release => [release.entryId, release.entity.id])); | ||||||
| 
 | 
 | ||||||
| 	const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => { | 	const duplicateReleasesByEntityIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => { | ||||||
| 		if (!acc[release.entity_id]) acc[release.entity_id] = {}; | 		if (!acc[release.entity_id]) acc[release.entity_id] = {}; | ||||||
| 		acc[release.entity_id][release.entry_id] = true; | 		acc[release.entity_id][release.entry_id] = true; | ||||||
| 
 | 
 | ||||||
| 		return acc; | 		return acc; | ||||||
| 	}, {}); | 	}, {}); | ||||||
| 
 | 
 | ||||||
| 	const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]); | 	const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]); | ||||||
| 	const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]); | 	const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]); | ||||||
| 
 | 
 | ||||||
| 	return { | 	return { | ||||||
| 		uniqueReleases, | 		uniqueReleases, | ||||||
|  | @ -216,10 +212,10 @@ async function storeReleases(releases) { | ||||||
| 
 | 
 | ||||||
| 	const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); | 	const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); | ||||||
| 
 | 
 | ||||||
| 	const releasesWithSites = await attachChannelSites(releases); | 	const releasesWithChannels = await attachChannelEntities(releases); | ||||||
| 	const releasesWithStudios = await attachStudios(releasesWithSites); | 	const releasesWithStudios = await attachStudios(releasesWithChannels); | ||||||
| 
 | 
 | ||||||
| 	// uniqueness is site ID + entry ID, filter uniques after adding sites
 | 	// uniqueness is entity ID + entry ID, filter uniques after adding entities
 | ||||||
| 	const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios); | 	const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios); | ||||||
| 
 | 
 | ||||||
| 	const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId)); | 	const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId)); | ||||||
|  |  | ||||||
							
								
								
									
										26
									
								
								src/tags.js
								
								
								
								
							
							
						
						
									
										26
									
								
								src/tags.js
								
								
								
								
							|  | @ -27,27 +27,27 @@ async function matchReleaseTags(releases) { | ||||||
| 	return tagIdsBySlug; | 	return tagIdsBySlug; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function getSiteTags(releases) { | async function getEntityTags(releases) { | ||||||
| 	const siteIds = releases.map(release => release.site.id); | 	const entityIds = releases.map(release => release.entity.id); | ||||||
| 	const siteTags = await knex('sites_tags').whereIn('site_id', siteIds); | 	const entityTags = await knex('entities_tags').whereIn('entity_id', entityIds); | ||||||
| 
 | 
 | ||||||
| 	const siteTagIdsBySiteId = siteTags.reduce((acc, siteTag) => { | 	const entityTagIdsByEntityId = entityTags.reduce((acc, entityTag) => { | ||||||
| 		if (!acc[siteTag.site_id]) { | 		if (!acc[entityTag.entity_id]) { | ||||||
| 			acc[siteTag.site_id] = []; | 			acc[entityTag.entity_id] = []; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		acc[siteTag.site_id].push(siteTag.tag_id); | 		acc[entityTag.entity_id].push(entityTag.tag_id); | ||||||
| 
 | 
 | ||||||
| 		return acc; | 		return acc; | ||||||
| 	}, {}); | 	}, {}); | ||||||
| 
 | 
 | ||||||
| 	return siteTagIdsBySiteId; | 	return entityTagIdsByEntityId; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId) { | function buildReleaseTagAssociations(releases, tagIdsBySlug, entityTagIdsByEntityId) { | ||||||
| 	const tagAssociations = releases | 	const tagAssociations = releases | ||||||
| 		.map((release) => { | 		.map((release) => { | ||||||
| 			const siteTagIds = siteTagIdsBySiteId[release.site.id]; | 			const entityTagIds = entityTagIdsByEntityId[release.entity.id]; | ||||||
| 			const releaseTags = release.tags || []; | 			const releaseTags = release.tags || []; | ||||||
| 
 | 
 | ||||||
| 			const releaseTagIds = releaseTags.every(tag => typeof tag === 'number') | 			const releaseTagIds = releaseTags.every(tag => typeof tag === 'number') | ||||||
|  | @ -57,7 +57,7 @@ function buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId) | ||||||
| 			const tags = [...new Set( | 			const tags = [...new Set( | ||||||
| 				// filter duplicates and empties
 | 				// filter duplicates and empties
 | ||||||
| 				releaseTagIds | 				releaseTagIds | ||||||
| 					.concat(siteTagIds) | 					.concat(entityTagIds) | ||||||
| 					.filter(Boolean), | 					.filter(Boolean), | ||||||
| 			)] | 			)] | ||||||
| 				.map(tagId => ({ | 				.map(tagId => ({ | ||||||
|  | @ -94,9 +94,9 @@ async function filterUniqueAssociations(tagAssociations) { | ||||||
| 
 | 
 | ||||||
| async function associateReleaseTags(releases) { | async function associateReleaseTags(releases) { | ||||||
| 	const tagIdsBySlug = await matchReleaseTags(releases); | 	const tagIdsBySlug = await matchReleaseTags(releases); | ||||||
| 	const siteTagIdsBySiteId = await getSiteTags(releases); | 	const EntityTagIdsByEntityId = await getEntityTags(releases); | ||||||
| 
 | 
 | ||||||
| 	const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId); | 	const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, EntityTagIdsByEntityId); | ||||||
| 	const uniqueAssociations = await filterUniqueAssociations(tagAssociations); | 	const uniqueAssociations = await filterUniqueAssociations(tagAssociations); | ||||||
| 
 | 
 | ||||||
| 	await knex('releases_tags').insert(uniqueAssociations); | 	await knex('releases_tags').insert(uniqueAssociations); | ||||||
|  |  | ||||||
|  | @ -196,12 +196,12 @@ async function scrapeSite(site, accSiteReleases) { | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function scrapeNetworkSequential(network) { | async function scrapeNetworkSequential(networkEntity) { | ||||||
| 	return Promise.reduce( | 	return Promise.reduce( | ||||||
| 		network.sites, | 		networkEntity.children, | ||||||
| 		async (chain, site) => { | 		async (chain, siteEntity) => { | ||||||
| 			const accSiteReleases = await chain; | 			const accSiteReleases = await chain; | ||||||
| 			const siteReleases = await scrapeSite(site, network, accSiteReleases); | 			const siteReleases = await scrapeSite(siteEntity, networkEntity, accSiteReleases); | ||||||
| 
 | 
 | ||||||
| 			return accSiteReleases.concat(siteReleases); | 			return accSiteReleases.concat(siteReleases); | ||||||
| 		}, | 		}, | ||||||
|  | @ -209,10 +209,10 @@ async function scrapeNetworkSequential(network) { | ||||||
| 	); | 	); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function scrapeNetworkParallel(network) { | async function scrapeNetworkParallel(networkEntity) { | ||||||
| 	return Promise.map( | 	return Promise.map( | ||||||
| 		network.children, | 		networkEntity.children, | ||||||
| 		async site => scrapeSite(site, network), | 		async siteEntity => scrapeSite(siteEntity, networkEntity), | ||||||
| 		{ concurrency: 3 }, | 		{ concurrency: 3 }, | ||||||
| 	); | 	); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -45,7 +45,7 @@ function slugify(string, delimiter = '-', { | ||||||
| 		return string; | 		return string; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	const slugComponents = string.trim().toLowerCase().match(/[A-Za-zÀ-ÖØ-öø-ÿ]+/g); | 	const slugComponents = string.trim().toLowerCase().match(/[A-Za-zÀ-ÖØ-öø-ÿ0-9]+/g); | ||||||
| 
 | 
 | ||||||
| 	if (!slugComponents) { | 	if (!slugComponents) { | ||||||
| 		return ''; | 		return ''; | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue