Matching URLs to entity using hostname rather than slug to minimize collisions. Fixed missing Cum Louder POV logo.
| Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB | 
| Before Width: | Height: | Size: 3.9 KiB After Width: | Height: | Size: 3.9 KiB | 
| Before Width: | Height: | Size: 2.7 KiB After Width: | Height: | Size: 2.8 KiB | 
| Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.5 KiB | 
| Before Width: | Height: | Size: 7.2 KiB After Width: | Height: | Size: 7.2 KiB | 
| Before Width: | Height: | Size: 6.8 KiB After Width: | Height: | Size: 6.8 KiB | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB | 
| Before Width: | Height: | Size: 5.2 KiB After Width: | Height: | Size: 5.3 KiB | 
| After Width: | Height: | Size: 3.2 KiB | 
| Before Width: | Height: | Size: 3.0 KiB After Width: | Height: | Size: 3.1 KiB | 
| Before Width: | Height: | Size: 2.9 KiB After Width: | Height: | Size: 2.9 KiB | 
| Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 1.9 KiB | 
| Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 1.9 KiB | 
| Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 1.9 KiB | 
| Before Width: | Height: | Size: 7.2 KiB After Width: | Height: | Size: 7.2 KiB | 
| Before Width: | Height: | Size: 3.9 KiB After Width: | Height: | Size: 3.9 KiB | 
| Before Width: | Height: | Size: 4.8 KiB After Width: | Height: | Size: 4.8 KiB | 
| Before Width: | Height: | Size: 4.0 KiB After Width: | Height: | Size: 4.1 KiB | 
| Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 5.7 KiB | 
| Before Width: | Height: | Size: 6.9 KiB After Width: | Height: | Size: 6.9 KiB | 
| Before Width: | Height: | Size: 5.2 KiB After Width: | Height: | Size: 5.3 KiB | 
| Before Width: | Height: | Size: 5.5 KiB After Width: | Height: | Size: 5.5 KiB | 
| Before Width: | Height: | Size: 4.4 KiB After Width: | Height: | Size: 4.5 KiB | 
| Before Width: | Height: | Size: 5.3 KiB After Width: | Height: | Size: 5.4 KiB | 
| Before Width: | Height: | Size: 5.2 KiB After Width: | Height: | Size: 5.2 KiB | 
| Before Width: | Height: | Size: 3.4 KiB After Width: | Height: | Size: 3.4 KiB | 
| Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 19 KiB | 
| Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB | 
| Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB | 
| Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB | 
| Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB | 
| Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 27 KiB | 
| Before Width: | Height: | Size: 8.9 KiB After Width: | Height: | Size: 9.0 KiB | 
| After Width: | Height: | Size: 12 KiB | 
| Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 19 KiB | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB | 
| Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 1.9 KiB | 
| Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 1.9 KiB | 
| Before Width: | Height: | Size: 1.8 KiB After Width: | Height: | Size: 1.9 KiB | 
| Before Width: | Height: | Size: 9.0 KiB After Width: | Height: | Size: 9.0 KiB | 
| Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 19 KiB | 
| Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 13 KiB | 
| Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 18 KiB | 
| Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB | 
| Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB | 
| Before Width: | Height: | Size: 8.9 KiB After Width: | Height: | Size: 9.0 KiB | 
| Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB | 
| Before Width: | Height: | Size: 9.9 KiB After Width: | Height: | Size: 10 KiB | 
| Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB | 
| Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB | 
| Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB | 
							
								
								
									
										19
									
								
								src/deep.js
								
								
								
								
							
							
						
						|  | @ -7,7 +7,7 @@ const { mergeAdvanced: merge } = require('object-merge-advanced'); | |||
| 
 | ||||
| const argv = require('./argv'); | ||||
| const include = require('./utils/argv-include')(argv); | ||||
| const { fetchReleaseEntities, urlToSiteSlug } = require('./entities'); | ||||
| const { fetchReleaseEntities, urlToHostname } = require('./entities'); | ||||
| const logger = require('./logger')(__filename); | ||||
| const qu = require('./utils/qu'); | ||||
| const getRecursiveParameters = require('./utils/get-recursive-parameters'); | ||||
|  | @ -110,8 +110,11 @@ function fetchMovie(scraper, url, entity, baseRelease, options) { | |||
| 	return fetchScene(scraper, url, entity, baseRelease, options, 'movie'); | ||||
| } | ||||
| 
 | ||||
| async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { | ||||
| 	const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)]; | ||||
| async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') { | ||||
| 	const entity = baseRelease.entity || entitiesByHostname[urlToHostname(baseRelease.url)]; | ||||
| 
 | ||||
| 	console.log(entitiesByHostname); | ||||
| 	console.log(entity); | ||||
| 
 | ||||
| 	if (!entity) { | ||||
| 		logger.warn(`No entity available for ${baseRelease.url}`); | ||||
|  | @ -222,10 +225,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| async function scrapeReleases(baseReleases, entitiesBySlug, type) { | ||||
| 	const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesBySlug).map(async ([slug, entity]) => { | ||||
| 		console.log('scraper', entity.scraper?.beforeFetchScenes); | ||||
| 
 | ||||
| async function scrapeReleases(baseReleases, entitiesByHostname, type) { | ||||
| 	const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesByHostname).map(async ([slug, entity]) => { | ||||
| 		if (entity.scraper?.beforeFetchScenes) { | ||||
| 			const parameters = getRecursiveParameters(entity); | ||||
| 			const preData = await entity.scraper.beforeFetchScenes(entity, parameters); | ||||
|  | @ -249,9 +250,9 @@ async function scrapeReleases(baseReleases, entitiesBySlug, type) { | |||
| 
 | ||||
| async function fetchReleases(baseReleasesOrUrls, type = 'scene') { | ||||
| 	const baseReleases = toBaseReleases(baseReleasesOrUrls); | ||||
| 	const entitiesBySlug = await fetchReleaseEntities(baseReleases); | ||||
| 	const entitiesByHostname = await fetchReleaseEntities(baseReleases); | ||||
| 
 | ||||
| 	const deepReleases = await scrapeReleases(baseReleases, entitiesBySlug, type); | ||||
| 	const deepReleases = await scrapeReleases(baseReleases, entitiesByHostname, type); | ||||
| 
 | ||||
| 	return deepReleases.filter(Boolean); | ||||
| } | ||||
|  |  | |||
|  | @ -82,6 +82,7 @@ async function curateEntities(entities, includeParameters) { | |||
| 	return Promise.all(entities.map(async (entity) => curateEntity(entity, includeParameters))); | ||||
| } | ||||
| 
 | ||||
| /* obsolete in favor of urlToHostname | ||||
| function urlToSiteSlug(url) { | ||||
| 	try { | ||||
| 		const slug = new URL(url) | ||||
|  | @ -96,6 +97,21 @@ function urlToSiteSlug(url) { | |||
| 		return null; | ||||
| 	} | ||||
| } | ||||
| */ | ||||
| 
 | ||||
| function urlToHostname(url) { | ||||
| 	try { | ||||
| 		const hostname = new URL(url) | ||||
| 			.hostname | ||||
| 			.match(/(www\.)(.*)/)?.at(-1); | ||||
| 
 | ||||
| 		return hostname; | ||||
| 	} catch (error) { | ||||
| 		logger.warn(`Failed to derive entity hostname from '${url}': ${error.message}`); | ||||
| 
 | ||||
| 		return null; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| async function fetchIncludedEntities() { | ||||
| 	const include = { | ||||
|  | @ -191,6 +207,7 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') { | |||
| 				array['parent'] as parent_path | ||||
| 			FROM entities | ||||
| 			WHERE slug = ANY(:entitySlugs) | ||||
| 			OR url ILIKE ANY(:entityHosts) | ||||
| 
 | ||||
| 			UNION ALL | ||||
| 
 | ||||
|  | @ -215,14 +232,23 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') { | |||
| 		WHERE entity_tree.parent_id IS NULL | ||||
| 		GROUP BY entity_tree.entity | ||||
| 		ORDER BY entity->'type' :sort; | ||||
| 	`, { entitySlugs, sort: knex.raw(sort) });
 | ||||
| 	`, {
 | ||||
| 		entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')), | ||||
| 		entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}%`), | ||||
| 		sort: knex.raw(sort), | ||||
| 	}); | ||||
| 
 | ||||
| 	// channel entity will overwrite network entity
 | ||||
| 	const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({ | ||||
| 		...accEntities, | ||||
| 		[entity.slug]: accEntities[entity.slug] || curateEntity(entity, true), | ||||
| 		[urlToSiteSlug(entity.url)]: accEntities[urlToSiteSlug(entity.url)] || curateEntity(entity, true), | ||||
| 	}), {}); | ||||
| 	const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => { | ||||
| 		const host = urlToHostname(entity.url); | ||||
| 		const curatedEntity = accEntities[entity.slug] || accEntities[host] || curateEntity(entity, true); | ||||
| 
 | ||||
| 		return { | ||||
| 			...accEntities, | ||||
| 			[entity.slug]: curatedEntity, | ||||
| 			[host]: curatedEntity, | ||||
| 		}; | ||||
| 	}, {}); | ||||
| 
 | ||||
| 	return entitiesBySlug; | ||||
| } | ||||
|  | @ -232,7 +258,7 @@ async function fetchReleaseEntities(baseReleases) { | |||
| 
 | ||||
| 	const entitySlugs = Array.from(new Set( | ||||
| 		baseReleasesWithoutEntity | ||||
| 			.map((baseRelease) => urlToSiteSlug(baseRelease.url)) | ||||
| 			.map((baseRelease) => urlToHostname(baseRelease.url)) | ||||
| 			.filter(Boolean), | ||||
| 	)); | ||||
| 
 | ||||
|  | @ -409,5 +435,6 @@ module.exports = { | |||
| 	getRecursiveParent, | ||||
| 	searchEntities, | ||||
| 	flushEntities, | ||||
| 	urlToSiteSlug, | ||||
| 	urlToHostname, | ||||
| 	// urlToSiteSlug,
 | ||||
| }; | ||||
|  |  | |||
|  | @ -22,6 +22,8 @@ function scrapeBlockLatest(scenes) { | |||
| 
 | ||||
| 		release.teaser = qu.video(); | ||||
| 
 | ||||
| 		console.log(release); | ||||
| 
 | ||||
| 		return release; | ||||
| 	}); | ||||
| } | ||||
|  | @ -57,6 +59,15 @@ function scrapeClassicLatest(scenes) { | |||
| 	}); | ||||
| } | ||||
| 
 | ||||
| function scrapeScene({ query }) { | ||||
| 	const release = {}; | ||||
| 
 | ||||
| 	release.title = query.content('.indScene h2'); | ||||
| 
 | ||||
| 	console.log(release); | ||||
| 	return release; | ||||
| } | ||||
| 
 | ||||
| async function fetchLatest(site, page = 1) { | ||||
| 	if (!site.parameters) { | ||||
| 		return null; | ||||
|  | @ -74,4 +85,6 @@ async function fetchLatest(site, page = 1) { | |||
| 
 | ||||
| module.exports = { | ||||
| 	fetchLatest, | ||||
| 	scrapeScene, | ||||
| 	useUnprint: true, | ||||
| }; | ||||
|  |  | |||
|  | @ -481,10 +481,14 @@ async function addReleaseMedia(medias, release, target) { | |||
| 			await transferMedia(media, target); | ||||
| 		} | ||||
| 
 | ||||
| 		await knex(`${release.type}s_${target}`).insert({ | ||||
| 			[`${release.type}_id`]: release.id, | ||||
| 			media_id: id, | ||||
| 		}); | ||||
| 		try { | ||||
| 			await knex(`${release.type}s_${target}`).insert({ | ||||
| 				[`${release.type}_id`]: release.id, | ||||
| 				media_id: id, | ||||
| 			}); | ||||
| 		} catch (error) { | ||||
| 			console.warn(`Ignored duplicate ${release.type} ${target} association ${media.hash} with ${release.id} "${release.title}"`); | ||||
| 		} | ||||
| 	}, Promise.resolve()); | ||||
| } | ||||
| 
 | ||||
|  | @ -496,8 +500,6 @@ async function linkMovieScenes(release, context) { | |||
| 			&& storedMovie.entity.slug === linkedMovie.entity.slug | ||||
| 			&& storedMovie.entity.type === linkedMovie.entity.type); | ||||
| 
 | ||||
| 		console.log('movie', linkedMovie, movie); | ||||
| 
 | ||||
| 		if (!movie) { | ||||
| 			throw new Error(`Missing ${linkedMovie.entity.slug} movie '${linkedMovie.title}' in '${release.title}'`); | ||||
| 		} | ||||
|  | @ -534,7 +536,7 @@ async function addRelease(release, context) { | |||
| 	]); | ||||
| 
 | ||||
| 	if (!entity) { | ||||
| 		throw new Error(`Release contains non-existent ${release.entity.type} '${release.entity.slug}'`); | ||||
| 		throw new Error(`Release "${release.title}" contains non-existent ${release.entity.type} '${release.entity.slug}'`); | ||||
| 	} | ||||
| 
 | ||||
| 	const [releaseEntry] = await knex(`${release.type}s`) | ||||
|  |  | |||