forked from DebaucheryLibrarian/traxxx
				
			Improved Little Caprice Dreams scraper.
This commit is contained in:
		
							parent
							
								
									9a183c7ffb
								
							
						
					
					
						commit
						71c884fe48
					
				|  | @ -166,6 +166,7 @@ module.exports = { | |||
| 		'hitzefrei', | ||||
| 		'porncz', | ||||
| 		'czechav', | ||||
| 		'littlecapricedreams', | ||||
| 		'gangbangcreampie', | ||||
| 		'gloryholesecrets', | ||||
| 		'aziani', | ||||
|  |  | |||
|  | @ -661,7 +661,7 @@ async function scrapeActors(argNames) { | |||
| 
 | ||||
| 	logger.info(`Scraping profiles for ${actorNames.length} actors`); | ||||
| 
 | ||||
| 	const sources = argv.actorsSources || config.profiles || Object.keys(scrapers.actors); | ||||
| 	const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors); | ||||
| 	const entitySlugs = sources.flat(); | ||||
| 
 | ||||
| 	const [entities, existingActorEntries] = await Promise.all([ | ||||
|  |  | |||
|  | @ -72,7 +72,7 @@ const { argv } = yargs | |||
| 	.option('actor-sources', { | ||||
| 		describe: 'Use these scrapers for actor data', | ||||
| 		type: 'array', | ||||
| 		alias: ['actor-source', 'profile-sources', 'profile-source', 'source'], | ||||
| 		alias: ['actor-source', 'profile-sources', 'profile-source', 'source', 'sources'], | ||||
| 	}) | ||||
| 	.option('movie-scenes', { | ||||
| 		describe: 'Fetch all scenes for a movie', | ||||
|  |  | |||
|  | @ -17,13 +17,21 @@ function matchChannel(release, channel) { | |||
| 	serieNames.superprivate = serieNames.superprivatex; | ||||
| 	serieNames.nasst = serieNames.nassty; | ||||
| 	serieNames.sexlesson = serieNames.sexlessons; | ||||
| 	serieNames['sex lesson'] = serieNames.sexlessons; | ||||
| 
 | ||||
| 	const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0]; | ||||
| 	// ensure longest key matches first
 | ||||
| 	const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length); | ||||
| 
 | ||||
| 	const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0]; | ||||
| 	const serie = serieName && serieNames[slugify(serieName, '')]; | ||||
| 
 | ||||
| 	return serie?.slug || null; | ||||
| 	// title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})[\\s:–-]*`, 'i'), ''),
 | ||||
| 	if (serie) { | ||||
| 		return { | ||||
| 			slug: serie.slug, | ||||
| 			title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-–:/]+\\s*`, 'ig'), ''), | ||||
| 		}; | ||||
| 	} | ||||
| 
 | ||||
| 	return null; | ||||
| } | ||||
| 
 | ||||
| function scrapeAll(scenes, channel) { | ||||
|  | @ -41,9 +49,10 @@ function scrapeAll(scenes, channel) { | |||
| 			referer: channel.url, | ||||
| 		}; | ||||
| 
 | ||||
| 		release.channel = matchChannel(release, channel); | ||||
| 
 | ||||
| 		return release; | ||||
| 		return { | ||||
| 			...release, | ||||
| 			...matchChannel(release, channel), | ||||
| 		}; | ||||
| 	}); | ||||
| } | ||||
| 
 | ||||
|  | @ -100,9 +109,32 @@ async function scrapeScene({ query }, url, channel, include) { | |||
| 		release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]')); | ||||
| 	} | ||||
| 
 | ||||
| 	release.channel = matchChannel(release, channel); | ||||
| 	return { | ||||
| 		...release, | ||||
| 		...matchChannel(release, channel), | ||||
| 	}; | ||||
| } | ||||
| 
 | ||||
| 	return release; | ||||
| function scrapeProfile({ query }, url) { | ||||
| 	const profile = {}; | ||||
| 
 | ||||
| 	const bio = query.cnts('div p').reduce((acc, item) => { | ||||
| 		const [key, value] = item.split(/\s*:\s*/); | ||||
| 
 | ||||
| 		return { | ||||
| 			...acc, | ||||
| 			[slugify(key, '_')]: value.trim(), | ||||
| 		}; | ||||
| 	}, {}); | ||||
| 
 | ||||
| 	profile.avatar = { | ||||
| 		src: query.img('.model-page'), | ||||
| 		referer: url, | ||||
| 	}; | ||||
| 
 | ||||
| 	console.log(bio); | ||||
| 	console.log(profile); | ||||
| 	return profile; | ||||
| } | ||||
| 
 | ||||
| async function fetchLatest(channel) { | ||||
|  | @ -128,7 +160,44 @@ async function fetchScene(url, channel, baseRelease, include) { | |||
| 	return res.status; | ||||
| } | ||||
| 
 | ||||
| async function getActorUrl(baseActor) { | ||||
| 	if (baseActor.url) { | ||||
| 		return baseActor.url; | ||||
| 	} | ||||
| 
 | ||||
| 	const overviewRes = await qu.getAll('https://www.littlecaprice-dreams.com/pornstars', '.models'); | ||||
| 
 | ||||
| 	if (!overviewRes.ok) { | ||||
| 		return overviewRes.status; | ||||
| 	} | ||||
| 
 | ||||
| 	const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug); | ||||
| 
 | ||||
| 	if (!actorItem) { | ||||
| 		return null; | ||||
| 	} | ||||
| 
 | ||||
| 	return actorItem.query.url('a'); | ||||
| } | ||||
| 
 | ||||
| async function fetchProfile(baseActor, entity) { | ||||
| 	const actorUrl = await getActorUrl(baseActor); | ||||
| 
 | ||||
| 	if (!actorUrl) { | ||||
| 		return null; | ||||
| 	} | ||||
| 
 | ||||
| 	const actorRes = await qu.get(actorUrl, '#main-content'); | ||||
| 
 | ||||
| 	if (actorRes.ok) { | ||||
| 		return scrapeProfile(actorRes.item, actorUrl, entity); | ||||
| 	} | ||||
| 
 | ||||
| 	return actorRes.status; | ||||
| } | ||||
| 
 | ||||
| module.exports = { | ||||
| 	fetchLatest, | ||||
| 	fetchScene, | ||||
| 	fetchProfile, | ||||
| }; | ||||
|  |  | |||
|  | @ -223,6 +223,7 @@ const scrapers = { | |||
| 		killergram, | ||||
| 		kink, | ||||
| 		legalporno, | ||||
| 		littlecapricedreams, | ||||
| 		men, | ||||
| 		metrohd, | ||||
| 		milehighmedia, | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue