Added actor page deep scrape for the few Sperm Mania scenes not on the homepage.
This commit is contained in:
		
							parent
							
								
									0ac8ae4d5a
								
							
						
					
					
						commit
						1163b010fb
					
				src
|  | @ -185,7 +185,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') { | |||
| 
 | ||||
| 		if (!scrapedRelease || typeof scrapedRelease !== 'object' || Array.isArray(scrapedRelease)) { | ||||
| 			// scraper is unable to fetch the releases and returned a HTTP code or null
 | ||||
| 			throw new Error(`Scraper returned '${scrapedRelease}' when fetching latest from '${entity.name}' (${entity.parent?.name})`); | ||||
| 			throw new Error(`Scraper returned '${scrapedRelease}' when deep fetching (${entity.name}, ${entity.parent?.name}) ${baseRelease.url || baseRelease.path}`); | ||||
| 		} | ||||
| 
 | ||||
| 		// object-merge-advance will use null as explicit false on hard merged keys, even when null as explicit falls is disabled
 | ||||
|  |  | |||
|  | @ -15,8 +15,18 @@ const tagsMap = { | |||
| 	'pussy bukkake': ['cum-on-pussy'], | ||||
| }; | ||||
| 
 | ||||
| function entryIdFromMedia(release) { | ||||
| 	return [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1].toLowerCase(); | ||||
| function entryIdFromMedia(release, toLowercase = true) { | ||||
| 	const originalEntryId = [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1]; | ||||
| 
 | ||||
| 	if (!originalEntryId) { | ||||
| 		return null; | ||||
| 	} | ||||
| 
 | ||||
| 	if (toLowercase) { | ||||
| 		return originalEntryId.toLowerCase(); | ||||
| 	} | ||||
| 
 | ||||
| 	return originalEntryId; | ||||
| } | ||||
| 
 | ||||
| function scrapeAll(scenes, tilesByEntryId, channel) { | ||||
|  | @ -63,6 +73,9 @@ function scrapeAll(scenes, tilesByEntryId, channel) { | |||
| 					release[key] = value; | ||||
| 				} | ||||
| 			}); | ||||
| 		} else { | ||||
| 			// most tiles are on the front page, but not all, deep scrape actor's page
 | ||||
| 			release.path = release.actors[0]?.url; | ||||
| 		} | ||||
| 
 | ||||
| 		return release; | ||||
|  | @ -94,9 +107,6 @@ function scrapeAllTiles(tiles, channel) { | |||
| 		const release = {}; | ||||
| 		const sceneString = query.content(); | ||||
| 
 | ||||
| 		const originalEntryId = query.attribute('.scene-hover', 'data-path'); | ||||
| 		release.entryId = originalEntryId?.toLowerCase(); | ||||
| 
 | ||||
| 		release.title = query.content('.scene-title'); | ||||
| 
 | ||||
| 		release.date = query.date('.scene-date, .sDate', 'YYYY-MM-DD'); | ||||
|  | @ -106,7 +116,7 @@ function scrapeAllTiles(tiles, channel) { | |||
| 			name: unprint.query.content(actorEl), | ||||
| 			url: channel.slug === 'fellatiojapan' | ||||
| 				? `${channel.url}/en/girl/${unprint.query.url(actorEl, null)}` | ||||
| 				: unprint.query.element(actorEl, null, { origin: channel.url }), | ||||
| 				: unprint.query.url(actorEl, null, { origin: channel.url }), | ||||
| 		})); | ||||
| 
 | ||||
| 		release.tags = [...query.contents('.data a[href*="/tag"]'), ...(tagsMap[query.content('.scene-type')?.toLowerCase()] || [])].filter(Boolean); | ||||
|  | @ -124,6 +134,10 @@ function scrapeAllTiles(tiles, channel) { | |||
| 			]; | ||||
| 		} | ||||
| 
 | ||||
| 		const originalEntryId = query.attribute('.scene-hover', 'data-path') || entryIdFromMedia(release, false); | ||||
| 
 | ||||
| 		release.entryId = originalEntryId?.toLowerCase(); | ||||
| 
 | ||||
| 		release.teaser = originalEntryId && `https://img.${channel.slug}.com/preview/${originalEntryId}/hover.mp4`; | ||||
| 
 | ||||
| 		release.photoCount = Number(sceneString.match(/(\d+) photos/)?.[1]) || null; | ||||
|  | @ -699,20 +713,21 @@ function scrapeSceneCospuri({ query }, { url, entity }) { | |||
| 	return release; | ||||
| } | ||||
| 
 | ||||
| // Fellatio Japan
 | ||||
| async function fetchSceneFellatio(url, channel, baseRelease) { | ||||
| // Sperm Mania, Fellatio Japan
 | ||||
| async function fetchScene(url, channel, baseRelease) { | ||||
| 	if (!baseRelease.entryId || !baseRelease.path) { | ||||
| 		return null; | ||||
| 	} | ||||
| 
 | ||||
| 	// no dedicated scene page, but there are dates on actor page; use that as 'deep' scrape
 | ||||
| 	// can't use front page like on Sperm Mania because dates are missing
 | ||||
| 	const res = await unprint.get(baseRelease.path, { selectAll: '.scene-obj' }); | ||||
| 	const res = await unprint.get(baseRelease.path, { selectAll: '.scene, .scene-obj' }); | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		const tiles = scrapeAllTiles(res.context, channel); | ||||
| 		const sceneTile = tiles.find((tile) => tile.entryId === baseRelease.entryId) || null; | ||||
| 
 | ||||
| 		return tiles.find((tile) => tile.entryId === baseRelease.entryId) || null; | ||||
| 		return sceneTile; | ||||
| 	} | ||||
| 
 | ||||
| 	return res.status; | ||||
|  | @ -830,6 +845,7 @@ async function fetchProfile({ slug, url: actorUrl }, { entity, parameters }) { | |||
| module.exports = { | ||||
| 	fetchLatest, | ||||
| 	fetchProfile, | ||||
| 	fetchScene, | ||||
| 	cospuri: { | ||||
| 		fetchLatest: fetchLatestCospuri, | ||||
| 		scrapeScene: scrapeSceneCospuri, | ||||
|  | @ -837,7 +853,7 @@ module.exports = { | |||
| 	}, | ||||
| 	fellatio: { | ||||
| 		fetchLatest: fetchLatestFellatio, | ||||
| 		fetchScene: fetchSceneFellatio, | ||||
| 		fetchScene, | ||||
| 		fetchProfile, | ||||
| 	}, | ||||
| 	handjob: { | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue