Added actor page deep scrape for the few Sperm Mania scenes not on the homepage.

2024-10-17 23:50:55 +02:00 · 2024-10-17 23:50:55 +02:00 · 1163b010fb
parent 0ac8ae4d5a
commit 1163b010fb
2 changed files with 28 additions and 12 deletions
--- a/src/deep.js
+++ b/src/deep.js
@ -185,7 +185,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') {
 		if (!scrapedRelease || typeof scrapedRelease !== 'object' || Array.isArray(scrapedRelease)) {
 			// scraper is unable to fetch the releases and returned a HTTP code or null
-			throw new Error(`Scraper returned '${scrapedRelease}' when fetching latest from '${entity.name}' (${entity.parent?.name})`);
+			throw new Error(`Scraper returned '${scrapedRelease}' when deep fetching (${entity.name}, ${entity.parent?.name}) ${baseRelease.url || baseRelease.path}`);
 		}
 		// object-merge-advance will use null as explicit false on hard merged keys, even when null as explicit falls is disabled
--- a/src/scrapers/snowvalley.js
+++ b/src/scrapers/snowvalley.js
@ -15,8 +15,18 @@ const tagsMap = {
 	'pussy bukkake': ['cum-on-pussy'],
 };
-function entryIdFromMedia(release) {
+function entryIdFromMedia(release, toLowercase = true) {
-	return [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1].toLowerCase();
+	const originalEntryId = [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1];
 	if (!originalEntryId) {
 		return null;
 	}
 	if (toLowercase) {
 		return originalEntryId.toLowerCase();
 	}
 	return originalEntryId;
 }
 function scrapeAll(scenes, tilesByEntryId, channel) {
@ -63,6 +73,9 @@ function scrapeAll(scenes, tilesByEntryId, channel) {
 					release[key] = value;
 				}
 			});
 		} else {
 			// most tiles are on the front page, but not all, deep scrape actor's page
 			release.path = release.actors[0]?.url;
 		}
 		return release;
@ -94,9 +107,6 @@ function scrapeAllTiles(tiles, channel) {
 		const release = {};
 		const sceneString = query.content();
 		const originalEntryId = query.attribute('.scene-hover', 'data-path');
 		release.entryId = originalEntryId?.toLowerCase();
 		release.title = query.content('.scene-title');
 		release.date = query.date('.scene-date, .sDate', 'YYYY-MM-DD');
@ -106,7 +116,7 @@ function scrapeAllTiles(tiles, channel) {
 			name: unprint.query.content(actorEl),
 			url: channel.slug === 'fellatiojapan'
 				? `${channel.url}/en/girl/${unprint.query.url(actorEl, null)}`
-				: unprint.query.element(actorEl, null, { origin: channel.url }),
+				: unprint.query.url(actorEl, null, { origin: channel.url }),
 		}));
 		release.tags = [...query.contents('.data a[href*="/tag"]'), ...(tagsMap[query.content('.scene-type')?.toLowerCase()] || [])].filter(Boolean);
@ -124,6 +134,10 @@ function scrapeAllTiles(tiles, channel) {
 			];
 		}
 		const originalEntryId = query.attribute('.scene-hover', 'data-path') || entryIdFromMedia(release, false);
 		release.entryId = originalEntryId?.toLowerCase();
 		release.teaser = originalEntryId && `https://img.${channel.slug}.com/preview/${originalEntryId}/hover.mp4`;
 		release.photoCount = Number(sceneString.match(/(\d+) photos/)?.[1]) || null;
@ -699,20 +713,21 @@ function scrapeSceneCospuri({ query }, { url, entity }) {
 	return release;
 }
-// Fellatio Japan
+// Sperm Mania, Fellatio Japan
-async function fetchSceneFellatio(url, channel, baseRelease) {
+async function fetchScene(url, channel, baseRelease) {
 	if (!baseRelease.entryId || !baseRelease.path) {
 		return null;
 	}
 	// no dedicated scene page, but there are dates on actor page; use that as 'deep' scrape
 	// can't use front page like on Sperm Mania because dates are missing
-	const res = await unprint.get(baseRelease.path, { selectAll: '.scene-obj' });
+	const res = await unprint.get(baseRelease.path, { selectAll: '.scene, .scene-obj' });
 	if (res.ok) {
 		const tiles = scrapeAllTiles(res.context, channel);
 		const sceneTile = tiles.find((tile) => tile.entryId === baseRelease.entryId) || null;
-		return tiles.find((tile) => tile.entryId === baseRelease.entryId) || null;
+		return sceneTile;
 	}
 	return res.status;
@ -830,6 +845,7 @@ async function fetchProfile({ slug, url: actorUrl }, { entity, parameters }) {
 module.exports = {
 	fetchLatest,
 	fetchProfile,
 	fetchScene,
 	cospuri: {
 		fetchLatest: fetchLatestCospuri,
 		scrapeScene: scrapeSceneCospuri,
@ -837,7 +853,7 @@ module.exports = {
 	},
 	fellatio: {
 		fetchLatest: fetchLatestFellatio,
-		fetchScene: fetchSceneFellatio,
+		fetchScene,
 		fetchProfile,
 	},
 	handjob: {