Updated Team Skeet endpoints, replaced qu with unprint.

2026-01-16 01:36:50 +01:00
parent f3ecfb66e3
commit 131751795f
4 changed files with 101 additions and 80 deletions
--- a/src/deep.js
+++ b/src/deep.js
@@ -208,7 +208,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') {
 			datePrecision: curatedScrapedRelease.date // don't inherit date precision from base release
 				? curatedScrapedRelease.datePrecision
 				: baseRelease.datePrecision,
-			poster: Array.from(new Set([
+			poster: Array.from(new Set([ // use base poster as fallback for deep poster
 				...[].concat(curatedScrapedRelease.poster),
 				...[].concat(baseRelease.poster),
 			])).filter(Boolean),
--- a/src/scrapers/teamskeet.js
+++ b/src/scrapers/teamskeet.js
@@ -1,9 +1,10 @@
 'use strict';

-const qu = require('../utils/qu');
+const unprint = require('unprint');
+
 const http = require('../utils/http');
 const slugify = require('../utils/slugify');
-const { lbsToKg, feetInchesToCm } = require('../utils/convert');
+const { convert } = require('../utils/convert');

 function getChannelSlug(channelName, entity) {
 	if (!channelName) {
@@ -29,7 +30,7 @@ function scrapeScene(scene, channel, parameters) {

 	release.title = scene.title;
 	release.description = scene.description;
-	release.date = qu.extractDate(scene.publishedDate);
+	release.date = unprint.extractDate(scene.publishedDate, 'YYYY-MM-DD');

 	// release.actors = scene.models?.map((model) => model.modelName) || [];
 	release.actors = scene.models?.map((model) => ({
@@ -38,10 +39,15 @@ function scrapeScene(scene, channel, parameters) {
 		url: `${channel.url}/models/${model.modelId || model.id}`,
 	}));

-	release.poster = [
-		// scene.img.replace('med.jpg', 'hi.jpg'), // this image is not always from the same scene! for example on Petite Teens 18
-		scene.img,
-	];
+	if (scene.img) {
+		const poster = new URL(scene.img);
+
+		release.poster = [
+			// scene.img.replace('med.jpg', 'hi.jpg'), // this image is not always from the same scene! for example on Petite Teens 18
+			scene.img,
+			`${poster.origin}/cdn-cgi/image/width=640,quality=89${poster.pathname}`, // sometimes works when main poster is broken, observed on GotMYLF
+		];
+	}

 	release.teaser = scene.videoTrailer;

@@ -63,6 +69,80 @@ function scrapeAll(scenes, channel, parameters) {
 	return scenes.map((scene) => scrapeScene(scene, channel, parameters));
 }

+async function fetchLatest(channel, page = 1, { parameters }) {
+	const res = await http.get(`https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-videoscontent`}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`);
+
+	if (res.ok) {
+		return scrapeAll(res.body.hits.hits.map(({ _source: scene }) => scene), channel, parameters);
+	}
+
+	return res.status;
+}
+
+async function fetchLatestOrganic(channel, page, context) {
+	const res = await http.get(`https://store.psmcdn.net/${context.parameters.endpoint}/newestMovies/items.json?orderBy="$key"&startAt="${context.cursor || 'aaaaaaaa'}"&limitToFirst=100`);
+
+	if (res.ok) {
+		const scenes = scrapeAll(Object.values(res.body), channel, context.parameters);
+
+		return {
+			// cursor implies page > 1 and first scene is last scene on previous page,
+			// it probably won't trip up the pagination logic, but avoid the duplicate anyway
+			scenes: context.cursor ? scenes.slice(1) : scenes,
+			context: {
+				cursor: Object.keys(res.body).at(-1), // official page seems to derive cursor from last scene, too
+			},
+		};
+	}
+
+	return res.status;
+}
+
+async function fetchLatestSearch(channel, page = 1, { parameters }) {
+	const res = await http.get(`https://tours-store.psmcdn.net/${parameters.fullEndpoint || parameters.endpoint}/_search?q=(site.seo.seoSlug:%22${parameters.id}%22%20AND%20type:video)&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`);
+
+	if (res.ok) {
+		return scrapeAll(res.body.hits.hits.map(({ _source: scene }) => scene), channel, parameters);
+	}
+
+	return res.status;
+}
+
+async function fetchScene(url, channel, baseScene, { parameters }) {
+	if (parameters.layout !== 'organic' && baseScene?.entryId) {
+		// overview and deep data is the same in elastic API, don't hit server unnecessarily
+		return baseScene;
+	}
+
+	const sceneSlug = new URL(url).pathname.match(/\/([\w-]+$)/)[1];
+
+	const res = await unprint.get(url, {
+		parser: {
+			runScripts: 'dangerously',
+		},
+	});
+
+	if (res.ok) {
+		const videos = res.context.window.__INITIAL_STATE__?.content?.videosContent;
+
+		res.context.window.fetch = () => {}; // suppress fetch missing error
+
+		if (!videos) {
+			return null;
+		}
+
+		const video = videos?.[sceneSlug] || Object.values(videos)[0];
+
+		if (video) {
+			return scrapeScene(video, channel, parameters);
+		}
+
+		return null;
+	}
+
+	return res.status;
+}
+
 function scrapeProfile(actor, entity, parameters) {
 	const profile = {};

@@ -113,11 +193,12 @@ function scrapeProfile(actor, entity, parameters) {
 	}

 	if (actor.bio.heightFeet && actor.bio.heightInches) {
-		profile.height = feetInchesToCm(actor.bio.heightFeet, actor.bio.heightInches);
+		// reports 5 foot as 1 foot for some reason, but inches seem correct
+		profile.height = convert(`${actor.bio.heightFeet >= 4 ? actor.bio.heightFeet : 5}' ${actor.bio.heightInches}"`, 'cm');
 	}

 	if (actor.bio.weight) {
-		profile.weight = lbsToKg(actor.bio.weight);
+		profile.weight = convert(actor.bio.weight, 'lb', 'kg');
 	}

 	profile.avatar = actor.img;
@@ -127,80 +208,16 @@ function scrapeProfile(actor, entity, parameters) {
 	return profile;
 }

-async function fetchLatest(channel, page = 1, { parameters }) {
-	const res = await http.get(`https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-videoscontent`}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`);
-
-	if (res.ok) {
-		return scrapeAll(res.body.hits.hits.map(({ _source: scene }) => scene), channel, parameters);
-	}
-
-	return res.status;
-}
-
-async function fetchLatestOrganic(channel, page, context) {
-	const res = await http.get(`https://store.psmcdn.net/${context.parameters.endpoint}/newestMovies/items.json?orderBy="$key"&startAt="${context.cursor || 'aaaaaaaa'}"&limitToFirst=100`);
-
-	if (res.ok) {
-		const scenes = scrapeAll(Object.values(res.body), channel, context.parameters);
-
-		return {
-			// cursor implies page > 1 and first scene is last scene on previous page,
-			// it probably won't trip up the pagination logic, but avoid the duplicate anyway
-			scenes: context.cursor ? scenes.slice(1) : scenes,
-			context: {
-				cursor: Object.keys(res.body).at(-1), // official page seems to derive cursor from last scene, too
-			},
-		};
-	}
-
-	return res.status;
-}
-
-async function fetchLatestSearch(channel, page = 1, { parameters }) {
-	const res = await http.get(`https://tours-store.psmcdn.net/${parameters.fullEndpoint || parameters.endpoint}/_search?q=(site.seo.seoSlug:%22${parameters.id}%22%20AND%20type:video)&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`);
-
-	if (res.ok) {
-		return scrapeAll(res.body.hits.hits.map(({ _source: scene }) => scene), channel, parameters);
-	}
-
-	return res.status;
-}
-
-async function fetchScene(url, channel, baseScene, { parameters }) {
-	if (parameters.layout !== 'organic' && baseScene?.entryId) {
-		// overview and deep data is the same in elastic API, don't hit server unnecessarily
-		return baseScene;
-	}
-
-	const sceneSlug = new URL(url).pathname.match(/\/([\w-]+$)/)[1];
-
-	const res = await http.get({
-		organic: `https://store.psmcdn.net/${parameters.endpoint}/moviesContent/${sceneSlug}.json`,
-		search: `https://tours-store.psmcdn.net/ts_network/_search/?q=(id:${sceneSlug})&size=1`,
-		undefined: `https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-videoscontent`}/_doc/${sceneSlug}`,
-	}[parameters.layout]);
-
-	if (res.ok && res.body.found) {
-		return scrapeScene(res.body._source, channel, parameters);
-	}
-
-	if (res.ok && parameters.layout === 'organic' && res.body.id) {
-		return scrapeScene(res.body, channel, parameters);
-	}
-
-	return res.status;
-}
-
 async function fetchProfile(baseActor, { entity, parameters }) {
 	// const url = format(parameters.profiles, { slug: baseActor.slug });
 	const url = parameters.layout === 'organic'
 		? `https://store.psmcdn.net/${parameters.endpoint}/modelsContent/${baseActor.slug}.json`
 		: `https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-modelscontent`}/_doc/${parameters.modelPrefix || ''}${baseActor.slug}`;

-	const res = await qu.get(url);
+	const res = await unprint.get(url);

-	if (res.ok && res.body) {
-		return scrapeProfile(parameters.layout === 'organic' ? res.body : res.body._source || res.body, entity, parameters);
+	if (res.ok && res.data) {
+		return scrapeProfile(parameters.layout === 'organic' ? res.data : res.data._source || res.body, entity, parameters);
 	}

 	return res.status;