traxxx/src/scrapers/radical.js

'use strict';

const unprint = require('unprint');
const mime = require('mime');

const http = require('../utils/http');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const { lbsToKg, feetInchesToCm } = require('../utils/convert');

const teaserOrder = ['large', 'small', 'mobile', 'mp4', 'jpg'];

function scrapeSceneMetadata(data, channel) {
	const release = {};

	release.entryId = data.id;
	release.url = `${channel.url}/tour/videos/${data.id}/${slugify(data.title, '-', { removePunctuation: true })}`;

	release.title = data.title;
	release.description = data.description;

	release.date = new Date(data.release_date);
	release.duration = data.seconds_duration || qu.durationToSeconds(data.videos_duration);

	release.actors = data.models.map((model) => ({
		entryId: model.id,
		name: model.name,
		gender: model.gender,
		avatar: model.thumb,
		url: `${channel.url}/tour/models/${model.id}/${slugify(model.name, '-', { removePunctuation: true })}`,
	}));

	release.poster = data.trailer?.poster || [data.thumb?.replace('mobile.jpg', '.jpg'), data.thumb];
	release.photos = [
		data.extra_thumbs?.find((url) => /portrait1.jpg/.test(url)),
		data.extra_thumbs?.find((url) => /scene.jpg/.test(url)),
		data.extra_thumbs?.find((url) => /portrait2.jpg/.test(url)),
	]; // ordered by chronology: portrait1.jpg and scene.jpg are usually pre-shoot poses, portrait2.jpg is the cumshot aftermath

	release.trailer = data.trailer && {
		src: data.trailer.src,
		type: data.trailer.type,
	};

	release.teaser = data.special_thumbs;

	release.tags = [].concat(data.tags?.map((tag) => tag.name));
	release.qualities = data.downloads && Object.values(data.downloads)?.map((download) => download.meta_data.height);
	release.stars = data.rating;

	return release;
}

function scrapeAllMetadata(scenes, channel) {
	return scenes.map((data) => scrapeSceneMetadata(data, channel));
}

function scrapeSceneApi(data, channel, parameters) {
	const release = {};

	release.entryId = data.id;
	release.url = `${channel.url}/${parameters.videos || 'videos'}/${data.slug}`;

	release.title = data.title;
	release.description = data.description;

	release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD HH:mm:ss') || unprint.extractDate(data.formatted_date, 'Do MMM YYYY');
	release.duration = data.seconds_duration || unprint.extractDuration(data.videos_duration);

	release.actors = data.models_thumbs?.map((actor) => ({
		name: actor.name,
		avatar: actor.thumb,
	})) || data.models;

	release.poster = data.trailer_screencap;

	if (mime.getType(data.thumb) !== 'image/gif') {
		release.teaser = data.thumb;
	} else {
		release.poster = [
			release.poster,
			data.thumb,
		];
	}

	release.photos = [
		...data.previews?.full || [],
		...data.extra_thumbnails?.filter((thumbnail) => !thumbnail.includes('mobile') // mobile is the cropped photo of a photo already in the set
			&& !(thumbnail.includes('_scene') && release.poster?.includes('_scene')) // likely the same photo, filename may differ so cannot compare full path
			&& !(thumbnail.includes('_player') && release.poster?.includes('_player'))
			&& !(thumbnail.includes('1920') && release.poster?.includes('1920'))) || [],
	];

	release.caps = data.thumbs;

	release.trailer = data.trailer_url;
	release.teaser = data.special_thumbnails
		?.filter((teaserUrl) => new URL(teaserUrl).pathname !== '/') // on Top Web Models, https://z7n5n3m8.ssl.hwcdn.net/ is listed as a teaser
		.sort((teaserA, teaserB) => teaserOrder.findIndex((label) => teaserA.includes(label)) - teaserOrder.findIndex((label) => teaserB.includes(label)));

	release.tags = data.tags;

	release.channel = slugify(data.site, '');
	release.qualities = Object.values(data.videos || []).map((video) => video.height);

	release.photoCount = Number(data.photos_duration) || null;

	return release;
}

function scrapeAllApi(scenes, channel, parameters) {
	return scenes.map((data) => scrapeSceneApi(data, channel, parameters));
}

function scrapeProfileMetadata(data, channel) {
	const profile = {};

	profile.entryId = data.id;
	profile.url = `${channel.url}/tour/models/${data.id}/${slugify(data.name, '-', { removePunctuation: true })}`;

	profile.description = data.attributes.bio?.value;
	profile.dateOfBirth = qu.parseDate(data.attributes.birthdate?.value, 'YYYY-MM-DD');
	profile.gender = data.gender;
	profile.age = data.attributes.age?.value;
	profile.birthPlace = data.attributes.born?.value;

	profile.measurements = data.attributes.measurements?.value;
	profile.height = feetInchesToCm(data.attributes.height?.value);
	profile.weight = lbsToKg(data.attributes.weight?.value);

	profile.eyes = data.attributes.eyes?.value;
	profile.hairColor = data.attributes.hair?.value;

	profile.avatar = data.thumb;
	profile.date = new Date(data.publish_date);

	return profile;
}

function scrapeProfileApi(data, channel, scenes, parameters) {
	const profile = {};
	const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value])); // keys are mixed upper and lowercase

	profile.entryId = bio.id;

	profile.description = bio.bio;

	profile.gender = bio.gender;

	profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
	profile.birthPlace = bio.born;
	profile.age = bio.age;

	profile.measurements = bio.measurements;

	profile.height = feetInchesToCm(bio.height);
	profile.weight = lbsToKg(bio.weight);

	profile.eyes = bio.eyes;
	profile.hairColor = bio.hair;

	profile.avatar = data.thumb;

	if (scenes) {
		profile.scenes = scrapeAllApi(scenes, channel, parameters);
	}

	return profile;
}

async function fetchLatestApi(channel, page, { parameters }) {
	const url = parameters.site
		? `${channel.parent.url}/_next/data/${parameters.endpoint}/sites/${parameters.site}.json?sitename=${parameters.site}&order_by=publish_date&sort_by=desc&per_page=30&page=${page}`
		: `${channel.url}/_next/data/${parameters.endpoint}/${parameters.videos || 'videos'}.json?order_by=publish_date&sort_by=desc&per_page=30&page=${page}`;

	const res = await http.get(url);

	if (res.ok && res.body.pageProps?.contents?.data) {
		return scrapeAllApi(res.body.pageProps.contents.data, channel, parameters);
	}

	return res.status;
}

async function fetchSceneApi(url, channel, baseScene, { parameters }) {
	const slug = new URL(url).pathname.split('/').at(-1);
	const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/${parameters.videos || 'videos'}/${slug}.json?slug=${slug}`);

	if (res.ok && res.body.pageProps?.content) {
		return scrapeSceneApi(res.body.pageProps.content, channel, parameters);
	}

	return res.status;
}

async function fetchProfileApi(actor, { channel, parameters }) {
	const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/models/${actor.slug}.json?slug=${actor.slug}`);

	if (res.ok && res.body.pageProps?.model) {
		return scrapeProfileApi(res.body.pageProps.model, channel, res.body.pageProps.model_contents, parameters);
	}

	return res.status;
}

async function fetchLatestMetadata(channel, page = 1) {
	const url = `${channel.url}/tour/videos?page=${page}`;
	const res = await http.get(url, {
		parse: true,
		extract: {
			runScripts: 'dangerously',
		},
	});

	if (res.ok && res.window.__DATA__) {
		return scrapeAllMetadata(res.window.__DATA__.videos.items, channel);
	}

	if (res.ok) {
		return res.window.__DATA__?.error || null;
	}

	return res.status;
}

async function fetchSceneMetadata(url, channel) {
	const res = await http.get(url, {
		parse: true,
		extract: {
			runScripts: 'dangerously',
		},
	});

	if (res.ok && res.window.__DATA__?.video) {
		return scrapeSceneMetadata(res.window.__DATA__.video, channel);
	}

	if (res.ok) {
		return res.window.__DATA__?.error || null;
	}

	return res.status;
}

async function fetchProfileMetadata(actor, channel) {
	const res = await http.get(`${channel.url}/tour/search-preview/${actor.name}`, {
		headers: {
			'X-Requested-With': 'XMLHttpRequest',
		},
	});

	if (res.ok) {
		const model = res.body.models?.items.find((modelX) => slugify(modelX.name) === actor.slug);

		if (model) {
			return scrapeProfileMetadata(model, channel);
		}

		return null;
	}

	return res.status;
}

module.exports = {
	metadata: {
		// probably deprecated
		fetchLatest: fetchLatestMetadata,
		fetchScene: fetchSceneMetadata,
		fetchProfile: fetchProfileMetadata,
	},
	api: {
		fetchLatest: fetchLatestApi,
		fetchScene: fetchSceneApi,
		fetchProfile: fetchProfileApi,
	},
};