traxxx/src/scrapers/radical.js

'use strict';

const unprint = require('unprint');
const mime = require('mime');

const http = require('../utils/http');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');

const teaserOrder = ['large', 'small', 'mobile', 'mp4', 'jpg'];

function getVideoPath(data, parameters) {
	if (data.is_published === 0 && parameters.upcoming) {
		return parameters.upcoming;
	}

	if (parameters.videos) {
		return parameters.videos;
	}

	return 'videos';
}

function scrapeScene(data, channel, parameters) {
	const release = {};

	release.entryId = data.id;
	release.url = `${new URL(channel.url).origin}/${getVideoPath(data, parameters)}/${data.slug}`;

	release.title = data.title;
	release.description = data.description;

	release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD HH:mm:ss') || unprint.extractDate(data.formatted_date, 'Do MMM YYYY');
	release.duration = data.seconds_duration || unprint.extractDuration(data.videos_duration);

	// TWM in particular has a habit of putting two names in a single link https://tour.2girls1camera.com/scenes/richelle-ryan-ariella-ferrara
	release.actors = (data.models_thumbs || data.models)?.flatMap((actor) => {
		const actorNames = actor.name.split('&').map((actorName) => actorName.trim());

		if (actorNames.length === 1) {
			return {
				name: actor.name,
				avatar: actor.thumb,
				url: actor.slug && `${channel.url}/models/${actor.slug}`,
			};
		}

		return actorNames;
	});

	release.poster = data.trailer_screencap || data.thumb;

	if (mime.getType(data.thumb) === 'image/gif') {
		release.poster = [
			data.trailer_screencap,
			data.thumb,
		];
	} else {
		release.poster = data.thumb;
		// release.teaser = data.thumb;
	}

	release.photos = [
		...data.previews?.full ? Object.values(data.previews?.full) : [], // sometimes it's an array, sometimes an object { '1': 'url' }
		...data.extra_thumbnails?.filter((thumbnail) => !thumbnail.includes('mobile') // mobile is the cropped photo of a photo already in the set
			&& !(thumbnail.includes('_scene') && release.poster?.includes('_scene')) // likely the same photo, filename may differ so cannot compare full path
			&& !(thumbnail.includes('_player') && release.poster?.includes('_player'))
			&& !(thumbnail.includes('1920') && release.poster?.includes('1920'))) || [],
	];

	release.caps = data.thumbs;

	release.trailer = data.trailer_url;

	release.teaser = data.special_thumbnails
		?.filter((teaserUrl) => new URL(teaserUrl).pathname !== '/') // on Top Web Models, https://z7n5n3m8.ssl.hwcdn.net/ is listed as a teaser
		.sort((teaserA, teaserB) => teaserOrder.findIndex((label) => teaserA.includes(label)) - teaserOrder.findIndex((label) => teaserB.includes(label)));

	release.tags = data.tags;

	release.channel = slugify(data.site, '');
	release.qualities = Object.values(data.videos || []).map((video) => video.height);

	release.photoCount = Number(data.photos_duration) || null;

	return release;
}

function scrapeAll(scenes, channel, parameters) {
	return scenes.map((data) => scrapeScene(data, channel, parameters));
}

async function fetchEndpoint(channel, parameters) {
	const res = await unprint.get(channel.url);

	if (res.ok) {
		const data = res.context.query.json('#__NEXT_DATA__');

		if (data?.buildId) {
			return data.buildId;
		}
	}

	// still allow manual configuration as a back-up
	return parameters.endpoint;
}

async function fetchLatest(channel, page, { parameters }) {
	const endpoint = await fetchEndpoint(channel, parameters);

	if (!endpoint) {
		return null;
	}

	const url = parameters.site
		? `${channel.parent.url}/_next/data/${endpoint}/sites/${parameters.site}.json?sitename=${parameters.site}&order_by=publish_date&sort_by=desc&per_page=30&page=${page}`
		: `${channel.url}/_next/data/${endpoint}/${parameters.videos || 'videos'}.json?order_by=publish_date&sort_by=desc&per_page=30&page=${page}`;

	const res = await http.get(url);

	if (res.ok && res.body.pageProps?.contents?.data) {
		return scrapeAll(res.body.pageProps.contents.data, channel, parameters);
	}

	return res.status;
}

async function fetchUpcoming(channel, _page, { parameters }) {
	const res = await unprint.get(channel.url, {
		parser: {
			runScripts: 'dangerously',
		},
	});

	if (res.ok) {
		const data = res.context.query.json('#__NEXT_DATA__');
		const scene = data?.props.pageProps.upcoming_scene;

		if (scene) {
			return scrapeScene(scene, channel, parameters);
		}

		return null;
	}

	return res.status;
}

async function fetchScene(url, channel, _baseScene, { parameters }) {
	const slug = new URL(url).pathname.split('/').at(-1);
	const endpoint = await fetchEndpoint(channel);
	const res = await http.get(`${channel.url}/_next/data/${endpoint}/${parameters.videos || 'videos'}/${slug}.json?slug=${slug}`);

	if (res.ok && res.body.pageProps?.content) {
		return scrapeScene(res.body.pageProps.content, channel, parameters);
	}

	return res.status;
}

function scrapeProfile(data, channel, scenes, parameters) {
	const profile = {};
	const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [slugify(key, '_'), value])); // keys are mixed upper and lowercase

	profile.entryId = bio.id;

	profile.description = bio.bio;

	profile.gender = bio.gender;

	profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
	profile.birthPlace = bio.born || bio.birthplace;
	profile.age = bio.age;

	profile.measurements = bio.measurements;

	profile.height = convert(bio.height, 'cm');
	profile.weight = convert(bio.weight, 'lb', 'kg');

	profile.eyes = bio.eyes || bio.eye_color;
	profile.hairColor = bio.hair || bio.hair_color;

	profile.avatar = data.thumb;

	if (scenes) {
		profile.scenes = scrapeAll(scenes, channel, parameters);
	}

	return profile;
}

async function fetchProfile(actor, { channel, parameters }) {
	const endpoint = await fetchEndpoint(channel);
	const res = await http.get(`${channel.url}/_next/data/${endpoint}/models/${actor.slug}.json?slug=${actor.slug}`);

	if (res.ok && res.body.pageProps?.model) {
		return scrapeProfile(res.body.pageProps.model, channel, res.body.pageProps.model_contents, parameters);
	}

	return res.status;
}

module.exports = {
	fetchLatest,
	fetchUpcoming,
	fetchScene,
	fetchProfile,
};