traxxx/src/scrapers/inthecrack.js

'use strict';

const unprint = require('unprint');

const slugify = require('../utils/slugify');

function scrapeProfile(model, channel) {
	const profile = {};

	profile.name = model.name; // used by shallow scrape
	profile.entryId = model.id;

	profile.dateOfBirth = unprint.extractDate(model.birthdate, 'YYYY-MM-DD');

	profile.birthPlace = model.countries?.map((country) => {
		if (country.name) {
			return country.name;
		}

		if (country.isO2 || country.iso2) { // sic
			return country.isO2 || country.iso2;
		}

		if (typeof country === 'string') {
			return country;
		}

		return null;
	}).filter(Boolean)[0];

	profile.height = model.height;
	profile.weight = model.weight;

	const ethnicity = model.ethnicity?.title || model.ethnicity;

	if (!/none/i.test(ethnicity)) {
		profile.ethnicity = ethnicity;
	}

	if (model.id) {
		profile.url = `${channel.origin}/modelcollections/${model.id}`;
	}

	return profile;
}

function mergeModels(sceneModels, models, channel) {
	if (!Array.isArray(sceneModels) || !models) {
		return [];
	}

	return sceneModels.map((modelId) => {
		const model = models[modelId?.id || modelId];

		if (!model) {
			return null;
		}

		return scrapeProfile(model, channel);
	}).filter(Boolean);
}

function scrapeAll(scenes, channel, models = {}, isUpcoming = false) {
	return scenes.map((scene) => {
		const release = {};

		release.entryId = scene.id;
		release.shootId = scene.id;

		release.title = scene.title;
		release.date = unprint.extractDate(scene.releaseDate, 'YYYY-MM-DD');

		release.poster = `https://api.inthecrack.com/image/resize/images/posters/collections/${scene.id}.jpg?w=1400`;

		// coming soon  photo remains available after release date
		release.photos = [`https://api.inthecrack.com/FileStore/images/coming_soon/${scene.id}.jpg`];

		if (isUpcoming) {
			return release;
		}

		release.url = `${channel.origin}/collection/${scene.id}`;

		release.duration = scene.clipMinutesTotal * 60 || null;
		release.actors = mergeModels(scene.models, models, channel);

		release.productionDate = unprint.extractDate(scene.shootDate, 'YYYY-MM-DD');
		release.photoCount = scene.picTotal;

		release.productionLocation = scene.shootLocation;

		return release;
	});
}

async function fetchLatest(channel, page, context) {
	const res = await unprint.get('https://api.inthecrack.com/Collection/');

	if (res.ok) {
		// API has no pagination, simulate so it doesn't blow up the rest of the guts
		return scrapeAll(res.data.slice((page - 1) * 100, page * 100), channel, context.beforeFetchLatest);
	}

	return res.status;
}

async function fetchUpcoming(channel) {
	const res = await unprint.get('https://api.inthecrack.com/Home/coming_soon');

	if (res.ok) {
		// API has no pagination, simulate so it doesn't blow up the rest of the guts
		return scrapeAll(res.data, channel, null, true);
	}

	return res.status;
}

const qualityMap = {
	// unsnure about 2 and 5
	1: 360,
	3: 720,
	4: 1080,
	6: 2160,
};

function scrapeScene(scene, channel, baseRelease, models = {}) {
	const release = {};

	release.entryId = scene.id;
	release.shootId = scene.id;

	release.url = `${channel.origin}/collection/${scene.id}`;

	release.title = scene.title;
	release.description = scene.description;

	release.actors = mergeModels(scene.models, models, channel);

	release.productionDate = unprint.extractDate(scene.shootDate, 'YYYY-MM-DD');
	release.productionLocation = scene.shootLocation;

	release.poster = `https://api.inthecrack.com/image/resize/images/posters/collections/${scene.id}.jpg?w=1400`;

	release.photos = scene.galleryImages
		?.filter((image) => image.imageType === 1) // type 1 and 2 are dupes as far as thumbs are concerned
		.slice(0, 15) // only first 15 photos have a free thumb
		.map((image) => image.filename && `https://api.inthecrack.com/FileStore/images/gallerysamples/${scene.id}/${image.filename}`).filter(Boolean);

	release.chapters = scene.clips?.map((clip) => ({
		entryId: clip.id,
		title: clip.title,
		description: clip.description,
		date: unprint.extractDate(clip.releaseDate, 'YYYY-MM-DD'),
		duration: clip.length,
		// this is how the site itself renders the thumbnails, I shit you not. does not return valid image without ?w parameter
		poster: `https://api.inthecrack.com/image/resize/images/posters/clips/${clip.videos?.[0]?.filename.match(/^(.*?)(?=\d+x\d+\.mp4)/)[0]}.jpg?w=1400`,
	}));

	release.qualities = scene.clips?.[0]?.videos?.map((video) => qualityMap[video.videoResolutionId]).filter(Boolean);

	if (!baseRelease.date) {
		// base release has 'official' release date, deep data only has chapter dates
		// though, this is probably how they calculate the collection date, too
		release.date = release.chapters
			?.map((chapter) => chapter.date)
			.filter(Boolean)
			.toSorted((dateA, dateB) => dateA - dateB)[0];
	}

	return release;
}

async function fetchScene(url, channel, baseRelease, context) {
	const entryId = new URL(url).pathname.match(/\/collection\/(\d+)/)?.[1];

	if (!entryId) {
		return null;
	}

	const res = await unprint.get(`https://api.inthecrack.com/Collection/${entryId}`);

	if (res.ok) {
		return scrapeScene(res.data, channel, baseRelease, context.beforeFetchScenes);
	}

	return res.status;
}

async function fetchModels() {
	const res = await unprint.get('https://api.inthecrack.com/Model/');

	if (res.ok) {
		try {
			const modelsById = Object.fromEntries(res.data.map((model) => [model.id, model]));

			return modelsById;
		} catch (error) {
			// we can continue, we just won't have model names
		}
	}

	return {};
}

async function getModelId(actor) {
	if (actor.entryId) {
		return actor.entryId;
	}

	if (actor.url) {
		const modelId = new URL(actor.url).pathname.match(/\/modelcollection\/(\d+)/)?.[1];

		if (modelId) {
			return modelId;
		}
	}

	const modelsById = await fetchModels();
	const model = Object.values(modelsById).find((searchModel) => slugify(searchModel.name) === slugify(actor.name));

	if (model) {
		return model.id;
	}

	return null;
}

async function fetchProfile(actor, channel) {
	const modelId = await getModelId(actor);

	if (!modelId) {
		return null;
	}

	const res = await unprint.get(`https://api.inthecrack.com/Model/${modelId}`);

	if (res.ok) {
		return scrapeProfile(res.data, channel);
	}

	return null;
}

module.exports = {
	fetchLatest,
	fetchUpcoming,
	fetchScene,
	fetchProfile,
	beforeFetchLatest: fetchModels,
	beforeFetchScenes: fetchModels,
};