Files
traxxx/src/scrapers/inthecrack.js

252 lines
6.3 KiB
JavaScript
Executable File

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
function scrapeProfile(model, channel) {
const profile = {};
profile.name = model.name; // used by shallow scrape
profile.entryId = model.id;
profile.dateOfBirth = unprint.extractDate(model.birthdate, 'YYYY-MM-DD');
profile.birthPlace = model.countries?.map((country) => {
if (country.name) {
return country.name;
}
if (country.isO2 || country.iso2) { // sic
return country.isO2 || country.iso2;
}
if (typeof country === 'string') {
return country;
}
return null;
}).filter(Boolean)[0];
profile.height = model.height;
profile.weight = model.weight;
const ethnicity = model.ethnicity?.title || model.ethnicity;
if (!/none/i.test(ethnicity)) {
profile.ethnicity = ethnicity;
}
if (model.id) {
profile.url = `${channel.origin}/modelcollections/${model.id}`;
}
return profile;
}
function mergeModels(sceneModels, models, channel) {
if (!Array.isArray(sceneModels) || !models) {
return [];
}
return sceneModels.map((modelId) => {
const model = models[modelId?.id || modelId];
if (!model) {
return null;
}
return scrapeProfile(model, channel);
}).filter(Boolean);
}
function scrapeAll(scenes, channel, models = {}, isUpcoming = false) {
return scenes.map((scene) => {
const release = {};
release.entryId = scene.id;
release.shootId = scene.id;
release.title = scene.title;
release.date = unprint.extractDate(scene.releaseDate, 'YYYY-MM-DD');
release.poster = `https://api.inthecrack.com/image/resize/images/posters/collections/${scene.id}.jpg?w=1400`;
// coming soon photo remains available after release date
release.photos = [`https://api.inthecrack.com/FileStore/images/coming_soon/${scene.id}.jpg`];
if (isUpcoming) {
return release;
}
release.url = `${channel.origin}/collection/${scene.id}`;
release.duration = scene.clipMinutesTotal * 60 || null;
release.actors = mergeModels(scene.models, models, channel);
release.productionDate = unprint.extractDate(scene.shootDate, 'YYYY-MM-DD');
release.photoCount = scene.picTotal;
release.productionLocation = scene.shootLocation;
return release;
});
}
async function fetchLatest(channel, page, context) {
const res = await unprint.get('https://api.inthecrack.com/Collection/');
if (res.ok) {
// API has no pagination, simulate so it doesn't blow up the rest of the guts
return scrapeAll(res.data.slice((page - 1) * 100, page * 100), channel, context.beforeFetchLatest);
}
return res.status;
}
async function fetchUpcoming(channel) {
const res = await unprint.get('https://api.inthecrack.com/Home/coming_soon');
if (res.ok) {
// API has no pagination, simulate so it doesn't blow up the rest of the guts
return scrapeAll(res.data, channel, null, true);
}
return res.status;
}
const qualityMap = {
// unsnure about 2 and 5
1: 360,
3: 720,
4: 1080,
6: 2160,
};
function scrapeScene(scene, channel, baseRelease, models = {}) {
const release = {};
release.entryId = scene.id;
release.shootId = scene.id;
release.url = `${channel.origin}/collection/${scene.id}`;
release.title = scene.title;
release.description = scene.description;
release.actors = mergeModels(scene.models, models, channel);
release.productionDate = unprint.extractDate(scene.shootDate, 'YYYY-MM-DD');
release.productionLocation = scene.shootLocation;
release.poster = `https://api.inthecrack.com/image/resize/images/posters/collections/${scene.id}.jpg?w=1400`;
release.photos = scene.galleryImages
?.filter((image) => image.imageType === 1) // type 1 and 2 are dupes as far as thumbs are concerned
.slice(0, 15) // only first 15 photos have a free thumb
.map((image) => image.filename && `https://api.inthecrack.com/FileStore/images/gallerysamples/${scene.id}/${image.filename}`).filter(Boolean);
release.chapters = scene.clips?.map((clip) => ({
entryId: clip.id,
title: clip.title,
description: clip.description,
date: unprint.extractDate(clip.releaseDate, 'YYYY-MM-DD'),
duration: clip.length,
// this is how the site itself renders the thumbnails, I shit you not. does not return valid image without ?w parameter
poster: `https://api.inthecrack.com/image/resize/images/posters/clips/${clip.videos?.[0]?.filename.match(/^(.*?)(?=\d+x\d+\.mp4)/)[0]}.jpg?w=1400`,
}));
release.qualities = scene.clips?.[0]?.videos?.map((video) => qualityMap[video.videoResolutionId]).filter(Boolean);
if (!baseRelease.date) {
// base release has 'official' release date, deep data only has chapter dates
// though, this is probably how they calculate the collection date, too
release.date = release.chapters
?.map((chapter) => chapter.date)
.filter(Boolean)
.toSorted((dateA, dateB) => dateA - dateB)[0];
}
return release;
}
async function fetchScene(url, channel, baseRelease, context) {
const entryId = new URL(url).pathname.match(/\/collection\/(\d+)/)?.[1];
if (!entryId) {
return null;
}
const res = await unprint.get(`https://api.inthecrack.com/Collection/${entryId}`);
if (res.ok) {
return scrapeScene(res.data, channel, baseRelease, context.beforeFetchScenes);
}
return res.status;
}
async function fetchModels() {
const res = await unprint.get('https://api.inthecrack.com/Model/');
if (res.ok) {
try {
const modelsById = Object.fromEntries(res.data.map((model) => [model.id, model]));
return modelsById;
} catch (error) {
// we can continue, we just won't have model names
}
}
return {};
}
async function getModelId(actor) {
if (actor.entryId) {
return actor.entryId;
}
if (actor.url) {
const modelId = new URL(actor.url).pathname.match(/\/modelcollection\/(\d+)/)?.[1];
if (modelId) {
return modelId;
}
}
const modelsById = await fetchModels();
const model = Object.values(modelsById).find((searchModel) => slugify(searchModel.name) === slugify(actor.name));
if (model) {
return model.id;
}
return null;
}
async function fetchProfile(actor, channel) {
const modelId = await getModelId(actor);
if (!modelId) {
return null;
}
const res = await unprint.get(`https://api.inthecrack.com/Model/${modelId}`);
if (res.ok) {
return scrapeProfile(res.data, channel);
}
return null;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
beforeFetchLatest: fetchModels,
beforeFetchScenes: fetchModels,
};