'use strict'; const unprint = require('unprint'); const mime = require('mime'); const http = require('../utils/http'); const slugify = require('../utils/slugify'); const { convert } = require('../utils/convert'); const teaserOrder = ['large', 'small', 'mobile', 'mp4', 'jpg']; function getVideoPath(data, parameters) { if (data.is_published === 0 && parameters.upcoming) { return parameters.upcoming; } if (parameters.videos) { return parameters.videos; } return 'videos'; } function scrapeScene(data, channel, parameters) { const release = {}; release.entryId = data.id; release.url = `${new URL(channel.url).origin}/${getVideoPath(data, parameters)}/${data.slug}`; release.title = data.title; release.description = data.description; release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD HH:mm:ss') || unprint.extractDate(data.formatted_date, 'Do MMM YYYY'); release.duration = data.seconds_duration || unprint.extractDuration(data.videos_duration); // TWM in particular has a habit of putting two names in a single link https://tour.2girls1camera.com/scenes/richelle-ryan-ariella-ferrara release.actors = (data.models_thumbs || data.models)?.flatMap((actor) => { const actorNames = actor.name.split('&').map((actorName) => actorName.trim()); if (actorNames.length === 1) { return { name: actor.name, avatar: actor.thumb, url: actor.slug && `${channel.url}/models/${actor.slug}`, }; } return actorNames; }); release.poster = data.trailer_screencap || data.thumb; if (mime.getType(data.thumb) === 'image/gif') { release.poster = [ data.trailer_screencap, data.thumb, ]; } else { release.poster = data.thumb; // release.teaser = data.thumb; } release.photos = [ ...data.previews?.full ? Object.values(data.previews?.full) : [], // sometimes it's an array, sometimes an object { '1': 'url' } ...data.extra_thumbnails?.filter((thumbnail) => !thumbnail.includes('mobile') // mobile is the cropped photo of a photo already in the set && !(thumbnail.includes('_scene') && release.poster?.includes('_scene')) // likely the same photo, filename may differ so cannot compare full path && !(thumbnail.includes('_player') && release.poster?.includes('_player')) && !(thumbnail.includes('1920') && release.poster?.includes('1920'))) || [], ]; release.caps = data.thumbs; release.trailer = data.trailer_url; release.teaser = data.special_thumbnails ?.filter((teaserUrl) => new URL(teaserUrl).pathname !== '/') // on Top Web Models, https://z7n5n3m8.ssl.hwcdn.net/ is listed as a teaser .sort((teaserA, teaserB) => teaserOrder.findIndex((label) => teaserA.includes(label)) - teaserOrder.findIndex((label) => teaserB.includes(label))); release.tags = data.tags; release.channel = slugify(data.site, ''); release.qualities = Object.values(data.videos || []).map((video) => video.height); release.photoCount = Number(data.photos_duration) || null; return release; } function scrapeAll(scenes, channel, parameters) { return scenes.map((data) => scrapeScene(data, channel, parameters)); } async function fetchEndpoint(channel, parameters) { const res = await unprint.get(channel.url); if (res.ok) { const data = res.context.query.json('#__NEXT_DATA__'); if (data?.buildId) { return data.buildId; } } // still allow manual configuration as a back-up return parameters.endpoint; } async function fetchLatest(channel, page, { parameters }) { const endpoint = await fetchEndpoint(channel, parameters); if (!endpoint) { return null; } const url = parameters.site ? `${channel.parent.url}/_next/data/${endpoint}/sites/${parameters.site}.json?sitename=${parameters.site}&order_by=publish_date&sort_by=desc&per_page=30&page=${page}` : `${channel.url}/_next/data/${endpoint}/${parameters.videos || 'videos'}.json?order_by=publish_date&sort_by=desc&per_page=30&page=${page}`; const res = await http.get(url); if (res.ok && res.body.pageProps?.contents?.data) { return scrapeAll(res.body.pageProps.contents.data, channel, parameters); } return res.status; } async function fetchUpcoming(channel, _page, { parameters }) { const res = await unprint.get(channel.url, { parser: { runScripts: 'dangerously', }, }); if (res.ok) { const data = res.context.query.json('#__NEXT_DATA__'); const scene = data?.props.pageProps.upcoming_scene; if (scene) { return scrapeScene(scene, channel, parameters); } return null; } return res.status; } async function fetchScene(url, channel, _baseScene, { parameters }) { const slug = new URL(url).pathname.split('/').at(-1); const endpoint = await fetchEndpoint(channel); const res = await http.get(`${channel.url}/_next/data/${endpoint}/${parameters.videos || 'videos'}/${slug}.json?slug=${slug}`); if (res.ok && res.body.pageProps?.content) { return scrapeScene(res.body.pageProps.content, channel, parameters); } return res.status; } function scrapeProfile(data, channel, scenes, parameters) { const profile = {}; const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [slugify(key, '_'), value])); // keys are mixed upper and lowercase profile.entryId = bio.id; profile.description = bio.bio; profile.gender = bio.gender; profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD'); profile.birthPlace = bio.born || bio.birthplace; profile.age = bio.age; profile.measurements = bio.measurements; profile.height = convert(bio.height, 'cm'); profile.weight = convert(bio.weight, 'lb', 'kg'); profile.eyes = bio.eyes || bio.eye_color; profile.hairColor = bio.hair || bio.hair_color; profile.avatar = data.thumb; if (scenes) { profile.scenes = scrapeAll(scenes, channel, parameters); } return profile; } async function fetchProfile(actor, { channel, parameters }) { const endpoint = await fetchEndpoint(channel); const res = await http.get(`${channel.url}/_next/data/${endpoint}/models/${actor.slug}.json?slug=${actor.slug}`); if (res.ok && res.body.pageProps?.model) { return scrapeProfile(res.body.pageProps.model, channel, res.body.pageProps.model_contents, parameters); } return res.status; } module.exports = { fetchLatest, fetchUpcoming, fetchScene, fetchProfile, };