'use strict'; const unprint = require('unprint'); const mime = require('mime'); const http = require('../utils/http'); const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); const { lbsToKg, feetInchesToCm } = require('../utils/convert'); const teaserOrder = ['large', 'small', 'mobile', 'mp4', 'jpg']; function scrapeSceneMetadata(data, channel) { const release = {}; release.entryId = data.id; release.url = `${channel.url}/tour/videos/${data.id}/${slugify(data.title, '-', { removePunctuation: true })}`; release.title = data.title; release.description = data.description; release.date = new Date(data.release_date); release.duration = data.seconds_duration || qu.durationToSeconds(data.videos_duration); release.actors = data.models.map((model) => ({ entryId: model.id, name: model.name, gender: model.gender, avatar: model.thumb, url: `${channel.url}/tour/models/${model.id}/${slugify(model.name, '-', { removePunctuation: true })}`, })); release.poster = data.trailer?.poster || [data.thumb?.replace('mobile.jpg', '.jpg'), data.thumb]; release.photos = [ data.extra_thumbs?.find((url) => /portrait1.jpg/.test(url)), data.extra_thumbs?.find((url) => /scene.jpg/.test(url)), data.extra_thumbs?.find((url) => /portrait2.jpg/.test(url)), ]; // ordered by chronology: portrait1.jpg and scene.jpg are usually pre-shoot poses, portrait2.jpg is the cumshot aftermath release.trailer = data.trailer && { src: data.trailer.src, type: data.trailer.type, }; release.teaser = data.special_thumbs; release.tags = [].concat(data.tags?.map((tag) => tag.name)); release.qualities = data.downloads && Object.values(data.downloads)?.map((download) => download.meta_data.height); release.stars = data.rating; return release; } function scrapeAllMetadata(scenes, channel) { return scenes.map((data) => scrapeSceneMetadata(data, channel)); } function scrapeSceneApi(data, channel, parameters) { const release = {}; release.entryId = data.id; release.url = `${channel.url}/${parameters.videos || 'videos'}/${data.slug}`; release.title = data.title; release.description = data.description; release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD HH:mm:ss') || unprint.extractDate(data.formatted_date, 'Do MMM YYYY'); release.duration = data.seconds_duration || unprint.extractDuration(data.videos_duration); release.actors = data.models_thumbs?.map((actor) => ({ name: actor.name, avatar: actor.thumb, })) || data.models; release.poster = data.trailer_screencap; if (mime.getType(data.thumb) !== 'image/gif') { release.teaser = data.thumb; } else { release.poster = [ release.poster, data.thumb, ]; } release.photos = [ ...data.previews?.full || [], ...data.extra_thumbnails?.filter((thumbnail) => !thumbnail.includes('mobile') // mobile is the cropped photo of a photo already in the set && !(thumbnail.includes('_scene') && release.poster?.includes('_scene')) // likely the same photo, filename may differ so cannot compare full path && !(thumbnail.includes('_player') && release.poster?.includes('_player')) && !(thumbnail.includes('1920') && release.poster?.includes('1920'))) || [], ]; release.caps = data.thumbs; release.trailer = data.trailer_url; release.teaser = data.special_thumbnails ?.filter((teaserUrl) => new URL(teaserUrl).pathname !== '/') // on Top Web Models, https://z7n5n3m8.ssl.hwcdn.net/ is listed as a teaser .sort((teaserA, teaserB) => teaserOrder.findIndex((label) => teaserA.includes(label)) - teaserOrder.findIndex((label) => teaserB.includes(label))); release.tags = data.tags; release.channel = slugify(data.site, ''); release.qualities = Object.values(data.videos || []).map((video) => video.height); release.photoCount = Number(data.photos_duration) || null; return release; } function scrapeAllApi(scenes, channel, parameters) { return scenes.map((data) => scrapeSceneApi(data, channel, parameters)); } function scrapeProfileMetadata(data, channel) { const profile = {}; profile.entryId = data.id; profile.url = `${channel.url}/tour/models/${data.id}/${slugify(data.name, '-', { removePunctuation: true })}`; profile.description = data.attributes.bio?.value; profile.dateOfBirth = qu.parseDate(data.attributes.birthdate?.value, 'YYYY-MM-DD'); profile.gender = data.gender; profile.age = data.attributes.age?.value; profile.birthPlace = data.attributes.born?.value; profile.measurements = data.attributes.measurements?.value; profile.height = feetInchesToCm(data.attributes.height?.value); profile.weight = lbsToKg(data.attributes.weight?.value); profile.eyes = data.attributes.eyes?.value; profile.hairColor = data.attributes.hair?.value; profile.avatar = data.thumb; profile.date = new Date(data.publish_date); return profile; } function scrapeProfileApi(data, channel, scenes, parameters) { const profile = {}; const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value])); // keys are mixed upper and lowercase profile.entryId = bio.id; profile.description = bio.bio; profile.gender = bio.gender; profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD'); profile.birthPlace = bio.born; profile.age = bio.age; profile.measurements = bio.measurements; profile.height = feetInchesToCm(bio.height); profile.weight = lbsToKg(bio.weight); profile.eyes = bio.eyes; profile.hairColor = bio.hair; profile.avatar = data.thumb; if (scenes) { profile.scenes = scrapeAllApi(scenes, channel, parameters); } return profile; } async function fetchLatestApi(channel, page, { parameters }) { const url = parameters.site ? `${channel.parent.url}/_next/data/${parameters.endpoint}/sites/${parameters.site}.json?sitename=${parameters.site}&order_by=publish_date&sort_by=desc&per_page=30&page=${page}` : `${channel.url}/_next/data/${parameters.endpoint}/${parameters.videos || 'videos'}.json?order_by=publish_date&sort_by=desc&per_page=30&page=${page}`; const res = await http.get(url); if (res.ok && res.body.pageProps?.contents?.data) { return scrapeAllApi(res.body.pageProps.contents.data, channel, parameters); } return res.status; } async function fetchSceneApi(url, channel, baseScene, { parameters }) { const slug = new URL(url).pathname.split('/').at(-1); const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/${parameters.videos || 'videos'}/${slug}.json?slug=${slug}`); if (res.ok && res.body.pageProps?.content) { return scrapeSceneApi(res.body.pageProps.content, channel, parameters); } return res.status; } async function fetchProfileApi(actor, { channel, parameters }) { const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/models/${actor.slug}.json?slug=${actor.slug}`); if (res.ok && res.body.pageProps?.model) { return scrapeProfileApi(res.body.pageProps.model, channel, res.body.pageProps.model_contents, parameters); } return res.status; } async function fetchLatestMetadata(channel, page = 1) { const url = `${channel.url}/tour/videos?page=${page}`; const res = await http.get(url, { parse: true, extract: { runScripts: 'dangerously', }, }); if (res.ok && res.window.__DATA__) { return scrapeAllMetadata(res.window.__DATA__.videos.items, channel); } if (res.ok) { return res.window.__DATA__?.error || null; } return res.status; } async function fetchSceneMetadata(url, channel) { const res = await http.get(url, { parse: true, extract: { runScripts: 'dangerously', }, }); if (res.ok && res.window.__DATA__?.video) { return scrapeSceneMetadata(res.window.__DATA__.video, channel); } if (res.ok) { return res.window.__DATA__?.error || null; } return res.status; } async function fetchProfileMetadata(actor, channel) { const res = await http.get(`${channel.url}/tour/search-preview/${actor.name}`, { headers: { 'X-Requested-With': 'XMLHttpRequest', }, }); if (res.ok) { const model = res.body.models?.items.find((modelX) => slugify(modelX.name) === actor.slug); if (model) { return scrapeProfileMetadata(model, channel); } return null; } return res.status; } module.exports = { metadata: { // probably deprecated fetchLatest: fetchLatestMetadata, fetchScene: fetchSceneMetadata, fetchProfile: fetchProfileMetadata, }, api: { fetchLatest: fetchLatestApi, fetchScene: fetchSceneApi, fetchProfile: fetchProfileApi, }, };