'use strict'; const unprint = require('unprint'); const http = require('../utils/http'); const { convert } = require('../utils/convert'); function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.title = query.content('h3.title a, .content-title-wrap a'); release.url = query.url('h3.title a, h1.title a, .content-title-wrap a', { origin: channel.url }); const pathname = new URL(release.url).pathname; release.entryId = pathname.match(/\/scenes\/([\w-]+)/)?.[1]; release.description = query.content('.desc, .content-description'); release.date = query.date('.date, time, .hide', 'Do MMM YYYY', { match: null }); release.actors = query.contents('h4.models a, .content-models a'); release.duration = query.duration('//span[contains(@class, "total-time") and text()[contains(., ":")]]'); // total-time is also used for photo counts on True Anal const [poster, ...primaryPhotos] = query.imgs('a img'); const secondaryPhotos = query.styles('.thumb-top, .thumb-bottom, .thumb-mouseover', { styleAttribute: 'background-image' }).map((style) => style.match(/url\((.*)\)/)?.[1]); release.poster = [ poster.replace(/-c\d+x\d+/, ''), poster, ]; release.photos = primaryPhotos.concat(secondaryPhotos); return release; }); } async function scrapeScene({ query }, url, channel) { const release = {}; const pathname = new URL(url).pathname; const data = query.json('#__NEXT_DATA__')?.props?.pageProps?.content; release.entryId = data?.slug || pathname.match(/\/scenes\/([\w-]+)/)?.[1]; release.title = data?.title || query.content('.content-page-info .title'); release.description = data?.description || query.content('.content-page-info .desc'); release.date = data?.formatted_date ? unprint.extractDate(data.formatted_date, 'Do MMM YYYY', { match: null }) : query.date('.content-page-info .date, .content-page-info .hide, .post-date', 'Do MMM YYYY', { match: null }); release.actors = data?.models_thumbs?.map((actor) => ({ name: actor.name, url: actor.slug && `${channel.url}/models/${actor.slug}`, avatar: actor.thumb, })) || query.elements('.content-page-info .models a').map((actorEl) => ({ name: unprint.query(actorEl), url: unprint.url(actorEl, null), })); release.duration = data?.seconds_duration || query.duration('.content-page-info .total-time:last-child'); release.poster = [data?.trailer_screencap, data?.thumb, data?.extra_thumbails?.[0]].filter(Boolean); release.photos = data?.extra_thumbnails?.slice(1); // first photo is poster release.trailer = data?.trailer_url || null; release.caps = data?.thumbs; release.tags = data?.tags; release.qualities = data?.videos && Object.values(data.videos).map((video) => video.height); return release; } async function fetchLatestContent(url, parameters) { if (parameters.useBrowser) { const res = await http.get(url, { bypassBrowser: 'shared', bypass: { evaluate: async () => { // images lazy loaded by JS, gradually scroll through page return Array.from(document.querySelectorAll('.content-item ')).reduce(async (chain, el) => { await chain; return new Promise((resolve) => { el.scrollIntoView(); setTimeout(resolve, 20); }); }, Promise.resolve()); }, }, }); if (res.statusCode !== 200) { return { ok: false, status: res.statusCode, }; } const context = unprint.init(res.body); return { ok: true, status: res.statusCode, context, }; } const res = await unprint.get(url); return res; } async function fetchLatest(channel, page = 1, { parameters }) { const url = `${channel.url}/scenes?page=${page}`; const res = await fetchLatestContent(url, parameters); if (res.ok) { if (res.context.query.exists('a[href*="stackpath.com"]')) { throw new Error('URL blocked by StackPath'); } return scrapeAll(unprint.initAll(res.context.query.all('.content-item-large, .content-item, .content-border')), channel); } return res.status; } async function fetchScene(url, channel) { const cookieJar = http.cookieJar(); const session = http.session({ cookieJar }); const res = await http.get(url, { session, }); if (res.ok) { const context = unprint.init(res.body); if (context.query.exists('a[href*="stackpath.com"]')) { throw new Error('URL blocked by StackPath'); } return scrapeScene(context, url, channel); } return res.status; } async function scrapeProfile(data) { const profile = {}; // unreliable key case, lowercase all const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value])); profile.entryId = bio.id; profile.gender = bio.gender; profile.description = bio.bio; profile.birthPlace = bio.born; profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD'); profile.age = bio.age; profile.measurements = bio.measurements; profile.height = convert(bio.height, 'cm'); profile.weight = convert(bio.weight, 'lb', 'kg'); profile.eyes = bio.eyes; profile.hairColor = bio.hair; profile.avatar = bio.thumb; const tags = bio.tags?.split(',') || []; if (tags.includes('tattoos')) profile.hasTattoos = true; if (tags.includes('piercing')) profile.hasPiercings = true; return profile; } async function fetchProfile(actor, context) { const url = `${context.channel.url}/models/${actor.slug}`; const res = await unprint.get(url, { parser: { runScripts: 'dangerously', }, }); if (res.ok) { const data = res.context.query.json('#__NEXT_DATA__'); if (data.props.pageProps.model) { return scrapeProfile(data.props.pageProps.model, context.channel); } return null; } return res.status; } module.exports = { fetchLatest, fetchProfile, fetchScene, };