'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); const qu = require('../utils/qu'); const http = require('../utils/http'); const { feetInchesToCm, femaleFeetUsToEu } = require('../utils/convert'); const siteMapByKey = { PF: 'pornfidelity', TF: 'teenfidelity', KM: 'kellymadison', '5KP': '5kporn', '5KT': '5kteens', }; const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {}); function scrapeLatest(scenes, site) { return scenes.map(({ query }) => { const release = {}; release.shootId = query.q('.card-meta .text-right, .row .text-right, .card-footer-item:last-child', true); const { pathname } = new URL(query.url('h5 a, .ep-title a, .title a')); [release.entryId] = pathname.match(/\d+$/); release.title = query.cnt('h5 a, .ep-title a, .title a'); release.date = query.date('.card-meta .text-left, .row .col-4:first-child, .card-footer-item:first-child', ['MMM D', 'MMM D, YYYY'], /\w+ \d+(, \w+)?/); release.actors = query.cnts('.models a, .ep-models a, a[href*="models/"]'); // older scenes do not have a working scene page on their native site, but they (often, not always) do on Porn Fidelity // scenes older than year do not show a date; this is not when the URLs stop working, but it's a rough guideline release.url = site.parameters.archive && !release.date ? `${site.parameters.archive}${pathname}` : `${site.url}${pathname}`; release.duration = query.dur('.content a'); const duration = query.cnt('.content a, .ep-runtime strong, .subtitle:last-child a')?.match(/(\d+) min/)?.[1]; if (duration) release.duration = Number(duration) * 60; if (query.exists('.episodes-preview')) { [release.poster, ...release.photos] = query.imgs('.episodes-preview img'); } else { release.poster = query.img('.card-img-top, .image img'); release.teaser = { src: query.video('video'), }; } /* using site ID, filter no longer needed const siteId = release.shootId.match(/\d?\w{2}/)[0]; const siteSlug = siteMapByKey[siteId]; if (site.slug !== siteSlug) { // using generic network overview, scene is not from the site we want return { ...acc, unextracted: [...acc.unextracted, release] }; } return { ...acc, scenes: [...acc.scenes, release] }; */ return release; }); } async function scrapeScene({ query, html }, url, baseRelease, channel, session) { const { pathname } = new URL(url); const release = {}; [release.entryId] = pathname.match(/\d+$/); const titleString = query.cnt('.card-header.row h4, .trailer-starring span, .level-left .level-item'); const episode = titleString?.match(/#\d+$/)?.[0]; release.title = query.cnt('.trailer-title') || titleString?.match(/(?:Trailer: )?(.+) -/)?.[1]; release.channel = slugify(titleString?.match(/([\w\s]+) #\d+$/)?.[1], ''); const siteKey = siteMapBySlug[release.channel]; release.shootId = `${siteKey} ${episode}`; release.description = query.text('p.card-text, h5.heavy + p, .card-content .is-three-fifths'); // order not reliable, get keys const detailElsByKey = query.all('.card-body h4.card-title, .video-summary h5, .columns li').reduce((acc, rowEl) => ({ ...acc, [slugify(rowEl?.textContent.match(/(\w+):/)?.[1])]: rowEl, }), {}); release.date = query.date(detailElsByKey.published, null, 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/); release.duration = query.dur(detailElsByKey.episode); release.actors = query.cnts(detailElsByKey.starring, 'a'); const posterPrefix = html.indexOf('poster:'); const poster = query.img('.trailer-poster') || html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4); if (poster) { if (baseRelease?.poster) { release.photos = [poster, ...(baseRelease.photos || [])]; } else { release.poster = poster; } } // const token = query.meta('name=_token'); // const trailerInfoUrl = `${channel.url}/episodes/trailer/sources/${release.entryId}?type=trailer`; const trailerInfoUrl = html.match(/'(http.*\/trailer\/sources.*)'/)?.[1]; if (trailerInfoUrl) { const trailerInfoRes = await http.post(trailerInfoUrl, null, { session }); if (trailerInfoRes.ok && trailerInfoRes.body.sources?.length > 0) { release.trailer = trailerInfoRes.body.sources.map((trailer) => ({ src: trailer.src, type: trailer.type, /* unreliable, sometimes actual video is 720p quality: trailer.res .replace(4000, 2160) .replace(5000, 2880), */ })); } } return release; } function scrapeProfile({ query }) { const profile = {}; const bioKeys = query.contents('table.table td:nth-child(1), table.table th'); const bioValues = query.contents('table.table td:nth-child(2)'); const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: bioValues[index], }), {}); if (bio.ethnicity) profile.ethnicity = bio.ethnicity; if (bio.measurements) profile.measurements = bio.measurements; if (bio.birthplace) profile.birthPlace = bio.birthplace; if (bio.shoe_size) profile.foot = femaleFeetUsToEu(bio.shoe_size); if (bio.height) { const [feet, inches] = bio.height.match(/\d+/g); profile.height = feetInchesToCm(feet, inches); } if (bio.birthday) { const [month, day] = bio.birthday.split('/'); const birthday = new Date(Date.UTC(0, Number(month) - 1, Number(day))); birthday.setUTCFullYear(0); // indicate birth year is unknown profile.dateOfBirth = new Date(birthday); } profile.avatar = query.img('img[src*="model"][src*="headshot"]'); profile.photos = query.imgs('img[src*="model"][src*="thumb_image"], img[src*="model"][src*="bg_image"]'); return profile; } async function fetchLatest(channel, page = 1) { const url = `${channel.url}/episodes/search?page=${page}&site=${channel.parameters.siteId || ''}`; // TLS issues with teenfidelity.com, same overview on all sites const res = await http.get(url, { headers: { 'X-Requested-With': 'XMLHttpRequest', }, }); if (res.ok && res.body.status === 'success') { return scrapeLatest(qu.extractAll(res.body.html, '.episode, .ep'), channel); } return res.status; } async function fetchScene(url, channel, baseRelease) { const session = http.session(); const res = await qu.get(url, null, { 'X-Requested-With': 'XMLHttpRequest', }, { session, followRedirects: false, // redirects to sign-up page if scene not found }); return res.ok ? scrapeScene(res.item, url, baseRelease, channel, session) : res.status; } async function fetchProfile({ name: actorName }, { entity }) { const actorSlug = slugify(actorName); const res = await unprint.get(`${entity.url}/models/${actorSlug}`, { headers: { 'X-Requested-With': 'XMLHttpRequest', }, }); if (res.ok) { return scrapeProfile(res.context); } return res.status; } module.exports = { fetchLatest, fetchProfile, fetchScene, };