'use strict'; const config = require('config'); const unprint = require('unprint'); // const { parse } = require('csv-parse/sync'); const slugify = require('../utils/slugify'); const http = require('../utils/http'); const { feetInchesToCm, femaleFeetUsToEu } = require('../utils/convert'); const thumbKeyRegex = /(thumb\d+_url)|(episode_thumb_image_\d+_url)/; const qualityMap = { '480p': 480, mobile: 720, // as of recent, might've been lower in the past '720p': 720, '1080p': 1080, '2k': 1440, '4k': 2160, '5k': 2280, '8k': 4320, }; function scrapeSceneApi(data, channel) { const release = {}; release.entryId = data.id; if (data.url) { // provided URL works but always points to 8KMilfs instead of dedicated site const { pathname } = new URL(data.url); release.url = unprint.prefixUrl(pathname, channel.url); } if (channel.parameters.short && data.sequence_number) { release.shootId = `${channel.parameters.short} #${data.sequence_number}`; } release.title = data.title; release.description = data.short_description; release.date = new Date(data.publish_on); if (data.fullEpisodeLength) { release.duration = data.fullEpisodeLength; } else if (data.full_episode_minutes) { // full_episode_seconds is always available so far, but no need to count on it release.duration = (data.full_episode_minutes + (data.full_episode_seconds || 0)) * 60; } release.actors = data.models.map((model) => ({ name: model.name, gender: model.sex?.toLowerCase(), url: unprint.prefixUrl(`/models/${model.slug}`, channel.url), })); release.poster = data.thumb_url || data.thumb_image_url; release.photos = [ data.poster_image_url, ...Object.entries(data).filter(([key]) => thumbKeyRegex.test(key)).map(([_key, url]) => url), ].filter(Boolean); // photo thumbs include poster, don't filter here but in client const trailers = data.trailerVideos || data.trailer; if (trailers) { release.trailer = Object.entries(trailers) .filter(([key, trailer]) => !key.toLowerCase().includes('_sfw') && !trailer.url?.toLowerCase().includes('_sfw')) .map(([_key, trailer]) => ({ src: trailer.url, quality: qualityMap[trailer.resolution?.toLowerCase()] || null, })); } release.tags = data.categories.map((category) => category.name); release.photoCount = data.photosetPhotoCount || data.episode_photoset_photo_count; return release; } async function fetchLatestApi(channel, page = 1, { parameters }) { // JSON API doesn't return poster images, CSV API doesn't have pagination. UPDATE: requested and received both, yet to test const res = await http.get(`${parameters.apiAddress}/affiliates?site_id=${parameters.siteId}&page=${page}`, { headers: { Authorization: `Bearer ${config.apiKeys[parameters.apiKey]}`, }, }); if (res.ok) { return res.body.data.map((data) => scrapeSceneApi(data, channel)); } return res.status; } /* not practical via API, updates endpoint contains all necessary data async function fetchSceneApi(url, entity, baseRelease, { parameters }) { // const episodeId = new URL(url).pathname.match(/\/episodes\/\w+\/(\d+)/)?.[1]; const episodeId = new URL(url).pathname.match(/\/episodes\/(\d+)/)?.[1]; if (!episodeId) { return null; } // JSON API doesn't return poster images, CSV API doesn't have pagination. UPDATE: requested and received both, yet to test const res = await http.get(`${parameters.apiAddress}/affiliates/episodes/${episodeId}`, { headers: { Authorization: `Bearer ${config.apiKeys[parameters.apiKey]}`, }, }); console.log(res.body); return; if (res.ok) { return scrapeSceneApi(res.body.data, entity); } return res.status; } */ function composeBio(bioKeys, bioValues) { return bioKeys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: bioValues[index], }), {}); } function getBio(query) { // Kelly Madison, Fidelity if (query.exists('.profile-stats')) { const bioKeys = query.contents('.profile-stats li strong'); const bioValues = query.texts('.profile-stats li'); return composeBio(bioKeys, bioValues); } // 8K if (query.exists('//h4[contains(text(), "Stats")]')) { const bioKeys = query.contents('(//h4[contains(text(), "Stats")])[1]//following-sibling::div//strong'); const bioValues = query.contents('(//h4[contains(text(), "Stats")])[1]//following-sibling::div//p/text()'); return composeBio(bioKeys, bioValues); } // 5K if (query.exists('.bio-overlay-1')) { const bioKeys = query.contents('.bio-overlay-1 td:first-child'); const bioValues = query.contents('.bio-overlay-1 td:last-child'); return composeBio(bioKeys, bioValues); } return null; } function scrapeProfile({ query }) { const profile = {}; const bio = getBio(query); const questions = query.contents('.model-faq .content-body .accordion-header, .card .card-header button'); const answers = query.contents('.model-faq .content-body .accordion-body, .card .collapse .card-body'); if (questions.length > 0 && questions.length === answers.length) { profile.description = questions.map((question, index) => `**${question}**\n${answers[index]}`).join('\n'); } if (bio) { if (bio.ethnicity) profile.ethnicity = bio.ethnicity; if (bio.measurements) profile.measurements = bio.measurements; if (bio.birthplace) profile.birthPlace = bio.birthplace; if (bio.shoe_size) profile.foot = femaleFeetUsToEu(bio.shoe_size); if (bio.height) { const [feet, inches] = bio.height.match(/\d+/g); profile.height = feetInchesToCm(feet, inches); } if (bio.age) profile.age = Number(bio.age); if (bio.birthday) { const [month, day] = bio.birthday.split('/'); const birthday = new Date(Date.UTC(0, Number(month) - 1, Number(day))); if (profile.age) { birthday.setUTCFullYear(new Date().getFullYear() - profile.age); // indicate birth year is unknown } else { birthday.setUTCFullYear(0); // indicate birth year is unknown } profile.dateOfBirth = new Date(birthday); } } profile.avatar = query.img('img[src*="model"][src*="headshot"]'); profile.photos = query.imgs('img[src*="model"][src*="thumb_image"], img[src*="model"][src*="bg_image"]'); return profile; } async function fetchProfile({ name: actorName }, { entity }) { const actorSlug = slugify(actorName); // 8K sites don't have avatar or interview on model page, always use 5K site const res = await unprint.get(`${entity.slug === '5kvids' ? 'https://www.5kporn.com' : entity.url}/models/${actorSlug}`, { headers: { 'X-Requested-With': 'XMLHttpRequest', }, }); if (res.ok) { return scrapeProfile(res.context); } return res.status; } module.exports = { fetchLatest: fetchLatestApi, fetchProfile, };