'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); function genderFromUrl(url) { const { pathname } = new URL(url); if (/atores/.test(pathname)) { return 'male'; } if (/atrizes/.test(pathname)) { return 'female'; } return null; } function scrapeAll(scenes) { return scenes.map(({ query }) => { const release = {}; release.url = query.url(null); release.entryId = new URL(release.url).pathname.match(/\/videos\/([\w-]+)/)[1]; release.title = query.attribute('img', 'title') || query.content('.font-semibold'); const poster = query.img('img[src*="/uploads"]'); if (poster) { release.poster = [ poster.replace(/-\d+x\d+/, ''), poster, ]; const match = poster.match(/\/uploads\/(\d{4})\/(\d{2})/); if (match) { release.date = new Date(match[1], match[2] - 1, 1); release.datePrecision = 'month'; } } return release; }); } function scrapeScene({ query, html }, { url, entity }) { const release = {}; const data = query.json('.yoast-schema-graph')?.['@graph']; const pageData = data?.find((item) => item['@type'] === 'WebPage'); const imageData = data?.find((item) => item['@type'] === 'ImageObject'); release.entryId = new URL(url).pathname.match(/\/videos\/([\w-]+)/)[1]; release.title = query.content('.w-screen + div .font-semibold') || data?.find((item) => item['@type'] === 'BreadcrumbList')?.itemListElement.slice(-1)[0].item?.name || pageData?.name.slice(0, pageData?.name.lastIndexOf('-')).trim(); release.description = query.content('.leading-relaxed'); release.date = pageData?.datePublished && new Date(pageData.datePublished); release.actors = query.elements('.models-slider-single a').map((el) => { const actorUrl = unprint.query.url(el, null); const avatarUrl = unprint.query.img(el); return { name: unprint.query.content(el), url: actorUrl, avatar: [ avatarUrl?.replace(/-\d+x\d+/, ''), avatarUrl, ], gender: genderFromUrl(actorUrl), }; }); release.poster = imageData?.url || query.meta('property="og:image"') || html.match(/poster: '(http.*\.jpg)'/)?.[1]; release.photos = query.imgs('.gallery img'); release.trailer = query.video('source', 'src', { origin: entity.url }); if (!release.date && release.poster) { const match = release.poster.match(/\/uploads\/(\d{4})\/(\d{2})/); if (match) { release.date = new Date(match[1], match[2] - 1, 1); release.datePrecision = 'month'; } } return release; } function scrapeProfile({ query }, entity, url) { const profile = { url }; const data = query.json('.yoast-schema-graph'); profile.gender = genderFromUrl(url); if (data) { profile.avatar = data['@graph']?.find((item) => item['@type'] === 'ImageObject')?.url; } return profile; } async function fetchLatest(channel, page = 1) { const url = `${channel.url}/videos/page/${page}`; const res = await unprint.get(url, { selectAll: '.grid > a[href*="/videos"]' }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } async function fetchProfilePage({ name, gender, url: actorUrl }, entity, secondAttempt) { const url = actorUrl || `${entity.url}/${gender === 'male' || secondAttempt ? 'atores' : 'atrizes'}/${slugify(name, '-')}`; const res = await unprint.get(url); if (res.ok) { return { res, url }; } if (actorUrl) { return fetchProfilePage({ name, gender }, entity, false); // don't count as second attempt, retry without actor URL } if (secondAttempt) { return res.status; } return fetchProfilePage({ name, gender }, entity, true); } async function fetchProfile(baseActor, entity, options) { const { res, url } = await fetchProfilePage(baseActor, entity, false); if (res.ok) { return scrapeProfile(res.context, entity, url, options); } return res.status; } module.exports = { fetchLatest, fetchProfile, scrapeScene, };