'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); const { stripQuery } = require('../utils/url'); const channelMap = { spa: 'thespa', gym: 'thegym', dormroom: 'thedormroom', dressingroom: 'thedressingroom', psepornstarexperience: 'pornstarexperience', office: 'theoffice', ta: 'tanda', }; function scrapeLatest(scenes, channel) { return scenes.map(({ query }) => { const release = {}; const url = query.url('a'); release.entryId = query.attribute('a', 'data-scene-id') || (url && new URL(url).pathname.match(/-(\d+)$/)?.[1]) || null; release.date = query.date('.entry-date, .scene-date', 'MMM D, YYYY'); release.duration = query.duration('.scene-runtime'); release.actors = query.all('.contain-actors a, .scene-actors a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: channel.parameters?.useActorUrl === false ? null // actor URL is scene link in native layout : unprint.query.url(actorEl, null), })); release.poster = [ ...(query .sourceSet('source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset', { includeDescriptor: true }) ?.toSorted((sourceA, sourceB) => sourceB.density - sourceA.density) .map((source) => source.url) || []), query.img('.main-scene-img', { attribute: 'srcset' }), query.img('.scene-thumb'), ].filter(Boolean); release.teaser = query.video('a[data-desktop-video]', { attribute: 'data-desktop-video' }); release.tags = query.contents('.flag-bg'); release.qualities = [ query.exists('//a[contains(@class, "label-four-k") and contains(text(), "4K")] | //span[contains(@class, "bug-4k")]') && 2160, // label-four-k is also used for non-4K tags query.exists('//a[contains(@class, "label-hd") and contains(text(), "HD")]') && 720, ].filter(Boolean); const channelSlug = slugify(query.content('.site-title'), ''); release.channel = channelMap[channelSlug] || channelSlug; // NA affiliate prefers to push more traffic to Naughty America VR, all scenes labeled VR seem to be available on NAVR release.url = release.tags?.some((tag) => tag.toLowerCase() === 'vr') ? url.replace('naughtyamerica.com', 'naughtyamericavr.com') : url; return release; }); } async function fetchLatest(channel, page = 1, { parameters }) { const url = parameters.latest || `${channel.url}${parameters?.scenes || ''}`; const res = await unprint.browserRequest(`${url}?page=${page}`, { selectAll: '.site-list .scene-item, .panel-body', async control(ctx) { await ctx.locator('.site-list, .grid-three').hover({ trial: true, timeout: 10000 }); // wait for overview to initialize }, }); if (res.ok) { const scenes = scrapeLatest(res.context, channel, parameters); return scenes; } return res.status; } function scrapeScene({ query }, { url }) { const release = {}; release.entryId = new URL(url).pathname.match(/-(\d+)$/)?.[1]; // release.title = query.content('.breadcrumb-item.active') || query.content('.scene-title, .grey-title'); // main title has performer name instead of scene title in live scenes release.title = query.content('.scene-title, .grey-title'); // breadcrumb as used before often doesn't have title release.description = query.text('.synopsis, .scene-description'); release.date = query.date('.entry-date, .released-date', ['MMM D, YYYY', 'MM/DD/YY']); release.duration = query.duration('.duration'); release.actors = query.exists('.performer-list') || query.exists('.scene-info a[href*="/pornstar"].scene-title') // title links to performer in live scenes ? query.all('.performer-list a, .grey-performers a, .scene-info a[href*="/pornstar"].scene-title').map((actorEl) => ({ name: unprint.query.content(actorEl), url: stripQuery(unprint.query.url(actorEl, null)), })) : query.content('.grey-performers')?.split(',').map((actorName) => actorName.trim()); // not all performers are linked release.poster = [ ...(query.sourceSet('.play-trailer source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset') || []), ...(query.sourceSet('.scenepage-video source[srcset*="scenes/"][type="image/jpeg"]', 'srcset') || []), query.img('.play-trailer img[data-srcset*="scenes/"]', { attribute: 'data-srcset' }), query.img('.scenepage-video .playcard'), query.img('.scene-page .start-card'), query.poster('dl8-video[poster]'), ].filter(Boolean); release.photos = query.els('.contain-scene-images.desktop-only .scene-image').map((imgEl) => [ unprint.query.url(imgEl, null), unprint.query.img(imgEl, 'img', { attribute: 'srcset' }), ]); const trailer = query.video('#triggerPlay video source'); if (trailer) { release.trailer = [ { source: trailer.replace(/_\d+\.mp4/, '_1080.mp4'), quality: 1080, }, trailer, ]; } const channelSlug = slugify(query.content('.site-title'), ''); release.channel = channelMap[channelSlug] || channelSlug; release.tags = query.contents('.categories a, .category a'); release.qualities = [ query.exists('//a[contains(@class, "label-four-k") and contains(text(), "4K")]') && 2160, // label-four-k is also used for non-4K tags query.exists('img.icon-1080') && 1080, query.exists('//a[contains(@class, "label-hd") and contains(text(), "HD")]') && 720, ].filter(Boolean); return release; } async function fetchScene(url, _channel) { // latest set NaughtyAmericaVR URL, but try deep scrape from regular NA website const res = await unprint.browserRequest(url.replace('naughtyamericavr.com', 'naughtyamerica.com'), { async control(ctx) { await ctx.locator('.scene-info, .scene').first().hover({ trial: true, timeout: 30000 }); // wait for trailer to initialize }, }); if (res.ok) { const scene = scrapeScene(res.context, { url }); return scene; } return res.controlError || res.status; } async function scrapeProfile({ query }) { const profile = {}; profile.description = query.content('.bio_about_text, .performer-description'); profile.avatar = query.img('img.performer-pic, img.performer-img, img.peformer-img'); // sic peformer return profile; } async function fetchProfile({ slug }, { channel }) { const url = unprint.prefixUrl(`/pornstar/${slug}`, channel.url); const res = await unprint.browserRequest(url, { select: '.bio-info, .performer-details', async control(ctx) { await ctx.locator('.bio-info, .performer-details').hover({ trial: true, timeout: 30000 }); // wait for bio to initialize }, }); if (res.ok) { const profile = scrapeProfile(res.context, { url }); return profile; } return res.status; } module.exports = { fetchLatest, fetchScene, fetchProfile, };