'use strict'; const unprint = require('unprint'); function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { const release = {}; const data = query.json('script[type="application/ld+json"]'); release.url = unprint.prefixUrl(data?.url, channel.url) || query.url('article a[href*="/scenes"]'); release.entryId = query.attribute(null, 'data-scene-id'); release.title = data?.name || query.content('.c-grid-item-footer-title'); release.description = data?.description; release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD'); release.duration = query.duration('.c-grid-ratio-bottom'); release.tags = [ query.exists('.c-grid-badge--fisheye-bg') && 'fisheye', query.exists('.c-grid-badge--fleshlight-badge-multi') && 'scripts', query.exists('.c-grid-badge--passthrough') && 'passthrough', query.exists('.c-grid-badge--passthrough-ai') && 'ai-passthrough', ].filter(Boolean); const poster = query.img('img[data-qa="grid-item-photo-img"]', { attribute: 'data-srcset' }); if (poster) { release.poster = [ poster.replace('-app.', '-desktop.'), poster, ]; } release.teaser = query.video('img[data-qa="grid-item-photo-img"]', { attribute: 'data-videosrc' }); return release; }); } function scrapeScene({ query, window }, { url, entity }) { const release = {}; const data = query.json('//script[contains(@type, "application/ld+json") and contains(text(), "VideoObject")]'); const videoData = window.vrPlayerSettings?.videoData; release.entryId = videoData?.id || (data?.url || new URL(url).pathname).split('-').at(-1); release.title = videoData?.title || data?.name || query.content('h1[data-qa="scene-title"]'); release.description = query.content('div[data-qa="scene-about-tab-text"]'); // data text is cut off release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('time[data-qa="page-scene-studio-date"]', 'YYYY-MM-DD', { attribute: 'datetime' }); // release.duration = unprint.extractTimestamp(data?.duration); // video duration data seems to be missing hours, regularly leading to wrong numbers; rely on front page duration release.actors = query.all('div[data-qa="scene-model-list-item"]').map((actorEl) => { const avatar = unprint.query.content(actorEl, 'img[data-qa="scene-model-list-item-photo-img"]', { attribute: 'data-src' }); return { name: unprint.query.content(actorEl, 'a[data-qa="scene-model-list-item-name"]'), url: unprint.query.url(actorEl, 'a[data-qa="scene-model-list-item-photo-link-to-profile"], a[data-qa="scene-model-list-item-name"]', { origin: entity.url }), avatar: [ avatar?.replace('-small.', '.'), avatar, ], }; }) || data?.actor.map((actor) => actor.name); // prefer html actors for url and avatar release.tags = query.contents('a[data-qa="scene-tags-list-item-link"]'); const fallbackPoster = data?.thumbnail || query.img(`link[rel="preload"][as="image"][href*="images/${release.entryId}"]`); release.poster = [ videoData?.posterURL, fallbackPoster?.replace('-app.', '-desktop.'), fallbackPoster, ]; release.photos = query.imgs('.mediabox-img', { attribute: 'data-srcset' }); release.trailer = videoData?.src .filter((src) => src.encoding === 'h264') ?.map((src) => ({ src: src.url, type: src.mimeType, quality: parseInt(src.quality, 10), expectType: { 'binary/octet-stream': 'video/mp4', }, })); release.chapters = videoData?.timeStamps?.map((chapter) => ({ time: chapter.ts, tags: [chapter.name], })); release.qualities = release.trailer?.map((trailer) => trailer.quality); return release; } function scrapeProfile({ query }, entity) { const profile = {}; const data = query.json('//script[contains(@type, "application/ld+json") and contains(text(), "Person")]'); if (!data) { return null; } profile.url = unprint.prefixUrl(data.url, entity.url); profile.dateOfBirth = unprint.extractDate(data.birthDate, 'MMMM DD, YYYY'); profile.birthPlace = data.nationality?.name || data.nationality; // origin country rather than nationality // height and weight are provided in both cm and lbs, but this seems to be a manual conversion; the format isn't always the same profile.height = unprint.extractNumber(data.height, { match: /(\d+)\s*cm/, matchIndex: 1 }); profile.weight = unprint.extractNumber(data.weight, { match: /(\d+)\s*kg/, matchIndex: 1 }); profile.description = data.description; profile.avatar = [ data.image?.replace('-small.', '.'), data.image, ]; return profile; } async function fetchLatest(channel, page = 1) { const url = `https://www.sexlikereal.com/studios/slr-originals?sort=most_recent&page=${page}`; const res = await unprint.get(url, { selectAll: '.c-grid-item--scene' }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } async function fetchUpcoming(channel) { const url = 'https://www.sexlikereal.com/studios/slr-originals?type=upcoming'; const res = await unprint.get(url, { selectAll: '.c-grid-item--scene' }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } async function fetchProfile({ slug }, entity) { const url = `${entity.url}/pornstars/${slug}`; const res = await unprint.get(url); if (res.ok) { return scrapeProfile(res.context, entity); } return res.status; } module.exports = { fetchLatest, fetchUpcoming, fetchProfile, scrapeScene: { scraper: scrapeScene, parser: { runScripts: 'dangerously', }, }, };