'use strict'; const unprint = require('unprint'); const cookie = require('cookie'); const slugify = require('../utils/slugify'); function extractSources(sources) { if (sources?.length > 0) { return sources .flat() .map((src) => { const [width, height] = src.match(/(\d{3,4})?_(\d{3,4})/)?.slice(1) || []; return { src, width, height, }; }) .toSorted((posterA, posterB) => { return posterB.height - posterA.height; }) .map(({ src }) => src); } return null; } function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('.title', { origin: channel.url }); release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)?.[1]; release.title = query.content('.title'); release.actors = query.all('.actors a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null, { origin: channel.url }), })); release.poster = extractSources(query.sourceSets('.thumb source', 'data-srcset')) || query.img('.thumb img'); release.teaser = [ query.video('.thumb-ratio', { attribute: 'data-hq-preview' }), query.video('.thumb-ratio', { attribute: 'data-preview' }), ]; return release; }); } async function beforeFetchLatest(channel) { // scene page only seems to accept language preferences from session const { res } = await unprint.get(`${channel.url}/en/news-videos-x-marc-dorcel`, { headers: { 'X-Requested-With': 'XMLHttpRequest', 'Accept-Language': 'en-US,en', // fetch English rather than French titles }, }); const sessionCookie = cookie.parse(res.headers['set-cookie'][0])?.dorcelclub; return `dorcelclub=${sessionCookie}`; } async function fetchLatest(channel, page = 1, _options, { beforeFetchLatest: sessionCookie }) { const url = `${channel.url}/scene/list/more/?lang=en&page=${page}&sorting=new`; const res = await unprint.post(url, null, { selectAll: '.scene', headers: { 'X-Requested-With': 'XMLHttpRequest', 'Accept-Language': 'en-US,en', // fetch English rather than French titles Cookie: sessionCookie, }, }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } function scrapeScene({ query }, url, channel) { const release = {}; release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)?.[1]; release.title = query.content('h1.title'); release.description = query.content('.content-description .full p'); release.date = query.date('.publish_date', 'MMM DD, YYYY') || query.date('.out_date', 'YYYY', { match: /\d{4}/ }); if (!query.exists('.publish_date')) { release.datePrecision = 'year'; } release.duration = query.duration('.duration'); release.actors = query.all('.actress a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null, { origin: channel.url }), })); release.director = query.content('.director')?.split(/\s*:\s*/)[1]; release.poster = extractSources(query.sourceSets('.player source', 'data-srcset')) || query.img('.player img'); const movieUrl = query.url('.movie a', { origin: channel.url }); if (movieUrl) { release.movie = { entryId: new URL(movieUrl).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1], title: query.content('.movie a'), url: query.url('.movie a', { origin: channel.url }), }; } return release; } async function fetchScene(url, channel) { const res = await unprint.get(url, { headers: { 'Accept-Language': 'en-US,en', // fetch English rather than French titles Referer: `${channel.url}/en/news-videos-x-marc-dorcel`, }, }); if (res.ok) { return scrapeScene(res.context, url, channel); } return res.status; } function scrapeMovies(movies, channel) { return movies.map(({ query }) => { const release = {}; release.url = query.url(null, { origin: channel.url })?.replace('/film-x', '/en/porn-movie'); // French -> English fallback in case language headers didn't work release.entryId = new URL(release.url).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1]; release.title = query.content('h2'); release.covers = [extractSources(query.sourceSets('.thumb-ratio source', 'data-srcset')) || query.img('.thumb-ratio img')]; return release; }); } async function fetchMovies(channel, page = 1, { beforeFetchLatest: sessionCookie }) { const url = `${channel.url}/movies/more?lang=en&page=${page}&sorting=new`; const res = await unprint.post(url, null, { selectAll: '.items .movie', headers: { 'X-Requested-With': 'XMLHttpRequest', 'Accept-Language': 'en-US,en', // fetch English rather than French titles Referer: 'https://www.dorcelclub.com/en/porn-movie?sorting=new', // might be used to derive sorting Cookie: sessionCookie, // seems necessary for English results }, }); if (res.ok && res.context) { return scrapeMovies(res.context, channel); } return res.status; } function scrapeMovie({ query }, url, channel) { const release = {}; release.title = query.content('.header h1'); release.description = query.content('.content-text p'); release.entryId = new URL(url).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1]; release.date = query.date('.out_date', 'YYYY', { match: /\d{4}/ }); release.datePrecision = 'year'; release.duration = query.duration('.duration'); release.actors = query.all('.actors .actor').map((actorEl) => ({ name: unprint.query.content(actorEl, '.name'), url: unprint.query.url(actorEl, 'a', { origin: channel.url }), avatar: extractSources(unprint.query.sourceSets(actorEl, '.thumbnail source', 'data-srcset')) || unprint.query.img(actorEl, '.thumbnail img'), })); release.poster = extractSources(query.sourceSets('//picture[img[contains(@class, \'banner\')]]//source', 'data-srcset')) || query.img('img.banner'); release.covers = [extractSources(query.sourceSets('//picture[img[contains(@class, \'cover\')]]//source', 'data-srcset')) || query.img('img.cover')]; release.scenes = scrapeAll(unprint.initAll(query.all('.scene')), channel); return release; } async function fetchMovie(url, channel) { const res = await unprint.get(url, { select: '.content', headers: { 'Accept-Language': 'en-US,en', // fetch English rather than French titles Referer: `${channel.url}/en/porn-movie`, }, }); if (res.ok && res.context) { return scrapeMovie(res.context, url, channel); } return res.status; } async function scrapeProfile({ query }, entity) { const profile = {}; profile.description = query.content('.content-description .content-text > p, .content-description .full p'); // different structure for overflowing vs short text profile.nationality = query.content('.nationality'); profile.banner = query.img('.header img:not([src*="actor/banner"])'); // ignore stock banner profile.avatar = extractSources(query.sourceSets('.banner source[data-srcset*="actorsquare"]', 'data-srcset')) || query.img('.banner img[src*="actorsqure"]'); // usually banner, but worth trying profile.releases = scrapeAll(unprint.initAll(query.all('.scene')), entity); return profile; } async function getActorUrl(baseActor, entity) { if (baseActor.url) { return baseActor.url; } // URL slugs are unpredictable: /jessie-volt, /aleska_diamond, /liza-del_sierra // AJAX API at /search/ajax/display doesn't actually return results unless an actor ID is passed const searchRes = await unprint.post(`${entity.url}/en/search`, { s: baseActor.name }, { selectAll: '#search .actor', form: true, headers: { // 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', 'Accept-Language': 'en-US,en', }, }); if (!searchRes.ok) { return searchRes.status; } const actorItem = searchRes.context.find(({ query }) => slugify(query.content('.name')) === baseActor.slug); if (!actorItem) { return null; } return actorItem.query.url('a', { origin: entity.url }); } async function fetchProfile(baseActor, { entity }) { const actorUrl = await getActorUrl(baseActor, entity); if (!actorUrl) { return null; } const actorRes = await unprint.get(actorUrl, { headers: { 'Accept-Language': 'en-US,en', }, }); if (actorRes.ok) { return scrapeProfile(actorRes.context, entity); } return null; } module.exports = { beforeFetchLatest, fetchLatest, fetchScene, fetchMovie, fetchMovies, fetchProfile, };