292 lines
8.2 KiB
JavaScript
Executable File
292 lines
8.2 KiB
JavaScript
Executable File
'use strict';
|
|
|
|
const unprint = require('unprint');
|
|
const cookie = require('cookie');
|
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
function extractSources(sources) {
|
|
if (sources?.length > 0) {
|
|
return sources
|
|
.flat()
|
|
.map((src) => {
|
|
const [width, height] = src.match(/(\d{3,4})?_(\d{3,4})/)?.slice(1) || [];
|
|
|
|
return {
|
|
src,
|
|
width,
|
|
height,
|
|
};
|
|
})
|
|
.toSorted((posterA, posterB) => {
|
|
return posterB.height - posterA.height;
|
|
})
|
|
.map(({ src }) => src);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function scrapeAll(scenes, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.url = query.url('.title', { origin: channel.url });
|
|
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)?.[1];
|
|
|
|
release.title = query.content('.title');
|
|
|
|
release.actors = query.all('.actors a').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null, { origin: channel.url }),
|
|
}));
|
|
|
|
release.poster = extractSources(query.sourceSets('.thumb source', 'data-srcset')) || query.img('.thumb img');
|
|
|
|
release.teaser = [
|
|
query.video('.thumb-ratio', { attribute: 'data-hq-preview' }),
|
|
query.video('.thumb-ratio', { attribute: 'data-preview' }),
|
|
];
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
async function beforeFetchLatest(channel) {
|
|
// scene page only seems to accept language preferences from session
|
|
const { res } = await unprint.get(`${channel.url}/en/news-videos-x-marc-dorcel`, {
|
|
headers: {
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
'Accept-Language': 'en-US,en', // fetch English rather than French titles
|
|
},
|
|
});
|
|
|
|
const sessionCookie = cookie.parse(res.headers['set-cookie'][0])?.dorcelclub;
|
|
|
|
return `dorcelclub=${sessionCookie}`;
|
|
}
|
|
|
|
async function fetchLatest(channel, page = 1, _options, { beforeFetchLatest: sessionCookie }) {
|
|
const url = `${channel.url}/scene/list/more/?lang=en&page=${page}&sorting=new`;
|
|
|
|
const res = await unprint.post(url, null, {
|
|
selectAll: '.scene',
|
|
headers: {
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
'Accept-Language': 'en-US,en', // fetch English rather than French titles
|
|
Cookie: sessionCookie,
|
|
},
|
|
});
|
|
|
|
if (res.ok) {
|
|
return scrapeAll(res.context, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeScene({ query }, url, channel) {
|
|
const release = {};
|
|
|
|
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)?.[1];
|
|
|
|
release.title = query.content('h1.title');
|
|
release.description = query.content('.content-description .full p');
|
|
|
|
release.date = query.date('.publish_date', 'MMM DD, YYYY') || query.date('.out_date', 'YYYY', { match: /\d{4}/ });
|
|
|
|
if (!query.exists('.publish_date')) {
|
|
release.datePrecision = 'year';
|
|
}
|
|
|
|
release.duration = query.duration('.duration');
|
|
|
|
release.actors = query.all('.actress a').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null, { origin: channel.url }),
|
|
}));
|
|
|
|
release.director = query.content('.director')?.split(/\s*:\s*/)[1];
|
|
|
|
release.poster = extractSources(query.sourceSets('.player source', 'data-srcset')) || query.img('.player img');
|
|
|
|
const movieUrl = query.url('.movie a', { origin: channel.url });
|
|
|
|
if (movieUrl) {
|
|
release.movie = {
|
|
entryId: new URL(movieUrl).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1],
|
|
title: query.content('.movie a'),
|
|
url: query.url('.movie a', { origin: channel.url }),
|
|
};
|
|
}
|
|
|
|
return release;
|
|
}
|
|
|
|
async function fetchScene(url, channel) {
|
|
const res = await unprint.get(url, {
|
|
headers: {
|
|
'Accept-Language': 'en-US,en', // fetch English rather than French titles
|
|
Referer: `${channel.url}/en/news-videos-x-marc-dorcel`,
|
|
},
|
|
});
|
|
|
|
if (res.ok) {
|
|
return scrapeScene(res.context, url, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeMovies(movies, channel) {
|
|
return movies.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.url = query.url(null, { origin: channel.url })?.replace('/film-x', '/en/porn-movie'); // French -> English fallback in case language headers didn't work
|
|
release.entryId = new URL(release.url).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1];
|
|
|
|
release.title = query.content('h2');
|
|
|
|
release.covers = [extractSources(query.sourceSets('.thumb-ratio source', 'data-srcset')) || query.img('.thumb-ratio img')];
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
async function fetchMovies(channel, page = 1, { beforeFetchLatest: sessionCookie }) {
|
|
const url = `${channel.url}/movies/more?lang=en&page=${page}&sorting=new`;
|
|
|
|
const res = await unprint.post(url, null, {
|
|
selectAll: '.items .movie',
|
|
headers: {
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
'Accept-Language': 'en-US,en', // fetch English rather than French titles
|
|
Referer: 'https://www.dorcelclub.com/en/porn-movie?sorting=new', // might be used to derive sorting
|
|
Cookie: sessionCookie, // seems necessary for English results
|
|
},
|
|
});
|
|
|
|
if (res.ok && res.context) {
|
|
return scrapeMovies(res.context, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeMovie({ query }, url, channel) {
|
|
const release = {};
|
|
|
|
release.title = query.content('.header h1');
|
|
release.description = query.content('.content-text p');
|
|
|
|
release.entryId = new URL(url).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1];
|
|
|
|
release.date = query.date('.out_date', 'YYYY', { match: /\d{4}/ });
|
|
release.datePrecision = 'year';
|
|
|
|
release.duration = query.duration('.duration');
|
|
|
|
release.actors = query.all('.actors .actor').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl, '.name'),
|
|
url: unprint.query.url(actorEl, 'a', { origin: channel.url }),
|
|
avatar: extractSources(unprint.query.sourceSets(actorEl, '.thumbnail source', 'data-srcset')) || unprint.query.img(actorEl, '.thumbnail img'),
|
|
}));
|
|
|
|
release.poster = extractSources(query.sourceSets('//picture[img[contains(@class, \'banner\')]]//source', 'data-srcset')) || query.img('img.banner');
|
|
release.covers = [extractSources(query.sourceSets('//picture[img[contains(@class, \'cover\')]]//source', 'data-srcset')) || query.img('img.cover')];
|
|
|
|
release.scenes = scrapeAll(unprint.initAll(query.all('.scene')), channel);
|
|
|
|
return release;
|
|
}
|
|
|
|
async function fetchMovie(url, channel) {
|
|
const res = await unprint.get(url, {
|
|
select: '.content',
|
|
headers: {
|
|
'Accept-Language': 'en-US,en', // fetch English rather than French titles
|
|
Referer: `${channel.url}/en/porn-movie`,
|
|
},
|
|
});
|
|
|
|
if (res.ok && res.context) {
|
|
return scrapeMovie(res.context, url, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
async function scrapeProfile({ query }, entity) {
|
|
const profile = {};
|
|
|
|
profile.description = query.content('.content-description .content-text > p, .content-description .full p'); // different structure for overflowing vs short text
|
|
profile.nationality = query.content('.nationality');
|
|
|
|
profile.banner = query.img('.header img:not([src*="actor/banner"])'); // ignore stock banner
|
|
|
|
profile.avatar = extractSources(query.sourceSets('.banner source[data-srcset*="actorsquare"]', 'data-srcset'))
|
|
|| query.img('.banner img[src*="actorsqure"]'); // usually banner, but worth trying
|
|
|
|
profile.releases = scrapeAll(unprint.initAll(query.all('.scene')), entity);
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function getActorUrl(baseActor, entity) {
|
|
if (baseActor.url) {
|
|
return baseActor.url;
|
|
}
|
|
|
|
// URL slugs are unpredictable: /jessie-volt, /aleska_diamond, /liza-del_sierra
|
|
// AJAX API at /search/ajax/display doesn't actually return results unless an actor ID is passed
|
|
const searchRes = await unprint.post(`${entity.url}/en/search`, { s: baseActor.name }, {
|
|
selectAll: '#search .actor',
|
|
form: true,
|
|
headers: {
|
|
// 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
|
|
'Accept-Language': 'en-US,en',
|
|
},
|
|
});
|
|
|
|
if (!searchRes.ok) {
|
|
return searchRes.status;
|
|
}
|
|
|
|
const actorItem = searchRes.context.find(({ query }) => slugify(query.content('.name')) === baseActor.slug);
|
|
|
|
if (!actorItem) {
|
|
return null;
|
|
}
|
|
|
|
return actorItem.query.url('a', { origin: entity.url });
|
|
}
|
|
|
|
async function fetchProfile(baseActor, { entity }) {
|
|
const actorUrl = await getActorUrl(baseActor, entity);
|
|
|
|
if (!actorUrl) {
|
|
return null;
|
|
}
|
|
|
|
const actorRes = await unprint.get(actorUrl, {
|
|
headers: {
|
|
'Accept-Language': 'en-US,en',
|
|
},
|
|
});
|
|
|
|
if (actorRes.ok) {
|
|
return scrapeProfile(actorRes.context, entity);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
module.exports = {
|
|
beforeFetchLatest,
|
|
fetchLatest,
|
|
fetchScene,
|
|
fetchMovie,
|
|
fetchMovies,
|
|
fetchProfile,
|
|
};
|