traxxx/src/scrapers/dorcel.js

243 lines
6.7 KiB
JavaScript
Raw Normal View History

'use strict';
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.title', 'href', { origin: channel.url });
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)?.[1];
release.title = query.cnt('.title');
2021-11-20 22:59:15 +00:00
release.actors = query.all('.actors a').map((actorEl) => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null, 'href', { origin: channel.url }),
}));
const fallbackPoster = query.img('.thumb img');
release.poster = query.sourceSet('.thumb img', 'data-srcset') || [fallbackPoster.replace('_crop', ''), fallbackPoster];
2021-02-10 22:49:37 +00:00
release.teaser = [
query.video('.thumb-ratio', 'data-hq-preview'),
query.video('.thumb-ratio', 'data-preview'),
];
return release;
});
}
function scrapeScene({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)?.[1];
release.title = query.cnt('h1.title');
release.description = query.cnt('.content-description .full p');
release.date = query.date('.publish_date', 'MMMM DD, YYYY');
release.duration = query.dur('.duration');
2021-11-20 22:59:15 +00:00
release.actors = query.all('.actress a').map((actorEl) => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null, 'href', { origin: channel.url }),
}));
release.director = query.cnt('.director')?.split(/\s*:\s*/)[1];
const fallbackPoster = query.img('.player img');
2023-06-16 00:29:01 +00:00
release.poster = query.sourceSet('.player img', 'data-srcset') || [fallbackPoster?.replace('_crop', ''), fallbackPoster];
const movieUrl = query.url('.movie a', 'href', { origin: channel.url });
if (movieUrl) {
release.movie = {
entryId: new URL(movieUrl).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1],
title: query.cnt('.movie a'),
url: query.url('.movie a', 'href', { origin: channel.url }),
};
}
return release;
}
function scrapeMovies(movies, channel) {
return movies.map(({ query }) => {
const release = {};
release.url = query.url(null, 'href', { origin: channel.url });
release.entryId = new URL(release.url).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1];
release.title = query.cnt('h2');
release.covers = [query.sourceSet('img', 'data-srcset')];
return release;
});
}
function scrapeMovie({ query, el }, url, channel) {
const release = {};
release.title = query.cnt('.header h1');
release.description = query.cnt('.content-text p');
release.entryId = new URL(url).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1];
release.date = query.date('.out_date', 'YYYY');
release.datePrecision = 'year';
release.duration = query.dur('.duration');
2021-11-20 22:59:15 +00:00
release.actors = query.all('.actors .actor').map((actorEl) => ({
name: query.cnt(actorEl, '.name'),
url: query.url(actorEl, 'a', 'href', { origin: channel.url }),
avatar: query.sourceSet(actorEl, '.thumbnail img', 'data-srcset'),
}));
release.poster = query.sourceSet('.banner', 'data-src')?.[0];
release.covers = [query.all(query.el('.cover').parentElement, 'source')
2021-11-20 22:59:15 +00:00
?.map((coverEl) => query.sourceSet(coverEl, null, 'data-srcset'))
.flat()
.sort((coverA, coverB) => {
const resA = Number(coverA.match(/_(\d{3,})_/)?.[1]);
const resB = Number(coverB.match(/_(\d{3,})_/)?.[1]);
if (resA < resB) return 1;
if (resA > resB) return -1;
return 0;
})
.concat(query.sourceSet('.cover', 'data-src')?.[0])];
release.scenes = scrapeAll(qu.initAll(el, '.scene'), channel);
return release;
}
async function scrapeProfile({ query, el }, entity, avatar) {
const profile = {};
profile.description = query.cnt('.content-description .content-text > p, .content-description .full p'); // different structure for overflowing vs short text
profile.nationality = query.cnt('.nationality');
profile.banner = query.img('.header img:not([src*="actor/banner"])'); // ignore stock banner
if (avatar) {
profile.avatar = [
avatar.replace('crop_', ''),
avatar,
];
}
profile.releases = scrapeAll(qu.initAll(el, '.scene'), entity);
return profile;
}
async function beforeFetchLatest(channel) {
// scene page only seems to accept language preferences from session
const session = qu.session();
await qu.getAll(`${channel.url}/en/news-videos-x-marc-dorcel`, '.scene', {
'X-Requested-With': 'XMLHttpRequest',
'Accept-Language': 'en-US,en', // fetch English rather than French titles
}, { session });
return session;
}
async function fetchLatest(channel, page = 1, options, { beforeFetchLatest: session }) {
const url = `${channel.url}/scene/list/more/?lang=en&page=${page}&sorting=new`;
const res = await qu.getAll(url, '.scene', {
'X-Requested-With': 'XMLHttpRequest',
'Accept-Language': 'en-US,en', // fetch English rather than French titles
}, { session });
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchMovies(channel, page = 1) {
const url = `${channel.url}/movies/more?lang=en&page=${page}&sorting=new`;
const res = await qu.getAll(url, '.movie', {
'X-Requested-With': 'XMLHttpRequest',
'Accept-Language': 'en-US,en', // fetch English rather than French titles
Referer: 'https://www.dorcelclub.com/en/porn-movie?sorting=new', // might be used to derive sorting
});
2023-06-16 00:29:01 +00:00
if (res.ok && res.items) {
return scrapeMovies(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url, null, {
'Accept-Language': 'en-US,en', // fetch English rather than French titles
Referer: `${channel.url}/en/news-videos-x-marc-dorcel`,
});
if (res.ok) {
return scrapeScene(res.item, url, channel);
}
return res.status;
}
async function fetchMovie(url, channel) {
const res = await qu.get(url, '.content', {
'Accept-Language': 'en-US,en', // fetch English rather than French titles
Referer: `${channel.url}/en/porn-movie`,
});
2023-06-16 00:29:01 +00:00
if (res.ok && res.item) {
return scrapeMovie(res.item, url, channel);
}
return res.status;
}
async function fetchProfile(baseActor, { entity }) {
// URL slugs are unpredictable: /jessie-volt, /aleska_diamond, /liza-del_sierra
const searchRes = await qu.postAll(`${entity.url}/en/search`, { s: baseActor.name }, '.actors .actor', { 'Accept-Language': 'en-US,en' });
if (!searchRes.ok) {
return searchRes.status;
}
const actorItem = searchRes.items.find(({ query }) => slugify(query.cnt('.name')) === baseActor.slug);
if (!actorItem) {
return null;
}
const actorUrl = actorItem.query.url('a', 'href', { origin: entity.url });
const actorAvatar = actorItem.query.img();
const actorRes = await qu.get(actorUrl, null, { 'Accept-Language': 'en-US,en' });
if (actorRes.ok) {
return scrapeProfile(actorRes.item, entity, actorAvatar);
}
return null;
}
module.exports = {
beforeFetchLatest,
fetchLatest,
fetchScene,
fetchMovie,
fetchMovies,
fetchProfile,
};