Rescraping upcoming scenes. Fixed language and scene deep scraping for Dorcel scraper.

This commit is contained in:
DebaucheryLibrarian
2021-06-02 03:27:32 +02:00
parent 42791c528e
commit c979173422
15 changed files with 105 additions and 15 deletions

View File

@@ -50,10 +50,15 @@ function scrapeScene({ query }, url, channel) {
const fallbackPoster = query.img('.player img');
release.poster = query.sourceSet('.player img', 'data-srcset') || [fallbackPoster.replace('_crop', ''), fallbackPoster];
release.movie = {
title: query.cnt('.movie a'),
url: query.url('.movie a', 'href', { origin: channel.url }),
};
const movieUrl = query.url('.movie a', 'href', { origin: channel.url });
if (movieUrl) {
release.movie = {
entryId: new URL(movieUrl).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1],
title: query.cnt('.movie a'),
url: query.url('.movie a', 'href', { origin: channel.url }),
};
}
return release;
}
@@ -92,8 +97,20 @@ function scrapeMovie({ query, el }, url, channel) {
avatar: query.sourceSet(actorEl, '.thumbnail img', 'data-srcset'),
}));
release.poster = query.sourceSet('.banner', 'data-srcset');
release.covers = [query.sourceSet('.cover', 'data-srcset')];
release.poster = query.sourceSet('.banner', 'data-src')?.[0];
release.covers = [query.all(query.el('.cover').parentElement, 'source')
?.map(coverEl => query.sourceSet(coverEl, null, 'data-srcset'))
.flat()
.sort((coverA, coverB) => {
const resA = Number(coverA.match(/_(\d{3,})_/)?.[1]);
const resB = Number(coverB.match(/_(\d{3,})_/)?.[1]);
if (resA < resB) return 1;
if (resA > resB) return -1;
return 0;
})
.concat(query.sourceSet('.cover', 'data-src')?.[0])];
release.scenes = scrapeAll(qu.initAll(el, '.scene'), channel);
@@ -120,13 +137,25 @@ async function scrapeProfile({ query, el }, entity, avatar) {
return profile;
}
async function fetchLatest(channel, page = 1) {
async function beforeFetchLatest(channel) {
// scene page only seems to accept language preferences from session
const session = qu.session();
await qu.getAll(`${channel.url}/en/news-videos-x-marc-dorcel`, '.scene', {
'X-Requested-With': 'XMLHttpRequest',
'Accept-Language': 'en-US,en', // fetch English rather than French titles
}, { session });
return session;
}
async function fetchLatest(channel, page = 1, options, { beforeFetchLatest: session }) {
const url = `${channel.url}/scene/list/more/?lang=en&page=${page}&sorting=new`;
const res = await qu.getAll(url, '.scene', {
'X-Requested-With': 'XMLHttpRequest',
'Accept-Language': 'en-US,en', // fetch English rather than French titles
});
}, { session });
if (res.ok) {
return scrapeAll(res.items, channel);
@@ -152,8 +181,9 @@ async function fetchMovies(channel, page = 1) {
}
async function fetchScene(url, channel) {
const res = await qu.get(url, '.content', {
const res = await qu.get(url, null, {
'Accept-Language': 'en-US,en', // fetch English rather than French titles
Referer: `${channel.url}/en/news-videos-x-marc-dorcel`,
});
if (res.ok) {
@@ -166,6 +196,7 @@ async function fetchScene(url, channel) {
async function fetchMovie(url, channel) {
const res = await qu.get(url, '.content', {
'Accept-Language': 'en-US,en', // fetch English rather than French titles
Referer: `${channel.url}/en/porn-movie`,
});
if (res.ok) {
@@ -202,6 +233,7 @@ async function fetchProfile(baseActor, { entity }) {
}
module.exports = {
beforeFetchLatest,
fetchLatest,
fetchScene,
fetchMovie,