forked from DebaucheryLibrarian/traxxx
Updated Dorcel scraper, added movie support.
This commit is contained in:
@@ -1,93 +1,124 @@
|
||||
'use strict';
|
||||
|
||||
const qu = require('../utils/q');
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.title a');
|
||||
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)[1];
|
||||
release.url = query.url('.title', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)?.[1];
|
||||
|
||||
release.title = query.cnt('.title a');
|
||||
|
||||
release.date = query.date('.date', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = query.number('.length') * 60;
|
||||
release.title = query.cnt('.title');
|
||||
|
||||
release.actors = query.all('.actors a').map(actorEl => ({
|
||||
name: query.cnt(actorEl),
|
||||
url: query.url(actorEl, null),
|
||||
url: query.url(actorEl, null, 'href', { origin: channel.url }),
|
||||
}));
|
||||
|
||||
release.poster = query.img('.poster noscript img');
|
||||
release.stars = query.count('.rating .star1');
|
||||
|
||||
release.tags = [query.cnt('.collection a')];
|
||||
const fallbackPoster = query.img('.thumb img');
|
||||
release.poster = query.sourceSet('.thumb img', 'data-srcset') || [fallbackPoster.replace('_crop', ''), fallbackPoster];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url) {
|
||||
function scrapeScene({ query }, url, channel) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)[1];
|
||||
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)?.[1];
|
||||
|
||||
release.title = query.cnt('.infos .title h1');
|
||||
release.description = query.cnt('#description p:nth-child(2)');
|
||||
release.title = query.cnt('h1.title');
|
||||
release.description = query.cnt('.content-description .full p');
|
||||
|
||||
release.date = query.date('.infos .date', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = query.number('.infos .length') * 60;
|
||||
release.date = query.date('.publish_date', 'MMMM DD, YYYY');
|
||||
release.duration = query.dur('.duration');
|
||||
|
||||
release.actors = query.all('.infos .actors a').map(actorEl => ({
|
||||
release.actors = query.all('.actress a').map(actorEl => ({
|
||||
name: query.cnt(actorEl),
|
||||
url: query.url(actorEl, null),
|
||||
url: query.url(actorEl, null, 'href', { origin: channel.url }),
|
||||
}));
|
||||
|
||||
release.poster = query.img('.poster noscript img');
|
||||
release.stars = query.count('.infos .rating .star1');
|
||||
release.director = query.cnt('.director')?.split(/\s*:\s*/)[1];
|
||||
release.poster = query.sourceSet('.player img', 'data-srcset');
|
||||
|
||||
if (query.exists('.movie')) {
|
||||
release.movie = {
|
||||
name: query.cnt('.movie a'),
|
||||
url: query.url('.movie a'),
|
||||
};
|
||||
|
||||
release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0];
|
||||
}
|
||||
release.movie = {
|
||||
title: query.cnt('.movie a'),
|
||||
url: query.url('.movie a', 'href', { origin: channel.url }),
|
||||
};
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query, el }, avatar) {
|
||||
function scrapeMovies(movies, channel) {
|
||||
return movies.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url(null, 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1];
|
||||
|
||||
release.title = query.cnt('h2');
|
||||
|
||||
release.covers = [query.sourceSet('img', 'data-srcset')];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeMovie({ query, el }, url, channel) {
|
||||
const release = {};
|
||||
|
||||
release.title = query.cnt('.header h1');
|
||||
release.description = query.cnt('.content-text p');
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/porn-movie\/([\w-]+)/)?.[1];
|
||||
|
||||
release.date = query.date('.out_date', 'YYYY');
|
||||
release.datePrecision = 'year';
|
||||
|
||||
release.duration = query.dur('.duration');
|
||||
|
||||
release.actors = query.all('.actors .actor').map(actorEl => ({
|
||||
name: query.cnt(actorEl, '.name'),
|
||||
url: query.url(actorEl, 'a', 'href', { origin: channel.url }),
|
||||
avatar: query.sourceSet(actorEl, '.thumbnail img', 'data-srcset'),
|
||||
}));
|
||||
|
||||
release.poster = query.sourceSet('.banner', 'data-srcset');
|
||||
release.covers = [query.sourceSet('.cover', 'data-srcset')];
|
||||
|
||||
release.scenes = scrapeAll(qu.initAll(el, '.scene'), channel);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeProfile({ query, el }, entity, avatar) {
|
||||
const profile = {};
|
||||
|
||||
profile.birthdate = qu.parseDate(query.text('.birthdate'), 'MMMM DD, YYYY');
|
||||
profile.nationality = query.text('.nationality');
|
||||
profile.hairColor = query.text('.hair');
|
||||
profile.description = query.cnt('.content-description .content-text > p, .content-description .full p'); // different structure for overflowing vs short text
|
||||
profile.nationality = query.cnt('.nationality');
|
||||
|
||||
profile.description = query.cnt('.bio_results p');
|
||||
profile.banner = query.img('.header img:not([src*="actor/banner"])'); // ignore stock banner
|
||||
|
||||
if (avatar) {
|
||||
profile.avatar = [
|
||||
avatar.replace('_crop', ''),
|
||||
avatar.replace('crop_', ''),
|
||||
avatar,
|
||||
];
|
||||
}
|
||||
|
||||
// TODO: add pagination
|
||||
profile.releases = scrapeAll(qu.initAll(el, '.scene'));
|
||||
profile.releases = scrapeAll(qu.initAll(el, '.scene'), entity);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
// TODO: add movies
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/en/news-videos-x-marc-dorcel-ajax?page=${page}&sorting=publish_date`;
|
||||
const url = `${channel.url}/scene/list/more/?lang=en&page=${page}&sorting=new`;
|
||||
|
||||
const res = await qu.getAll(url, '.scene', {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Accept-Language': 'en-US,en', // fetch English rather than French titles
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
@@ -97,8 +128,26 @@ async function fetchLatest(channel, page = 1) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchMovies(channel, page = 1) {
|
||||
const url = `${channel.url}/movies/more?lang=en&page=${page}&sorting=new`;
|
||||
|
||||
const res = await qu.getAll(url, '.movie', {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Accept-Language': 'en-US,en', // fetch English rather than French titles
|
||||
Referer: 'https://www.dorcelclub.com/en/porn-movie?sorting=new', // might be used to derive sorting
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeMovies(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url);
|
||||
const res = await qu.get(url, '.content', {
|
||||
'Accept-Language': 'en-US,en', // fetch English rather than French titles
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, channel);
|
||||
@@ -107,25 +156,48 @@ async function fetchScene(url, channel) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName, url: actorUrl }, entity, include) {
|
||||
const searchRes = await qu.getAll(`${entity.url}/en/pornstars?search=${slugify(actorName, '+')}`, '.actor');
|
||||
|
||||
const actorItem = searchRes.ok && searchRes.items.find(actor => slugify(actor.query.cnt('h2')) === slugify(actorName));
|
||||
const actorItemUrl = actorItem?.query.url();
|
||||
const actorItemAvatar = actorItem?.query.img();
|
||||
|
||||
const url = actorUrl || actorItemUrl || `${entity.url}/en/pornstar/${slugify(actorName, '-')}`;
|
||||
const res = await qu.get(url);
|
||||
async function fetchMovie(url, channel) {
|
||||
const res = await qu.get(url, '.content', {
|
||||
'Accept-Language': 'en-US,en', // fetch English rather than French titles
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, actorItemAvatar, entity, include);
|
||||
return scrapeMovie(res.item, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, { entity }) {
|
||||
// URL slugs are unpredictable: /jessie-volt, /aleska_diamond, /liza-del_sierra
|
||||
const searchRes = await qu.postAll(`${entity.url}/en/search`, { s: baseActor.name }, '.actors .actor', { 'Accept-Language': 'en-US,en' });
|
||||
|
||||
if (!searchRes.ok) {
|
||||
return searchRes.status;
|
||||
}
|
||||
|
||||
const actorItem = searchRes.items.find(({ query }) => slugify(query.cnt('.name')) === baseActor.slug);
|
||||
|
||||
if (!actorItem) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const actorUrl = actorItem.query.url('a', 'href', { origin: entity.url });
|
||||
const actorAvatar = actorItem.query.img();
|
||||
|
||||
const actorRes = await qu.get(actorUrl, null, { 'Accept-Language': 'en-US,en' });
|
||||
|
||||
if (actorRes.ok) {
|
||||
return scrapeProfile(actorRes.item, entity, actorAvatar);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchMovie,
|
||||
fetchMovies,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user