traxxx/src/scrapers/dorcel.js

132 lines
3.3 KiB
JavaScript
Raw Normal View History

'use strict';
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.title a');
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)[1];
release.title = query.cnt('.title a');
release.date = query.date('.date', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = query.number('.length') * 60;
release.actors = query.all('.actors a').map(actorEl => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null),
}));
release.poster = query.img('.poster noscript img');
release.stars = query.count('.rating .star1');
release.tags = [query.cnt('.collection a')];
return release;
});
}
function scrapeScene({ query }, url) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)[1];
release.title = query.cnt('.infos .title h1');
release.description = query.cnt('#description p:nth-child(2)');
release.date = query.date('.infos .date', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = query.number('.infos .length') * 60;
release.actors = query.all('.infos .actors a').map(actorEl => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null),
}));
release.poster = query.img('.poster noscript img');
release.stars = query.count('.infos .rating .star1');
if (query.exists('.movie')) {
release.movie = {
name: query.cnt('.movie a'),
url: query.url('.movie a'),
};
release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0];
}
return release;
}
function scrapeProfile({ query, el }, avatar) {
const profile = {};
profile.birthdate = qu.parseDate(query.text('.birthdate'), 'MMMM DD, YYYY');
profile.nationality = query.text('.nationality');
profile.hairColor = query.text('.hair');
profile.description = query.cnt('.bio_results p');
if (avatar) {
profile.avatar = [
avatar.replace('_crop', ''),
avatar,
];
}
// TODO: add pagination
profile.releases = scrapeAll(qu.initAll(el, '.scene'));
return profile;
}
// TODO: add movies
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/en/news-videos-x-marc-dorcel-ajax?page=${page}&sorting=publish_date`;
const res = await qu.getAll(url, '.scene', {
'X-Requested-With': 'XMLHttpRequest',
});
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, url, channel);
}
return res.status;
}
async function fetchProfile({ name: actorName, url: actorUrl }, entity, include) {
const searchRes = await qu.getAll(`${entity.url}/en/pornstars?search=${slugify(actorName, '+')}`, '.actor');
const actorItem = searchRes.ok && searchRes.items.find(actor => slugify(actor.query.cnt('h2')) === slugify(actorName));
const actorItemUrl = actorItem?.query.url();
const actorItemAvatar = actorItem?.query.img();
const url = actorUrl || actorItemUrl || `${entity.url}/en/pornstar/${slugify(actorName, '-')}`;
const res = await qu.get(url);
if (res.ok) {
return scrapeProfile(res.item, actorItemAvatar, entity, include);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};