traxxx/src/scrapers/pornworld.js

126 lines
3.3 KiB
JavaScript
Executable File

'use strict';
const unprint = require('unprint');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.card-title a');
release.entryId = new URL(release.url).pathname.match(/\/watch\/([\d-]+)/)[1];
release.title = query.content('.card-title a');
release.date = query.date('.release-date, .coming-soon-date', 'YYYY MMMM, DD', { match: /\d{4} \w+, \d{1,2}/i });
release.duration = query.duration('.video-duration');
release.actors = query.all('.starring a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
const poster = query.img('.thumbnail-pic .card-img');
if (poster) {
const { origin, pathname } = new URL(poster);
release.poster = [
`${origin}${pathname}`,
poster,
];
}
release.trailer = query.video('.thumbnail-pic', { attribute: 'data-video-src' }); // actually the full trailer with audio, not a teaser
return release;
});
}
function scrapeScene({ query }, { url }) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/watch\/([\d-]+)/)[1];
release.title = query.content('.scene__title');
release.description = query.text('//p[span[contains(text(), "Description:")]]');
release.date = query.date('//p[strong[contains(text(), "Publication date:")]]/span', 'YYYY, MMMM D', { match: /\d{4}, \w+ \d{1,2}/i });
release.duration = query.duration('//p[i[contains(@class, "bi-clock-fill")]]');
release.actors = query.all('p a[href*="model/"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.tags = query.contents('p a[href*="?tags"]');
release.poster = query.poster('.video-player');
release.trailer = query.video('.video-player source');
return release;
}
function scrapeProfile({ query }) {
const profile = {};
profile.nationality = query.content('//h3[contains(text(), "Nationality:")]/span') || null;
profile.age = query.number('//h3[contains(text(), "Age:")]/span');
profile.avatar = query.img();
return profile;
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/videos?page=${page}`;
const res = await unprint.get(url, { selectAll: '.card.scene' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchUpcoming(channel) {
const url = `${channel.url}/coming-soon`;
const res = await unprint.get(url, { selectAll: '.card.scene' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchProfile({ name: actorName }, entity) {
const searchUrl = `${entity.url}/models?name=${actorName}&sort=popularity`;
const searchRes = await unprint.get(searchUrl);
if (searchRes.ok) {
const actorEl = searchRes.context.query.all('.pagination-items .model a').find((resultEl) => unprint.query.attribute(resultEl, null, 'title') === actorName);
const actorUrl = unprint.query.url(actorEl, null);
if (actorUrl) {
const res = await unprint.get(actorUrl, { select: '.model-detail-card' });
if (res.ok) {
return scrapeProfile(res.context, actorName, entity);
}
return res.status;
}
return null;
}
return searchRes.status;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchProfile,
scrapeScene,
};