traxxx/src/scrapers/porncz.js

126 lines
3.5 KiB
JavaScript
Raw Normal View History

2020-07-22 02:12:20 +00:00
'use strict';
const http = require('../utils/http');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const capitalize = require('../utils/capitalize');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('h4 a', 'href', { origin: channel.url });
release.entryId = new URL(release.url).pathname.match(/\d+$/)[0];
release.title = query.cnt('h4 a');
release.duration = query.duration('.product-item-time');
release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });
return release;
});
}
function scrapeScene({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/\d+$/)[0];
release.title = query.cnt('.heading-detail h1');
release.description = query.cnt('.heading-detail p:nth-child(3)');
const details = query.all('.video-info-item').reduce((acc, detailEl) => {
const key = detailEl.textContent.match(/(\w+):/)[1];
return { ...acc, [slugify(key, '_')]: detailEl };
}, {});
const { date, precision } = query.dateAgo(details.date);
release.date = date;
release.datePrecision = precision;
release.actors = query.cnts(details.actors, 'a').map((actor) => capitalize(actor, { uncapitalize: true }));
2020-07-22 02:12:20 +00:00
release.duration = query.duration(details.duration);
release.tags = query.cnts(details.genres, 'a');
release.poster = query.img('#video-poster', 'data-poster', { origin: channel.url });
2020-11-22 03:13:21 +00:00
release.photos = query.imgs('#gallery .photo-item img', 'data-src', { origin: channel.url });
2020-07-22 02:12:20 +00:00
2021-02-16 18:53:32 +00:00
release.trailer = query.video();
2020-07-22 02:12:20 +00:00
release.channel = slugify(query.q('.video-detail-logo img', 'alt'), '');
return release;
}
function scrapeProfile({ query }, entity) {
const profile = {};
profile.avatar = query.img('.model-heading-photo img', 'src', { origin: entity.url });
profile.releases = scrapeAll(qu.initAll(query.all('.product-item')), entity);
return profile;
}
async function fetchLatest(channel, page = 1) {
const url = page === 1 ? `${channel.url}/en/new-videos` : `${channel.url}/en/new-videos?do=next`;
// pagination state is kept in session, and new each 'page' includes all previous pages
const session = http.session();
const headers = { 'X-Requested-With': 'XMLHttpRequest' };
2020-07-22 02:12:20 +00:00
for (let i = 0; i < page - 1; i += 1) {
await http.get(url, { headers, session }); // eslint-disable-line no-await-in-loop
}
const res = await http.get(url, { headers, session });
2020-07-22 02:12:20 +00:00
if (res.ok) {
const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');
return scrapeAll(items.slice((page - 1) * 16), channel);
2020-07-22 02:12:20 +00:00
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url, 'body > .container');
if (res.ok) {
return scrapeScene(res.item, url, channel);
}
return res.status;
}
async function fetchProfile(baseActor, entity) {
const searchRes = await qu.getAll(`https://www.porncz.com/en/search-results?showModels=1&value=${baseActor.name}`, '.project-item');
if (searchRes.ok) {
const model = searchRes.items.find(({ query }) => query.cnt('h3 a') === baseActor.name);
if (model) {
const modelUrl = model.query.url('h3 a', 'href', { origin: 'https://www.porncz.com' });
const modelRes = await qu.get(`${modelUrl}?do=nextDetail`); // get more videos
if (modelRes.ok) {
return scrapeProfile(modelRes.item, entity);
}
return modelRes.status;
}
return null;
}
return searchRes.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};