traxxx/src/scrapers/porncz.js

'use strict';

const http = require('../utils/http');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const capitalize = require('../utils/capitalize');

function scrapeAll(scenes, channel) {
	return scenes.map(({ query }) => {
		const release = {};

		release.url = query.url('h4 a', 'href', { origin: channel.url });
		release.entryId = new URL(release.url).pathname.match(/\d+$/)[0];

		release.title = query.cnt('h4 a');
		release.duration = query.duration('.product-item-time');

		release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });

		return release;
	});
}

function scrapeScene({ query }, url, channel) {
	const release = {};

	release.entryId = new URL(url).pathname.match(/\d+$/)[0];

	release.title = query.cnt('.heading-detail h1');
	release.description = query.cnt('.heading-detail p:nth-child(3)');

	const details = query.all('.video-info-item').reduce((acc, detailEl) => {
		const key = detailEl.textContent.match(/(\w+):/)[1];

		return { ...acc, [slugify(key, '_')]: detailEl };
	}, {});

	const { date, precision } = query.dateAgo(details.date);

	release.date = date;
	release.datePrecision = precision;

	release.actors = query.cnts(details.actors, 'a').map((actor) => capitalize(actor, { uncapitalize: true }));
	release.duration = query.duration(details.duration);
	release.tags = query.cnts(details.genres, 'a');

	release.poster = query.img('#video-poster', 'data-poster', { origin: channel.url });
	release.photos = query.imgs('#gallery .photo-item img', 'data-src', { origin: channel.url });

	release.trailer = query.video();

	release.channel = slugify(query.q('.video-detail-logo img', 'alt'), '');

	return release;
}

function scrapeProfile({ query }, entity) {
	const profile = {};

	profile.avatar = query.img('.model-heading-photo img', 'src', { origin: entity.url });
	profile.releases = scrapeAll(qu.initAll(query.all('.product-item')), entity);

	return profile;
}

async function fetchLatest(channel, page = 1) {
	const url = page === 1 ? `${channel.url}/en/new-videos` : `${channel.url}/en/new-videos?do=next`;

	// pagination state is kept in session, and new each 'page' includes all previous pages
	const session = http.session();
	const headers = { 'X-Requested-With': 'XMLHttpRequest' };

	for (let i = 0; i < page - 1; i += 1) {
		await http.get(url, { headers, session }); // eslint-disable-line no-await-in-loop
	}

	const res = await http.get(url, { headers, session });

	if (res.ok) {
		const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');

		return scrapeAll(items.slice((page - 1) * 16), channel);
	}

	return res.status;
}

async function fetchScene(url, channel) {
	const res = await qu.get(url, 'body > .container');

	if (res.ok) {
		return scrapeScene(res.item, url, channel);
	}

	return res.status;
}

async function fetchProfile(baseActor, entity) {
	const searchRes = await qu.getAll(`https://www.porncz.com/en/search-results?showModels=1&value=${baseActor.name}`, '.project-item');

	if (searchRes.ok) {
		const model = searchRes.items.find(({ query }) => query.cnt('h3 a') === baseActor.name);

		if (model) {
			const modelUrl = model.query.url('h3 a', 'href', { origin: 'https://www.porncz.com' });
			const modelRes = await qu.get(`${modelUrl}?do=nextDetail`); // get more videos

			if (modelRes.ok) {
				return scrapeProfile(modelRes.item, entity);
			}

			return modelRes.status;
		}

		return null;
	}

	return searchRes.status;
}

module.exports = {
	fetchLatest,
	fetchScene,
	fetchProfile,
};