traxxx/src/scrapers/hookuphotshot.js

'use strict';

const qu = require('../utils/q');
const slugify = require('../utils/slugify');

function scrapeAll(scenes) {
	return scenes.map(({ query }) => {
		const release = {};

		release.url = query.url('.date-title a');

		const avatarEl = query.el('.girl-thumb-container img');
		release.actors = query.all('.date-starring a').map((actorEl) => {
			const name = query.cnt(actorEl);

			return {
				name,
				gender: 'female',
				url: query.url(actorEl, null),
				...(new RegExp(name).test(avatarEl.alt) && {
					avatar: [
						avatarEl.src.replace(/-\d+x\d+/, ''),
						avatarEl.src,
					].map((src) => ({ src, interval: 1000, concurrency: 1 })),
				}),
			};
		}).concat({
			name: 'Bryan Gozzling',
			gender: 'male',
		});

		release.duration = query.dur('.date-facts');
		release.stars = query.number('[data-rating]', null, 'data-rating');

		const photoCount = query.number('input[id*=count]', null, 'value');
		const photoPath = query.url('input[id*=baseurl]', 'value');

		release.poster = {
			src: query.img('.date-img-swap'),
			interval: 1000,
			concurrency: 1,
		};

		release.photos = [...Array(photoCount)].map((value, index) => ({
			src: `${photoPath}/${String(index + 1).padStart(2, '0')}.jpg`,
			interval: 1000,
			concurrency: 1,
		}));

		// dates appear to be manually curated
		const fullTitle = query.cnt('.date-title a');
		const [monthName, date, title] = fullTitle.match(/(\w+)\.? (\d+)\s*-?\s*(.*)/)?.slice(1) || [];
		const [year, month] = release.poster.src.match(/uploads\/(\d+)\/(\d+)/)?.slice(1) || [];

		release.title = title.replace(/behind the\.\.\./i, 'Behind the Scenes');
		release.date = qu.extractDate(`${year}-${monthName || month}-${date}`, ['YYYY-MM-DD', 'YYYY-MMM-DD', 'YYYY-MMMM-DD']);

		// release.entryId = new URL(release.url).pathname.split('/')[2];
		release.entryId = `${release.date.getFullYear()}-${release.date.getMonth() + 1}-${release.date.getDate()}-${slugify(release.actors[0].name)}`;

		release.tags = ['rough', ...release.title.match(/behind the scenes|anal/gi) || []];

		return release;
	});
}

function scrapeProfile({ query }) {
	const profile = {};

	profile.gender = 'female';

	profile.description = query.cnts('.girl-about p:not(.bio-facts)').join(' ');
	profile.avatar = query.img('.girl-pic');

	// no deep scraping available, and not all scene details available here

	return profile;
}

async function fetchLatest(channel, page = 1) {
	const url = `${channel.url}/the-dates/page/${page}`;
	const res = await qu.getAll(url, '#et-projects li');

	if (res.ok) {
		return scrapeAll(res.items, channel);
	}

	return res.status;
}

async function fetchProfile({ name: actorName }, entity, include) {
	const url = `${entity.url}/girls/${slugify(actorName)}`;
	const res = await qu.get(url);

	if (res.ok) {
		return scrapeProfile(res.item, actorName, entity, include);
	}

	return res.status;
}

module.exports = {
	fetchLatest,
	fetchProfile,
};