traxxx/src/scrapers/aziani.js

146 lines
3.8 KiB
JavaScript

'use strict';
const slugify = require('../utils/slugify');
const { get, getAll, initAll, extractDate } = require('../utils/qu');
const { feetInchesToCm } = require('../utils/convert');
function getFallbacks(source) {
return [
source.replace('-1x.jpg', '-4x.jpg'),
source.replace('-1x.jpg', '-3x.jpg'),
source.replace('-1x.jpg', '-2x.jpg'),
source,
];
}
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.url = qu.url('a');
release.title = qu.q('h5 a', true);
release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY');
release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', ');
const photoCount = qu.q('.stdimage', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => {
const source = qu.img('.stdimage', `src${index}_1x`, site.url);
return getFallbacks(source);
});
return release;
});
}
function scrapeScene({ html, qu }, url) {
const release = { url };
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.title = qu.q('h2', true);
release.description = qu.q('p', true);
release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/);
release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({
name: qu.q(actor, null, true),
url: qu.url(actor, null),
}));
release.tags = qu.all('.video_categories a', true);
release.duration = qu.dur('.video_categories + p');
const poster = qu.img('a img');
release.poster = getFallbacks(poster);
release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source));
return release;
}
function scrapeProfile({ el, qu }) {
const profile = {};
const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => {
const nextNode = nodes[index + 1];
if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) {
acc[slugify(node.textContent, '_')] = nextNode.textContent.trim();
}
return acc;
}, {});
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.age) profile.age = Number(bio.age);
if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]);
if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase();
if (bio.birth_location) profile.birthPlace = bio.birth_location;
if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single;
if (bio.eye_color) profile.eyes = bio.eye_color;
const avatar = qu.img('.tac img');
profile.avatar = getFallbacks(avatar);
profile.releases = scrapeAll(initAll(el, '.featured-video'));
return profile;
}
async function fetchLatest(site, page) {
const url = `${site.url}/tour/categories/movies_${page}_d.html`;
const res = await getAll(url, '.featured-video');
if (res.ok) {
return scrapeAll(res.items, site);
}
return res.status;
}
async function fetchScene(url, site) {
const res = await get(url, '.page-content .row');
if (res.ok) {
return scrapeScene(res.item, url, site);
}
return res.status;
}
async function fetchProfile(actorName, scraperSlug, site) {
const actorSlug = slugify(actorName, '');
const url = `${site.url}/tour/models/${actorSlug}.html`;
const res = await get(url, '.page-content .row');
if (res.ok) {
return scrapeProfile(res.item);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
};