traxxx/src/scrapers/hitzefrei.js

161 lines
4.3 KiB
JavaScript
Raw Normal View History

2020-07-21 02:04:07 +00:00
'use strict';
const qu = require('../utils/qu');
const http = require('../utils/http');
const { lbsToKg, feetInchesToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.content-title a');
release.entryId = new URL(release.url).pathname.match(/\/view\/(\d+)/)[1];
release.title = query.cnt('.content-title a');
release.date = query.date('.content-date strong', 'DD/MM/YYYY');
release.duration = query.dur('.content-date');
release.actors = query.cnts('.content-models a');
release.poster = query.img('.content-thumbnail img, .large-thumbnail img') || query.poster('.content-thumbnail video, .large-thumbnail video');
release.photos = query.imgs('.small-thumbnails img'); // actor page only
2020-07-21 02:04:07 +00:00
const teaser = query.video('.vid-hover source');
release.teaser = { src: teaser };
release.channel = slugify(query.cnt('.content-site a'), '');
return release;
});
}
function scrapeScene({ query }, url) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/view\/(\d+)/)[1];
release.title = query.cnt('.content-title');
release.description = query.cnt('.content-description p');
release.date = query.date('.content-metas span:nth-child(4)', 'DD/MM/YYYY');
release.duration = query.dur('.content-metas span:nth-child(2)');
release.likes = query.number('.content-metas span:nth-child(6)');
release.actors = query.all('.model-thumb img').map((el) => ({
2020-07-21 02:04:07 +00:00
name: query.q(el, null, 'alt'),
avatar: query.img(el, null, 'src'),
}));
release.poster = query.poster('.content-video video');
release.photos = query.urls('#photo-carousel a').map((photo) => [
2020-07-21 02:04:07 +00:00
photo.replace('/full', ''),
photo,
photo.replace('/full', '/thumbs'),
]);
const trailer = query.video('.content-video source');
release.trailer = { src: trailer };
return release;
}
async function fetchActorScenes({ query }, accReleases = []) {
const releases = scrapeAll(qu.initAll(query.all('.container-large-video-thumb')));
const nextPage = query.url('.pagination li:nth-last-child(2) a');
if (nextPage) {
const res = await qu.get(nextPage);
if (res.ok) {
return fetchActorScenes(res.item, accReleases.concat(releases));
}
}
return accReleases.concat(releases);
}
async function scrapeProfile({ query }, include) {
const profile = {};
/*
const bio = query.all('.model-stats-info div div').reduce((acc, el) => ({
...acc,
[slugify(query.cnt(el, '.stat-label'), '_')]: query.cnt(el, '.stat-value'),
}), {});
*/
profile.dateOfBirth = query.date('.col-birtdate .stat-value, .col-birthdate .stat-value', 'YYYY-MM-DD'); // sic
profile.birthPlace = query.cnt('.col-birth .stat-value');
[profile.bust, profile.waist, profile.hip] = query.cnt('.col-measurements .stat-value').split('-').map(Number);
profile.height = feetInchesToCm(query.cnt('.col-height .stat-value'));
profile.weight = lbsToKg(query.number('.col-weight .stat-value'));
profile.eyes = query.cnt('.col-eyes .stat-value');
profile.hair = query.cnt('.col-hair .stat-value');
profile.description = query.cnt('.model-profile .model-profile');
profile.avatar = query.img('.model-thumbnail img');
if (include.releases) {
profile.releases = await fetchActorScenes({ query });
}
return profile;
}
async function fetchLatest(channel, page = 1) {
const res = await qu.getAll(`https://tour.hitzefrei.com/videos?site=${channel.parameters.siteId}&page=${page}`, '.hitem');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url) {
const res = await qu.get(url, '#content-details');
if (res.ok) {
return scrapeScene(res.item, url);
}
return res.status;
}
async function fetchProfile(baseActor, entity, include) {
const searchRes = await http.post('https://tour.hitzefrei.com/search-preview', {
q: baseActor.name,
}, {
headers: {
'Accept-Language': 'en-US',
},
2020-07-21 02:04:07 +00:00
});
if (searchRes.ok) {
const actor = searchRes.body.find((result) => result.type === 'model' && result.title === baseActor.name);
2020-07-21 02:04:07 +00:00
if (actor) {
const actorRes = await qu.get(actor.url);
if (actorRes.ok) {
return scrapeProfile(actorRes.item, include);
}
return actorRes.status;
}
return null;
}
return searchRes.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};