traxxx/src/scrapers/bamvisions.js

175 lines
4.9 KiB
JavaScript

'use strict';
const format = require('template-format');
const { get, geta, initAll, formatDate } = require('../utils/qu');
const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.actors = qu.all('a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
.map((source) => [
source.getAttribute('src0_3x'),
source.getAttribute('src0_2x'),
source.getAttribute('src0_1x'),
]
.filter(Boolean)
.map((fallback) => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
});
}
function scrapeScene({ html, qu }, url, site) {
const release = { url };
release.title = qu.q('.item-episode h4 a', true);
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.actors = qu.all('.item-episode a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
if (trailerPath) {
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
release.trailer = { src: trailer };
}
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
}
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
const url = site.parameters?.sets
? `${site.parameters.sets}?id=${actorId}&page=${page}`
: `${site.url}/sets.php?id=${actorId}&page=${page}`;
const res = await get(url);
if (!res.ok) return [];
const quReleases = initAll(res.item.el, '.item-episode');
const releases = scrapeAll(quReleases, site);
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
if (nextPage) {
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
}
return accScenes.concat(releases);
}
async function scrapeProfile({ qu }, site, withScenes) {
if (!qu.exists('.content')) {
// page probably returned a 404 with a 200 HTTP code
return null;
}
const profile = {};
const bio = qu.all('.stats li', true).reduce((acc, row) => {
const [key, value] = row.split(':');
return { ...acc, [slugify(key, '_')]: value.trim() };
}, {});
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
profile.avatar = [
qu.q('.profile-pic img', 'src0_3x'),
qu.q('.profile-pic img', 'src0_2x'),
qu.q('.profile-pic img', 'src0_1x'),
].filter(Boolean).map((source) => (/^http/.test(source) ? source : `${site.url}${source}`));
if (withScenes) {
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
if (actorId) {
profile.releases = await fetchActorReleases(actorId, site);
}
}
return profile;
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.latest
? format(site.parameters.latest, { page })
: `${site.url}/categories/movies/${page}/latest/`;
const res = await geta(url, '.item-episode');
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site) {
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchMovies(channel, page) {
console.log(channel, page);
}
async function fetchProfile({ name: actorName }, { site }, include) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const urlA = site.parameters?.profile
? format(site.parameters.profile, { slug: actorSlugA })
: `${site.url}/models/${actorSlugA}.html`;
const urlB = site.parameters?.profile
? format(site.parameters.profile, { slug: actorSlugB })
: `${site.url}/models/${actorSlugB}.html`;
const resA = await get(urlA);
const res = resA.ok ? resA : await get(urlB);
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
}
module.exports = {
fetchLatest,
fetchMovies,
fetchScene,
fetchProfile,
};