Files
traxxx/src/scrapers/bamvisions.js
2026-01-19 03:31:40 +01:00

170 lines
4.4 KiB
JavaScript
Executable File

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const tryUrls = require('../utils/try-urls');
const { convert } = require('../utils/convert');
function scrapeAll(scenes, site) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('h3 a');
release.title = query.content('h3 a');
release.description = query.content('.description');
release.date = query.date('.item-meta li', 'MMMM D, YYYY');
release.duration = query.duration('.item-meta li:nth-child(2)');
release.actors = query.all('a[href*="/models"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
[release.poster, ...release.photos] = query.all('.item-thumbs img')
.map((source) => [
source.getAttribute('src0_3x'),
source.getAttribute('src0_2x'),
source.getAttribute('src0_1x'),
]
.filter(Boolean)
.map((fallback) => unprint.prefixUrl(fallback, site.url)));
release.entryId = `${unprint.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
});
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/categories/movies/${page}/latest/`;
const res = await unprint.get(url, { selectAll: '.item-episode' });
if (res.ok) {
return scrapeAll(res.context, site);
}
return res.status;
}
function scrapeScene({ html, query }, site) {
const release = {};
release.title = query.content('.item-episode h4 a');
release.description = query.content('.description');
release.date = query.date('.item-meta li', 'MMMM D, YYYY');
release.duration = query.duration('.item-meta li:nth-child(2)');
release.actors = query.all('.item-episode a[href*="/models"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
if (posterPath) {
release.poster = [
posterPath.replace('-1x', '-3x'),
posterPath.replace('-1x', '-2x'),
posterPath,
].map((poster) => unprint.prefixUrl(poster, site.url));
}
if (trailerPath) {
release.trailer = /^http/.test(trailerPath)
? trailerPath
: `${site.url}${trailerPath}`;
}
release.photoCount = query.number('.item-meta li:last-child');
release.entryId = `${unprint.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
}
async function fetchScene(url, site) {
const res = await unprint.get(url);
if (res.ok) {
return scrapeScene(res.context, site);
}
return res.status;
}
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
const res = await unprint.get(url, { selectAll: '.item-episode' });
if (!res.ok) {
return [];
}
const releases = scrapeAll(res.context, site);
const nextPage = res.context.query.url(`a[href*="page=${page + 1}"]`);
if (nextPage) {
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
}
return accScenes.concat(releases);
}
async function scrapeProfile({ query }, url, site, withScenes) {
if (!query.exists('.content')) {
// page probably returned a 404 with a 200 HTTP code
return null;
}
const profile = { url };
const bio = Object.fromEntries(query.all('.stats li').map((bioEl) => [
slugify(unprint.query.content(bioEl, 'strong'), '_'),
unprint.query.text(bioEl),
]));
profile.height = convert(bio.height, 'cm');
profile.measurements = bio.measurements;
profile.avatar = [
query.img('.profile-pic img', { attribute: 'src0_3x' }),
query.img('.profile-pic img', { attribute: 'src0_2x' }),
query.img('.profile-pic img', { attribute: 'src0_1x' }),
].filter(Boolean).map((source) => (/^http/.test(source) ? source : `${site.url}${source}`));
if (withScenes) {
const actorId = query.attribute('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
if (actorId) {
profile.releases = await fetchActorReleases(actorId, site);
}
}
return profile;
}
async function fetchProfile({ name: actorName, url: actorUrl }, { channel }, include) {
const { res, url } = await tryUrls([
actorUrl,
`${channel.url}/models/${slugify(actorName, '-')}.html`,
`${channel.url}/models/${slugify(actorName, '')}.html`,
]);
if (res.ok) {
return scrapeProfile(res.context, url, channel, include.scenes);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};