traxxx/src/scrapers/bang.js

210 lines
5.9 KiB
JavaScript
Executable File

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
/*
function encodeId(id) {
if (!id) {
return id;
}
return Buffer
.from(id, 'hex')
.toString('base64')
.replace(/\+/g, '-')
.replace(/\//g, '_')
.replace(/=/g, ',');
}
*/
function decodeId(id) {
if (!id) {
return id;
}
const restoredId = id
.replace(/-/g, '+')
.replace(/_/g, '/')
.replace(/,/g, '=');
return Buffer
.from(restoredId, 'base64')
.toString('hex');
}
function getAvatarFallback(url) {
try {
const { origin, pathname } = new URL(url);
return [
`${origin}${pathname}`,
url,
];
} catch (error) {
return null;
}
}
function scrapeAll(scenes, entity) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.video_preview_container > a', { origin: entity.url });
release.entryId = query.attribute(null, 'data-video-id') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
release.title = query.content('.video_preview_container >a > span.block');
release.date = query.date('.videoInfo .statistics span', 'MMM DD, YYYY');
release.actors = query.elements('.videoInfo a[href*="/pornstar"]').map((el) => ({
name: unprint.query.content(el),
url: unprint.query.url(el, null, { origin: 'https://www.bang.com' }),
}));
const poster = query.img('img[data-videopreview-target="image"]');
const posterUrl = new URL(poster);
if (poster) {
release.poster = [
`${posterUrl.origin}${posterUrl.pathname}`,
posterUrl.href,
];
}
release.teaser = query.video();
return release;
});
}
async function scrapeScene({ query }, { url, entity }) {
const release = {};
const data = query.json('script[type="application/ld+json"]');
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
release.title = data?.name || query.content('.video-heading');
release.description = data?.description || query.content('.expanded p.clear-both');
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY');
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
if (data?.actors) {
release.actors = data.actor.map((actor) => ({
name: actor.name,
url: actor.url,
avatar: getAvatarFallback(query.img(`.video-actors img[alt="${actor.name}"]`)),
}));
} else {
release.actors = query.elements('//div[contains(@class, "video-actors")]//a[img|picture]').map((element) => ({
name: unprint.query.attribute(element, 'img', 'alt'),
url: unprint.query.url(element, null, { origin: entity.url }),
avatar: getAvatarFallback(unprint.query.img(element, 'img')),
}));
}
release.tags = query.contents('.expanded .genres');
release.poster = data?.thumbnailUrl || data?.contentUrl || query.attribute('meta[name*="og:image"]', 'content');
release.teaser = query.video('video[data-videocontainer-target] source');
release.photos = JSON.parse(query.attribute('[data-video-gallery-photos-value]', 'data-video-gallery-photos-value'));
release.photoCount = query.number('[data-video-gallery-count-value]', { attribute: 'data-video-gallery-count-value' });
const channelName = query.content('.expanded a[href*="?in="]')?.trim();
if (channelName) {
release.channel = entity.children?.find((channel) => new RegExp(channel.name, 'i').test(channelName) || slugify(channelName) === channel.slug)?.slug;
}
console.log(release);
return release;
}
async function fetchActorScenes(element, url, entity, page = 1, acc = []) {
const scenes = scrapeAll(unprint.initAll(element, '.search-grid li'), entity);
if (scenes.length) {
const nextPageRes = await unprint.post(url, { page: page + 1 });
if (nextPageRes.ok) {
return fetchActorScenes(nextPageRes.context.element, url, entity, page + 1, acc.concat(scenes));
}
}
return acc.concat(scenes);
}
async function scrapeProfile({ query, element }, url, entity, include) {
const profile = { url };
profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY');
profile.birthPlace = query.content('//text()[contains(., "in")]/following-sibling::span[contains(@class, "font-bold")][1]');
profile.ethnicity = query.content('//text()[contains(., "Ethnicity")]/following-sibling::span[contains(@class, "font-bold")][1]');
profile.hairColor = query.content('//text()[contains(., "Hair Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
profile.eyes = query.content('//text()[contains(., "Eye Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
profile.avatar = getAvatarFallback(query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]'));
if (include.scenes) {
profile.scenes = await fetchActorScenes(element, url, entity);
}
return profile;
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}&page=${page}`;
const res = await unprint.get(url, { selectAll: '.search-grid li' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchUpcoming(channel) {
const url = `${channel.url}&early-access=true`;
const res = await unprint.get(url, { selectAll: '.search-grid li' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchProfile({ name: actorName }, { entity }, include) {
const searchRes = await unprint.get(`https://www.bang.com/pornstars?term=${slugify(actorName, '+')}`);
if (!searchRes.ok) {
return searchRes.status;
}
const url = searchRes.context.query.url(`//a[contains(.//span, "${actorName}")]`);
if (!url) {
return null;
}
const actorRes = await unprint.get(url);
if (actorRes.ok) {
return scrapeProfile(actorRes.context, url, entity, include);
}
return actorRes.status;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchProfile,
scrapeScene,
useUnprint: true,
};