'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); /* function encodeId(id) { if (!id) { return id; } return Buffer .from(id, 'hex') .toString('base64') .replace(/\+/g, '-') .replace(/\//g, '_') .replace(/=/g, ','); } */ function decodeId(id) { if (!id) { return id; } const restoredId = id .replace(/-/g, '+') .replace(/_/g, '/') .replace(/,/g, '='); return Buffer .from(restoredId, 'base64') .toString('hex'); } function getAvatarFallback(url) { try { const { origin, pathname } = new URL(url); return [ `${origin}${pathname}`, url, ]; } catch (error) { return null; } } function scrapeAll(scenes, entity) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('a', { origin: entity.url }); release.entryId = query.dataset('a', 'videopreview-id-value') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]); release.title = query.content('a > span.block'); release.date = query.date('a + div', 'MMM DD, YYYY'); release.duration = query.duration('[data-videopreview-target="duration"]', { attribute: 'data-content' }); release.actors = query.elements('a + div a[href*="/pornstar"]').map((el) => ({ name: unprint.query.content(el), url: unprint.query.url(el, null, { origin: 'https://www.bang.com' }), })); const poster = query.img('img[data-videopreview-target="image"]'); const posterUrl = new URL(poster); if (poster) { release.poster = [ `${posterUrl.origin}${posterUrl.pathname}`, posterUrl.href, ]; } const videoData = query.json('a', { attribute: 'data-videopreview-sources-value' }); if (videoData) { release.teaser = [ videoData.mp4_large, videoData.webm_large, videoData.mp4, videoData.webm, ]; } return release; }); } async function scrapeScene({ query }, { url, entity }) { const release = {}; const data = query.json('//script[contains(text(), "VideoObject")]'); release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]); // data title is not capitalized, prefer markup release.title = query.attribute('meta[property="og:title"]', 'content') || query.content('.video-container + div h1') || data?.name; release.description = data?.description || query.attribute('meta[property="og:description"]', 'content') || query.content('//div[contains(@class, "actions")]/preceding-sibling::p'); release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY'); release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span'); if (data?.actor) { release.actors = data.actor.map((actor) => ({ name: actor.name, url: actor.url, avatar: getAvatarFallback(query.img(`a[href*="/pornstar"] img[alt="${actor.name}"]`)), })); } else { release.actors = query.elements('//div[contains(@class, "video-actors")]//a[img|picture]').map((element) => ({ name: unprint.query.attribute(element, 'img', 'alt'), url: unprint.query.url(element, null, { origin: entity.url }), avatar: getAvatarFallback(unprint.query.img(element, 'img')), })); } release.tags = query.contents('.actions .genres'); const sourcesData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' }); release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content'); release.teaser = (sourcesData && [ sourcesData.mp4_large, sourcesData.webm_large, sourcesData.mp4, sourcesData.webm, ]) || data?.contentUrl || query.attribute('meta[property="og:video"]') || query.video('video[data-videocontainer-target] source'); release.photos = query.sourceSets('.photo-set img'); release.photoCount = query.number('//h2[contains(text(), "Photos")]/following-sibling::span'); const channelName = query.content('.video-container + div a[href*="?in="]')?.trim(); if (channelName) { release.channel = entity.children?.find((channel) => new RegExp(channel.name, 'i').test(channelName) || slugify(channelName) === channel.slug)?.slug; } return release; } async function fetchActorScenes(element, url, entity, page = 1, acc = []) { const scenes = scrapeAll(unprint.initAll(element, '.video_container'), entity); if (scenes.length) { const nextPageRes = await unprint.post(url, { page: page + 1 }); if (nextPageRes.ok) { return fetchActorScenes(nextPageRes.context.element, url, entity, page + 1, acc.concat(scenes)); } } return acc.concat(scenes); } async function scrapeProfile({ query, element }, url, entity, include) { const profile = { url }; profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY'); profile.birthPlace = query.content('//text()[contains(., "From")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.ethnicity = query.content('//text()[contains(., "Ethnicity")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.hairColor = query.content('//text()[contains(., "Hair Color")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.eyes = query.content('//text()[contains(., "Eye Color")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.avatar = getAvatarFallback(query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]')); if (include.scenes) { profile.scenes = await fetchActorScenes(element, url, entity); } return profile; } async function fetchLatest(channel, page = 1) { const url = `${channel.url}&by=date.desc&page=${page}`; const res = await unprint.get(url, { selectAll: '.video_container' }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } async function fetchUpcoming(channel) { const url = `${channel.url}&by=date.desc&early-access=true`; const res = await unprint.get(url, { selectAll: '.video_container' }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } async function fetchProfile({ name: actorName }, { entity }, include) { const searchRes = await unprint.get(`https://www.bang.com/pornstars?term=${slugify(actorName, '+')}`); if (!searchRes.ok) { return searchRes.status; } const url = searchRes.context.query.url(`//a[contains(.//span, "${actorName}")]`); if (!url) { return null; } const actorRes = await unprint.get(url); if (actorRes.ok) { return scrapeProfile(actorRes.context, url, entity, include); } return actorRes.status; } module.exports = { fetchLatest, fetchUpcoming, fetchProfile, scrapeScene, };