2020-01-06 04:19:38 +00:00
|
|
|
'use strict';
|
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
const unprint = require('unprint');
|
2020-01-07 03:23:28 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
const slugify = require('../utils/slugify');
|
2020-02-09 22:25:54 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
/*
|
|
|
|
function encodeId(id) {
|
|
|
|
if (!id) {
|
|
|
|
return id;
|
2022-02-12 02:26:40 +00:00
|
|
|
}
|
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return Buffer
|
|
|
|
.from(id, 'hex')
|
|
|
|
.toString('base64')
|
|
|
|
.replace(/\+/g, '-')
|
|
|
|
.replace(/\//g, '_')
|
|
|
|
.replace(/=/g, ',');
|
2020-01-06 04:19:38 +00:00
|
|
|
}
|
2022-11-27 03:22:58 +00:00
|
|
|
*/
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2020-01-07 03:23:28 +00:00
|
|
|
function decodeId(id) {
|
2022-11-27 03:22:58 +00:00
|
|
|
if (!id) {
|
|
|
|
return id;
|
|
|
|
}
|
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const restoredId = id
|
|
|
|
.replace(/-/g, '+')
|
|
|
|
.replace(/_/g, '/')
|
|
|
|
.replace(/,/g, '=');
|
|
|
|
|
|
|
|
return Buffer
|
|
|
|
.from(restoredId, 'base64')
|
|
|
|
.toString('hex');
|
2020-01-07 03:23:28 +00:00
|
|
|
}
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2022-11-27 03:43:27 +00:00
|
|
|
function getAvatarFallback(url) {
|
|
|
|
try {
|
|
|
|
const { origin, pathname } = new URL(url);
|
|
|
|
|
|
|
|
return [
|
|
|
|
`${origin}${pathname}`,
|
|
|
|
url,
|
|
|
|
];
|
|
|
|
} catch (error) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
function scrapeAll(scenes, entity) {
|
|
|
|
return scenes.map(({ query }) => {
|
|
|
|
const release = {};
|
2021-02-04 21:55:19 +00:00
|
|
|
|
2023-06-05 01:32:24 +00:00
|
|
|
release.url = query.url('a', { origin: entity.url });
|
|
|
|
release.entryId = query.dataset('a', 'videopreview-id-value') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
2021-02-04 21:55:19 +00:00
|
|
|
|
2023-06-05 01:32:24 +00:00
|
|
|
release.title = query.content('a > span.block');
|
|
|
|
release.date = query.date('a + div', 'MMM DD, YYYY');
|
2021-02-04 21:55:19 +00:00
|
|
|
|
2023-06-05 01:32:24 +00:00
|
|
|
release.duration = query.duration('[data-videopreview-target="duration"]', { attribute: 'data-content' });
|
|
|
|
|
|
|
|
release.actors = query.elements('a + div a[href*="/pornstar"]').map((el) => ({
|
2022-11-27 03:22:58 +00:00
|
|
|
name: unprint.query.content(el),
|
|
|
|
url: unprint.query.url(el, null, { origin: 'https://www.bang.com' }),
|
|
|
|
}));
|
2021-02-04 21:55:19 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
const poster = query.img('img[data-videopreview-target="image"]');
|
|
|
|
const posterUrl = new URL(poster);
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
if (poster) {
|
|
|
|
release.poster = [
|
|
|
|
`${posterUrl.origin}${posterUrl.pathname}`,
|
|
|
|
posterUrl.href,
|
|
|
|
];
|
|
|
|
}
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2023-06-05 01:32:24 +00:00
|
|
|
const videoData = query.json('a', { attribute: 'data-videopreview-sources-value' });
|
|
|
|
|
|
|
|
if (videoData) {
|
|
|
|
release.teaser = [
|
|
|
|
videoData.mp4_large,
|
|
|
|
videoData.webm_large,
|
|
|
|
videoData.mp4,
|
|
|
|
videoData.webm,
|
|
|
|
];
|
|
|
|
}
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
return release;
|
|
|
|
});
|
|
|
|
}
|
2020-02-09 22:25:54 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
async function scrapeScene({ query }, { url, entity }) {
|
|
|
|
const release = {};
|
2023-06-05 01:32:24 +00:00
|
|
|
const data = query.json('//script[contains(text(), "VideoObject")]');
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2023-06-15 17:53:42 +00:00
|
|
|
// data title is not capitalized, prefer markup
|
|
|
|
release.title = query.attribute('meta[property="og:title"]', 'content') || query.content('.video-container + div h1') || data?.name;
|
|
|
|
release.description = data?.description || query.attribute('meta[property="og:description"]', 'content') || query.content('//div[contains(@class, "actions")]/preceding-sibling::p');
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2023-03-19 16:31:47 +00:00
|
|
|
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY');
|
2022-11-27 03:22:58 +00:00
|
|
|
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2023-06-05 01:32:24 +00:00
|
|
|
if (data?.actor) {
|
2022-11-27 03:43:27 +00:00
|
|
|
release.actors = data.actor.map((actor) => ({
|
|
|
|
name: actor.name,
|
|
|
|
url: actor.url,
|
2023-06-05 01:32:24 +00:00
|
|
|
avatar: getAvatarFallback(query.img(`a[href*="/pornstar"] img[alt="${actor.name}"]`)),
|
2022-11-27 03:43:27 +00:00
|
|
|
}));
|
|
|
|
} else {
|
2023-03-19 16:31:47 +00:00
|
|
|
release.actors = query.elements('//div[contains(@class, "video-actors")]//a[img|picture]').map((element) => ({
|
2022-11-27 03:43:27 +00:00
|
|
|
name: unprint.query.attribute(element, 'img', 'alt'),
|
|
|
|
url: unprint.query.url(element, null, { origin: entity.url }),
|
|
|
|
avatar: getAvatarFallback(unprint.query.img(element, 'img')),
|
|
|
|
}));
|
|
|
|
}
|
2021-02-04 21:55:19 +00:00
|
|
|
|
2023-06-05 01:32:24 +00:00
|
|
|
release.tags = query.contents('.actions .genres');
|
|
|
|
|
2023-06-15 17:53:42 +00:00
|
|
|
const sourcesData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' });
|
2021-02-04 21:55:19 +00:00
|
|
|
|
2023-06-05 01:32:24 +00:00
|
|
|
release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content');
|
2023-06-15 17:53:42 +00:00
|
|
|
release.teaser = (sourcesData && [
|
|
|
|
sourcesData.mp4_large,
|
|
|
|
sourcesData.webm_large,
|
|
|
|
sourcesData.mp4,
|
|
|
|
sourcesData.webm,
|
2023-06-05 01:32:24 +00:00
|
|
|
])
|
|
|
|
|| data?.contentUrl
|
|
|
|
|| query.attribute('meta[property="og:video"]')
|
|
|
|
|| query.video('video[data-videocontainer-target] source');
|
2020-01-07 03:23:28 +00:00
|
|
|
|
2023-06-05 01:32:24 +00:00
|
|
|
release.photos = query.sourceSets('.photo-set img');
|
|
|
|
release.photoCount = query.number('//h2[contains(text(), "Photos")]/following-sibling::span');
|
2022-11-27 03:22:58 +00:00
|
|
|
|
2023-06-05 01:32:24 +00:00
|
|
|
const channelName = query.content('.video-container + div a[href*="?in="]')?.trim();
|
2022-11-27 03:22:58 +00:00
|
|
|
|
|
|
|
if (channelName) {
|
|
|
|
release.channel = entity.children?.find((channel) => new RegExp(channel.name, 'i').test(channelName) || slugify(channelName) === channel.slug)?.slug;
|
|
|
|
}
|
2020-01-07 03:23:28 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
2020-01-06 04:19:38 +00:00
|
|
|
}
|
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
async function fetchActorScenes(element, url, entity, page = 1, acc = []) {
|
2023-06-05 01:32:24 +00:00
|
|
|
const scenes = scrapeAll(unprint.initAll(element, '.video_container'), entity);
|
2020-01-06 04:19:38 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
if (scenes.length) {
|
|
|
|
const nextPageRes = await unprint.post(url, { page: page + 1 });
|
2020-05-18 01:22:03 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
if (nextPageRes.ok) {
|
|
|
|
return fetchActorScenes(nextPageRes.context.element, url, entity, page + 1, acc.concat(scenes));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return acc.concat(scenes);
|
2020-05-18 01:22:03 +00:00
|
|
|
}
|
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
async function scrapeProfile({ query, element }, url, entity, include) {
|
2022-11-27 03:43:27 +00:00
|
|
|
const profile = { url };
|
2020-05-17 02:59:09 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY');
|
2023-06-05 01:32:24 +00:00
|
|
|
profile.birthPlace = query.content('//text()[contains(., "From")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
2020-05-17 02:59:09 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
profile.ethnicity = query.content('//text()[contains(., "Ethnicity")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
2020-05-17 02:59:09 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
profile.hairColor = query.content('//text()[contains(., "Hair Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
|
|
|
profile.eyes = query.content('//text()[contains(., "Eye Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
2020-05-17 02:59:09 +00:00
|
|
|
|
2022-11-27 03:43:27 +00:00
|
|
|
profile.avatar = getAvatarFallback(query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]'));
|
2020-05-17 02:59:09 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
if (include.scenes) {
|
|
|
|
profile.scenes = await fetchActorScenes(element, url, entity);
|
2020-05-18 01:22:03 +00:00
|
|
|
}
|
2020-05-17 02:59:09 +00:00
|
|
|
|
|
|
|
return profile;
|
|
|
|
}
|
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
async function fetchLatest(channel, page = 1) {
|
2023-06-05 01:32:24 +00:00
|
|
|
const url = `${channel.url}&by=date.desc&page=${page}`;
|
|
|
|
const res = await unprint.get(url, { selectAll: '.video_container' });
|
2020-05-14 02:26:05 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
if (res.ok) {
|
|
|
|
return scrapeAll(res.context, channel);
|
2022-02-12 15:22:00 +00:00
|
|
|
}
|
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
return res.status;
|
2020-01-07 03:23:28 +00:00
|
|
|
}
|
2024-10-31 02:01:13 +00:00
|
|
|
|
2023-01-03 22:32:52 +00:00
|
|
|
async function fetchUpcoming(channel) {
|
2023-06-05 01:32:24 +00:00
|
|
|
const url = `${channel.url}&by=date.desc&early-access=true`;
|
|
|
|
const res = await unprint.get(url, { selectAll: '.video_container' });
|
2023-01-03 22:32:52 +00:00
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
return scrapeAll(res.context, channel);
|
|
|
|
}
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
}
|
2020-01-07 03:23:28 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
async function fetchProfile({ name: actorName }, { entity }, include) {
|
|
|
|
const searchRes = await unprint.get(`https://www.bang.com/pornstars?term=${slugify(actorName, '+')}`);
|
2020-05-17 02:59:09 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
if (!searchRes.ok) {
|
|
|
|
return searchRes.status;
|
|
|
|
}
|
2020-05-17 02:59:09 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
const url = searchRes.context.query.url(`//a[contains(.//span, "${actorName}")]`);
|
2020-05-17 23:22:56 +00:00
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
if (!url) {
|
2020-05-17 23:22:56 +00:00
|
|
|
return null;
|
2020-05-17 02:59:09 +00:00
|
|
|
}
|
|
|
|
|
2022-11-27 03:22:58 +00:00
|
|
|
const actorRes = await unprint.get(url);
|
|
|
|
|
|
|
|
if (actorRes.ok) {
|
|
|
|
return scrapeProfile(actorRes.context, url, entity, include);
|
|
|
|
}
|
|
|
|
|
|
|
|
return actorRes.status;
|
2020-05-17 02:59:09 +00:00
|
|
|
}
|
|
|
|
|
2020-01-06 04:19:38 +00:00
|
|
|
module.exports = {
|
2020-05-14 02:26:05 +00:00
|
|
|
fetchLatest,
|
2023-01-03 22:32:52 +00:00
|
|
|
fetchUpcoming,
|
2020-05-17 02:59:09 +00:00
|
|
|
fetchProfile,
|
2022-11-27 03:22:58 +00:00
|
|
|
scrapeScene,
|
2020-01-06 04:19:38 +00:00
|
|
|
};
|