traxxx/src/scrapers/bang-legacy.js

430 lines
9.2 KiB
JavaScript
Raw Normal View History

'use strict';
const http = require('../utils/http');
const qu = require('../utils/qu');
const { extractDate } = require('../utils/qu');
const { inchesToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
const capitalize = require('../utils/capitalize');
const clusterId = '617fb597b659459bafe6472470d9073a';
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
const genderMap = {
M: 'male',
F: 'female',
};
function getScreenUrl(item, scene) {
if (!scene.dvd?.id || !item?.screenId) {
return null;
}
return `https://i.bang.com/screenshots/${scene.dvd.id}/${scene.type}/${scene.order}/${item.screenId}.jpg`;
}
function encodeId(id) {
return Buffer
.from(id, 'hex')
.toString('base64')
.replace(/\+/g, '-')
.replace(/\//g, '_')
.replace(/=/g, ',');
}
function decodeId(id) {
const restoredId = id
.replace(/-/g, '+')
.replace(/_/g, '/')
.replace(/,/g, '=');
return Buffer
.from(restoredId, 'base64')
.toString('hex');
}
async function fetchPhotos(scene) {
const photoPaths = Array.from({ length: scene.photos }, (value, index) => `/${scene.dvd.id}/${scene.identifier}/final/${String(index + 1).padStart(6, '0')}.jpg`);
const res = await http.post('https://www.bang.com/sign-images', {
images: photoPaths,
}, {
encodeJSON: false,
});
if (res.ok && res.body.images) {
return res.body.images.map((image) => qu.prefixUrl(image, 'https://photos.bang.com'));
}
return null;
}
async function scrapeScene(scene, entity, options) {
const release = {
entryId: scene.id,
title: scene.name || (scene.dvd?.name && scene.type === 'bonus' && capitalize(`${scene.dvd.name} - Bonus Scene ${scene.order || 1}`)) || null,
description: scene.description,
tags: scene.genres.concat(scene.actions).map((genre) => genre.name),
duration: scene.duration,
};
const slug = slugify(release.title);
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
const date = new Date(scene.releaseDate);
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
release.actors = scene.actors.map((actor) => ({ name: actor.name, gender: genderMap[actor.gender] }));
if (scene.is4k) release.tags.push('4k');
if (scene.gay) release.tags.push('gay');
const defaultPoster = scene.screenshots.find((photo) => photo.default === true);
const screens = scene.screenshots.filter((photo) => photo.default === false);
const remainingScreens = defaultPoster ? screens : screens.slice(1);
const poster = defaultPoster || screens[0];
release.poster = getScreenUrl(poster, scene);
release.photos = remainingScreens.map((photo) => getScreenUrl(photo, scene));
if (options?.includePhotos) {
const photos = await fetchPhotos(scene);
if (photos?.length > 0) {
release.photos = photos;
}
}
release.teaser = `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`;
release.channel = scene.series.name
.replace(/[! .]/g, '')
.replace('&', 'and');
return release;
}
function scrapeAll(scenes, entity) {
return Promise.all(scenes.map(({ _source: scene }) => scrapeScene(scene, entity)));
}
async function fetchActorReleases(actor, entity) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
nested: {
path: 'actors',
query: {
bool: {
must: [
{
match: {
'actors.mongoId': {
operator: 'AND',
query: actor.id,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeAll(res.body.hits.hits, entity);
}
async function scrapeProfile(actor, entity, include) {
const profile = {};
profile.aliases = actor.aliases;
profile.dateOfBirth = extractDate(actor.birthDate);
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
profile.ethnicity = actor.ethnicity;
profile.nationality = actor.nationality;
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
profile.hair = actor.hairColor;
profile.eyes = actor.eyeColor;
profile.naturalBoobs = actor.naturalBreasts;
if (actor.measurements) {
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
if (height) profile.height = inchesToCm(height);
if (cupSize) profile.cup = cupSize;
// [SIC]
if (shoulder) profile.bust = shoulder;
if (chest) profile.waist = chest;
if (waist) profile.hip = waist;
}
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
if (include.releases) {
profile.releases = await fetchActorReleases(actor, entity);
}
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now',
},
},
},
/*
* global fetch
{
nested: {
path: 'studio',
query: {
bool: {
must: [
{
match: {
'studio.name': {
operator: 'AND',
query: 'bang! originals',
},
},
},
],
},
},
},
},
*/
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeAll(res.body.hits.hits, site);
}
async function fetchUpcoming(site, page = 1) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now+7d',
},
},
},
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeAll(res.body.hits.hits, site);
}
async function fetchScene(url, entity, baseRelease, options) {
if (baseRelease?.entryId) {
// overview and deep data is the same, don't hit server unnecessarily
return baseRelease;
}
const encodedId = new URL(url).pathname.split('/')[2];
const entryId = decodeId(encodedId);
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeScene(res.body._source, entity, options); // eslint-disable-line no-underscore-dangle
}
async function fetchProfile({ name: actorName }, context, include) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
size: 5,
sort: [{
_score: {
order: 'desc',
},
}],
query: {
bool: {
must: [
{
match: {
name: {
query: actorName,
operator: 'and',
},
},
},
{
match: {
status: 'ok',
},
},
],
},
},
}, {
headers: {
Authorization: `Basic ${authKey}`,
},
encodeJSON: true,
});
if (res.ok) {
const actor = res.body.hits.hits.find((hit) => hit._source.name.toLowerCase() === actorName.toLowerCase());
if (actor) {
return scrapeProfile(actor._source, context.entity, include);
}
return null;
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchUpcoming,
};