Refactored Bang! scraper to match new website, first use of unprint.

This commit is contained in:
DebaucheryLibrarian 2022-11-27 04:22:58 +01:00
parent 3cf8776ca5
commit 6edd62c337
61 changed files with 2371 additions and 1233 deletions

View File

@ -7,7 +7,7 @@ Use [nvm](https://github.com/creationix/nvm) to install NodeJS v16.8.0 or newer.
`npm install`
### Set up database
Install PostgreSQL, make sure password authentication is enabled (scram-sha-256) and create a database with a privileged user.
Install PostgreSQL, make sure password authentication is enabled (scram-sha-256). Create a database with a fully privileged user, and a visitor user without privileges (they will be provided by the migration).
### Configuration
Do not modify `config/default.js`, but instead create a copy at `config/local.js` containing the properties you wish to change. If you have set `NODE_ENV`, copy `assets/js/config/default.js` to `assets/js/config/[environment].js`. After setting up PostgreSQL and configuring the details, run the following commands to create and populate the tables, and build the project:

View File

@ -294,13 +294,13 @@ module.exports = {
},
bypass: {
browser: {
enable: true,
enable: false,
hostnames: [ // these can run in the same browser session
'www.kink.com',
],
},
cloudflare: {
enable: true,
enable: false,
auto: true, // try bypass when CF challenge is detected
path: 'http://localhost:8191/v1',
sharedHostnames: [ // these can run in the same browser session

2602
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -134,6 +134,7 @@
"tough-cookie": "^4.0.0",
"tunnel": "0.0.6",
"undici": "^4.13.0",
"unprint": "^0.7.2",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3",
"video.js": "^7.11.4",

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 166 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 172 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 166 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 166 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 378 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 416 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

BIN
public/img/logos/zerotolerance/lazy/addicted2girls.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

BIN
public/img/logos/zerotolerance/lazy/favicon.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.0 KiB

After

Width:  |  Height:  |  Size: 7.0 KiB

BIN
public/img/logos/zerotolerance/lazy/favicon_dark.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.0 KiB

After

Width:  |  Height:  |  Size: 7.0 KiB

BIN
public/img/logos/zerotolerance/lazy/favicon_light.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.0 KiB

After

Width:  |  Height:  |  Size: 7.0 KiB

BIN
public/img/logos/zerotolerance/lazy/genderx.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 KiB

After

Width:  |  Height:  |  Size: 2.0 KiB

BIN
public/img/logos/zerotolerance/lazy/genderxfilms.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
public/img/logos/zerotolerance/lazy/network.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.9 KiB

BIN
public/img/logos/zerotolerance/lazy/zerotolerance.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
public/img/logos/zerotolerance/thumbs/addicted2girls.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.5 KiB

After

Width:  |  Height:  |  Size: 9.6 KiB

BIN
public/img/logos/zerotolerance/thumbs/favicon.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.0 KiB

After

Width:  |  Height:  |  Size: 7.0 KiB

BIN
public/img/logos/zerotolerance/thumbs/favicon_dark.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.0 KiB

After

Width:  |  Height:  |  Size: 7.0 KiB

BIN
public/img/logos/zerotolerance/thumbs/favicon_light.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.0 KiB

After

Width:  |  Height:  |  Size: 7.0 KiB

BIN
public/img/logos/zerotolerance/thumbs/genderx.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.8 KiB

After

Width:  |  Height:  |  Size: 4.8 KiB

BIN
public/img/logos/zerotolerance/thumbs/genderxfilms.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.7 KiB

After

Width:  |  Height:  |  Size: 5.8 KiB

BIN
public/img/logos/zerotolerance/thumbs/network.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

After

Width:  |  Height:  |  Size: 36 KiB

BIN
public/img/logos/zerotolerance/thumbs/zerotolerance.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -906,7 +906,7 @@ const sites = [
{
name: 'Trickery',
slug: 'bangtrickery',
url: 'https://www.bang.com/original/4800/bang-trickery',
url: 'https://www.bang.com/videos?in=BANG%21+Trickery',
parameters: { siteId: 4800 },
parent: 'bang',
},
@ -914,23 +914,21 @@ const sites = [
name: 'Yngr',
slug: 'yngrcom',
alias: ['byngr'],
// url: 'https://www.bang.com/original/5010/bang-yngr',
url: 'https://yngr.com',
url: 'https://www.bang.com/videos?in=yngr.com',
parameters: { siteId: 5010 },
parent: 'bang',
},
{
name: 'Roadside XXX',
slug: 'bangroadsidexxx',
// url: 'https://www.bang.com/original/4864/roadside-xxx',
url: 'https://roadsidexxx.com',
url: 'https://www.bang.com/videos?in=Bang%21+Roadside+XXX',
parameters: { siteId: 4864 },
parent: 'bang',
},
{
name: 'Surprise',
slug: 'bangsurprise',
url: 'https://www.bang.com/original/5000/bang-surprise',
url: 'https://www.bang.com/videos?in=BANG%21+Surprise',
parameters: { siteId: 5000 },
parent: 'bang',
},
@ -938,37 +936,35 @@ const sites = [
name: 'Real Teens',
slug: 'bangrealteens',
alias: ['brealteens'],
url: 'https://www.bang.com/original/3366/bang-real-teens',
url: 'https://www.bang.com/videos?in=BANG%21+Real+Teens',
parameters: { siteId: 3366 },
parent: 'bang',
},
{
name: 'FCK.news',
slug: 'bangfakenews',
// url: 'https://www.bang.com/original/4998/bang-fckNews',
url: 'https://fck.news',
url: 'https://www.bang.com/videos?in=Bang%21+Fake+news',
parameters: { siteId: 4998 },
parent: 'bang',
},
{
name: 'Pretty & Raw',
slug: 'prettyandraw',
// url: 'https://www.bang.com/original/4792/bang-pretty-and-raw',
url: 'https://prettyandraw.com',
url: 'https://www.bang.com/videos?in=Pretty+%26+Raw',
parameters: { siteId: 4792 },
parent: 'bang',
},
{
name: 'Japan',
slug: 'bangjapan',
url: 'https://www.bang.com/original/3079/bang-japan',
url: 'https://www.bang.com/videos?in=BANG%21+Japan',
parameters: { siteId: 3079, ignore: true },
parent: 'bang',
},
{
name: 'Rammed',
slug: 'bangrammed',
url: 'https://www.bang.com/original/4836/bang-rammed',
url: 'https://www.bang.com/videos?in=BANG%21+Rammed',
parameters: { siteId: 4836 },
parent: 'bang',
},
@ -976,14 +972,14 @@ const sites = [
name: 'Glamkore',
slug: 'bangglamkore',
alias: ['bglamkore'],
url: 'https://www.bang.com/original/4586/bang-glamkore',
url: 'https://www.bang.com/videos?in=BANG%21+Glamkore',
parameters: { siteId: 4586 },
parent: 'bang',
},
{
name: 'Screw The Cops',
slug: 'screwthecops',
url: 'https://www.bang.com/original/4710/bang-screw-cops',
url: 'https://www.bang.com/videos?in=Screw+the+Cops',
parameters: { siteId: 4710 },
parent: 'bang',
},
@ -991,7 +987,7 @@ const sites = [
name: 'Real MILFs',
slug: 'bangrealmilfs',
alias: ['brealmilfs'],
url: 'https://www.bang.com/original/4448/bang-real-milfs',
url: 'https://www.bang.com/videos?in=BANG%21+Real+Milfs',
parameters: { siteId: 4448 },
parent: 'bang',
},
@ -999,7 +995,7 @@ const sites = [
name: 'Confessions',
slug: 'bangconfessions',
alias: ['bconfessions'],
url: 'https://www.bang.com/original/4308/bang-confessions',
url: 'https://www.bang.com/videos?in=BANG%21+Confessions',
parameters: { siteId: 4308 },
parent: 'bang',
},
@ -1007,14 +1003,14 @@ const sites = [
name: 'Casting',
slug: 'bangcasting',
alias: ['bcasting'],
url: 'https://www.bang.com/original/3261/bang-casting',
url: 'https://www.bang.com/videos?in=BANG%21+Casting',
parameters: { siteId: 3261 },
parent: 'bang',
},
{
name: 'Bang! Podcast',
slug: 'bangpodcast',
url: 'https://www.bang.com/videos?in=bang!%20podcast',
url: 'https://www.bang.com/videos?in=bang%21+podcast',
parameters: { siteId: 6305 },
parent: 'bang',
},
@ -11518,6 +11514,15 @@ const sites = [
movie: 'https://www.addicted2girls.com/en/dvd',
},
},
{
slug: '3rddegreefilms',
name: '3rd Degree Films',
url: 'https://www.3rddegreefilms.com',
parent: 'zerotolerance',
parameters: {
movie: 'https://www.genderxfilms.com/en/dvd',
},
},
{
slug: 'genderxfilms',
name: 'GenderXFilms',
@ -11525,7 +11530,8 @@ const sites = [
tags: ['transsexual'],
parent: 'zerotolerance',
parameters: {
movie: 'https://www.genderxfilms.com/en/dvd',
scene: 'https://www.3rddegreefilms.com/en/video/3rddegreefilms',
movie: 'https://www.3rddegreefilms.com/en/movie',
},
},
{

View File

@ -2,6 +2,7 @@
const config = require('config');
const util = require('util');
const unprint = require('unprint');
// const log = require('why-is-node-running');
const Inspector = require('inspector-api');
const fs = require('fs').promises;
@ -25,6 +26,13 @@ const getFileEntries = require('./utils/file-entries');
const inspector = new Inspector();
let done = false;
unprint.options({
timeout: 5000,
headers: {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
},
});
/*
function logActive() {
setTimeout(() => {

View File

@ -2,6 +2,7 @@
const util = require('util');
const Promise = require('bluebird');
const unprint = require('unprint');
const { mergeAdvanced: merge } = require('object-merge-advanced');
const argv = require('./argv');
@ -54,12 +55,33 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
.filter(Boolean);
}
async function fetchUnprintScene(scraper, url, entity, baseRelease, options, type) {
const res = await unprint.get(url, {
rejectUnauthorized: false,
});
if (res.ok) {
return scraper[type === 'movie' ? 'scrapeMovie' : 'scrapeScene'](res.context, {
url,
entity,
baseRelease,
headers: res.headers,
}, options);
}
return res.status;
}
async function fetchScene(scraper, url, entity, baseRelease, options, type = 'scene') {
if ((type === 'scene' && scraper.fetchScene) || (type === 'movie' && scraper.fetchMovie)) {
return scraper[type === 'movie' ? 'fetchMovie' : 'fetchScene'](baseRelease.url, entity, baseRelease, options, null);
}
if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) {
if (scraper.useUnprint) {
return fetchUnprintScene(scraper, url, entity, baseRelease, options, type);
}
const session = qu.session();
const res = await qu.get(url, null, null, {

429
src/scrapers/bang-legacy.js Executable file
View File

@ -0,0 +1,429 @@
'use strict';
const http = require('../utils/http');
const qu = require('../utils/qu');
const { extractDate } = require('../utils/qu');
const { inchesToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
const capitalize = require('../utils/capitalize');
const clusterId = '617fb597b659459bafe6472470d9073a';
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
const genderMap = {
M: 'male',
F: 'female',
};
function getScreenUrl(item, scene) {
if (!scene.dvd?.id || !item?.screenId) {
return null;
}
return `https://i.bang.com/screenshots/${scene.dvd.id}/${scene.type}/${scene.order}/${item.screenId}.jpg`;
}
function encodeId(id) {
return Buffer
.from(id, 'hex')
.toString('base64')
.replace(/\+/g, '-')
.replace(/\//g, '_')
.replace(/=/g, ',');
}
function decodeId(id) {
const restoredId = id
.replace(/-/g, '+')
.replace(/_/g, '/')
.replace(/,/g, '=');
return Buffer
.from(restoredId, 'base64')
.toString('hex');
}
async function fetchPhotos(scene) {
const photoPaths = Array.from({ length: scene.photos }, (value, index) => `/${scene.dvd.id}/${scene.identifier}/final/${String(index + 1).padStart(6, '0')}.jpg`);
const res = await http.post('https://www.bang.com/sign-images', {
images: photoPaths,
}, {
encodeJSON: false,
});
if (res.ok && res.body.images) {
return res.body.images.map((image) => qu.prefixUrl(image, 'https://photos.bang.com'));
}
return null;
}
async function scrapeScene(scene, entity, options) {
const release = {
entryId: scene.id,
title: scene.name || (scene.dvd?.name && scene.type === 'bonus' && capitalize(`${scene.dvd.name} - Bonus Scene ${scene.order || 1}`)) || null,
description: scene.description,
tags: scene.genres.concat(scene.actions).map((genre) => genre.name),
duration: scene.duration,
};
const slug = slugify(release.title);
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
const date = new Date(scene.releaseDate);
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
release.actors = scene.actors.map((actor) => ({ name: actor.name, gender: genderMap[actor.gender] }));
if (scene.is4k) release.tags.push('4k');
if (scene.gay) release.tags.push('gay');
const defaultPoster = scene.screenshots.find((photo) => photo.default === true);
const screens = scene.screenshots.filter((photo) => photo.default === false);
const remainingScreens = defaultPoster ? screens : screens.slice(1);
const poster = defaultPoster || screens[0];
release.poster = getScreenUrl(poster, scene);
release.photos = remainingScreens.map((photo) => getScreenUrl(photo, scene));
if (options?.includePhotos) {
const photos = await fetchPhotos(scene);
if (photos?.length > 0) {
release.photos = photos;
}
}
release.teaser = `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`;
release.channel = scene.series.name
.replace(/[! .]/g, '')
.replace('&', 'and');
return release;
}
function scrapeAll(scenes, entity) {
return Promise.all(scenes.map(({ _source: scene }) => scrapeScene(scene, entity)));
}
async function fetchActorReleases(actor, entity) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
nested: {
path: 'actors',
query: {
bool: {
must: [
{
match: {
'actors.mongoId': {
operator: 'AND',
query: actor.id,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeAll(res.body.hits.hits, entity);
}
async function scrapeProfile(actor, entity, include) {
const profile = {};
profile.aliases = actor.aliases;
profile.dateOfBirth = extractDate(actor.birthDate);
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
profile.ethnicity = actor.ethnicity;
profile.nationality = actor.nationality;
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
profile.hair = actor.hairColor;
profile.eyes = actor.eyeColor;
profile.naturalBoobs = actor.naturalBreasts;
if (actor.measurements) {
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
if (height) profile.height = inchesToCm(height);
if (cupSize) profile.cup = cupSize;
// [SIC]
if (shoulder) profile.bust = shoulder;
if (chest) profile.waist = chest;
if (waist) profile.hip = waist;
}
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
if (include.releases) {
profile.releases = await fetchActorReleases(actor, entity);
}
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now',
},
},
},
/*
* global fetch
{
nested: {
path: 'studio',
query: {
bool: {
must: [
{
match: {
'studio.name': {
operator: 'AND',
query: 'bang! originals',
},
},
},
],
},
},
},
},
*/
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeAll(res.body.hits.hits, site);
}
async function fetchUpcoming(site, page = 1) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now+7d',
},
},
},
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeAll(res.body.hits.hits, site);
}
async function fetchScene(url, entity, baseRelease, options) {
if (baseRelease?.entryId) {
// overview and deep data is the same, don't hit server unnecessarily
return baseRelease;
}
const encodedId = new URL(url).pathname.split('/')[2];
const entryId = decodeId(encodedId);
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeScene(res.body._source, entity, options); // eslint-disable-line no-underscore-dangle
}
async function fetchProfile({ name: actorName }, context, include) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
size: 5,
sort: [{
_score: {
order: 'desc',
},
}],
query: {
bool: {
must: [
{
match: {
name: {
query: actorName,
operator: 'and',
},
},
},
{
match: {
status: 'ok',
},
},
],
},
},
}, {
headers: {
Authorization: `Basic ${authKey}`,
},
encodeJSON: true,
});
if (res.ok) {
const actor = res.body.hits.hits.find((hit) => hit._source.name.toLowerCase() === actorName.toLowerCase());
if (actor) {
return scrapeProfile(actor._source, context.entity, include);
}
return null;
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchUpcoming,
};

View File

@ -1,29 +1,15 @@
'use strict';
const http = require('../utils/http');
const qu = require('../utils/qu');
const { extractDate } = require('../utils/qu');
const { inchesToCm } = require('../utils/convert');
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const capitalize = require('../utils/capitalize');
const clusterId = '617fb597b659459bafe6472470d9073a';
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
const genderMap = {
M: 'male',
F: 'female',
};
function getScreenUrl(item, scene) {
if (!scene.dvd?.id || !item?.screenId) {
return null;
/*
function encodeId(id) {
if (!id) {
return id;
}
return `https://i.bang.com/screenshots/${scene.dvd.id}/${scene.type}/${scene.order}/${item.screenId}.jpg`;
}
function encodeId(id) {
return Buffer
.from(id, 'hex')
.toString('base64')
@ -31,8 +17,13 @@ function encodeId(id) {
.replace(/\//g, '_')
.replace(/=/g, ',');
}
*/
function decodeId(id) {
if (!id) {
return id;
}
const restoredId = id
.replace(/-/g, '+')
.replace(/_/g, '/')
@ -43,387 +34,150 @@ function decodeId(id) {
.toString('hex');
}
async function fetchPhotos(scene) {
const photoPaths = Array.from({ length: scene.photos }, (value, index) => `/${scene.dvd.id}/${scene.identifier}/final/${String(index + 1).padStart(6, '0')}.jpg`);
function scrapeAll(scenes, entity) {
return scenes.map(({ query }) => {
const release = {};
const res = await http.post('https://www.bang.com/sign-images', {
images: photoPaths,
}, {
encodeJSON: false,
release.url = query.url('.video_preview_container > a', { origin: entity.url });
release.entryId = query.attribute(null, 'data-video-id') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
release.title = query.content('.video_preview_container >a > span.block');
release.date = query.date('.videoInfo .statistics span', 'MMM DD, YYYY');
release.actors = query.elements('.videoInfo a[href*="/pornstar"]').map((el) => ({
name: unprint.query.content(el),
url: unprint.query.url(el, null, { origin: 'https://www.bang.com' }),
}));
const poster = query.img('img[data-videopreview-target="image"]');
const posterUrl = new URL(poster);
if (poster) {
release.poster = [
`${posterUrl.origin}${posterUrl.pathname}`,
posterUrl.href,
];
}
release.teaser = query.video();
return release;
});
if (res.ok && res.body.images) {
return res.body.images.map((image) => qu.prefixUrl(image, 'https://photos.bang.com'));
}
return null;
}
async function scrapeScene(scene, entity, options) {
const release = {
entryId: scene.id,
title: scene.name || (scene.dvd?.name && scene.type === 'bonus' && capitalize(`${scene.dvd.name} - Bonus Scene ${scene.order || 1}`)) || null,
description: scene.description,
tags: scene.genres.concat(scene.actions).map((genre) => genre.name),
duration: scene.duration,
};
async function scrapeScene({ query }, { url, entity }) {
const release = {};
const data = query.json('script[type="application/ld+json"]');
const slug = slugify(release.title);
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
const date = new Date(scene.releaseDate);
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
release.title = data?.name || query.content('.video-heading');
release.description = data?.description || query.content('.expanded p.clear-both');
release.actors = scene.actors.map((actor) => ({ name: actor.name, gender: genderMap[actor.gender] }));
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD');
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
if (scene.is4k) release.tags.push('4k');
if (scene.gay) release.tags.push('gay');
release.actors = data?.actor.map((actor) => ({
name: actor.name,
url: actor.url,
})) || query.contents('.expanded a[href*="/pornstar"]');
const defaultPoster = scene.screenshots.find((photo) => photo.default === true);
const screens = scene.screenshots.filter((photo) => photo.default === false);
release.tags = query.contents('.expanded .genres');
const remainingScreens = defaultPoster ? screens : screens.slice(1);
const poster = defaultPoster || screens[0];
release.poster = data?.thumbnailUrl || data?.contentUrl || query.attribute('meta[name*="og:image"]', 'content');
release.teaser = query.video('video[data-modal-target="videoImage"] source');
release.poster = getScreenUrl(poster, scene);
release.photos = remainingScreens.map((photo) => getScreenUrl(photo, scene));
release.photos = JSON.parse(query.attribute('[data-video-gallery-photos-value]', 'data-video-gallery-photos-value'));
release.photoCount = query.number('[data-video-gallery-count-value]', { attribute: 'data-video-gallery-count-value' });
if (options?.includePhotos) {
const photos = await fetchPhotos(scene);
const channelName = query.content('.expanded a[href*="?in="]')?.trim();
if (photos?.length > 0) {
release.photos = photos;
}
if (channelName) {
release.channel = entity.children?.find((channel) => new RegExp(channel.name, 'i').test(channelName) || slugify(channelName) === channel.slug)?.slug;
}
release.teaser = `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`;
release.channel = scene.series.name
.replace(/[! .]/g, '')
.replace('&', 'and');
return release;
}
function scrapeAll(scenes, entity) {
return Promise.all(scenes.map(({ _source: scene }) => scrapeScene(scene, entity)));
}
async function fetchActorScenes(element, url, entity, page = 1, acc = []) {
const scenes = scrapeAll(unprint.initAll(element, '.search-grid li'), entity);
async function fetchActorReleases(actor, entity) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
nested: {
path: 'actors',
query: {
bool: {
must: [
{
match: {
'actors.mongoId': {
operator: 'AND',
query: actor.id,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
if (scenes.length) {
const nextPageRes = await unprint.post(url, { page: page + 1 });
return scrapeAll(res.body.hits.hits, entity);
}
async function scrapeProfile(actor, entity, include) {
const profile = {};
profile.aliases = actor.aliases;
profile.dateOfBirth = extractDate(actor.birthDate);
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
profile.ethnicity = actor.ethnicity;
profile.nationality = actor.nationality;
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
profile.hair = actor.hairColor;
profile.eyes = actor.eyeColor;
profile.naturalBoobs = actor.naturalBreasts;
if (actor.measurements) {
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
if (height) profile.height = inchesToCm(height);
if (cupSize) profile.cup = cupSize;
// [SIC]
if (shoulder) profile.bust = shoulder;
if (chest) profile.waist = chest;
if (waist) profile.hip = waist;
if (nextPageRes.ok) {
return fetchActorScenes(nextPageRes.context.element, url, entity, page + 1, acc.concat(scenes));
}
}
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
return acc.concat(scenes);
}
if (include.releases) {
profile.releases = await fetchActorReleases(actor, entity);
async function scrapeProfile({ query, element }, url, entity, include) {
const profile = {};
profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY');
profile.birthPlace = query.content('//text()[contains(., "in")]/following-sibling::span[contains(@class, "font-bold")][1]');
profile.ethnicity = query.content('//text()[contains(., "Ethnicity")]/following-sibling::span[contains(@class, "font-bold")][1]');
profile.hairColor = query.content('//text()[contains(., "Hair Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
profile.eyes = query.content('//text()[contains(., "Eye Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
const avatar = query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]');
if (avatar) {
const { origin, pathname } = new URL(avatar);
profile.avatar = [
`${origin}${pathname}`, // full size
avatar,
];
}
if (include.scenes) {
profile.scenes = await fetchActorScenes(element, url, entity);
}
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now',
},
},
},
/*
* global fetch
{
nested: {
path: 'studio',
query: {
bool: {
must: [
{
match: {
'studio.name': {
operator: 'AND',
query: 'bang! originals',
},
},
},
],
},
},
},
},
*/
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeAll(res.body.hits.hits, site);
}
async function fetchUpcoming(site, page = 1) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now+7d',
},
},
},
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeAll(res.body.hits.hits, site);
}
async function fetchScene(url, entity, baseRelease, options) {
if (baseRelease?.entryId) {
// overview and deep data is the same, don't hit server unnecessarily
return baseRelease;
}
const encodedId = new URL(url).pathname.split('/')[2];
const entryId = decodeId(encodedId);
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeScene(res.body._source, entity, options); // eslint-disable-line no-underscore-dangle
}
async function fetchProfile({ name: actorName }, context, include) {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
size: 5,
sort: [{
_score: {
order: 'desc',
},
}],
query: {
bool: {
must: [
{
match: {
name: {
query: actorName,
operator: 'and',
},
},
},
{
match: {
status: 'ok',
},
},
],
},
},
}, {
headers: {
Authorization: `Basic ${authKey}`,
},
encodeJSON: true,
});
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}&page=${page}`;
const res = await unprint.get(url, { selectAll: '.search-grid li' });
if (res.ok) {
const actor = res.body.hits.hits.find((hit) => hit._source.name.toLowerCase() === actorName.toLowerCase());
if (actor) {
return scrapeProfile(actor._source, context.entity, include);
}
return null;
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchProfile({ name: actorName }, { entity }, include) {
const searchRes = await unprint.get(`https://www.bang.com/pornstars?term=${slugify(actorName, '+')}`);
if (!searchRes.ok) {
return searchRes.status;
}
const url = searchRes.context.query.url(`//a[contains(.//span, "${actorName}")]`);
if (!url) {
return null;
}
const actorRes = await unprint.get(url);
if (actorRes.ok) {
return scrapeProfile(actorRes.context, url, entity, include);
}
return actorRes.status;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchUpcoming,
scrapeScene,
useUnprint: true,
};

View File

@ -237,6 +237,10 @@ async function destroyBypassSession(sessionId) {
}
async function destroyBypassSessions() {
if (!config.bypass.cloudflare.enabled) {
return;
}
const sessionListRes = await limiters.bypass.schedule(async () => bhttp.post(config.bypass.cloudflare.path, {
cmd: 'sessions.list',
}, {