Refactored Bang! scraper to match new website, first use of unprint.
|
@ -7,7 +7,7 @@ Use [nvm](https://github.com/creationix/nvm) to install NodeJS v16.8.0 or newer.
|
|||
`npm install`
|
||||
|
||||
### Set up database
|
||||
Install PostgreSQL, make sure password authentication is enabled (scram-sha-256) and create a database with a privileged user.
|
||||
Install PostgreSQL, make sure password authentication is enabled (scram-sha-256). Create a database with a fully privileged user, and a visitor user without privileges (they will be provided by the migration).
|
||||
|
||||
### Configuration
|
||||
Do not modify `config/default.js`, but instead create a copy at `config/local.js` containing the properties you wish to change. If you have set `NODE_ENV`, copy `assets/js/config/default.js` to `assets/js/config/[environment].js`. After setting up PostgreSQL and configuring the details, run the following commands to create and populate the tables, and build the project:
|
||||
|
|
|
@ -294,13 +294,13 @@ module.exports = {
|
|||
},
|
||||
bypass: {
|
||||
browser: {
|
||||
enable: true,
|
||||
enable: false,
|
||||
hostnames: [ // these can run in the same browser session
|
||||
'www.kink.com',
|
||||
],
|
||||
},
|
||||
cloudflare: {
|
||||
enable: true,
|
||||
enable: false,
|
||||
auto: true, // try bypass when CF challenge is detected
|
||||
path: 'http://localhost:8191/v1',
|
||||
sharedHostnames: [ // these can run in the same browser session
|
||||
|
|
|
@ -134,6 +134,7 @@
|
|||
"tough-cookie": "^4.0.0",
|
||||
"tunnel": "0.0.6",
|
||||
"undici": "^4.13.0",
|
||||
"unprint": "^0.7.2",
|
||||
"url-pattern": "^1.0.3",
|
||||
"v-tooltip": "^2.0.3",
|
||||
"video.js": "^7.11.4",
|
||||
|
|
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 35 KiB |
After Width: | Height: | Size: 64 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 6.3 KiB |
After Width: | Height: | Size: 4.8 KiB |
After Width: | Height: | Size: 166 KiB |
After Width: | Height: | Size: 172 KiB |
After Width: | Height: | Size: 166 KiB |
After Width: | Height: | Size: 166 KiB |
After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 7.4 KiB |
After Width: | Height: | Size: 7.4 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 45 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 378 KiB |
After Width: | Height: | Size: 416 KiB |
After Width: | Height: | Size: 78 KiB |
After Width: | Height: | Size: 2.4 KiB |
Before Width: | Height: | Size: 2.2 KiB After Width: | Height: | Size: 2.2 KiB |
Before Width: | Height: | Size: 7.0 KiB After Width: | Height: | Size: 7.0 KiB |
Before Width: | Height: | Size: 7.0 KiB After Width: | Height: | Size: 7.0 KiB |
Before Width: | Height: | Size: 7.0 KiB After Width: | Height: | Size: 7.0 KiB |
Before Width: | Height: | Size: 2.0 KiB After Width: | Height: | Size: 2.0 KiB |
Before Width: | Height: | Size: 1.0 KiB After Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 3.9 KiB |
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 2.2 KiB |
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 2.2 KiB |
After Width: | Height: | Size: 89 KiB |
After Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 9.5 KiB After Width: | Height: | Size: 9.6 KiB |
Before Width: | Height: | Size: 7.0 KiB After Width: | Height: | Size: 7.0 KiB |
Before Width: | Height: | Size: 7.0 KiB After Width: | Height: | Size: 7.0 KiB |
Before Width: | Height: | Size: 7.0 KiB After Width: | Height: | Size: 7.0 KiB |
Before Width: | Height: | Size: 4.8 KiB After Width: | Height: | Size: 4.8 KiB |
Before Width: | Height: | Size: 5.7 KiB After Width: | Height: | Size: 5.8 KiB |
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
|
@ -906,7 +906,7 @@ const sites = [
|
|||
{
|
||||
name: 'Trickery',
|
||||
slug: 'bangtrickery',
|
||||
url: 'https://www.bang.com/original/4800/bang-trickery',
|
||||
url: 'https://www.bang.com/videos?in=BANG%21+Trickery',
|
||||
parameters: { siteId: 4800 },
|
||||
parent: 'bang',
|
||||
},
|
||||
|
@ -914,23 +914,21 @@ const sites = [
|
|||
name: 'Yngr',
|
||||
slug: 'yngrcom',
|
||||
alias: ['byngr'],
|
||||
// url: 'https://www.bang.com/original/5010/bang-yngr',
|
||||
url: 'https://yngr.com',
|
||||
url: 'https://www.bang.com/videos?in=yngr.com',
|
||||
parameters: { siteId: 5010 },
|
||||
parent: 'bang',
|
||||
},
|
||||
{
|
||||
name: 'Roadside XXX',
|
||||
slug: 'bangroadsidexxx',
|
||||
// url: 'https://www.bang.com/original/4864/roadside-xxx',
|
||||
url: 'https://roadsidexxx.com',
|
||||
url: 'https://www.bang.com/videos?in=Bang%21+Roadside+XXX',
|
||||
parameters: { siteId: 4864 },
|
||||
parent: 'bang',
|
||||
},
|
||||
{
|
||||
name: 'Surprise',
|
||||
slug: 'bangsurprise',
|
||||
url: 'https://www.bang.com/original/5000/bang-surprise',
|
||||
url: 'https://www.bang.com/videos?in=BANG%21+Surprise',
|
||||
parameters: { siteId: 5000 },
|
||||
parent: 'bang',
|
||||
},
|
||||
|
@ -938,37 +936,35 @@ const sites = [
|
|||
name: 'Real Teens',
|
||||
slug: 'bangrealteens',
|
||||
alias: ['brealteens'],
|
||||
url: 'https://www.bang.com/original/3366/bang-real-teens',
|
||||
url: 'https://www.bang.com/videos?in=BANG%21+Real+Teens',
|
||||
parameters: { siteId: 3366 },
|
||||
parent: 'bang',
|
||||
},
|
||||
{
|
||||
name: 'FCK.news',
|
||||
slug: 'bangfakenews',
|
||||
// url: 'https://www.bang.com/original/4998/bang-fckNews',
|
||||
url: 'https://fck.news',
|
||||
url: 'https://www.bang.com/videos?in=Bang%21+Fake+news',
|
||||
parameters: { siteId: 4998 },
|
||||
parent: 'bang',
|
||||
},
|
||||
{
|
||||
name: 'Pretty & Raw',
|
||||
slug: 'prettyandraw',
|
||||
// url: 'https://www.bang.com/original/4792/bang-pretty-and-raw',
|
||||
url: 'https://prettyandraw.com',
|
||||
url: 'https://www.bang.com/videos?in=Pretty+%26+Raw',
|
||||
parameters: { siteId: 4792 },
|
||||
parent: 'bang',
|
||||
},
|
||||
{
|
||||
name: 'Japan',
|
||||
slug: 'bangjapan',
|
||||
url: 'https://www.bang.com/original/3079/bang-japan',
|
||||
url: 'https://www.bang.com/videos?in=BANG%21+Japan',
|
||||
parameters: { siteId: 3079, ignore: true },
|
||||
parent: 'bang',
|
||||
},
|
||||
{
|
||||
name: 'Rammed',
|
||||
slug: 'bangrammed',
|
||||
url: 'https://www.bang.com/original/4836/bang-rammed',
|
||||
url: 'https://www.bang.com/videos?in=BANG%21+Rammed',
|
||||
parameters: { siteId: 4836 },
|
||||
parent: 'bang',
|
||||
},
|
||||
|
@ -976,14 +972,14 @@ const sites = [
|
|||
name: 'Glamkore',
|
||||
slug: 'bangglamkore',
|
||||
alias: ['bglamkore'],
|
||||
url: 'https://www.bang.com/original/4586/bang-glamkore',
|
||||
url: 'https://www.bang.com/videos?in=BANG%21+Glamkore',
|
||||
parameters: { siteId: 4586 },
|
||||
parent: 'bang',
|
||||
},
|
||||
{
|
||||
name: 'Screw The Cops',
|
||||
slug: 'screwthecops',
|
||||
url: 'https://www.bang.com/original/4710/bang-screw-cops',
|
||||
url: 'https://www.bang.com/videos?in=Screw+the+Cops',
|
||||
parameters: { siteId: 4710 },
|
||||
parent: 'bang',
|
||||
},
|
||||
|
@ -991,7 +987,7 @@ const sites = [
|
|||
name: 'Real MILFs',
|
||||
slug: 'bangrealmilfs',
|
||||
alias: ['brealmilfs'],
|
||||
url: 'https://www.bang.com/original/4448/bang-real-milfs',
|
||||
url: 'https://www.bang.com/videos?in=BANG%21+Real+Milfs',
|
||||
parameters: { siteId: 4448 },
|
||||
parent: 'bang',
|
||||
},
|
||||
|
@ -999,7 +995,7 @@ const sites = [
|
|||
name: 'Confessions',
|
||||
slug: 'bangconfessions',
|
||||
alias: ['bconfessions'],
|
||||
url: 'https://www.bang.com/original/4308/bang-confessions',
|
||||
url: 'https://www.bang.com/videos?in=BANG%21+Confessions',
|
||||
parameters: { siteId: 4308 },
|
||||
parent: 'bang',
|
||||
},
|
||||
|
@ -1007,14 +1003,14 @@ const sites = [
|
|||
name: 'Casting',
|
||||
slug: 'bangcasting',
|
||||
alias: ['bcasting'],
|
||||
url: 'https://www.bang.com/original/3261/bang-casting',
|
||||
url: 'https://www.bang.com/videos?in=BANG%21+Casting',
|
||||
parameters: { siteId: 3261 },
|
||||
parent: 'bang',
|
||||
},
|
||||
{
|
||||
name: 'Bang! Podcast',
|
||||
slug: 'bangpodcast',
|
||||
url: 'https://www.bang.com/videos?in=bang!%20podcast',
|
||||
url: 'https://www.bang.com/videos?in=bang%21+podcast',
|
||||
parameters: { siteId: 6305 },
|
||||
parent: 'bang',
|
||||
},
|
||||
|
@ -11518,6 +11514,15 @@ const sites = [
|
|||
movie: 'https://www.addicted2girls.com/en/dvd',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: '3rddegreefilms',
|
||||
name: '3rd Degree Films',
|
||||
url: 'https://www.3rddegreefilms.com',
|
||||
parent: 'zerotolerance',
|
||||
parameters: {
|
||||
movie: 'https://www.genderxfilms.com/en/dvd',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'genderxfilms',
|
||||
name: 'GenderXFilms',
|
||||
|
@ -11525,7 +11530,8 @@ const sites = [
|
|||
tags: ['transsexual'],
|
||||
parent: 'zerotolerance',
|
||||
parameters: {
|
||||
movie: 'https://www.genderxfilms.com/en/dvd',
|
||||
scene: 'https://www.3rddegreefilms.com/en/video/3rddegreefilms',
|
||||
movie: 'https://www.3rddegreefilms.com/en/movie',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
const config = require('config');
|
||||
const util = require('util');
|
||||
const unprint = require('unprint');
|
||||
// const log = require('why-is-node-running');
|
||||
const Inspector = require('inspector-api');
|
||||
const fs = require('fs').promises;
|
||||
|
@ -25,6 +26,13 @@ const getFileEntries = require('./utils/file-entries');
|
|||
const inspector = new Inspector();
|
||||
let done = false;
|
||||
|
||||
unprint.options({
|
||||
timeout: 5000,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
|
||||
},
|
||||
});
|
||||
|
||||
/*
|
||||
function logActive() {
|
||||
setTimeout(() => {
|
||||
|
|
22
src/deep.js
|
@ -2,6 +2,7 @@
|
|||
|
||||
const util = require('util');
|
||||
const Promise = require('bluebird');
|
||||
const unprint = require('unprint');
|
||||
const { mergeAdvanced: merge } = require('object-merge-advanced');
|
||||
|
||||
const argv = require('./argv');
|
||||
|
@ -54,12 +55,33 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
|||
.filter(Boolean);
|
||||
}
|
||||
|
||||
async function fetchUnprintScene(scraper, url, entity, baseRelease, options, type) {
|
||||
const res = await unprint.get(url, {
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scraper[type === 'movie' ? 'scrapeMovie' : 'scrapeScene'](res.context, {
|
||||
url,
|
||||
entity,
|
||||
baseRelease,
|
||||
headers: res.headers,
|
||||
}, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(scraper, url, entity, baseRelease, options, type = 'scene') {
|
||||
if ((type === 'scene' && scraper.fetchScene) || (type === 'movie' && scraper.fetchMovie)) {
|
||||
return scraper[type === 'movie' ? 'fetchMovie' : 'fetchScene'](baseRelease.url, entity, baseRelease, options, null);
|
||||
}
|
||||
|
||||
if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) {
|
||||
if (scraper.useUnprint) {
|
||||
return fetchUnprintScene(scraper, url, entity, baseRelease, options, type);
|
||||
}
|
||||
|
||||
const session = qu.session();
|
||||
|
||||
const res = await qu.get(url, null, null, {
|
||||
|
|
|
@ -0,0 +1,429 @@
|
|||
'use strict';
|
||||
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const { extractDate } = require('../utils/qu');
|
||||
const { inchesToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
const capitalize = require('../utils/capitalize');
|
||||
|
||||
const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
||||
const genderMap = {
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
};
|
||||
|
||||
function getScreenUrl(item, scene) {
|
||||
if (!scene.dvd?.id || !item?.screenId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/${scene.type}/${scene.order}/${item.screenId}.jpg`;
|
||||
}
|
||||
|
||||
function encodeId(id) {
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
.replace(/\+/g, '-')
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
}
|
||||
|
||||
function decodeId(id) {
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
.replace(/,/g, '=');
|
||||
|
||||
return Buffer
|
||||
.from(restoredId, 'base64')
|
||||
.toString('hex');
|
||||
}
|
||||
|
||||
async function fetchPhotos(scene) {
|
||||
const photoPaths = Array.from({ length: scene.photos }, (value, index) => `/${scene.dvd.id}/${scene.identifier}/final/${String(index + 1).padStart(6, '0')}.jpg`);
|
||||
|
||||
const res = await http.post('https://www.bang.com/sign-images', {
|
||||
images: photoPaths,
|
||||
}, {
|
||||
encodeJSON: false,
|
||||
});
|
||||
|
||||
if (res.ok && res.body.images) {
|
||||
return res.body.images.map((image) => qu.prefixUrl(image, 'https://photos.bang.com'));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene(scene, entity, options) {
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.name || (scene.dvd?.name && scene.type === 'bonus' && capitalize(`${scene.dvd.name} - Bonus Scene ${scene.order || 1}`)) || null,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map((genre) => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
|
||||
release.actors = scene.actors.map((actor) => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
|
||||
const defaultPoster = scene.screenshots.find((photo) => photo.default === true);
|
||||
const screens = scene.screenshots.filter((photo) => photo.default === false);
|
||||
|
||||
const remainingScreens = defaultPoster ? screens : screens.slice(1);
|
||||
const poster = defaultPoster || screens[0];
|
||||
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = remainingScreens.map((photo) => getScreenUrl(photo, scene));
|
||||
|
||||
if (options?.includePhotos) {
|
||||
const photos = await fetchPhotos(scene);
|
||||
|
||||
if (photos?.length > 0) {
|
||||
release.photos = photos;
|
||||
}
|
||||
}
|
||||
|
||||
release.teaser = `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`;
|
||||
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, entity) {
|
||||
return Promise.all(scenes.map(({ _source: scene }) => scrapeScene(scene, entity)));
|
||||
}
|
||||
|
||||
async function fetchActorReleases(actor, entity) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
nested: {
|
||||
path: 'actors',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'actors.mongoId': {
|
||||
operator: 'AND',
|
||||
query: actor.id,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, entity);
|
||||
}
|
||||
|
||||
async function scrapeProfile(actor, entity, include) {
|
||||
const profile = {};
|
||||
|
||||
profile.aliases = actor.aliases;
|
||||
profile.dateOfBirth = extractDate(actor.birthDate);
|
||||
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
|
||||
|
||||
profile.ethnicity = actor.ethnicity;
|
||||
profile.nationality = actor.nationality;
|
||||
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
|
||||
|
||||
profile.hair = actor.hairColor;
|
||||
profile.eyes = actor.eyeColor;
|
||||
|
||||
profile.naturalBoobs = actor.naturalBreasts;
|
||||
|
||||
if (actor.measurements) {
|
||||
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
|
||||
|
||||
if (height) profile.height = inchesToCm(height);
|
||||
if (cupSize) profile.cup = cupSize;
|
||||
|
||||
// [SIC]
|
||||
if (shoulder) profile.bust = shoulder;
|
||||
if (chest) profile.waist = chest;
|
||||
if (waist) profile.hip = waist;
|
||||
}
|
||||
|
||||
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
|
||||
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
|
||||
|
||||
if (include.releases) {
|
||||
profile.releases = await fetchActorReleases(actor, entity);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
* global fetch
|
||||
{
|
||||
nested: {
|
||||
path: 'studio',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'studio.name': {
|
||||
operator: 'AND',
|
||||
query: 'bang! originals',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
*/
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site, page = 1) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now+7d',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, entity, baseRelease, options) {
|
||||
if (baseRelease?.entryId) {
|
||||
// overview and deep data is the same, don't hit server unnecessarily
|
||||
return baseRelease;
|
||||
}
|
||||
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
|
||||
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body._source, entity, options); // eslint-disable-line no-underscore-dangle
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, context, include) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||
size: 5,
|
||||
sort: [{
|
||||
_score: {
|
||||
order: 'desc',
|
||||
},
|
||||
}],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
name: {
|
||||
query: actorName,
|
||||
operator: 'and',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const actor = res.body.hits.hits.find((hit) => hit._source.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actor) {
|
||||
return scrapeProfile(actor._source, context.entity, include);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
|
@ -1,29 +1,15 @@
|
|||
'use strict';
|
||||
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const { extractDate } = require('../utils/qu');
|
||||
const { inchesToCm } = require('../utils/convert');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
const capitalize = require('../utils/capitalize');
|
||||
|
||||
const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
||||
const genderMap = {
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
};
|
||||
|
||||
function getScreenUrl(item, scene) {
|
||||
if (!scene.dvd?.id || !item?.screenId) {
|
||||
return null;
|
||||
/*
|
||||
function encodeId(id) {
|
||||
if (!id) {
|
||||
return id;
|
||||
}
|
||||
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/${scene.type}/${scene.order}/${item.screenId}.jpg`;
|
||||
}
|
||||
|
||||
function encodeId(id) {
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
|
@ -31,8 +17,13 @@ function encodeId(id) {
|
|||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
}
|
||||
*/
|
||||
|
||||
function decodeId(id) {
|
||||
if (!id) {
|
||||
return id;
|
||||
}
|
||||
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
|
@ -43,387 +34,150 @@ function decodeId(id) {
|
|||
.toString('hex');
|
||||
}
|
||||
|
||||
async function fetchPhotos(scene) {
|
||||
const photoPaths = Array.from({ length: scene.photos }, (value, index) => `/${scene.dvd.id}/${scene.identifier}/final/${String(index + 1).padStart(6, '0')}.jpg`);
|
||||
function scrapeAll(scenes, entity) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
const res = await http.post('https://www.bang.com/sign-images', {
|
||||
images: photoPaths,
|
||||
}, {
|
||||
encodeJSON: false,
|
||||
});
|
||||
release.url = query.url('.video_preview_container > a', { origin: entity.url });
|
||||
release.entryId = query.attribute(null, 'data-video-id') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
||||
|
||||
if (res.ok && res.body.images) {
|
||||
return res.body.images.map((image) => qu.prefixUrl(image, 'https://photos.bang.com'));
|
||||
release.title = query.content('.video_preview_container >a > span.block');
|
||||
release.date = query.date('.videoInfo .statistics span', 'MMM DD, YYYY');
|
||||
|
||||
release.actors = query.elements('.videoInfo a[href*="/pornstar"]').map((el) => ({
|
||||
name: unprint.query.content(el),
|
||||
url: unprint.query.url(el, null, { origin: 'https://www.bang.com' }),
|
||||
}));
|
||||
|
||||
const poster = query.img('img[data-videopreview-target="image"]');
|
||||
const posterUrl = new URL(poster);
|
||||
|
||||
if (poster) {
|
||||
release.poster = [
|
||||
`${posterUrl.origin}${posterUrl.pathname}`,
|
||||
posterUrl.href,
|
||||
];
|
||||
}
|
||||
|
||||
return null;
|
||||
release.teaser = query.video();
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(scene, entity, options) {
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.name || (scene.dvd?.name && scene.type === 'bonus' && capitalize(`${scene.dvd.name} - Bonus Scene ${scene.order || 1}`)) || null,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map((genre) => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
async function scrapeScene({ query }, { url, entity }) {
|
||||
const release = {};
|
||||
const data = query.json('script[type="application/ld+json"]');
|
||||
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
||||
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
release.title = data?.name || query.content('.video-heading');
|
||||
release.description = data?.description || query.content('.expanded p.clear-both');
|
||||
|
||||
release.actors = scene.actors.map((actor) => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD');
|
||||
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
|
||||
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
release.actors = data?.actor.map((actor) => ({
|
||||
name: actor.name,
|
||||
url: actor.url,
|
||||
})) || query.contents('.expanded a[href*="/pornstar"]');
|
||||
|
||||
const defaultPoster = scene.screenshots.find((photo) => photo.default === true);
|
||||
const screens = scene.screenshots.filter((photo) => photo.default === false);
|
||||
release.tags = query.contents('.expanded .genres');
|
||||
|
||||
const remainingScreens = defaultPoster ? screens : screens.slice(1);
|
||||
const poster = defaultPoster || screens[0];
|
||||
release.poster = data?.thumbnailUrl || data?.contentUrl || query.attribute('meta[name*="og:image"]', 'content');
|
||||
release.teaser = query.video('video[data-modal-target="videoImage"] source');
|
||||
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = remainingScreens.map((photo) => getScreenUrl(photo, scene));
|
||||
release.photos = JSON.parse(query.attribute('[data-video-gallery-photos-value]', 'data-video-gallery-photos-value'));
|
||||
release.photoCount = query.number('[data-video-gallery-count-value]', { attribute: 'data-video-gallery-count-value' });
|
||||
|
||||
if (options?.includePhotos) {
|
||||
const photos = await fetchPhotos(scene);
|
||||
const channelName = query.content('.expanded a[href*="?in="]')?.trim();
|
||||
|
||||
if (photos?.length > 0) {
|
||||
release.photos = photos;
|
||||
if (channelName) {
|
||||
release.channel = entity.children?.find((channel) => new RegExp(channel.name, 'i').test(channelName) || slugify(channelName) === channel.slug)?.slug;
|
||||
}
|
||||
}
|
||||
|
||||
release.teaser = `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`;
|
||||
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, entity) {
|
||||
return Promise.all(scenes.map(({ _source: scene }) => scrapeScene(scene, entity)));
|
||||
}
|
||||
async function fetchActorScenes(element, url, entity, page = 1, acc = []) {
|
||||
const scenes = scrapeAll(unprint.initAll(element, '.search-grid li'), entity);
|
||||
|
||||
async function fetchActorReleases(actor, entity) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
nested: {
|
||||
path: 'actors',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'actors.mongoId': {
|
||||
operator: 'AND',
|
||||
query: actor.id,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
if (scenes.length) {
|
||||
const nextPageRes = await unprint.post(url, { page: page + 1 });
|
||||
|
||||
return scrapeAll(res.body.hits.hits, entity);
|
||||
}
|
||||
|
||||
async function scrapeProfile(actor, entity, include) {
|
||||
const profile = {};
|
||||
|
||||
profile.aliases = actor.aliases;
|
||||
profile.dateOfBirth = extractDate(actor.birthDate);
|
||||
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
|
||||
|
||||
profile.ethnicity = actor.ethnicity;
|
||||
profile.nationality = actor.nationality;
|
||||
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
|
||||
|
||||
profile.hair = actor.hairColor;
|
||||
profile.eyes = actor.eyeColor;
|
||||
|
||||
profile.naturalBoobs = actor.naturalBreasts;
|
||||
|
||||
if (actor.measurements) {
|
||||
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
|
||||
|
||||
if (height) profile.height = inchesToCm(height);
|
||||
if (cupSize) profile.cup = cupSize;
|
||||
|
||||
// [SIC]
|
||||
if (shoulder) profile.bust = shoulder;
|
||||
if (chest) profile.waist = chest;
|
||||
if (waist) profile.hip = waist;
|
||||
if (nextPageRes.ok) {
|
||||
return fetchActorScenes(nextPageRes.context.element, url, entity, page + 1, acc.concat(scenes));
|
||||
}
|
||||
}
|
||||
|
||||
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
|
||||
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
|
||||
return acc.concat(scenes);
|
||||
}
|
||||
|
||||
if (include.releases) {
|
||||
profile.releases = await fetchActorReleases(actor, entity);
|
||||
async function scrapeProfile({ query, element }, url, entity, include) {
|
||||
const profile = {};
|
||||
|
||||
profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY');
|
||||
profile.birthPlace = query.content('//text()[contains(., "in")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
|
||||
profile.ethnicity = query.content('//text()[contains(., "Ethnicity")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
|
||||
profile.hairColor = query.content('//text()[contains(., "Hair Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
profile.eyes = query.content('//text()[contains(., "Eye Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
|
||||
const avatar = query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]');
|
||||
|
||||
if (avatar) {
|
||||
const { origin, pathname } = new URL(avatar);
|
||||
|
||||
profile.avatar = [
|
||||
`${origin}${pathname}`, // full size
|
||||
avatar,
|
||||
];
|
||||
}
|
||||
|
||||
if (include.scenes) {
|
||||
profile.scenes = await fetchActorScenes(element, url, entity);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
* global fetch
|
||||
{
|
||||
nested: {
|
||||
path: 'studio',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'studio.name': {
|
||||
operator: 'AND',
|
||||
query: 'bang! originals',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
*/
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site, page = 1) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now+7d',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, entity, baseRelease, options) {
|
||||
if (baseRelease?.entryId) {
|
||||
// overview and deep data is the same, don't hit server unnecessarily
|
||||
return baseRelease;
|
||||
}
|
||||
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
|
||||
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body._source, entity, options); // eslint-disable-line no-underscore-dangle
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, context, include) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||
size: 5,
|
||||
sort: [{
|
||||
_score: {
|
||||
order: 'desc',
|
||||
},
|
||||
}],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
name: {
|
||||
query: actorName,
|
||||
operator: 'and',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}&page=${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.search-grid li' });
|
||||
|
||||
if (res.ok) {
|
||||
const actor = res.body.hits.hits.find((hit) => hit._source.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actor) {
|
||||
return scrapeProfile(actor._source, context.entity, include);
|
||||
}
|
||||
|
||||
return null;
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, { entity }, include) {
|
||||
const searchRes = await unprint.get(`https://www.bang.com/pornstars?term=${slugify(actorName, '+')}`);
|
||||
|
||||
if (!searchRes.ok) {
|
||||
return searchRes.status;
|
||||
}
|
||||
|
||||
const url = searchRes.context.query.url(`//a[contains(.//span, "${actorName}")]`);
|
||||
|
||||
if (!url) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const actorRes = await unprint.get(url);
|
||||
|
||||
if (actorRes.ok) {
|
||||
return scrapeProfile(actorRes.context, url, entity, include);
|
||||
}
|
||||
|
||||
return actorRes.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
scrapeScene,
|
||||
useUnprint: true,
|
||||
};
|
||||
|
|
|
@ -237,6 +237,10 @@ async function destroyBypassSession(sessionId) {
|
|||
}
|
||||
|
||||
async function destroyBypassSessions() {
|
||||
if (!config.bypass.cloudflare.enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
const sessionListRes = await limiters.bypass.schedule(async () => bhttp.post(config.bypass.cloudflare.path, {
|
||||
cmd: 'sessions.list',
|
||||
}, {
|
||||
|
|