Refactored Bang! scraper to match new website, first use of unprint.
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
const config = require('config');
|
||||
const util = require('util');
|
||||
const unprint = require('unprint');
|
||||
// const log = require('why-is-node-running');
|
||||
const Inspector = require('inspector-api');
|
||||
const fs = require('fs').promises;
|
||||
@@ -25,6 +26,13 @@ const getFileEntries = require('./utils/file-entries');
|
||||
const inspector = new Inspector();
|
||||
let done = false;
|
||||
|
||||
unprint.options({
|
||||
timeout: 5000,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
|
||||
},
|
||||
});
|
||||
|
||||
/*
|
||||
function logActive() {
|
||||
setTimeout(() => {
|
||||
|
||||
22
src/deep.js
22
src/deep.js
@@ -2,6 +2,7 @@
|
||||
|
||||
const util = require('util');
|
||||
const Promise = require('bluebird');
|
||||
const unprint = require('unprint');
|
||||
const { mergeAdvanced: merge } = require('object-merge-advanced');
|
||||
|
||||
const argv = require('./argv');
|
||||
@@ -54,12 +55,33 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
async function fetchUnprintScene(scraper, url, entity, baseRelease, options, type) {
|
||||
const res = await unprint.get(url, {
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scraper[type === 'movie' ? 'scrapeMovie' : 'scrapeScene'](res.context, {
|
||||
url,
|
||||
entity,
|
||||
baseRelease,
|
||||
headers: res.headers,
|
||||
}, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(scraper, url, entity, baseRelease, options, type = 'scene') {
|
||||
if ((type === 'scene' && scraper.fetchScene) || (type === 'movie' && scraper.fetchMovie)) {
|
||||
return scraper[type === 'movie' ? 'fetchMovie' : 'fetchScene'](baseRelease.url, entity, baseRelease, options, null);
|
||||
}
|
||||
|
||||
if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) {
|
||||
if (scraper.useUnprint) {
|
||||
return fetchUnprintScene(scraper, url, entity, baseRelease, options, type);
|
||||
}
|
||||
|
||||
const session = qu.session();
|
||||
|
||||
const res = await qu.get(url, null, null, {
|
||||
|
||||
429
src/scrapers/bang-legacy.js
Executable file
429
src/scrapers/bang-legacy.js
Executable file
@@ -0,0 +1,429 @@
|
||||
'use strict';
|
||||
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const { extractDate } = require('../utils/qu');
|
||||
const { inchesToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
const capitalize = require('../utils/capitalize');
|
||||
|
||||
const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
||||
const genderMap = {
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
};
|
||||
|
||||
function getScreenUrl(item, scene) {
|
||||
if (!scene.dvd?.id || !item?.screenId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/${scene.type}/${scene.order}/${item.screenId}.jpg`;
|
||||
}
|
||||
|
||||
function encodeId(id) {
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
.replace(/\+/g, '-')
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
}
|
||||
|
||||
function decodeId(id) {
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
.replace(/,/g, '=');
|
||||
|
||||
return Buffer
|
||||
.from(restoredId, 'base64')
|
||||
.toString('hex');
|
||||
}
|
||||
|
||||
async function fetchPhotos(scene) {
|
||||
const photoPaths = Array.from({ length: scene.photos }, (value, index) => `/${scene.dvd.id}/${scene.identifier}/final/${String(index + 1).padStart(6, '0')}.jpg`);
|
||||
|
||||
const res = await http.post('https://www.bang.com/sign-images', {
|
||||
images: photoPaths,
|
||||
}, {
|
||||
encodeJSON: false,
|
||||
});
|
||||
|
||||
if (res.ok && res.body.images) {
|
||||
return res.body.images.map((image) => qu.prefixUrl(image, 'https://photos.bang.com'));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene(scene, entity, options) {
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.name || (scene.dvd?.name && scene.type === 'bonus' && capitalize(`${scene.dvd.name} - Bonus Scene ${scene.order || 1}`)) || null,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map((genre) => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
|
||||
release.actors = scene.actors.map((actor) => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
|
||||
const defaultPoster = scene.screenshots.find((photo) => photo.default === true);
|
||||
const screens = scene.screenshots.filter((photo) => photo.default === false);
|
||||
|
||||
const remainingScreens = defaultPoster ? screens : screens.slice(1);
|
||||
const poster = defaultPoster || screens[0];
|
||||
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = remainingScreens.map((photo) => getScreenUrl(photo, scene));
|
||||
|
||||
if (options?.includePhotos) {
|
||||
const photos = await fetchPhotos(scene);
|
||||
|
||||
if (photos?.length > 0) {
|
||||
release.photos = photos;
|
||||
}
|
||||
}
|
||||
|
||||
release.teaser = `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`;
|
||||
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, entity) {
|
||||
return Promise.all(scenes.map(({ _source: scene }) => scrapeScene(scene, entity)));
|
||||
}
|
||||
|
||||
async function fetchActorReleases(actor, entity) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
nested: {
|
||||
path: 'actors',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'actors.mongoId': {
|
||||
operator: 'AND',
|
||||
query: actor.id,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, entity);
|
||||
}
|
||||
|
||||
async function scrapeProfile(actor, entity, include) {
|
||||
const profile = {};
|
||||
|
||||
profile.aliases = actor.aliases;
|
||||
profile.dateOfBirth = extractDate(actor.birthDate);
|
||||
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
|
||||
|
||||
profile.ethnicity = actor.ethnicity;
|
||||
profile.nationality = actor.nationality;
|
||||
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
|
||||
|
||||
profile.hair = actor.hairColor;
|
||||
profile.eyes = actor.eyeColor;
|
||||
|
||||
profile.naturalBoobs = actor.naturalBreasts;
|
||||
|
||||
if (actor.measurements) {
|
||||
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
|
||||
|
||||
if (height) profile.height = inchesToCm(height);
|
||||
if (cupSize) profile.cup = cupSize;
|
||||
|
||||
// [SIC]
|
||||
if (shoulder) profile.bust = shoulder;
|
||||
if (chest) profile.waist = chest;
|
||||
if (waist) profile.hip = waist;
|
||||
}
|
||||
|
||||
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
|
||||
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
|
||||
|
||||
if (include.releases) {
|
||||
profile.releases = await fetchActorReleases(actor, entity);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
* global fetch
|
||||
{
|
||||
nested: {
|
||||
path: 'studio',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'studio.name': {
|
||||
operator: 'AND',
|
||||
query: 'bang! originals',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
*/
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site, page = 1) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now+7d',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, entity, baseRelease, options) {
|
||||
if (baseRelease?.entryId) {
|
||||
// overview and deep data is the same, don't hit server unnecessarily
|
||||
return baseRelease;
|
||||
}
|
||||
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
|
||||
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body._source, entity, options); // eslint-disable-line no-underscore-dangle
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, context, include) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||
size: 5,
|
||||
sort: [{
|
||||
_score: {
|
||||
order: 'desc',
|
||||
},
|
||||
}],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
name: {
|
||||
query: actorName,
|
||||
operator: 'and',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const actor = res.body.hits.hits.find((hit) => hit._source.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actor) {
|
||||
return scrapeProfile(actor._source, context.entity, include);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
||||
@@ -1,29 +1,15 @@
|
||||
'use strict';
|
||||
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const { extractDate } = require('../utils/qu');
|
||||
const { inchesToCm } = require('../utils/convert');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
const capitalize = require('../utils/capitalize');
|
||||
|
||||
const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
||||
const genderMap = {
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
};
|
||||
|
||||
function getScreenUrl(item, scene) {
|
||||
if (!scene.dvd?.id || !item?.screenId) {
|
||||
return null;
|
||||
/*
|
||||
function encodeId(id) {
|
||||
if (!id) {
|
||||
return id;
|
||||
}
|
||||
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/${scene.type}/${scene.order}/${item.screenId}.jpg`;
|
||||
}
|
||||
|
||||
function encodeId(id) {
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
@@ -31,8 +17,13 @@ function encodeId(id) {
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
}
|
||||
*/
|
||||
|
||||
function decodeId(id) {
|
||||
if (!id) {
|
||||
return id;
|
||||
}
|
||||
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
@@ -43,387 +34,150 @@ function decodeId(id) {
|
||||
.toString('hex');
|
||||
}
|
||||
|
||||
async function fetchPhotos(scene) {
|
||||
const photoPaths = Array.from({ length: scene.photos }, (value, index) => `/${scene.dvd.id}/${scene.identifier}/final/${String(index + 1).padStart(6, '0')}.jpg`);
|
||||
function scrapeAll(scenes, entity) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
const res = await http.post('https://www.bang.com/sign-images', {
|
||||
images: photoPaths,
|
||||
}, {
|
||||
encodeJSON: false,
|
||||
release.url = query.url('.video_preview_container > a', { origin: entity.url });
|
||||
release.entryId = query.attribute(null, 'data-video-id') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
||||
|
||||
release.title = query.content('.video_preview_container >a > span.block');
|
||||
release.date = query.date('.videoInfo .statistics span', 'MMM DD, YYYY');
|
||||
|
||||
release.actors = query.elements('.videoInfo a[href*="/pornstar"]').map((el) => ({
|
||||
name: unprint.query.content(el),
|
||||
url: unprint.query.url(el, null, { origin: 'https://www.bang.com' }),
|
||||
}));
|
||||
|
||||
const poster = query.img('img[data-videopreview-target="image"]');
|
||||
const posterUrl = new URL(poster);
|
||||
|
||||
if (poster) {
|
||||
release.poster = [
|
||||
`${posterUrl.origin}${posterUrl.pathname}`,
|
||||
posterUrl.href,
|
||||
];
|
||||
}
|
||||
|
||||
release.teaser = query.video();
|
||||
|
||||
return release;
|
||||
});
|
||||
|
||||
if (res.ok && res.body.images) {
|
||||
return res.body.images.map((image) => qu.prefixUrl(image, 'https://photos.bang.com'));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene(scene, entity, options) {
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.name || (scene.dvd?.name && scene.type === 'bonus' && capitalize(`${scene.dvd.name} - Bonus Scene ${scene.order || 1}`)) || null,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map((genre) => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
async function scrapeScene({ query }, { url, entity }) {
|
||||
const release = {};
|
||||
const data = query.json('script[type="application/ld+json"]');
|
||||
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
||||
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
release.title = data?.name || query.content('.video-heading');
|
||||
release.description = data?.description || query.content('.expanded p.clear-both');
|
||||
|
||||
release.actors = scene.actors.map((actor) => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD');
|
||||
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
|
||||
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
release.actors = data?.actor.map((actor) => ({
|
||||
name: actor.name,
|
||||
url: actor.url,
|
||||
})) || query.contents('.expanded a[href*="/pornstar"]');
|
||||
|
||||
const defaultPoster = scene.screenshots.find((photo) => photo.default === true);
|
||||
const screens = scene.screenshots.filter((photo) => photo.default === false);
|
||||
release.tags = query.contents('.expanded .genres');
|
||||
|
||||
const remainingScreens = defaultPoster ? screens : screens.slice(1);
|
||||
const poster = defaultPoster || screens[0];
|
||||
release.poster = data?.thumbnailUrl || data?.contentUrl || query.attribute('meta[name*="og:image"]', 'content');
|
||||
release.teaser = query.video('video[data-modal-target="videoImage"] source');
|
||||
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = remainingScreens.map((photo) => getScreenUrl(photo, scene));
|
||||
release.photos = JSON.parse(query.attribute('[data-video-gallery-photos-value]', 'data-video-gallery-photos-value'));
|
||||
release.photoCount = query.number('[data-video-gallery-count-value]', { attribute: 'data-video-gallery-count-value' });
|
||||
|
||||
if (options?.includePhotos) {
|
||||
const photos = await fetchPhotos(scene);
|
||||
const channelName = query.content('.expanded a[href*="?in="]')?.trim();
|
||||
|
||||
if (photos?.length > 0) {
|
||||
release.photos = photos;
|
||||
}
|
||||
if (channelName) {
|
||||
release.channel = entity.children?.find((channel) => new RegExp(channel.name, 'i').test(channelName) || slugify(channelName) === channel.slug)?.slug;
|
||||
}
|
||||
|
||||
release.teaser = `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`;
|
||||
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, entity) {
|
||||
return Promise.all(scenes.map(({ _source: scene }) => scrapeScene(scene, entity)));
|
||||
}
|
||||
async function fetchActorScenes(element, url, entity, page = 1, acc = []) {
|
||||
const scenes = scrapeAll(unprint.initAll(element, '.search-grid li'), entity);
|
||||
|
||||
async function fetchActorReleases(actor, entity) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
nested: {
|
||||
path: 'actors',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'actors.mongoId': {
|
||||
operator: 'AND',
|
||||
query: actor.id,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
if (scenes.length) {
|
||||
const nextPageRes = await unprint.post(url, { page: page + 1 });
|
||||
|
||||
return scrapeAll(res.body.hits.hits, entity);
|
||||
}
|
||||
|
||||
async function scrapeProfile(actor, entity, include) {
|
||||
const profile = {};
|
||||
|
||||
profile.aliases = actor.aliases;
|
||||
profile.dateOfBirth = extractDate(actor.birthDate);
|
||||
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
|
||||
|
||||
profile.ethnicity = actor.ethnicity;
|
||||
profile.nationality = actor.nationality;
|
||||
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
|
||||
|
||||
profile.hair = actor.hairColor;
|
||||
profile.eyes = actor.eyeColor;
|
||||
|
||||
profile.naturalBoobs = actor.naturalBreasts;
|
||||
|
||||
if (actor.measurements) {
|
||||
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
|
||||
|
||||
if (height) profile.height = inchesToCm(height);
|
||||
if (cupSize) profile.cup = cupSize;
|
||||
|
||||
// [SIC]
|
||||
if (shoulder) profile.bust = shoulder;
|
||||
if (chest) profile.waist = chest;
|
||||
if (waist) profile.hip = waist;
|
||||
if (nextPageRes.ok) {
|
||||
return fetchActorScenes(nextPageRes.context.element, url, entity, page + 1, acc.concat(scenes));
|
||||
}
|
||||
}
|
||||
|
||||
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
|
||||
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
|
||||
return acc.concat(scenes);
|
||||
}
|
||||
|
||||
if (include.releases) {
|
||||
profile.releases = await fetchActorReleases(actor, entity);
|
||||
async function scrapeProfile({ query, element }, url, entity, include) {
|
||||
const profile = {};
|
||||
|
||||
profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY');
|
||||
profile.birthPlace = query.content('//text()[contains(., "in")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
|
||||
profile.ethnicity = query.content('//text()[contains(., "Ethnicity")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
|
||||
profile.hairColor = query.content('//text()[contains(., "Hair Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
profile.eyes = query.content('//text()[contains(., "Eye Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
|
||||
const avatar = query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]');
|
||||
|
||||
if (avatar) {
|
||||
const { origin, pathname } = new URL(avatar);
|
||||
|
||||
profile.avatar = [
|
||||
`${origin}${pathname}`, // full size
|
||||
avatar,
|
||||
];
|
||||
}
|
||||
|
||||
if (include.scenes) {
|
||||
profile.scenes = await fetchActorScenes(element, url, entity);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
* global fetch
|
||||
{
|
||||
nested: {
|
||||
path: 'studio',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'studio.name': {
|
||||
operator: 'AND',
|
||||
query: 'bang! originals',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
*/
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site, page = 1) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now+7d',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, entity, baseRelease, options) {
|
||||
if (baseRelease?.entryId) {
|
||||
// overview and deep data is the same, don't hit server unnecessarily
|
||||
return baseRelease;
|
||||
}
|
||||
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
|
||||
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body._source, entity, options); // eslint-disable-line no-underscore-dangle
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, context, include) {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||
size: 5,
|
||||
sort: [{
|
||||
_score: {
|
||||
order: 'desc',
|
||||
},
|
||||
}],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
name: {
|
||||
query: actorName,
|
||||
operator: 'and',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}&page=${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.search-grid li' });
|
||||
|
||||
if (res.ok) {
|
||||
const actor = res.body.hits.hits.find((hit) => hit._source.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actor) {
|
||||
return scrapeProfile(actor._source, context.entity, include);
|
||||
}
|
||||
|
||||
return null;
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, { entity }, include) {
|
||||
const searchRes = await unprint.get(`https://www.bang.com/pornstars?term=${slugify(actorName, '+')}`);
|
||||
|
||||
if (!searchRes.ok) {
|
||||
return searchRes.status;
|
||||
}
|
||||
|
||||
const url = searchRes.context.query.url(`//a[contains(.//span, "${actorName}")]`);
|
||||
|
||||
if (!url) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const actorRes = await unprint.get(url);
|
||||
|
||||
if (actorRes.ok) {
|
||||
return scrapeProfile(actorRes.context, url, entity, include);
|
||||
}
|
||||
|
||||
return actorRes.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
scrapeScene,
|
||||
useUnprint: true,
|
||||
};
|
||||
|
||||
@@ -237,6 +237,10 @@ async function destroyBypassSession(sessionId) {
|
||||
}
|
||||
|
||||
async function destroyBypassSessions() {
|
||||
if (!config.bypass.cloudflare.enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
const sessionListRes = await limiters.bypass.schedule(async () => bhttp.post(config.bypass.cloudflare.path, {
|
||||
cmd: 'sessions.list',
|
||||
}, {
|
||||
|
||||
Reference in New Issue
Block a user