traxxx/src/scrapers/vixen.js

564 lines
12 KiB
JavaScript
Executable File

'use strict';
/* eslint-disable newline-per-chained-call */
const Promise = require('bluebird');
const moment = require('moment');
const unprint = require('unprint');
const qu = require('../utils/qu');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
const genderMap = {
F: 'female',
M: 'male',
T: 'transsexual', // not yet observed
};
function getAvatarFallbacks(avatar) {
return avatar
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
.flat();
}
function curateSources(sources, type = 'image/jpeg') {
if (!sources) {
return null;
}
return sources
.map((source) => ({
src: source.src,
width: source.width,
height: source.height,
type: source.type || type,
expectType: {
'binary/octet-stream': type,
},
}))
.sort((resA, resB) => (resB.width * resB.height) - (resA.width * resA.height)) // number of pixels
.sort((resA, resB) => Math.abs(1.8 - Number((resA.width / resA.height).toFixed(1))) // approximation to 16:9
- Math.abs(1.8 - Number((resB.width / resB.height).toFixed(1))));
}
function scrapeAll(scenes, channel) {
return scenes.map((data) => {
const release = {};
release.entryId = data.videoId;
release.url = `${channel.url}/videos/${data.slug}`;
release.title = data.title;
release.date = qu.extractDate(data.releaseDate);
release.actors = data.modelsSlugged.map((model) => ({
name: model.name,
url: `${channel.url}/models/${model.slugged}`,
}));
release.poster = curateSources(data.images.listing);
release.teaser = curateSources(data.previews.listing, 'video/mp4');
release.stars = data.rating;
return release;
});
}
function scrapeUpcoming(scene, site) {
if (!scene || scene.isPreReleasePeriod) {
return null;
}
const release = {};
release.entryId = scene.videoId;
release.url = `${site.url}/videos/${scene.slug}`;
release.title = scene.slug
.split('-')
.map((component) => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
release.date = moment.utc(scene.releaseDate).toDate();
release.datePrecision = 'minute';
release.actors = scene.models.map((model) => model.name);
release.poster = curateSources(scene.images.poster);
release.teaser = curateSources(scene.previews.poster);
return [release];
}
async function getTrailer(videoId, channel, url) {
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'getToken',
variables: {
videoId,
device: 'trailer',
},
query: `
query getToken($videoId: ID!, $device: Device!) {
generateVideoToken(input: {videoId: $videoId, device: $device}) {
p270 {
token
cdn
__typename
}
p360 {
token
cdn
__typename
}
p480 {
token
cdn
__typename
}
p480l {
token
cdn
__typename
}
p720 {
token
cdn
__typename
}
p1080 {
token
cdn
__typename
}
p2160 {
token
cdn
__typename
}
hls {
token
cdn
__typename
}
__typename
}
}
`,
}, {
headers: {
referer: url,
origin: channel.url,
},
});
if (res.ok && res.body.data?.generateVideoToken) {
return [
{
src: res.body.data.generateVideoToken.p2160?.token,
quality: 2160,
},
{
src: res.body.data.generateVideoToken.p1080?.token,
quality: 1080,
},
{
src: res.body.data.generateVideoToken.p720?.token,
quality: 720,
},
{
src: res.body.data.generateVideoToken.p480?.token,
quality: 480,
},
{
src: res.body.data.generateVideoToken.p360?.token,
quality: 360,
},
{
src: res.body.data.generateVideoToken.p270?.token,
quality: 270,
},
];
}
return null;
}
async function scrapeScene(data, url, channel, options) {
const release = {
url,
entryId: data.video.videoId || data.video.newId,
title: data.video.title,
description: data.video.description,
actors: data.video.models,
director: data.video.directorNames,
duration: qu.durationToSeconds(data.video.runLength),
stars: data.video.rating,
};
release.entryId = data.video.newId;
release.date = qu.extractDate(data.video.releaseDate);
release.actors = data.video.modelsSlugged.map((model) => ({
name: model.name,
url: `${channel.url}/models/${model.slugged}`,
}));
release.poster = curateSources(data.video.images?.poster) || data.video.videoImage?.src;
release.photos = data.galleryImages?.length > 0
? data.galleryImages.map((image) => image.src)
: data.video.carousel?.map((photo) => photo.main[0]?.src).filter(Boolean);
if (options.includeTrailers) {
release.trailer = await getTrailer(release.entryId, channel, release.url);
}
release.qualities = data.video?.downloadResolutions.map((quality) => Number(quality.width)).filter(Boolean); // width property is actually the height
release.channel = data.video?.id.split(':')[0];
return release;
}
async function scrapeSceneData(data, channel, options) {
const release = {};
release.entryId = data.videoId;
release.url = `${channel.url}/videos/${data.slug}`;
release.title = data.title;
release.description = data.description;
release.date = new Date(data.releaseDate);
release.duration = unprint.extractDuration(data.runLength);
release.actors = data.models;
release.directors = data.directors.map((director) => ({
entryId: director.directorId,
name: director.name,
}));
release.poster = curateSources(data.images?.poster);
release.photos = data.carousel?.map((photo) => photo.main[0]?.src).filter(Boolean);
if (options.includeTrailers) {
release.trailer = await getTrailer(release.entryId, channel, release.url);
}
release.tags = data.categories.map((category) => category.name);
release.qualities = data.downloadResolutions.map((quality) => Number(quality.width)).filter(Boolean); // width property is actually the height
release.chapters = data.chapters.video?.map((chapter) => ({
time: chapter.seconds,
tags: [chapter.title],
}));
release.channel = data.site;
release.stars = data.rating;
console.log(release);
return release;
}
async function fetchGraphqlScene(release, channel) {
const slug = new URL(release.url).pathname.match(/\/videos\/(.*)/)?.[1];
if (!slug) {
return null;
}
// the API won't reliable return results when the query is over ~30 characters for some reason
// it may still occasionally fail to return the relevant result, first, such as Blacked Raw - After the Show
const query = slug.split('-').reduce((acc, word) => {
const newAcc = `${acc} ${word}`;
if (newAcc.length > 30) {
return acc;
}
return newAcc;
}, '').trim();
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'searchVideos',
variables: {
site: channel.slug.toUpperCase(),
query,
},
// ranking can be weird, use higher limit to increase likelihood of finding scene
query: `
query searchVideos($site: Site!, $query: String!) {
searchVideos(input: {
query: $query
site: $site
first: 50
}) {
edges {
node {
videoId
title
slug
description
releaseDate
runLength
site
rating
models {
name
}
directors {
directorId
name
}
categories {
name
}
chapters {
video {
title
seconds
}
}
downloadResolutions {
width
}
carousel {
main {
src
}
}
images {
poster {
...ImageInfo
}
}
}
}
}
}
fragment ImageInfo on Image {
src
width
height
highdpi {
double
}
}
`,
}, {
headers: {
referer: release.url,
origin: channel.url,
},
});
if (res.ok) {
console.log(res.body.data.searchVideos.edges);
return res.body.data?.searchVideos?.edges?.find((edge) => edge.node.slug === slug)?.node || null;
}
console.log(res.body);
return null;
}
async function fetchScene(url, channel, baseRelease, options) {
const graphqlData = await fetchGraphqlScene(baseRelease, channel);
if (graphqlData) {
return scrapeSceneData(graphqlData, channel, options);
}
const session = qu.session();
const res = await qu.get(url, null, null, { session });
if (res.ok) {
const dataString = res.item.query.html('#__NEXT_DATA__');
const data = dataString && JSON.parse(dataString);
return scrapeScene(data.props.pageProps, url, channel, options, session);
}
return res.status;
}
async function fetchActorReleases(pages, model, origin) {
const releasesPerPage = await Promise.map(pages, async (page) => {
const url = `${origin}/api${model.targetUrl}?page=${page}`;
const res = await http.get(url);
if (res.status === 200) {
return scrapeAll(res.body.data.videos.videos, null, origin);
}
return [];
}, { concurrency: 3 });
return releasesPerPage.flat();
}
async function scrapeProfile(data, origin, withReleases) {
const model = data.model;
const profile = {};
profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex];
profile.hair = model.hairColour;
profile.nationality = model.nationality;
if (model.biography.trim().length > 0) profile.description = model.biography;
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
profile.avatar = getAvatarFallbacks(model.images.listing);
profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster);
const releases = scrapeAll(data.videos.videos, null, origin);
if (withReleases) {
const pageCount = Math.ceil(data.videos.count / 6);
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
profile.releases = [...releases, ...otherReleases];
} else {
profile.releases = releases;
}
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/videos?page=${page}`;
const res = await qu.get(url);
if (res.ok) {
const dataString = res.item.query.html('#__NEXT_DATA__');
const data = dataString && JSON.parse(dataString);
if (data?.props.pageProps.edges) {
return scrapeAll(data.props.pageProps.edges.map((edge) => edge.node), site);
}
return [];
}
return res.status;
}
async function fetchUpcoming(channel) {
const query = `
query getNextScene($site: Site!) {
nextScene: findNextReleaseVideo(input: { site: $site }) {
videoId
slug
isPreReleasePeriod
releaseDate
models {
name
__typename
}
images {
countdown {
...ImageInfo
__typename
}
poster {
...ImageInfo
__typename
}
__typename
}
previews {
countdown {
...PreviewInfo
__typename
}
poster {
...PreviewInfo
__typename
}
__typename
}
__typename
}
}
fragment ImageInfo on Image {
src
placeholder
width
height
highdpi {
double
triple
__typename
}
webp {
src
placeholder
highdpi {
double
triple
__typename
}
__typename
}
}
fragment PreviewInfo on Preview {
src
width
height
type
}
`;
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'getNextScene',
query,
variables: {
site: channel.slug.toUpperCase(),
},
});
if (res.ok) {
if (res.body.data.nextScene) {
return scrapeUpcoming(res.body.data.nextScene, channel);
}
return [];
}
return res.status;
}
async function fetchProfile({ name: actorName }, { site }, include) {
const origin = site.url;
const actorSlug = slugify(actorName);
const url = `${origin}/api/${actorSlug}`;
const res = await http.get(url);
if (res.ok) {
if (res.body.data) {
return scrapeProfile(res.body.data, origin, include.scenes);
}
return null;
}
return res.status;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
};