traxxx/src/scrapers/vixen.js

695 lines
14 KiB
JavaScript
Executable File

'use strict';
/* eslint-disable newline-per-chained-call */
const moment = require('moment');
const unprint = require('unprint');
const argv = require('../argv');
const qu = require('../utils/qu');
const http = require('../utils/http');
const genderMap = {
F: 'female',
M: 'male',
T: 'transsexual', // not yet observed
};
function getAvatarFallbacks(avatar) {
if (!avatar) {
return null;
}
return avatar
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => [image.highdpi?.['3x'], image.highdpi?.triple, image.highdpi?.['2x'], image.highdpi?.double, image.src])
.flat()
.filter(Boolean)
.map((source) => ({
src: source,
expectType: {
'binary/octet-stream': 'image/jpeg',
},
}));
}
function curateSources(sources, type = 'image/jpeg') {
if (!sources) {
return null;
}
return sources
.map((source) => ({
src: source.src,
width: source.width,
height: source.height,
type: source.type || type,
expectType: {
'binary/octet-stream': type,
},
}))
.sort((resA, resB) => (resB.width * resB.height) - (resA.width * resA.height)) // number of pixels
.sort((resA, resB) => Math.abs(1.8 - Number((resA.width / resA.height).toFixed(1))) // approximation to 16:9
- Math.abs(1.8 - Number((resB.width / resB.height).toFixed(1))));
}
function scrapeAll(scenes, channel) {
return scenes.map((data) => {
const release = {};
release.entryId = data.videoId;
release.url = `${channel.url}/videos/${data.slug}`;
release.title = data.title;
release.date = qu.extractDate(data.releaseDate);
release.actors = (data.modelsSlugged || data.models)?.map((model) => ({
name: model.name,
url: model.slugged && `${channel.url}/models/${model.slugged}`,
}));
release.poster = curateSources(data.images.listing);
release.teaser = curateSources(data.previews.listing, 'video/mp4');
release.stars = data.rating;
return release;
});
}
function scrapeUpcoming(scenes, site) {
return scenes.map((scene) => {
if (!scene || scene.isPreReleasePeriod) {
return null;
}
const release = {};
release.entryId = scene.videoId;
release.url = `${site.url}/videos/${scene.slug}`;
release.title = scene.slug
.split('-')
.map((component) => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
release.date = moment.utc(scene.releaseDate).toDate();
release.datePrecision = 'minute';
release.actors = scene.models.map((model) => model.name);
release.poster = curateSources(scene.images.poster);
release.teaser = curateSources(scene.previews.poster);
return release;
}).filter(Boolean);
}
async function getTrailer(videoId, channel, url) {
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'getToken',
variables: {
videoId,
device: 'trailer',
},
query: `
query getToken($videoId: ID!, $device: Device!) {
generateVideoToken(input: {videoId: $videoId, device: $device}) {
p270 {
token
cdn
__typename
}
p360 {
token
cdn
__typename
}
p480 {
token
cdn
__typename
}
p480l {
token
cdn
__typename
}
p720 {
token
cdn
__typename
}
p1080 {
token
cdn
__typename
}
p2160 {
token
cdn
__typename
}
hls {
token
cdn
__typename
}
__typename
}
}
`,
}, {
headers: {
referer: url,
origin: channel.url,
},
});
if (res.ok && res.body.data?.generateVideoToken) {
return [
{
src: res.body.data.generateVideoToken.p2160?.token,
quality: 2160,
},
{
src: res.body.data.generateVideoToken.p1080?.token,
quality: 1080,
},
{
src: res.body.data.generateVideoToken.p720?.token,
quality: 720,
},
{
src: res.body.data.generateVideoToken.p480?.token,
quality: 480,
},
{
src: res.body.data.generateVideoToken.p360?.token,
quality: 360,
},
{
src: res.body.data.generateVideoToken.p270?.token,
quality: 270,
},
];
}
return null;
}
async function scrapeScene(data, url, channel, options) {
const release = {
url,
entryId: data.video.videoId || data.video.newId,
title: data.video.title,
description: data.video.description,
actors: data.video.models,
director: data.video.directorNames,
duration: qu.durationToSeconds(data.video.runLength),
stars: data.video.rating,
};
release.entryId = data.video.newId;
release.date = qu.extractDate(data.video.releaseDate);
release.actors = data.video.modelsSlugged.map((model) => ({
name: model.name,
url: `${channel.url}/models/${model.slugged}`,
}));
release.poster = curateSources(data.video.images?.poster) || data.video.videoImage?.src;
release.photos = data.galleryImages?.length > 0
? data.galleryImages.map((image) => image.src)
: data.video.carousel?.map((photo) => photo.main[0]?.src).filter(Boolean);
if (options.includeTrailers) {
release.trailer = await getTrailer(release.entryId, channel, release.url);
}
release.qualities = data.video?.downloadResolutions.map((quality) => Number(quality.width)).filter(Boolean); // width property is actually the height
release.channel = data.video?.id.split(':')[0];
return release;
}
async function scrapeSceneData(data, channel, options) {
const release = {};
release.entryId = data.videoId;
release.url = `${channel.url}/videos/${data.slug}`;
release.title = data.title;
release.description = data.description;
release.date = new Date(data.releaseDate);
release.duration = unprint.extractDuration(data.runLength);
release.actors = data.models;
release.directors = data.directors?.map((director) => ({
entryId: director.directorId,
name: director.name,
}));
release.poster = curateSources(data.images?.poster);
release.photos = data.carousel?.map((photo) => photo.main[0]?.src).filter(Boolean);
if (options.includeTrailers) {
release.trailer = await getTrailer(release.entryId, channel, release.url);
}
release.tags = data.categories?.map((category) => category.name);
release.qualities = data.downloadResolutions?.map((quality) => Number(quality.width)).filter(Boolean); // width property is actually the height
release.chapters = data.chapters?.video?.map((chapter) => ({
time: chapter.seconds,
tags: [chapter.title],
}));
release.channel = data.site;
release.stars = data.rating;
return release;
}
const videoFields = `
videoId
title
slug
description
releaseDate
runLength
site
rating
models {
name
}
directors {
directorId
name
}
categories {
name
}
chapters {
video {
title
seconds
}
}
downloadResolutions {
width
}
carousel {
main {
src
}
}
images {
poster {
...ImageInfo
}
}
`;
const imageFragment = `
fragment ImageInfo on Image {
src
width
height
highdpi {
double
triple
}
}
`;
function getSlug(release) {
if (release.slug) {
return release.slug;
}
if (release.url) {
return new URL(release.url).pathname.match(/\/videos\/(.*)/)?.[1];
}
return null;
}
async function fetchGraphqlScene(release, channel) {
const slug = getSlug(release);
const entryId = argv.entryId || release.entryId;
if (!entryId && !slug) {
return null;
}
const query = entryId
? `
query searchVideos($videoId: ID!) {
video: findOneVideo(input: { videoId: $videoId }) {
${videoFields}
}
}
`
: `
query searchVideos($slug: String!) {
video: findOneVideo(input: { slug: $slug }) {
${videoFields}
}
}
`;
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'searchVideos',
variables: {
videoId: entryId,
slug,
},
query: `
${query}
fragment ImageInfo on Image {
src
width
height
highdpi {
double
}
}
`,
}, {
headers: {
referer: release.url,
origin: channel.url,
},
});
if (res.ok) {
return res.body.data.video;
}
return null;
}
async function fetchScene(url, channel, baseRelease, options) {
const graphqlData = await fetchGraphqlScene(baseRelease, channel);
if (graphqlData) {
return scrapeSceneData(graphqlData, channel, options);
}
const session = qu.session();
const res = await qu.get(url, null, null, { session });
if (res.ok) {
const dataString = res.item.query.html('#__NEXT_DATA__');
const data = dataString && JSON.parse(dataString);
return scrapeScene(data.props.pageProps, url, channel, options, session);
}
return res.status;
}
async function scrapeProfile(data, channel) {
const model = data.model;
const profile = {};
// most details seemingly unavailable in graphql
if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex];
profile.hair = model.hairColour;
profile.nationality = model.nationality;
if (model.biography.trim().length > 0) profile.description = model.biography;
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
profile.avatar = getAvatarFallbacks(model.images.listing);
profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster);
if (model.videos) {
profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel);
}
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/videos?page=${page}`;
const res = await qu.get(url);
if (res.ok) {
const dataString = res.item.query.html('#__NEXT_DATA__');
const data = dataString && JSON.parse(dataString);
if (data?.props.pageProps.edges) {
return scrapeAll(data.props.pageProps.edges.map((edge) => edge.node), site);
}
return [];
}
return res.status;
}
async function fetchUpcoming(channel) {
const query = `
query getNextScene($site: Site!) {
nextScene: findNextReleaseVideo(input: { site: $site }) {
videoId
slug
isPreReleasePeriod
releaseDate
models {
name
__typename
}
images {
countdown {
...ImageInfo
__typename
}
poster {
...ImageInfo
__typename
}
__typename
}
previews {
countdown {
...PreviewInfo
__typename
}
poster {
...PreviewInfo
__typename
}
__typename
}
__typename
}
}
fragment ImageInfo on Image {
src
placeholder
width
height
highdpi {
double
triple
__typename
}
webp {
src
placeholder
highdpi {
double
triple
__typename
}
__typename
}
}
fragment PreviewInfo on Preview {
src
width
height
type
}
`;
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'getNextScene',
query,
variables: {
site: channel.slug.toUpperCase(),
},
});
if (res.ok) {
if (res.body.data.nextScene) {
return scrapeUpcoming(res.body.data.nextScene, channel);
}
return [];
}
return res.status;
}
async function fetchProfile(actor, { channel }) {
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'searchModels',
variables: {
slug: actor.slug,
site: channel.slug.toUpperCase(),
},
query: `
query searchModels(
$slug: String!
$site: Site!
) {
model: findOneModel(input: { slug: $slug, site: $site }) {
name
biography
images {
listing {
...ImageInfo
}
profile {
...ImageInfo
}
poster {
...ImageInfo
}
}
videos {
edges {
node {
videoId
title
slug
releaseDate
runLength
site
rating
models {
name
}
carousel {
main {
src
}
}
previews {
listing {
src
}
}
images {
poster {
...ImageInfo
}
}
}
}
}
}
}
${imageFragment}
`,
}, {
headers: {
referer: channel.url,
origin: channel.url,
},
});
if (res.ok && res.body.data?.model) {
return scrapeProfile(res.body.data, channel);
}
return null;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
};
/* less reliable search API in case direct video query becomes unavailable
async function findGraphqlScene(release, channel) {
const slug = new URL(release.url).pathname.match(/\/videos\/(.*)/)?.[1];
if (!slug) {
return null;
}
// the API won't reliable return results when the query is over ~30 characters for some reason
// it may still occasionally fail to return the relevant result, first, such as Blacked Raw - After the Show
const query = slug.split('-').reduce((acc, word) => {
const newAcc = `${acc} ${word}`;
if (newAcc.length > 30) {
return acc;
}
return newAcc;
}, '').trim();
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'searchVideos',
variables: {
site: channel.slug.toUpperCase(),
query,
},
// ranking can be weird, use higher limit to increase likelihood of finding scene
query: `
query searchVideos($site: Site!, $query: String!) {
searchVideos(input: {
query: $query
site: $site
first: 50
}) {
edges {
node {
${videoFields}
}
}
}
}
fragment ImageInfo on Image {
src
width
height
highdpi {
double
}
}
`,
}, {
headers: {
referer: release.url,
origin: channel.url,
},
});
if (res.ok) {
return res.body.data?.searchVideos?.edges?.find((edge) => edge.node.slug === slug)?.node || null;
}
return null;
}
*/