traxxx/src/scrapers/vixen.js

377 lines
8.3 KiB
JavaScript

'use strict';
/* eslint-disable newline-per-chained-call */
const Promise = require('bluebird');
const moment = require('moment');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
const genderMap = {
F: 'female',
M: 'male',
T: 'transsexual', // not yet observed
};
function getPosterFallbacks(poster) {
return poster
.filter(image => /landscape/i.test(image.name))
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => {
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
return image.height === 1080 ? sources : sources.reverse();
})
.flat()
.map(src => ({
src,
expectType: {
'binary/octet-stream': 'image/jpeg',
},
}));
}
function getTeaserFallbacks(teaser) {
return teaser
.filter(video => /landscape/i.test(video.name))
.map(video => ({
src: video.src,
type: video.type,
quality: Number(String(video.height).replace('353', '360')),
}));
}
function getAvatarFallbacks(avatar) {
return avatar
.sort((imageA, imageB) => imageB.height - imageA.height)
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
.flat();
}
async function getTrailer(scene, channel, url) {
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'getToken',
variables: {
videoId: scene.newId,
device: 'trailer',
},
query: `
query getToken($videoId: ID!, $device: Device!) {
generateVideoToken(input: {videoId: $videoId, device: $device}) {
p270 {
token
cdn
__typename
}
p360 {
token
cdn
__typename
}
p480 {
token
cdn
__typename
}
p480l {
token
cdn
__typename
}
p720 {
token
cdn
__typename
}
p1080 {
token
cdn
__typename
}
p2160 {
token
cdn
__typename
}
hls {
token
cdn
__typename
}
__typename
}
}
`,
}, {
headers: {
referer: url,
origin: channel.url,
},
});
if (res.ok && res.body.data?.generateVideoToken) {
return [
{
src: res.body.data.generateVideoToken.p2160?.token,
quality: 2160,
},
{
src: res.body.data.generateVideoToken.p1080?.token,
quality: 1080,
},
{
src: res.body.data.generateVideoToken.p720?.token,
quality: 720,
},
{
src: res.body.data.generateVideoToken.p480?.token,
quality: 480,
},
{
src: res.body.data.generateVideoToken.p360?.token,
quality: 360,
},
{
src: res.body.data.generateVideoToken.p270?.token,
quality: 270,
},
];
}
return null;
}
async function getPhotos(url) {
const htmlRes = await http.get(url, {
extract: {
runScripts: 'dangerously',
},
});
const state = htmlRes?.window.__APOLLO_STATE__;
const key = Object.values(state.ROOT_QUERY).find(query => query?.__ref)?.__ref;
const data = state[key];
console.log(data);
if (!data) {
return [];
}
return data.carousel.slice(1).map(photo => photo.main?.[0].src).filter(Boolean);
}
function scrapeAll(scenes, site, origin) {
return scenes.map((scene) => {
const release = {};
release.title = scene.title;
release.entryId = String(scene.newId);
release.url = `${site?.url || origin}/videos${scene.targetUrl}`;
release.date = moment.utc(scene.releaseDate).toDate();
release.datePrecision = 'minute';
release.actors = scene.models;
release.stars = Number(scene.textRating) / 2;
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
return release;
});
}
function scrapeUpcoming(scene, site) {
if (!scene || scene.isPreReleasePeriod) return null;
const release = {};
release.title = scene.targetUrl
.slice(1)
.split('-')
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
release.url = `${site.url}/videos${scene.targetUrl}`;
release.date = moment.utc(scene.releaseDate).toDate();
release.datePrecision = 'minute';
release.actors = scene.models;
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.entryId = (release.poster[0] || release.teaser[0])?.src?.match(/\/(\d+)/)?.[1];
return [release];
}
async function scrapeScene(data, url, site, baseRelease, options) {
const scene = data.video;
const release = {
url,
title: scene.title,
description: scene.description,
actors: scene.models,
director: scene.directorNames,
duration: scene.runLength,
stars: scene.totalRateVal,
tags: scene.tags,
};
release.entryId = scene.newId;
release.date = moment.utc(scene.releaseDate).toDate();
release.productionDate = moment.utc(scene.shootDate).toDate();
release.datePrecision = 'minute';
release.actors = baseRelease?.actors || scene.models;
release.poster = getPosterFallbacks(scene.images.poster);
// release.photos = data.pictureset.map(photo => photo.main[0]?.src).filter(Boolean);
if (options.includePhotos) {
release.photos = await getPhotos(url);
}
release.teaser = getTeaserFallbacks(scene.previews.poster);
const trailer = await getTrailer(scene, site, url);
if (trailer) release.trailer = trailer;
release.chapters = data.video.chapters?.video.map(chapter => ({
tags: [chapter.title],
time: chapter.seconds,
}));
return release;
}
async function fetchActorReleases(pages, model, origin) {
const releasesPerPage = await Promise.map(pages, async (page) => {
const url = `${origin}/api${model.targetUrl}?page=${page}`;
const res = await http.get(url);
if (res.status === 200) {
return scrapeAll(res.body.data.videos.videos, null, origin);
}
return [];
}, { concurrency: 3 });
return releasesPerPage.flat();
}
async function scrapeProfile(data, origin, withReleases) {
const model = data.model;
const profile = {};
profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex];
profile.hair = model.hairColour;
profile.nationality = model.nationality;
if (model.biography.trim().length > 0) profile.description = model.biography;
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
profile.avatar = getAvatarFallbacks(model.images.listing);
profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster);
const releases = scrapeAll(data.videos.videos, null, origin);
if (withReleases) {
const pageCount = Math.ceil(data.videos.count / 6);
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
profile.releases = [...releases, ...otherReleases];
} else {
profile.releases = releases;
}
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/api/videos?page=${page}`;
const res = await http.get(url);
if (res.ok) {
if (res.body.data.videos) {
return scrapeAll(res.body.data.videos, site);
}
return [];
}
return res.status;
}
async function fetchUpcoming(site) {
const apiUrl = `${site.url}/api`;
const res = await http.get(apiUrl);
if (res.ok) {
if (res.body.data.nextScene) {
return scrapeUpcoming(res.body.data.nextScene, site);
}
return [];
}
return res.status;
}
async function fetchScene(url, site, baseRelease, options) {
const { origin, pathname } = new URL(url);
const apiUrl = `${origin}/api/${pathname.split('/').slice(-1)[0]}`;
const res = await http.get(apiUrl, {
extract: {
runScripts: 'dangerously',
},
});
if (res.ok) {
if (res.body.data) {
return scrapeScene(res.body.data, url, site, baseRelease, options);
}
return null;
}
return res.status;
}
async function fetchProfile({ name: actorName }, { site }, include) {
const origin = site.url;
const actorSlug = slugify(actorName);
const url = `${origin}/api/${actorSlug}`;
const res = await http.get(url);
if (res.ok) {
if (res.body.data) {
return scrapeProfile(res.body.data, origin, include.scenes);
}
return null;
}
return res.status;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
};