traxxx/src/scrapers/vixen.js

433 lines
9.6 KiB
JavaScript
Raw Normal View History

'use strict';
/* eslint-disable newline-per-chained-call */
const Promise = require('bluebird');
const moment = require('moment');
const qu = require('../utils/qu');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
const genderMap = {
F: 'female',
M: 'male',
T: 'transsexual', // not yet observed
};
function getPosterFallbacks(poster) {
return poster
.filter((image) => /landscape/i.test(image.name))
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => {
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
return image.height === 1080 ? sources : sources.reverse();
})
.flat()
.map((src) => ({
src,
expectType: {
'binary/octet-stream': 'image/jpeg',
},
}));
}
function getTeaserFallbacks(teaser) {
return teaser
.filter((video) => /landscape/i.test(video.name))
.map((video) => ({
src: video.src,
type: video.type,
quality: Number(String(video.height).replace('353', '360')),
}));
}
function getAvatarFallbacks(avatar) {
return avatar
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
.flat();
}
function curateSources(sources, type = 'image/jpeg') {
if (!sources) {
return null;
}
return sources
.map((source) => ({
src: source.src,
width: source.width,
height: source.height,
type: source.type || type,
expectType: {
'binary/octet-stream': type,
},
}))
.sort((resA, resB) => (resB.width * resB.height) - (resA.width * resA.height)) // number of pixels
.sort((resA, resB) => Math.abs(1.8 - Number((resA.width / resA.height).toFixed(1))) // approximation to 16:9
- Math.abs(1.8 - Number((resB.width / resB.height).toFixed(1))));
}
async function getTrailer(scene, channel, url) {
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'getToken',
variables: {
videoId: scene.newId,
device: 'trailer',
},
query: `
query getToken($videoId: ID!, $device: Device!) {
generateVideoToken(input: {videoId: $videoId, device: $device}) {
p270 {
token
cdn
__typename
}
p360 {
token
cdn
__typename
}
p480 {
token
cdn
__typename
}
p480l {
token
cdn
__typename
}
p720 {
token
cdn
__typename
}
p1080 {
token
cdn
__typename
}
p2160 {
token
cdn
__typename
}
hls {
token
cdn
__typename
}
__typename
}
}
`,
}, {
headers: {
referer: url,
origin: channel.url,
},
});
if (res.ok && res.body.data?.generateVideoToken) {
return [
{
src: res.body.data.generateVideoToken.p2160?.token,
quality: 2160,
},
{
src: res.body.data.generateVideoToken.p1080?.token,
quality: 1080,
},
{
src: res.body.data.generateVideoToken.p720?.token,
quality: 720,
},
{
src: res.body.data.generateVideoToken.p480?.token,
quality: 480,
},
{
src: res.body.data.generateVideoToken.p360?.token,
quality: 360,
},
{
src: res.body.data.generateVideoToken.p270?.token,
quality: 270,
},
];
}
return null;
}
2020-04-26 02:21:57 +00:00
function scrapeAll(scenes, channel) {
return scenes.map((data) => {
const release = {};
release.entryId = data.videoId;
release.url = `${channel.url}/videos/${data.slug}`;
release.title = data.title;
release.date = qu.extractDate(data.releaseDate);
release.actors = data.modelsSlugged.map((model) => ({
name: model.name,
url: `${channel.url}/models/${model.slugged}`,
}));
release.poster = curateSources(data.images.listing);
release.teaser = curateSources(data.previews.listing, 'video/mp4');
release.stars = data.rating;
console.log(data);
console.log(release);
return release;
});
}
function scrapeUpcoming(scene, site) {
if (!scene || scene.isPreReleasePeriod) return null;
const release = {};
release.title = scene.targetUrl
.slice(1)
.split('-')
.map((component) => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
2021-04-22 17:49:11 +00:00
release.url = `${site.url}/videos${scene.targetUrl}`;
release.date = moment.utc(scene.releaseDate).toDate();
release.datePrecision = 'minute';
release.actors = scene.models;
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.entryId = (release.poster[0] || release.teaser[0])?.src?.match(/\/(\d+)/)?.[1];
console.log('upcoming', scene);
return [release];
}
async function scrapeScene(data, url, channel, options) {
const release = {
url,
entryId: data.video.videoId || data.video.newId,
title: data.video.title,
description: data.video.description,
actors: data.video.models,
director: data.video.directorNames,
duration: qu.durationToSeconds(data.video.runLength),
stars: data.video.rating,
};
release.entryId = data.video.newId;
release.date = qu.extractDate(data.video.releaseDate);
release.actors = data.video.modelsSlugged.map((model) => ({
name: model.name,
url: `${channel.url}/models/${model.slugged}`,
}));
release.poster = curateSources(data.video.images?.poster) || data.video.videoImage?.src;
release.photos = data.galleryImages?.length > 0
? data.galleryImages.map((image) => image.src)
: data.video.carousel?.map((photo) => photo.main[0]?.src).filter(Boolean);
if (options.includeTrailers) {
const trailer = await getTrailer(data.video, channel, url);
if (trailer) {
release.trailer = trailer;
}
}
release.qualities = data.video?.downloadResolutions.map((quality) => Number(quality.width)).filter(Boolean); // width property is actually the height
console.log(release);
2021-02-26 23:37:22 +00:00
return release;
}
async function fetchActorReleases(pages, model, origin) {
const releasesPerPage = await Promise.map(pages, async (page) => {
const url = `${origin}/api${model.targetUrl}?page=${page}`;
const res = await http.get(url);
if (res.status === 200) {
return scrapeAll(res.body.data.videos.videos, null, origin);
}
return [];
}, { concurrency: 3 });
return releasesPerPage.flat();
}
async function scrapeProfile(data, origin, withReleases) {
const model = data.model;
const profile = {};
profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex];
profile.hair = model.hairColour;
profile.nationality = model.nationality;
if (model.biography.trim().length > 0) profile.description = model.biography;
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
profile.avatar = getAvatarFallbacks(model.images.listing);
profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster);
const releases = scrapeAll(data.videos.videos, null, origin);
if (withReleases) {
const pageCount = Math.ceil(data.videos.count / 6);
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
profile.releases = [...releases, ...otherReleases];
} else {
profile.releases = releases;
}
return profile;
}
async function fetchLatestGraphql(channel, page = 1) {
const query = `
query($query: String!, $site: Site!) {
searchVideos(input: {
query: $query
site: $site
}) {
edges {
node {
title
slug
description
releaseDate
categories {
name
}
chapters {
video {
title
seconds
}
}
models {
name
}
images {
poster {
...ImageInfo
}
}
}
}
}
}
fragment ImageInfo on Image {
src
highdpi {
double
}
}
`;
const variables = JSON.stringify({
site: channel.slug.toUpperCase(),
query: 'alone at last',
});
const res = await http.get(`${channel.url}/graphql?query=${encodeURI(query)}&variables=${variables}`);
console.log(res.body);
console.log(res.body.errors);
console.log(res.body.data?.searchVideos?.edges.map((edge) => edge.node));
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/videos?page=${page}`;
const res = await qu.get(url);
if (res.ok) {
const dataString = res.item.query.html('#__NEXT_DATA__');
const data = dataString && JSON.parse(dataString);
if (data?.props.pageProps.edges) {
return scrapeAll(data.props.pageProps.edges.map((edge) => edge.node), site);
}
return [];
}
return res.status;
}
async function fetchUpcoming(site) {
const apiUrl = `${site.url}/api`;
const res = await http.get(apiUrl);
if (res.ok) {
if (res.body.data.nextScene) {
return scrapeUpcoming(res.body.data.nextScene, site);
}
return [];
}
return res.status;
}
async function fetchScene(url, channel, baseRelease, options) {
const res = await qu.get(url);
if (res.ok) {
const dataString = res.item.query.html('#__NEXT_DATA__');
const data = dataString && JSON.parse(dataString);
return scrapeScene(data.props.pageProps, url, channel, options);
}
return res.status;
}
async function fetchProfile({ name: actorName }, { site }, include) {
const origin = site.url;
const actorSlug = slugify(actorName);
const url = `${origin}/api/${actorSlug}`;
const res = await http.get(url);
if (res.ok) {
if (res.body.data) {
return scrapeProfile(res.body.data, origin, include.scenes);
}
return null;
}
return res.status;
}
module.exports = {
// fetchLatest: fetchLatestGraphql,
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
};