Using better video API for Vixen deep scrape.

This commit is contained in:
DebaucheryLibrarian 2023-07-07 02:20:07 +02:00
parent 10ba67fde1
commit 6f4608ba23
1 changed files with 142 additions and 67 deletions

View File

@ -5,6 +5,7 @@ const Promise = require('bluebird');
const moment = require('moment');
const unprint = require('unprint');
const argv = require('../argv');
const qu = require('../utils/qu');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
@ -259,82 +260,90 @@ async function scrapeSceneData(data, channel, options) {
return release;
}
async function fetchGraphqlScene(release, channel) {
const slug = new URL(release.url).pathname.match(/\/videos\/(.*)/)?.[1];
const videoFields = `
videoId
title
slug
description
releaseDate
runLength
site
rating
models {
name
}
directors {
directorId
name
}
categories {
name
}
chapters {
video {
title
seconds
}
}
downloadResolutions {
width
}
carousel {
main {
src
}
}
images {
poster {
...ImageInfo
}
}
`;
if (!slug) {
function getSlug(release) {
if (release.slug) {
return release.slug;
}
if (release.url) {
return new URL(release.url).pathname.match(/\/videos\/(.*)/)?.[1];
}
return null;
}
async function fetchGraphqlScene(release, channel) {
const slug = getSlug(release);
const entryId = argv.entryId || release.entryId;
if (!entryId && !slug) {
return null;
}
// the API won't reliable return results when the query is over ~30 characters for some reason
// it may still occasionally fail to return the relevant result, first, such as Blacked Raw - After the Show
const query = slug.split('-').reduce((acc, word) => {
const newAcc = `${acc} ${word}`;
if (newAcc.length > 30) {
return acc;
}
return newAcc;
}, '').trim();
const query = entryId
? `
query searchVideos($videoId: ID!) {
video: findOneVideo(input: { videoId: $videoId }) {
${videoFields}
}
}
`
: `
query searchVideos($slug: String!) {
video: findOneVideo(input: { slug: $slug }) {
${videoFields}
}
}
`;
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'searchVideos',
variables: {
site: channel.slug.toUpperCase(),
query,
videoId: entryId,
slug,
},
// ranking can be weird, use higher limit to increase likelihood of finding scene
query: `
query searchVideos($site: Site!, $query: String!) {
searchVideos(input: {
query: $query
site: $site
first: 50
}) {
edges {
node {
videoId
title
slug
description
releaseDate
runLength
site
rating
models {
name
}
directors {
directorId
name
}
categories {
name
}
chapters {
video {
title
seconds
}
}
downloadResolutions {
width
}
carousel {
main {
src
}
}
images {
poster {
...ImageInfo
}
}
}
}
}
}
${query}
fragment ImageInfo on Image {
src
@ -353,7 +362,7 @@ async function fetchGraphqlScene(release, channel) {
});
if (res.ok) {
return res.body.data?.searchVideos?.edges?.find((edge) => edge.node.slug === slug)?.node || null;
return res.body.data.video;
}
return null;
@ -556,3 +565,69 @@ module.exports = {
fetchScene,
fetchProfile,
};
/* less reliable search API in case direct video query becomes unavailable
async function findGraphqlScene(release, channel) {
const slug = new URL(release.url).pathname.match(/\/videos\/(.*)/)?.[1];
if (!slug) {
return null;
}
// the API won't reliable return results when the query is over ~30 characters for some reason
// it may still occasionally fail to return the relevant result, first, such as Blacked Raw - After the Show
const query = slug.split('-').reduce((acc, word) => {
const newAcc = `${acc} ${word}`;
if (newAcc.length > 30) {
return acc;
}
return newAcc;
}, '').trim();
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'searchVideos',
variables: {
site: channel.slug.toUpperCase(),
query,
},
// ranking can be weird, use higher limit to increase likelihood of finding scene
query: `
query searchVideos($site: Site!, $query: String!) {
searchVideos(input: {
query: $query
site: $site
first: 50
}) {
edges {
node {
${videoFields}
}
}
}
}
fragment ImageInfo on Image {
src
width
height
highdpi {
double
}
}
`,
}, {
headers: {
referer: release.url,
origin: channel.url,
},
});
if (res.ok) {
return res.body.data?.searchVideos?.edges?.find((edge) => edge.node.slug === slug)?.node || null;
}
return null;
}
*/