Using better video API for Vixen deep scrape.
This commit is contained in:
parent
10ba67fde1
commit
6f4608ba23
|
@ -5,6 +5,7 @@ const Promise = require('bluebird');
|
|||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const argv = require('../argv');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
@ -259,82 +260,90 @@ async function scrapeSceneData(data, channel, options) {
|
|||
return release;
|
||||
}
|
||||
|
||||
async function fetchGraphqlScene(release, channel) {
|
||||
const slug = new URL(release.url).pathname.match(/\/videos\/(.*)/)?.[1];
|
||||
const videoFields = `
|
||||
videoId
|
||||
title
|
||||
slug
|
||||
description
|
||||
releaseDate
|
||||
runLength
|
||||
site
|
||||
rating
|
||||
models {
|
||||
name
|
||||
}
|
||||
directors {
|
||||
directorId
|
||||
name
|
||||
}
|
||||
categories {
|
||||
name
|
||||
}
|
||||
chapters {
|
||||
video {
|
||||
title
|
||||
seconds
|
||||
}
|
||||
}
|
||||
downloadResolutions {
|
||||
width
|
||||
}
|
||||
carousel {
|
||||
main {
|
||||
src
|
||||
}
|
||||
}
|
||||
images {
|
||||
poster {
|
||||
...ImageInfo
|
||||
}
|
||||
}
|
||||
`;
|
||||
|
||||
if (!slug) {
|
||||
function getSlug(release) {
|
||||
if (release.slug) {
|
||||
return release.slug;
|
||||
}
|
||||
|
||||
if (release.url) {
|
||||
return new URL(release.url).pathname.match(/\/videos\/(.*)/)?.[1];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchGraphqlScene(release, channel) {
|
||||
const slug = getSlug(release);
|
||||
const entryId = argv.entryId || release.entryId;
|
||||
|
||||
if (!entryId && !slug) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// the API won't reliable return results when the query is over ~30 characters for some reason
|
||||
// it may still occasionally fail to return the relevant result, first, such as Blacked Raw - After the Show
|
||||
const query = slug.split('-').reduce((acc, word) => {
|
||||
const newAcc = `${acc} ${word}`;
|
||||
|
||||
if (newAcc.length > 30) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
return newAcc;
|
||||
}, '').trim();
|
||||
const query = entryId
|
||||
? `
|
||||
query searchVideos($videoId: ID!) {
|
||||
video: findOneVideo(input: { videoId: $videoId }) {
|
||||
${videoFields}
|
||||
}
|
||||
}
|
||||
`
|
||||
: `
|
||||
query searchVideos($slug: String!) {
|
||||
video: findOneVideo(input: { slug: $slug }) {
|
||||
${videoFields}
|
||||
}
|
||||
}
|
||||
`;
|
||||
|
||||
const res = await http.post(`${channel.url}/graphql`, {
|
||||
operationName: 'searchVideos',
|
||||
variables: {
|
||||
site: channel.slug.toUpperCase(),
|
||||
query,
|
||||
videoId: entryId,
|
||||
slug,
|
||||
},
|
||||
// ranking can be weird, use higher limit to increase likelihood of finding scene
|
||||
query: `
|
||||
query searchVideos($site: Site!, $query: String!) {
|
||||
searchVideos(input: {
|
||||
query: $query
|
||||
site: $site
|
||||
first: 50
|
||||
}) {
|
||||
edges {
|
||||
node {
|
||||
videoId
|
||||
title
|
||||
slug
|
||||
description
|
||||
releaseDate
|
||||
runLength
|
||||
site
|
||||
rating
|
||||
models {
|
||||
name
|
||||
}
|
||||
directors {
|
||||
directorId
|
||||
name
|
||||
}
|
||||
categories {
|
||||
name
|
||||
}
|
||||
chapters {
|
||||
video {
|
||||
title
|
||||
seconds
|
||||
}
|
||||
}
|
||||
downloadResolutions {
|
||||
width
|
||||
}
|
||||
carousel {
|
||||
main {
|
||||
src
|
||||
}
|
||||
}
|
||||
images {
|
||||
poster {
|
||||
...ImageInfo
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
${query}
|
||||
|
||||
fragment ImageInfo on Image {
|
||||
src
|
||||
|
@ -353,7 +362,7 @@ async function fetchGraphqlScene(release, channel) {
|
|||
});
|
||||
|
||||
if (res.ok) {
|
||||
return res.body.data?.searchVideos?.edges?.find((edge) => edge.node.slug === slug)?.node || null;
|
||||
return res.body.data.video;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
@ -556,3 +565,69 @@ module.exports = {
|
|||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
/* less reliable search API in case direct video query becomes unavailable
|
||||
async function findGraphqlScene(release, channel) {
|
||||
const slug = new URL(release.url).pathname.match(/\/videos\/(.*)/)?.[1];
|
||||
|
||||
if (!slug) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// the API won't reliable return results when the query is over ~30 characters for some reason
|
||||
// it may still occasionally fail to return the relevant result, first, such as Blacked Raw - After the Show
|
||||
const query = slug.split('-').reduce((acc, word) => {
|
||||
const newAcc = `${acc} ${word}`;
|
||||
|
||||
if (newAcc.length > 30) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
return newAcc;
|
||||
}, '').trim();
|
||||
|
||||
const res = await http.post(`${channel.url}/graphql`, {
|
||||
operationName: 'searchVideos',
|
||||
variables: {
|
||||
site: channel.slug.toUpperCase(),
|
||||
query,
|
||||
},
|
||||
// ranking can be weird, use higher limit to increase likelihood of finding scene
|
||||
query: `
|
||||
query searchVideos($site: Site!, $query: String!) {
|
||||
searchVideos(input: {
|
||||
query: $query
|
||||
site: $site
|
||||
first: 50
|
||||
}) {
|
||||
edges {
|
||||
node {
|
||||
${videoFields}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fragment ImageInfo on Image {
|
||||
src
|
||||
width
|
||||
height
|
||||
highdpi {
|
||||
double
|
||||
}
|
||||
}
|
||||
`,
|
||||
}, {
|
||||
headers: {
|
||||
referer: release.url,
|
||||
origin: channel.url,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return res.body.data?.searchVideos?.edges?.find((edge) => edge.node.slug === slug)?.node || null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue