Refactored upcoming scenes in Vixen scraper.

This commit is contained in:
DebaucheryLibrarian 2022-05-24 00:22:33 +02:00
parent 1a92cd79f7
commit 0fa36b17bf
1 changed files with 165 additions and 107 deletions

View File

@ -14,34 +14,6 @@ const genderMap = {
T: 'transsexual', // not yet observed T: 'transsexual', // not yet observed
}; };
function getPosterFallbacks(poster) {
return poster
.filter((image) => /landscape/i.test(image.name))
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => {
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
return image.height === 1080 ? sources : sources.reverse();
})
.flat()
.map((src) => ({
src,
expectType: {
'binary/octet-stream': 'image/jpeg',
},
}));
}
function getTeaserFallbacks(teaser) {
return teaser
.filter((video) => /landscape/i.test(video.name))
.map((video) => ({
src: video.src,
type: video.type,
quality: Number(String(video.height).replace('353', '360')),
}));
}
function getAvatarFallbacks(avatar) { function getAvatarFallbacks(avatar) {
return avatar return avatar
.sort((imageA, imageB) => imageB.height - imageA.height) .sort((imageA, imageB) => imageB.height - imageA.height)
@ -181,42 +153,101 @@ function scrapeAll(scenes, channel) {
release.stars = data.rating; release.stars = data.rating;
console.log(data);
console.log(release);
return release; return release;
}); });
} }
function scrapeUpcoming(scene, site) { function scrapeUpcoming(scene, site) {
if (!scene || scene.isPreReleasePeriod) return null; if (!scene || scene.isPreReleasePeriod) {
return null;
}
const release = {}; const release = {};
release.title = scene.targetUrl release.entryId = scene.videoId;
.slice(1) release.url = `${site.url}/videos/${scene.slug}`;
release.title = scene.slug
.split('-') .split('-')
.map((component) => `${component.charAt(0).toUpperCase()}${component.slice(1)}`) .map((component) => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' '); .join(' ');
release.url = `${site.url}/videos${scene.targetUrl}`;
release.date = moment.utc(scene.releaseDate).toDate(); release.date = moment.utc(scene.releaseDate).toDate();
release.datePrecision = 'minute'; release.datePrecision = 'minute';
release.actors = scene.models; release.actors = scene.models.map((model) => model.name);
release.poster = getPosterFallbacks(scene.images.poster); release.poster = curateSources(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster); release.teaser = curateSources(scene.previews.poster);
release.entryId = (release.poster[0] || release.teaser[0])?.src?.match(/\/(\d+)/)?.[1];
console.log('upcoming', scene);
return [release]; return [release];
} }
async function scrapeScene(data, url, channel, options) { async function fetchGraphqlDetails(release, channel, session) {
const query = `
query($query: String!, $site: Site!) {
searchVideos(input: {
query: $query
site: $site
}) {
edges {
node {
videoId
title
slug
description
releaseDate
categories {
name
}
chapters {
video {
title
seconds
}
}
models {
name
}
images {
poster {
...ImageInfo
}
}
}
}
}
}
fragment ImageInfo on Image {
src
highdpi {
double
}
}
`;
const variables = JSON.stringify({
site: channel.slug.toUpperCase(),
query: release.title,
});
const res = await http.get(`${channel.url}/graphql?query=${encodeURI(query)}&variables=${variables}`, {
session,
headers: {
referer: channel.url,
accept: '*/*',
},
});
if (res.ok) {
return res.body.data?.searchVideos?.edges?.find((edge) => edge.node.videoId === release.entryId)?.node || null;
}
return null;
}
async function scrapeScene(data, url, channel, options, session) {
const release = { const release = {
url, url,
entryId: data.video.videoId || data.video.newId, entryId: data.video.videoId || data.video.newId,
@ -251,7 +282,17 @@ async function scrapeScene(data, url, channel, options) {
release.qualities = data.video?.downloadResolutions.map((quality) => Number(quality.width)).filter(Boolean); // width property is actually the height release.qualities = data.video?.downloadResolutions.map((quality) => Number(quality.width)).filter(Boolean); // width property is actually the height
console.log(release); const graphqlDetails = await fetchGraphqlDetails(release, channel, session);
if (graphqlDetails) {
release.tags = graphqlDetails.categories?.map((category) => category.name);
release.chapters = graphqlDetails.chapters?.video?.map((chapter) => ({
time: chapter.seconds,
tags: [chapter.title],
}));
}
release.channel = data.video?.id.split(':')[0];
return release; return release;
} }
@ -305,61 +346,6 @@ async function scrapeProfile(data, origin, withReleases) {
return profile; return profile;
} }
async function fetchLatestGraphql(channel, page = 1) {
const query = `
query($query: String!, $site: Site!) {
searchVideos(input: {
query: $query
site: $site
}) {
edges {
node {
title
slug
description
releaseDate
categories {
name
}
chapters {
video {
title
seconds
}
}
models {
name
}
images {
poster {
...ImageInfo
}
}
}
}
}
}
fragment ImageInfo on Image {
src
highdpi {
double
}
}
`;
const variables = JSON.stringify({
site: channel.slug.toUpperCase(),
query: 'alone at last',
});
const res = await http.get(`${channel.url}/graphql?query=${encodeURI(query)}&variables=${variables}`);
console.log(res.body);
console.log(res.body.errors);
console.log(res.body.data?.searchVideos?.edges.map((edge) => edge.node));
}
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
const url = `${site.url}/videos?page=${page}`; const url = `${site.url}/videos?page=${page}`;
const res = await qu.get(url); const res = await qu.get(url);
@ -378,13 +364,85 @@ async function fetchLatest(site, page = 1) {
return res.status; return res.status;
} }
async function fetchUpcoming(site) { async function fetchUpcoming(channel) {
const apiUrl = `${site.url}/api`; const query = `
const res = await http.get(apiUrl); query getNextScene($site: Site!) {
nextScene: findNextReleaseVideo(input: { site: $site }) {
videoId
slug
isPreReleasePeriod
releaseDate
models {
name
__typename
}
images {
countdown {
...ImageInfo
__typename
}
poster {
...ImageInfo
__typename
}
__typename
}
previews {
countdown {
...PreviewInfo
__typename
}
poster {
...PreviewInfo
__typename
}
__typename
}
__typename
}
}
fragment ImageInfo on Image {
src
placeholder
width
height
highdpi {
double
triple
__typename
}
webp {
src
placeholder
highdpi {
double
triple
__typename
}
__typename
}
}
fragment PreviewInfo on Preview {
src
width
height
type
}
`;
const res = await http.post(`${channel.url}/graphql`, {
operationName: 'getNextScene',
query,
variables: {
site: channel.slug.toUpperCase(),
},
});
if (res.ok) { if (res.ok) {
if (res.body.data.nextScene) { if (res.body.data.nextScene) {
return scrapeUpcoming(res.body.data.nextScene, site); return scrapeUpcoming(res.body.data.nextScene, channel);
} }
return []; return [];
@ -394,13 +452,14 @@ async function fetchUpcoming(site) {
} }
async function fetchScene(url, channel, baseRelease, options) { async function fetchScene(url, channel, baseRelease, options) {
const res = await qu.get(url); const session = qu.session();
const res = await qu.get(url, null, null, { session });
if (res.ok) { if (res.ok) {
const dataString = res.item.query.html('#__NEXT_DATA__'); const dataString = res.item.query.html('#__NEXT_DATA__');
const data = dataString && JSON.parse(dataString); const data = dataString && JSON.parse(dataString);
return scrapeScene(data.props.pageProps, url, channel, options); return scrapeScene(data.props.pageProps, url, channel, options, session);
} }
return res.status; return res.status;
@ -424,7 +483,6 @@ async function fetchProfile({ name: actorName }, { site }, include) {
} }
module.exports = { module.exports = {
// fetchLatest: fetchLatestGraphql,
fetchLatest, fetchLatest,
fetchUpcoming, fetchUpcoming,
fetchScene, fetchScene,