From 0fa36b17bf0195cb6069f67d0d62f26fed7ad42d Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Tue, 24 May 2022 00:22:33 +0200 Subject: [PATCH] Refactored upcoming scenes in Vixen scraper. --- src/scrapers/vixen.js | 272 +++++++++++++++++++++++++----------------- 1 file changed, 165 insertions(+), 107 deletions(-) diff --git a/src/scrapers/vixen.js b/src/scrapers/vixen.js index 588a5ea9..3116551b 100644 --- a/src/scrapers/vixen.js +++ b/src/scrapers/vixen.js @@ -14,34 +14,6 @@ const genderMap = { T: 'transsexual', // not yet observed }; -function getPosterFallbacks(poster) { - return poster - .filter((image) => /landscape/i.test(image.name)) - .sort((imageA, imageB) => imageB.height - imageA.height) - .map((image) => { - const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']]; - // high DPI images for full HD source are huge, only prefer for smaller fallback sources - return image.height === 1080 ? sources : sources.reverse(); - }) - .flat() - .map((src) => ({ - src, - expectType: { - 'binary/octet-stream': 'image/jpeg', - }, - })); -} - -function getTeaserFallbacks(teaser) { - return teaser - .filter((video) => /landscape/i.test(video.name)) - .map((video) => ({ - src: video.src, - type: video.type, - quality: Number(String(video.height).replace('353', '360')), - })); -} - function getAvatarFallbacks(avatar) { return avatar .sort((imageA, imageB) => imageB.height - imageA.height) @@ -181,42 +153,101 @@ function scrapeAll(scenes, channel) { release.stars = data.rating; - console.log(data); - console.log(release); - return release; }); } function scrapeUpcoming(scene, site) { - if (!scene || scene.isPreReleasePeriod) return null; + if (!scene || scene.isPreReleasePeriod) { + return null; + } const release = {}; - release.title = scene.targetUrl - .slice(1) + release.entryId = scene.videoId; + release.url = `${site.url}/videos/${scene.slug}`; + + release.title = scene.slug .split('-') .map((component) => `${component.charAt(0).toUpperCase()}${component.slice(1)}`) .join(' '); - release.url = `${site.url}/videos${scene.targetUrl}`; - release.date = moment.utc(scene.releaseDate).toDate(); release.datePrecision = 'minute'; - release.actors = scene.models; + release.actors = scene.models.map((model) => model.name); - release.poster = getPosterFallbacks(scene.images.poster); - release.teaser = getTeaserFallbacks(scene.previews.poster); - - release.entryId = (release.poster[0] || release.teaser[0])?.src?.match(/\/(\d+)/)?.[1]; - - console.log('upcoming', scene); + release.poster = curateSources(scene.images.poster); + release.teaser = curateSources(scene.previews.poster); return [release]; } -async function scrapeScene(data, url, channel, options) { +async function fetchGraphqlDetails(release, channel, session) { + const query = ` + query($query: String!, $site: Site!) { + searchVideos(input: { + query: $query + site: $site + }) { + edges { + node { + videoId + title + slug + description + releaseDate + categories { + name + } + chapters { + video { + title + seconds + } + } + models { + name + } + images { + poster { + ...ImageInfo + } + } + } + } + } + } + + fragment ImageInfo on Image { + src + highdpi { + double + } + } + `; + + const variables = JSON.stringify({ + site: channel.slug.toUpperCase(), + query: release.title, + }); + + const res = await http.get(`${channel.url}/graphql?query=${encodeURI(query)}&variables=${variables}`, { + session, + headers: { + referer: channel.url, + accept: '*/*', + }, + }); + + if (res.ok) { + return res.body.data?.searchVideos?.edges?.find((edge) => edge.node.videoId === release.entryId)?.node || null; + } + + return null; +} + +async function scrapeScene(data, url, channel, options, session) { const release = { url, entryId: data.video.videoId || data.video.newId, @@ -251,7 +282,17 @@ async function scrapeScene(data, url, channel, options) { release.qualities = data.video?.downloadResolutions.map((quality) => Number(quality.width)).filter(Boolean); // width property is actually the height - console.log(release); + const graphqlDetails = await fetchGraphqlDetails(release, channel, session); + + if (graphqlDetails) { + release.tags = graphqlDetails.categories?.map((category) => category.name); + release.chapters = graphqlDetails.chapters?.video?.map((chapter) => ({ + time: chapter.seconds, + tags: [chapter.title], + })); + } + + release.channel = data.video?.id.split(':')[0]; return release; } @@ -305,61 +346,6 @@ async function scrapeProfile(data, origin, withReleases) { return profile; } -async function fetchLatestGraphql(channel, page = 1) { - const query = ` - query($query: String!, $site: Site!) { - searchVideos(input: { - query: $query - site: $site - }) { - edges { - node { - title - slug - description - releaseDate - categories { - name - } - chapters { - video { - title - seconds - } - } - models { - name - } - images { - poster { - ...ImageInfo - } - } - } - } - } - } - - fragment ImageInfo on Image { - src - highdpi { - double - } - } - `; - - const variables = JSON.stringify({ - site: channel.slug.toUpperCase(), - query: 'alone at last', - }); - - const res = await http.get(`${channel.url}/graphql?query=${encodeURI(query)}&variables=${variables}`); - - console.log(res.body); - console.log(res.body.errors); - console.log(res.body.data?.searchVideos?.edges.map((edge) => edge.node)); -} - async function fetchLatest(site, page = 1) { const url = `${site.url}/videos?page=${page}`; const res = await qu.get(url); @@ -378,13 +364,85 @@ async function fetchLatest(site, page = 1) { return res.status; } -async function fetchUpcoming(site) { - const apiUrl = `${site.url}/api`; - const res = await http.get(apiUrl); +async function fetchUpcoming(channel) { + const query = ` + query getNextScene($site: Site!) { + nextScene: findNextReleaseVideo(input: { site: $site }) { + videoId + slug + isPreReleasePeriod + releaseDate + models { + name + __typename + } + images { + countdown { + ...ImageInfo + __typename + } + poster { + ...ImageInfo + __typename + } + __typename + } + previews { + countdown { + ...PreviewInfo + __typename + } + poster { + ...PreviewInfo + __typename + } + __typename + } + __typename + } + } + + fragment ImageInfo on Image { + src + placeholder + width + height + highdpi { + double + triple + __typename + } + webp { + src + placeholder + highdpi { + double + triple + __typename + } + __typename + } + } + + fragment PreviewInfo on Preview { + src + width + height + type + } + `; + + const res = await http.post(`${channel.url}/graphql`, { + operationName: 'getNextScene', + query, + variables: { + site: channel.slug.toUpperCase(), + }, + }); if (res.ok) { if (res.body.data.nextScene) { - return scrapeUpcoming(res.body.data.nextScene, site); + return scrapeUpcoming(res.body.data.nextScene, channel); } return []; @@ -394,13 +452,14 @@ async function fetchUpcoming(site) { } async function fetchScene(url, channel, baseRelease, options) { - const res = await qu.get(url); + const session = qu.session(); + const res = await qu.get(url, null, null, { session }); if (res.ok) { const dataString = res.item.query.html('#__NEXT_DATA__'); const data = dataString && JSON.parse(dataString); - return scrapeScene(data.props.pageProps, url, channel, options); + return scrapeScene(data.props.pageProps, url, channel, options, session); } return res.status; @@ -424,7 +483,6 @@ async function fetchProfile({ name: actorName }, { site }, include) { } module.exports = { - // fetchLatest: fetchLatestGraphql, fetchLatest, fetchUpcoming, fetchScene,