From 5e1a1005f15744d3fa97ef53b651f70c3e1c3cac Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Sat, 22 Feb 2020 05:29:02 +0100 Subject: [PATCH] Refactored Vixen scene scraper. Using better poster source for Vixen. Returning video as teaser instead of trailer. --- src/scrapers/vixen.js | 100 ++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/src/scrapers/vixen.js b/src/scrapers/vixen.js index cd10adec..b6030bbd 100644 --- a/src/scrapers/vixen.js +++ b/src/scrapers/vixen.js @@ -5,6 +5,18 @@ const bhttp = require('bhttp'); const cheerio = require('cheerio'); const moment = require('moment'); +function getPosterFallbacks(poster) { + return poster + .filter(image => /landscape/i.test(image.name)) + .sort((imageA, imageB) => imageB.height - imageA.height) + .map((image) => { + const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']]; + // high DPI images for full HD source are huge, only prefer for smaller fallback sources + return image.height === 1080 ? sources : sources.reverse(); + }) + .flat(); +} + function scrapeLatest(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); @@ -53,7 +65,7 @@ function scrapeUpcoming(html, site) { const data = JSON.parse(stateString); const scene = data.page.data['/'].data?.nextScene; - if (!scene) return null; + if (!scene || scene.isPreReleasePeriod) return null; const release = {}; @@ -68,16 +80,7 @@ function scrapeUpcoming(html, site) { release.actors = scene.models; - release.poster = scene.images.poster - .filter(image => /landscape/i.test(image.name)) - .sort((imageA, imageB) => imageB.height - imageA.height) - .map((image) => { - const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']]; - // high DPI images for full HD source are huge, only prefer for smaller fallback sources - return image.height === 1080 ? sources : sources.reverse(); - }) - .flat(); - + release.poster = getPosterFallbacks(scene.images.poster); release.teaser = scene.previews.poster .filter(teaser => /landscape/i.test(teaser.name)) .map(teaser => ({ @@ -91,56 +94,49 @@ function scrapeUpcoming(html, site) { return [release]; } -async function scrapeScene(html, url, site) { +async function scrapeScene(html, url) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const stateObject = $('script:contains("INITIAL_STATE")'); const data = JSON.parse(stateObject.html().trim().slice(27, -1)); const pageData = data.page.data[data.location.pathname].data; - const entryId = pageData.video; - const scene = data.videos.find(video => video.newId === entryId); + const scene = data.videos.find(video => video.newId === pageData.video); - console.log(scene, data, pageData); - - const poster = scene.rotatingThumbsUrlSizes[0]['1040w']; - const photos = pageData.pictureset.map(photo => photo.main[0].src); - const trailer = scene.previews.listing.find(preview => preview.height === 353) || null; - - const { - title, - description, - models: actors, - totalRateVal: stars, - runLength: duration, - directorNames: director, - tags, - } = scene; - - const date = new Date(scene.releaseDate); - - return { + const release = { url, - entryId, - title, - description, - actors, - director, - date, - duration, - tags, - photos, - poster, - trailer: trailer && { - src: trailer.src, - type: trailer.type, - quality: 353, - }, - rating: { - stars, - }, - site, + title: scene.title, + description: scene.description, + actors: scene.models, + director: scene.directorNames, + duration: scene.runLength, + stars: scene.totalRateVal, + tags: scene.tags, }; + + release.entryId = pageData.video; + release.actors = scene.models; + + // release.poster = scene.rotatingThumbsUrlSizes[0]['1040w']; + release.poster = getPosterFallbacks(scene.images.poster); + release.photos = pageData.pictureset.map(photo => photo.main[0].src); + + const trailer = scene.previews.listing.find(preview => preview.height === 353); + if (trailer) release.trailer = { src: trailer }; + + // trailer must exist! + + release.teaser = scene.previews.poster + .filter(teaser => /landscape/i.test(teaser.name)) + .map(teaser => ({ + src: teaser.src, + type: teaser.type, + quality: Number(String(teaser.height).replace('353', '360')), + })); + + release.date = new Date(scene.releaseDate); + + return release; } async function fetchLatest(site, page = 1) {