From 0b101dde3c36463341dcf5006e6a1ca1f34aa925 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Fri, 21 Jul 2023 23:55:30 +0200 Subject: [PATCH] Updated Vixen model scraper. --- src/scrapers/teamskeet.js | 2 - src/scrapers/vixen.js | 139 ++++++++++++++++++++++++++------------ 2 files changed, 96 insertions(+), 45 deletions(-) diff --git a/src/scrapers/teamskeet.js b/src/scrapers/teamskeet.js index 11ef71f7..1dbaf680 100755 --- a/src/scrapers/teamskeet.js +++ b/src/scrapers/teamskeet.js @@ -62,8 +62,6 @@ function scrapeAll(scenes, channel) { function scrapeProfile(actor, entity) { const profile = {}; - console.log(actor); - if (actor.bio.about && !/\band\b/.test(actor.bio.about)) { const bio = actor.bio.about.split(/\n/).filter(Boolean).reduce((acc, item) => { const [key, value] = item.match(/(.+): (.+)/).slice(1); diff --git a/src/scrapers/vixen.js b/src/scrapers/vixen.js index de3e06b7..7ee01bd0 100755 --- a/src/scrapers/vixen.js +++ b/src/scrapers/vixen.js @@ -1,14 +1,12 @@ 'use strict'; /* eslint-disable newline-per-chained-call */ -const Promise = require('bluebird'); const moment = require('moment'); const unprint = require('unprint'); const argv = require('../argv'); const qu = require('../utils/qu'); const http = require('../utils/http'); -const slugify = require('../utils/slugify'); const genderMap = { F: 'female', @@ -17,10 +15,15 @@ const genderMap = { }; function getAvatarFallbacks(avatar) { + if (!avatar) { + return null; + } + return avatar .sort((imageA, imageB) => imageB.height - imageA.height) - .map((image) => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src]) - .flat(); + .map((image) => [image.highdpi?.['3x'], image.highdpi?.triple, image.highdpi?.['2x'], image.highdpi?.double, image.src]) + .flat() + .filter(Boolean); } function curateSources(sources, type = 'image/jpeg') { @@ -52,9 +55,9 @@ function scrapeAll(scenes, channel) { release.title = data.title; release.date = qu.extractDate(data.releaseDate); - release.actors = data.modelsSlugged.map((model) => ({ + release.actors = (data.modelsSlugged || data.models)?.map((model) => ({ name: model.name, - url: `${channel.url}/models/${model.slugged}`, + url: model.slugged && `${channel.url}/models/${model.slugged}`, })); release.poster = curateSources(data.images.listing); @@ -300,6 +303,18 @@ const videoFields = ` } `; +const imageFragment = ` + fragment ImageInfo on Image { + src + width + height + highdpi { + double + triple + } + } +`; + function getSlug(release) { if (release.slug) { return release.slug; @@ -388,26 +403,12 @@ async function fetchScene(url, channel, baseRelease, options) { return res.status; } -async function fetchActorReleases(pages, model, origin) { - const releasesPerPage = await Promise.map(pages, async (page) => { - const url = `${origin}/api${model.targetUrl}?page=${page}`; - const res = await http.get(url); - - if (res.status === 200) { - return scrapeAll(res.body.data.videos.videos, null, origin); - } - - return []; - }, { concurrency: 3 }); - - return releasesPerPage.flat(); -} - -async function scrapeProfile(data, origin, withReleases) { +async function scrapeProfile(data, channel) { const model = data.model; const profile = {}; - profile.birthdate = new Date(model.dateOfBirth); + // most details seemingly unavailable in graphql + if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth); profile.gender = genderMap[model.sex]; profile.hair = model.hairColour; @@ -423,15 +424,8 @@ async function scrapeProfile(data, origin, withReleases) { profile.poster = getAvatarFallbacks(model.images.profile); profile.banner = getAvatarFallbacks(model.images.poster); - const releases = scrapeAll(data.videos.videos, null, origin); - - if (withReleases) { - const pageCount = Math.ceil(data.videos.count / 6); - const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin); - - profile.releases = [...releases, ...otherReleases]; - } else { - profile.releases = releases; + if (model.videos) { + profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel); } return profile; @@ -542,21 +536,80 @@ async function fetchUpcoming(channel) { return res.status; } -async function fetchProfile({ name: actorName }, { site }, include) { - const origin = site.url; - const actorSlug = slugify(actorName); - const url = `${origin}/api/${actorSlug}`; - const res = await http.get(url); +async function fetchProfile(actor, { channel }) { + const res = await http.post(`${channel.url}/graphql`, { + operationName: 'searchModels', + variables: { + slug: actor.slug, + site: channel.slug.toUpperCase(), + }, + query: ` + query searchModels( + $slug: String! + $site: Site! + ) { + model: findOneModel(input: { slug: $slug, site: $site }) { + name + biography + images { + listing { + ...ImageInfo + } + profile { + ...ImageInfo + } + poster { + ...ImageInfo + } + } + videos { + edges { + node { + videoId + title + slug + releaseDate + runLength + site + rating + models { + name + } + carousel { + main { + src + } + } + previews { + listing { + src + } + } + images { + poster { + ...ImageInfo + } + } + } + } + } + } + } + + ${imageFragment} + `, + }, { + headers: { + referer: channel.url, + origin: channel.url, + }, + }); if (res.ok) { - if (res.body.data) { - return scrapeProfile(res.body.data, origin, include.scenes); - } - - return null; + return scrapeProfile(res.body.data, channel); } - return res.status; + return null; } module.exports = {