Updated Vixen model scraper.

This commit is contained in:
DebaucheryLibrarian 2023-07-21 23:55:30 +02:00
parent 4d20dae079
commit 0b101dde3c
2 changed files with 96 additions and 45 deletions

View File

@ -62,8 +62,6 @@ function scrapeAll(scenes, channel) {
function scrapeProfile(actor, entity) { function scrapeProfile(actor, entity) {
const profile = {}; const profile = {};
console.log(actor);
if (actor.bio.about && !/\band\b/.test(actor.bio.about)) { if (actor.bio.about && !/\band\b/.test(actor.bio.about)) {
const bio = actor.bio.about.split(/\n/).filter(Boolean).reduce((acc, item) => { const bio = actor.bio.about.split(/\n/).filter(Boolean).reduce((acc, item) => {
const [key, value] = item.match(/(.+): (.+)/).slice(1); const [key, value] = item.match(/(.+): (.+)/).slice(1);

View File

@ -1,14 +1,12 @@
'use strict'; 'use strict';
/* eslint-disable newline-per-chained-call */ /* eslint-disable newline-per-chained-call */
const Promise = require('bluebird');
const moment = require('moment'); const moment = require('moment');
const unprint = require('unprint'); const unprint = require('unprint');
const argv = require('../argv'); const argv = require('../argv');
const qu = require('../utils/qu'); const qu = require('../utils/qu');
const http = require('../utils/http'); const http = require('../utils/http');
const slugify = require('../utils/slugify');
const genderMap = { const genderMap = {
F: 'female', F: 'female',
@ -17,10 +15,15 @@ const genderMap = {
}; };
function getAvatarFallbacks(avatar) { function getAvatarFallbacks(avatar) {
if (!avatar) {
return null;
}
return avatar return avatar
.sort((imageA, imageB) => imageB.height - imageA.height) .sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src]) .map((image) => [image.highdpi?.['3x'], image.highdpi?.triple, image.highdpi?.['2x'], image.highdpi?.double, image.src])
.flat(); .flat()
.filter(Boolean);
} }
function curateSources(sources, type = 'image/jpeg') { function curateSources(sources, type = 'image/jpeg') {
@ -52,9 +55,9 @@ function scrapeAll(scenes, channel) {
release.title = data.title; release.title = data.title;
release.date = qu.extractDate(data.releaseDate); release.date = qu.extractDate(data.releaseDate);
release.actors = data.modelsSlugged.map((model) => ({ release.actors = (data.modelsSlugged || data.models)?.map((model) => ({
name: model.name, name: model.name,
url: `${channel.url}/models/${model.slugged}`, url: model.slugged && `${channel.url}/models/${model.slugged}`,
})); }));
release.poster = curateSources(data.images.listing); release.poster = curateSources(data.images.listing);
@ -300,6 +303,18 @@ const videoFields = `
} }
`; `;
const imageFragment = `
fragment ImageInfo on Image {
src
width
height
highdpi {
double
triple
}
}
`;
function getSlug(release) { function getSlug(release) {
if (release.slug) { if (release.slug) {
return release.slug; return release.slug;
@ -388,26 +403,12 @@ async function fetchScene(url, channel, baseRelease, options) {
return res.status; return res.status;
} }
async function fetchActorReleases(pages, model, origin) { async function scrapeProfile(data, channel) {
const releasesPerPage = await Promise.map(pages, async (page) => {
const url = `${origin}/api${model.targetUrl}?page=${page}`;
const res = await http.get(url);
if (res.status === 200) {
return scrapeAll(res.body.data.videos.videos, null, origin);
}
return [];
}, { concurrency: 3 });
return releasesPerPage.flat();
}
async function scrapeProfile(data, origin, withReleases) {
const model = data.model; const model = data.model;
const profile = {}; const profile = {};
profile.birthdate = new Date(model.dateOfBirth); // most details seemingly unavailable in graphql
if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex]; profile.gender = genderMap[model.sex];
profile.hair = model.hairColour; profile.hair = model.hairColour;
@ -423,15 +424,8 @@ async function scrapeProfile(data, origin, withReleases) {
profile.poster = getAvatarFallbacks(model.images.profile); profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster); profile.banner = getAvatarFallbacks(model.images.poster);
const releases = scrapeAll(data.videos.videos, null, origin); if (model.videos) {
profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel);
if (withReleases) {
const pageCount = Math.ceil(data.videos.count / 6);
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
profile.releases = [...releases, ...otherReleases];
} else {
profile.releases = releases;
} }
return profile; return profile;
@ -542,21 +536,80 @@ async function fetchUpcoming(channel) {
return res.status; return res.status;
} }
async function fetchProfile({ name: actorName }, { site }, include) { async function fetchProfile(actor, { channel }) {
const origin = site.url; const res = await http.post(`${channel.url}/graphql`, {
const actorSlug = slugify(actorName); operationName: 'searchModels',
const url = `${origin}/api/${actorSlug}`; variables: {
const res = await http.get(url); slug: actor.slug,
site: channel.slug.toUpperCase(),
},
query: `
query searchModels(
$slug: String!
$site: Site!
) {
model: findOneModel(input: { slug: $slug, site: $site }) {
name
biography
images {
listing {
...ImageInfo
}
profile {
...ImageInfo
}
poster {
...ImageInfo
}
}
videos {
edges {
node {
videoId
title
slug
releaseDate
runLength
site
rating
models {
name
}
carousel {
main {
src
}
}
previews {
listing {
src
}
}
images {
poster {
...ImageInfo
}
}
}
}
}
}
}
${imageFragment}
`,
}, {
headers: {
referer: channel.url,
origin: channel.url,
},
});
if (res.ok) { if (res.ok) {
if (res.body.data) { return scrapeProfile(res.body.data, channel);
return scrapeProfile(res.body.data, origin, include.scenes);
} }
return null; return null;
}
return res.status;
} }
module.exports = { module.exports = {