Updated Vixen model scraper.
This commit is contained in:
parent
4d20dae079
commit
0b101dde3c
|
@ -62,8 +62,6 @@ function scrapeAll(scenes, channel) {
|
|||
function scrapeProfile(actor, entity) {
|
||||
const profile = {};
|
||||
|
||||
console.log(actor);
|
||||
|
||||
if (actor.bio.about && !/\band\b/.test(actor.bio.about)) {
|
||||
const bio = actor.bio.about.split(/\n/).filter(Boolean).reduce((acc, item) => {
|
||||
const [key, value] = item.match(/(.+): (.+)/).slice(1);
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
'use strict';
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
const Promise = require('bluebird');
|
||||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const argv = require('../argv');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
const genderMap = {
|
||||
F: 'female',
|
||||
|
@ -17,10 +15,15 @@ const genderMap = {
|
|||
};
|
||||
|
||||
function getAvatarFallbacks(avatar) {
|
||||
if (!avatar) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return avatar
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map((image) => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
|
||||
.flat();
|
||||
.map((image) => [image.highdpi?.['3x'], image.highdpi?.triple, image.highdpi?.['2x'], image.highdpi?.double, image.src])
|
||||
.flat()
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function curateSources(sources, type = 'image/jpeg') {
|
||||
|
@ -52,9 +55,9 @@ function scrapeAll(scenes, channel) {
|
|||
release.title = data.title;
|
||||
|
||||
release.date = qu.extractDate(data.releaseDate);
|
||||
release.actors = data.modelsSlugged.map((model) => ({
|
||||
release.actors = (data.modelsSlugged || data.models)?.map((model) => ({
|
||||
name: model.name,
|
||||
url: `${channel.url}/models/${model.slugged}`,
|
||||
url: model.slugged && `${channel.url}/models/${model.slugged}`,
|
||||
}));
|
||||
|
||||
release.poster = curateSources(data.images.listing);
|
||||
|
@ -300,6 +303,18 @@ const videoFields = `
|
|||
}
|
||||
`;
|
||||
|
||||
const imageFragment = `
|
||||
fragment ImageInfo on Image {
|
||||
src
|
||||
width
|
||||
height
|
||||
highdpi {
|
||||
double
|
||||
triple
|
||||
}
|
||||
}
|
||||
`;
|
||||
|
||||
function getSlug(release) {
|
||||
if (release.slug) {
|
||||
return release.slug;
|
||||
|
@ -388,26 +403,12 @@ async function fetchScene(url, channel, baseRelease, options) {
|
|||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(pages, model, origin) {
|
||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.status === 200) {
|
||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||
}
|
||||
|
||||
return [];
|
||||
}, { concurrency: 3 });
|
||||
|
||||
return releasesPerPage.flat();
|
||||
}
|
||||
|
||||
async function scrapeProfile(data, origin, withReleases) {
|
||||
async function scrapeProfile(data, channel) {
|
||||
const model = data.model;
|
||||
const profile = {};
|
||||
|
||||
profile.birthdate = new Date(model.dateOfBirth);
|
||||
// most details seemingly unavailable in graphql
|
||||
if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth);
|
||||
profile.gender = genderMap[model.sex];
|
||||
|
||||
profile.hair = model.hairColour;
|
||||
|
@ -423,15 +424,8 @@ async function scrapeProfile(data, origin, withReleases) {
|
|||
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||
|
||||
const releases = scrapeAll(data.videos.videos, null, origin);
|
||||
|
||||
if (withReleases) {
|
||||
const pageCount = Math.ceil(data.videos.count / 6);
|
||||
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
|
||||
|
||||
profile.releases = [...releases, ...otherReleases];
|
||||
} else {
|
||||
profile.releases = releases;
|
||||
if (model.videos) {
|
||||
profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel);
|
||||
}
|
||||
|
||||
return profile;
|
||||
|
@ -542,23 +536,82 @@ async function fetchUpcoming(channel) {
|
|||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, { site }, include) {
|
||||
const origin = site.url;
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `${origin}/api/${actorSlug}`;
|
||||
const res = await http.get(url);
|
||||
async function fetchProfile(actor, { channel }) {
|
||||
const res = await http.post(`${channel.url}/graphql`, {
|
||||
operationName: 'searchModels',
|
||||
variables: {
|
||||
slug: actor.slug,
|
||||
site: channel.slug.toUpperCase(),
|
||||
},
|
||||
query: `
|
||||
query searchModels(
|
||||
$slug: String!
|
||||
$site: Site!
|
||||
) {
|
||||
model: findOneModel(input: { slug: $slug, site: $site }) {
|
||||
name
|
||||
biography
|
||||
images {
|
||||
listing {
|
||||
...ImageInfo
|
||||
}
|
||||
profile {
|
||||
...ImageInfo
|
||||
}
|
||||
poster {
|
||||
...ImageInfo
|
||||
}
|
||||
}
|
||||
videos {
|
||||
edges {
|
||||
node {
|
||||
videoId
|
||||
title
|
||||
slug
|
||||
releaseDate
|
||||
runLength
|
||||
site
|
||||
rating
|
||||
models {
|
||||
name
|
||||
}
|
||||
carousel {
|
||||
main {
|
||||
src
|
||||
}
|
||||
}
|
||||
previews {
|
||||
listing {
|
||||
src
|
||||
}
|
||||
}
|
||||
images {
|
||||
poster {
|
||||
...ImageInfo
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
${imageFragment}
|
||||
`,
|
||||
}, {
|
||||
headers: {
|
||||
referer: channel.url,
|
||||
origin: channel.url,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
if (res.body.data) {
|
||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||
return scrapeProfile(res.body.data, channel);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
|
|
Loading…
Reference in New Issue