Refactored Vixen scraper, using API endpoint and added actor profile and releases scraper. Release scraper will return base release when present and 'deep' argument is false.
This commit is contained in:
parent
d977a5e712
commit
915eb75719
|
@ -140,6 +140,7 @@ function initActorActions(store, _router) {
|
|||
url
|
||||
title
|
||||
date
|
||||
slug
|
||||
${releaseActorsFragment}
|
||||
${releaseTagsFragment}
|
||||
${releasePosterFragment}
|
||||
|
|
|
@ -43,6 +43,14 @@ module.exports = {
|
|||
'burningangel',
|
||||
'brazzers',
|
||||
'milehighmedia',
|
||||
[
|
||||
'vixen',
|
||||
'tushy',
|
||||
'blacked',
|
||||
'tushyraw',
|
||||
'blackedraw',
|
||||
'deeper',
|
||||
],
|
||||
[
|
||||
// Nubiles
|
||||
'nubiles',
|
||||
|
|
|
@ -381,7 +381,7 @@ async function scrapeActors(actorNames) {
|
|||
|
||||
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
|
||||
|
||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug);
|
||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, argv.withReleases);
|
||||
|
||||
if (profile) {
|
||||
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
|
||||
|
|
|
@ -153,7 +153,7 @@ function curateReleases(releases) {
|
|||
}
|
||||
|
||||
async function attachChannelSite(release) {
|
||||
if (!release.site.isFallback) {
|
||||
if (!release.site?.isFallback) {
|
||||
return release;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,13 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli
|
|||
throw new Error(`Could not find site ${url} in database`);
|
||||
}
|
||||
|
||||
if (!argv.deep && release) {
|
||||
return {
|
||||
...release,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
|
|
|
@ -114,9 +114,15 @@ module.exports = {
|
|||
},
|
||||
actors: {
|
||||
'21sextury': sextury,
|
||||
analbbc: fullpornnetwork,
|
||||
analized: fullpornnetwork,
|
||||
analviolation: fullpornnetwork,
|
||||
anilos: nubiles,
|
||||
babes,
|
||||
baddaddypov: fullpornnetwork,
|
||||
bangbros,
|
||||
blacked: vixen,
|
||||
blackedraw: vixen,
|
||||
blowpass,
|
||||
boobpedia,
|
||||
brattysis: nubiles,
|
||||
|
@ -124,47 +130,47 @@ module.exports = {
|
|||
burningangel,
|
||||
cherrypimps,
|
||||
ddfnetwork,
|
||||
deeper: vixen,
|
||||
deeplush: nubiles,
|
||||
digitalplayground,
|
||||
dtfsluts: fullpornnetwork,
|
||||
evilangel,
|
||||
fakehub,
|
||||
famedigital,
|
||||
freeones,
|
||||
freeonesLegacy,
|
||||
girlfaction: fullpornnetwork,
|
||||
hergape: fullpornnetwork,
|
||||
homemadeanalwhores: fullpornnetwork,
|
||||
hotcrazymess: nubiles,
|
||||
iconmale,
|
||||
jamesdeen: fullpornnetwork,
|
||||
julesjordan,
|
||||
kellymadison,
|
||||
legalporno,
|
||||
men,
|
||||
analbbc: fullpornnetwork,
|
||||
analized: fullpornnetwork,
|
||||
analviolation: fullpornnetwork,
|
||||
baddaddypov: fullpornnetwork,
|
||||
dtfsluts: fullpornnetwork,
|
||||
girlfaction: fullpornnetwork,
|
||||
hergape: fullpornnetwork,
|
||||
homemadeanalwhores: fullpornnetwork,
|
||||
jamesdeen: fullpornnetwork,
|
||||
mugfucked: fullpornnetwork,
|
||||
onlyprince: fullpornnetwork,
|
||||
pervertgallery: fullpornnetwork,
|
||||
povperverts: fullpornnetwork,
|
||||
metrohd,
|
||||
milehighmedia,
|
||||
mofos,
|
||||
mugfucked: fullpornnetwork,
|
||||
naughtyamerica,
|
||||
nfbusty: nubiles,
|
||||
nubilefilms: nubiles,
|
||||
nubiles,
|
||||
nubilesporn: nubiles,
|
||||
onlyprince: fullpornnetwork,
|
||||
pervertgallery: fullpornnetwork,
|
||||
pimpxxx: cherrypimps,
|
||||
pornhub,
|
||||
povperverts: fullpornnetwork,
|
||||
realitykings,
|
||||
score,
|
||||
thatsitcomshow: nubiles,
|
||||
transangels,
|
||||
tushy: vixen,
|
||||
tushyraw: vixen,
|
||||
twistys,
|
||||
vixen,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
|
|
|
@ -1,10 +1,18 @@
|
|||
'use strict';
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const Promise = require('bluebird');
|
||||
const moment = require('moment');
|
||||
|
||||
const { get, post } = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
const genderMap = {
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
T: 'transsexual', // not yet observed
|
||||
};
|
||||
|
||||
function getPosterFallbacks(poster) {
|
||||
return poster
|
||||
.filter(image => /landscape/i.test(image.name))
|
||||
|
@ -17,54 +25,46 @@ function getPosterFallbacks(poster) {
|
|||
.flat();
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
function getTeaserFallbacks(teaser) {
|
||||
return teaser
|
||||
.filter(video => /landscape/i.test(video.name))
|
||||
.map(video => ({
|
||||
src: video.src,
|
||||
type: video.type,
|
||||
quality: Number(String(video.height).replace('353', '360')),
|
||||
}));
|
||||
}
|
||||
|
||||
const stateScript = $('script:contains("INITIAL_STATE")').html();
|
||||
const { videos: scenes } = JSON.parse(stateScript.slice(stateScript.indexOf('{'), stateScript.indexOf('};') + 1));
|
||||
function getAvatarFallbacks(avatar) {
|
||||
return avatar
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
|
||||
.flat();
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site, origin) {
|
||||
return scenes.map((scene) => {
|
||||
const entryId = String(scene.newId);
|
||||
const release = {};
|
||||
|
||||
const {
|
||||
title,
|
||||
models: actors,
|
||||
} = scene;
|
||||
release.title = scene.title;
|
||||
|
||||
const url = `${site.url}${scene.targetUrl}`;
|
||||
const date = moment.utc(scene.releaseDateFormatted, 'MMMM DD, YYYY').toDate();
|
||||
const stars = Number(scene.textRating) / 2;
|
||||
release.entryId = String(scene.newId);
|
||||
release.url = `${site?.url || origin}${scene.targetUrl}`;
|
||||
|
||||
// largest thumbnail. poster is the same image but bigger, too large for storage space efficiency
|
||||
const poster = scene.images.listing.slice(-1)[0].src;
|
||||
const teaser = scene.previews.listing.slice(-1)[0];
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
teaser: {
|
||||
src: teaser.src,
|
||||
type: teaser.type,
|
||||
quality: teaser.height,
|
||||
},
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
release.actors = scene.models;
|
||||
release.stars = Number(scene.textRating) / 2;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeUpcoming(html, site) {
|
||||
const statePrefix = html.indexOf('__INITIAL_STATE__');
|
||||
const stateString = html.slice(html.indexOf('{', statePrefix), html.indexOf('};', statePrefix) + 1);
|
||||
const data = JSON.parse(stateString);
|
||||
|
||||
const scene = data.page.data['/'].data?.nextScene;
|
||||
function scrapeUpcoming(scene, site) {
|
||||
if (!scene || scene.isPreReleasePeriod) return null;
|
||||
|
||||
const release = {};
|
||||
|
@ -75,33 +75,23 @@ function scrapeUpcoming(html, site) {
|
|||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.url = `${site.url}${scene.targetUrl}`;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = scene.models;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = scene.previews.poster
|
||||
.filter(teaser => /landscape/i.test(teaser.name))
|
||||
.map(teaser => ({
|
||||
src: teaser.src,
|
||||
type: teaser.type,
|
||||
quality: Number(String(teaser.height).replace('353', '360')),
|
||||
}));
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
|
||||
|
||||
return [release];
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const stateObject = $('script:contains("INITIAL_STATE")');
|
||||
const data = JSON.parse(stateObject.html().trim().slice(27, -1));
|
||||
|
||||
const pageData = data.page.data[data.location.pathname].data;
|
||||
const scene = data.videos.find(video => video.newId === pageData.video);
|
||||
async function scrapeScene(data, url, site, baseRelease) {
|
||||
const scene = data.video;
|
||||
|
||||
const release = {
|
||||
url,
|
||||
|
@ -114,64 +104,132 @@ async function scrapeScene(html, url) {
|
|||
tags: scene.tags,
|
||||
};
|
||||
|
||||
release.entryId = pageData.video;
|
||||
release.actors = scene.models;
|
||||
release.entryId = scene.newId;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = baseRelease?.actors || scene.models;
|
||||
|
||||
// release.poster = scene.rotatingThumbsUrlSizes[0]['1040w'];
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.photos = pageData.pictureset.map(photo => photo.main[0].src);
|
||||
release.photos = data.pictureset.map(photo => photo.main[0].src);
|
||||
|
||||
const trailer = scene.previews.listing.find(preview => preview.height === 353);
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
// trailer must exist!
|
||||
const qualities = [360, 480, 720, 1080, 2160];
|
||||
const trailersUrl = `${site.url}/api/__tkn/${scene.previewVideoUrl1080P}/trailer/${qualities.join('+')}`;
|
||||
const trailersRes = await post(trailersUrl, null, { headers: { referer: url } });
|
||||
|
||||
release.teaser = scene.previews.poster
|
||||
.filter(teaser => /landscape/i.test(teaser.name))
|
||||
.map(teaser => ({
|
||||
src: teaser.src,
|
||||
type: teaser.type,
|
||||
quality: Number(String(teaser.height).replace('353', '360')),
|
||||
}));
|
||||
|
||||
release.date = new Date(scene.releaseDate);
|
||||
if (trailersRes.code === 200) {
|
||||
release.trailer = qualities.map(quality => (trailersRes.body[quality] ? {
|
||||
src: trailersRes.body[quality].token,
|
||||
quality,
|
||||
} : null)).filter(Boolean);
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/videos?page=${page}&size=7`;
|
||||
const res = await bhttp.get(url);
|
||||
async function fetchActorReleases(pages, model, origin) {
|
||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||
}
|
||||
|
||||
return [];
|
||||
}, { concurrency: 3 });
|
||||
|
||||
return releasesPerPage.flat();
|
||||
}
|
||||
|
||||
async function scrapeProfile(data, origin, withReleases) {
|
||||
const model = data.model;
|
||||
const profile = {};
|
||||
|
||||
profile.birthdate = new Date(model.dateOfBirth);
|
||||
profile.gender = genderMap[model.sex];
|
||||
|
||||
profile.hair = model.hairColour;
|
||||
profile.nationality = model.nationality;
|
||||
|
||||
if (model.biography.trim().length > 0) profile.description = model.biography;
|
||||
|
||||
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
||||
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
||||
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
||||
|
||||
profile.avatar = getAvatarFallbacks(model.images.listing);
|
||||
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||
|
||||
const releases = scrapeAll(data.videos.videos, null, origin);
|
||||
|
||||
if (withReleases) {
|
||||
const pageCount = Math.ceil(data.videos.count / 6);
|
||||
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
|
||||
|
||||
profile.releases = [...releases, ...otherReleases];
|
||||
} else {
|
||||
profile.releases = releases;
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/api/videos?page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get(site.url);
|
||||
const apiUrl = `${site.url}/api`;
|
||||
const res = await get(apiUrl);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeUpcoming(res.body.toString(), site);
|
||||
if (res.code === 200) {
|
||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
async function fetchScene(url, site, baseRelease) {
|
||||
const { origin, pathname } = new URL(url);
|
||||
const apiUrl = `${origin}/api${pathname}`;
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
const res = await get(apiUrl);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||
}
|
||||
|
||||
throw new Error(`Vixen response not OK for scene (${url}): ${res.statusCode}`);
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, withReleases) {
|
||||
const origin = `https://www.${scraperSlug}.com`;
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `${origin}/api/${actorSlug}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeProfile(res.body.data, origin, withReleases);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue