Refactored Vixen scraper, using API endpoint and added actor profile and releases scraper. Release scraper will return base release when present and 'deep' argument is false.
This commit is contained in:
parent
d977a5e712
commit
915eb75719
|
@ -140,6 +140,7 @@ function initActorActions(store, _router) {
|
||||||
url
|
url
|
||||||
title
|
title
|
||||||
date
|
date
|
||||||
|
slug
|
||||||
${releaseActorsFragment}
|
${releaseActorsFragment}
|
||||||
${releaseTagsFragment}
|
${releaseTagsFragment}
|
||||||
${releasePosterFragment}
|
${releasePosterFragment}
|
||||||
|
|
|
@ -43,6 +43,14 @@ module.exports = {
|
||||||
'burningangel',
|
'burningangel',
|
||||||
'brazzers',
|
'brazzers',
|
||||||
'milehighmedia',
|
'milehighmedia',
|
||||||
|
[
|
||||||
|
'vixen',
|
||||||
|
'tushy',
|
||||||
|
'blacked',
|
||||||
|
'tushyraw',
|
||||||
|
'blackedraw',
|
||||||
|
'deeper',
|
||||||
|
],
|
||||||
[
|
[
|
||||||
// Nubiles
|
// Nubiles
|
||||||
'nubiles',
|
'nubiles',
|
||||||
|
|
|
@ -381,7 +381,7 @@ async function scrapeActors(actorNames) {
|
||||||
|
|
||||||
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
|
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
|
||||||
|
|
||||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug);
|
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, argv.withReleases);
|
||||||
|
|
||||||
if (profile) {
|
if (profile) {
|
||||||
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
|
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
|
||||||
|
|
|
@ -153,7 +153,7 @@ function curateReleases(releases) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function attachChannelSite(release) {
|
async function attachChannelSite(release) {
|
||||||
if (!release.site.isFallback) {
|
if (!release.site?.isFallback) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,13 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli
|
||||||
throw new Error(`Could not find site ${url} in database`);
|
throw new Error(`Could not find site ${url} in database`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!argv.deep && release) {
|
||||||
|
return {
|
||||||
|
...release,
|
||||||
|
site,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||||
|
|
||||||
if (!scraper) {
|
if (!scraper) {
|
||||||
|
|
|
@ -114,9 +114,15 @@ module.exports = {
|
||||||
},
|
},
|
||||||
actors: {
|
actors: {
|
||||||
'21sextury': sextury,
|
'21sextury': sextury,
|
||||||
|
analbbc: fullpornnetwork,
|
||||||
|
analized: fullpornnetwork,
|
||||||
|
analviolation: fullpornnetwork,
|
||||||
anilos: nubiles,
|
anilos: nubiles,
|
||||||
babes,
|
babes,
|
||||||
|
baddaddypov: fullpornnetwork,
|
||||||
bangbros,
|
bangbros,
|
||||||
|
blacked: vixen,
|
||||||
|
blackedraw: vixen,
|
||||||
blowpass,
|
blowpass,
|
||||||
boobpedia,
|
boobpedia,
|
||||||
brattysis: nubiles,
|
brattysis: nubiles,
|
||||||
|
@ -124,47 +130,47 @@ module.exports = {
|
||||||
burningangel,
|
burningangel,
|
||||||
cherrypimps,
|
cherrypimps,
|
||||||
ddfnetwork,
|
ddfnetwork,
|
||||||
|
deeper: vixen,
|
||||||
deeplush: nubiles,
|
deeplush: nubiles,
|
||||||
digitalplayground,
|
digitalplayground,
|
||||||
|
dtfsluts: fullpornnetwork,
|
||||||
evilangel,
|
evilangel,
|
||||||
fakehub,
|
fakehub,
|
||||||
famedigital,
|
famedigital,
|
||||||
freeones,
|
freeones,
|
||||||
freeonesLegacy,
|
freeonesLegacy,
|
||||||
|
girlfaction: fullpornnetwork,
|
||||||
|
hergape: fullpornnetwork,
|
||||||
|
homemadeanalwhores: fullpornnetwork,
|
||||||
hotcrazymess: nubiles,
|
hotcrazymess: nubiles,
|
||||||
iconmale,
|
iconmale,
|
||||||
|
jamesdeen: fullpornnetwork,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
kellymadison,
|
kellymadison,
|
||||||
legalporno,
|
legalporno,
|
||||||
men,
|
men,
|
||||||
analbbc: fullpornnetwork,
|
|
||||||
analized: fullpornnetwork,
|
|
||||||
analviolation: fullpornnetwork,
|
|
||||||
baddaddypov: fullpornnetwork,
|
|
||||||
dtfsluts: fullpornnetwork,
|
|
||||||
girlfaction: fullpornnetwork,
|
|
||||||
hergape: fullpornnetwork,
|
|
||||||
homemadeanalwhores: fullpornnetwork,
|
|
||||||
jamesdeen: fullpornnetwork,
|
|
||||||
mugfucked: fullpornnetwork,
|
|
||||||
onlyprince: fullpornnetwork,
|
|
||||||
pervertgallery: fullpornnetwork,
|
|
||||||
povperverts: fullpornnetwork,
|
|
||||||
metrohd,
|
metrohd,
|
||||||
milehighmedia,
|
milehighmedia,
|
||||||
mofos,
|
mofos,
|
||||||
|
mugfucked: fullpornnetwork,
|
||||||
naughtyamerica,
|
naughtyamerica,
|
||||||
nfbusty: nubiles,
|
nfbusty: nubiles,
|
||||||
nubilefilms: nubiles,
|
nubilefilms: nubiles,
|
||||||
nubiles,
|
nubiles,
|
||||||
nubilesporn: nubiles,
|
nubilesporn: nubiles,
|
||||||
|
onlyprince: fullpornnetwork,
|
||||||
|
pervertgallery: fullpornnetwork,
|
||||||
pimpxxx: cherrypimps,
|
pimpxxx: cherrypimps,
|
||||||
pornhub,
|
pornhub,
|
||||||
|
povperverts: fullpornnetwork,
|
||||||
realitykings,
|
realitykings,
|
||||||
score,
|
score,
|
||||||
thatsitcomshow: nubiles,
|
thatsitcomshow: nubiles,
|
||||||
transangels,
|
transangels,
|
||||||
|
tushy: vixen,
|
||||||
|
tushyraw: vixen,
|
||||||
twistys,
|
twistys,
|
||||||
|
vixen,
|
||||||
wicked,
|
wicked,
|
||||||
xempire,
|
xempire,
|
||||||
},
|
},
|
||||||
|
|
|
@ -1,10 +1,18 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/* eslint-disable newline-per-chained-call */
|
/* eslint-disable newline-per-chained-call */
|
||||||
const bhttp = require('bhttp');
|
const Promise = require('bluebird');
|
||||||
const cheerio = require('cheerio');
|
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const { get, post } = require('../utils/http');
|
||||||
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
|
const genderMap = {
|
||||||
|
F: 'female',
|
||||||
|
M: 'male',
|
||||||
|
T: 'transsexual', // not yet observed
|
||||||
|
};
|
||||||
|
|
||||||
function getPosterFallbacks(poster) {
|
function getPosterFallbacks(poster) {
|
||||||
return poster
|
return poster
|
||||||
.filter(image => /landscape/i.test(image.name))
|
.filter(image => /landscape/i.test(image.name))
|
||||||
|
@ -17,54 +25,46 @@ function getPosterFallbacks(poster) {
|
||||||
.flat();
|
.flat();
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeLatest(html, site) {
|
function getTeaserFallbacks(teaser) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
return teaser
|
||||||
|
.filter(video => /landscape/i.test(video.name))
|
||||||
|
.map(video => ({
|
||||||
|
src: video.src,
|
||||||
|
type: video.type,
|
||||||
|
quality: Number(String(video.height).replace('353', '360')),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
const stateScript = $('script:contains("INITIAL_STATE")').html();
|
function getAvatarFallbacks(avatar) {
|
||||||
const { videos: scenes } = JSON.parse(stateScript.slice(stateScript.indexOf('{'), stateScript.indexOf('};') + 1));
|
return avatar
|
||||||
|
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||||
|
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
|
||||||
|
.flat();
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeAll(scenes, site, origin) {
|
||||||
return scenes.map((scene) => {
|
return scenes.map((scene) => {
|
||||||
const entryId = String(scene.newId);
|
const release = {};
|
||||||
|
|
||||||
const {
|
release.title = scene.title;
|
||||||
title,
|
|
||||||
models: actors,
|
|
||||||
} = scene;
|
|
||||||
|
|
||||||
const url = `${site.url}${scene.targetUrl}`;
|
release.entryId = String(scene.newId);
|
||||||
const date = moment.utc(scene.releaseDateFormatted, 'MMMM DD, YYYY').toDate();
|
release.url = `${site?.url || origin}${scene.targetUrl}`;
|
||||||
const stars = Number(scene.textRating) / 2;
|
|
||||||
|
|
||||||
// largest thumbnail. poster is the same image but bigger, too large for storage space efficiency
|
release.date = moment.utc(scene.releaseDate).toDate();
|
||||||
const poster = scene.images.listing.slice(-1)[0].src;
|
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||||
const teaser = scene.previews.listing.slice(-1)[0];
|
|
||||||
|
|
||||||
return {
|
release.actors = scene.models;
|
||||||
url,
|
release.stars = Number(scene.textRating) / 2;
|
||||||
entryId,
|
|
||||||
title,
|
release.poster = getPosterFallbacks(scene.images.poster);
|
||||||
actors,
|
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||||
date,
|
|
||||||
poster,
|
return release;
|
||||||
teaser: {
|
|
||||||
src: teaser.src,
|
|
||||||
type: teaser.type,
|
|
||||||
quality: teaser.height,
|
|
||||||
},
|
|
||||||
rating: {
|
|
||||||
stars,
|
|
||||||
},
|
|
||||||
site,
|
|
||||||
};
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeUpcoming(html, site) {
|
function scrapeUpcoming(scene, site) {
|
||||||
const statePrefix = html.indexOf('__INITIAL_STATE__');
|
|
||||||
const stateString = html.slice(html.indexOf('{', statePrefix), html.indexOf('};', statePrefix) + 1);
|
|
||||||
const data = JSON.parse(stateString);
|
|
||||||
|
|
||||||
const scene = data.page.data['/'].data?.nextScene;
|
|
||||||
if (!scene || scene.isPreReleasePeriod) return null;
|
if (!scene || scene.isPreReleasePeriod) return null;
|
||||||
|
|
||||||
const release = {};
|
const release = {};
|
||||||
|
@ -75,33 +75,23 @@ function scrapeUpcoming(html, site) {
|
||||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||||
.join(' ');
|
.join(' ');
|
||||||
|
|
||||||
release.date = moment.utc(scene.releaseDate).toDate();
|
|
||||||
release.url = `${site.url}${scene.targetUrl}`;
|
release.url = `${site.url}${scene.targetUrl}`;
|
||||||
|
|
||||||
|
release.date = moment.utc(scene.releaseDate).toDate();
|
||||||
|
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||||
|
|
||||||
release.actors = scene.models;
|
release.actors = scene.models;
|
||||||
|
|
||||||
release.poster = getPosterFallbacks(scene.images.poster);
|
release.poster = getPosterFallbacks(scene.images.poster);
|
||||||
release.teaser = scene.previews.poster
|
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||||
.filter(teaser => /landscape/i.test(teaser.name))
|
|
||||||
.map(teaser => ({
|
|
||||||
src: teaser.src,
|
|
||||||
type: teaser.type,
|
|
||||||
quality: Number(String(teaser.height).replace('353', '360')),
|
|
||||||
}));
|
|
||||||
|
|
||||||
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
|
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
|
||||||
|
|
||||||
return [release];
|
return [release];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene(html, url) {
|
async function scrapeScene(data, url, site, baseRelease) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const scene = data.video;
|
||||||
|
|
||||||
const stateObject = $('script:contains("INITIAL_STATE")');
|
|
||||||
const data = JSON.parse(stateObject.html().trim().slice(27, -1));
|
|
||||||
|
|
||||||
const pageData = data.page.data[data.location.pathname].data;
|
|
||||||
const scene = data.videos.find(video => video.newId === pageData.video);
|
|
||||||
|
|
||||||
const release = {
|
const release = {
|
||||||
url,
|
url,
|
||||||
|
@ -114,64 +104,132 @@ async function scrapeScene(html, url) {
|
||||||
tags: scene.tags,
|
tags: scene.tags,
|
||||||
};
|
};
|
||||||
|
|
||||||
release.entryId = pageData.video;
|
release.entryId = scene.newId;
|
||||||
release.actors = scene.models;
|
|
||||||
|
release.date = moment.utc(scene.releaseDate).toDate();
|
||||||
|
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||||
|
|
||||||
|
release.actors = baseRelease?.actors || scene.models;
|
||||||
|
|
||||||
// release.poster = scene.rotatingThumbsUrlSizes[0]['1040w'];
|
|
||||||
release.poster = getPosterFallbacks(scene.images.poster);
|
release.poster = getPosterFallbacks(scene.images.poster);
|
||||||
release.photos = pageData.pictureset.map(photo => photo.main[0].src);
|
release.photos = data.pictureset.map(photo => photo.main[0].src);
|
||||||
|
|
||||||
const trailer = scene.previews.listing.find(preview => preview.height === 353);
|
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||||
if (trailer) release.trailer = { src: trailer };
|
|
||||||
|
|
||||||
// trailer must exist!
|
const qualities = [360, 480, 720, 1080, 2160];
|
||||||
|
const trailersUrl = `${site.url}/api/__tkn/${scene.previewVideoUrl1080P}/trailer/${qualities.join('+')}`;
|
||||||
|
const trailersRes = await post(trailersUrl, null, { headers: { referer: url } });
|
||||||
|
|
||||||
release.teaser = scene.previews.poster
|
if (trailersRes.code === 200) {
|
||||||
.filter(teaser => /landscape/i.test(teaser.name))
|
release.trailer = qualities.map(quality => (trailersRes.body[quality] ? {
|
||||||
.map(teaser => ({
|
src: trailersRes.body[quality].token,
|
||||||
src: teaser.src,
|
quality,
|
||||||
type: teaser.type,
|
} : null)).filter(Boolean);
|
||||||
quality: Number(String(teaser.height).replace('353', '360')),
|
}
|
||||||
}));
|
|
||||||
|
|
||||||
release.date = new Date(scene.releaseDate);
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchActorReleases(pages, model, origin) {
|
||||||
const url = `${site.url}/videos?page=${page}&size=7`;
|
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||||
const res = await bhttp.get(url);
|
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||||
|
const res = await get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.code === 200) {
|
||||||
return scrapeLatest(res.body.toString(), site);
|
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.statusCode;
|
return [];
|
||||||
|
}, { concurrency: 3 });
|
||||||
|
|
||||||
|
return releasesPerPage.flat();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeProfile(data, origin, withReleases) {
|
||||||
|
const model = data.model;
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
profile.birthdate = new Date(model.dateOfBirth);
|
||||||
|
profile.gender = genderMap[model.sex];
|
||||||
|
|
||||||
|
profile.hair = model.hairColour;
|
||||||
|
profile.nationality = model.nationality;
|
||||||
|
|
||||||
|
if (model.biography.trim().length > 0) profile.description = model.biography;
|
||||||
|
|
||||||
|
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
||||||
|
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
||||||
|
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
||||||
|
|
||||||
|
profile.avatar = getAvatarFallbacks(model.images.listing);
|
||||||
|
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||||
|
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||||
|
|
||||||
|
const releases = scrapeAll(data.videos.videos, null, origin);
|
||||||
|
|
||||||
|
if (withReleases) {
|
||||||
|
const pageCount = Math.ceil(data.videos.count / 6);
|
||||||
|
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
|
||||||
|
|
||||||
|
profile.releases = [...releases, ...otherReleases];
|
||||||
|
} else {
|
||||||
|
profile.releases = releases;
|
||||||
|
}
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(site, page = 1) {
|
||||||
|
const url = `${site.url}/api/videos?page=${page}`;
|
||||||
|
const res = await get(url);
|
||||||
|
|
||||||
|
if (res.code === 200) {
|
||||||
|
return scrapeAll(res.body.data.videos, site);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.code;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchUpcoming(site) {
|
async function fetchUpcoming(site) {
|
||||||
const res = await bhttp.get(site.url);
|
const apiUrl = `${site.url}/api`;
|
||||||
|
const res = await get(apiUrl);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.code === 200) {
|
||||||
return scrapeUpcoming(res.body.toString(), site);
|
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.statusCode;
|
return res.code;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site, baseRelease) {
|
||||||
const res = await bhttp.get(url);
|
const { origin, pathname } = new URL(url);
|
||||||
|
const apiUrl = `${origin}/api${pathname}`;
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
const res = await get(apiUrl);
|
||||||
return scrapeScene(res.body.toString(), url, site);
|
|
||||||
|
if (res.code === 200) {
|
||||||
|
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error(`Vixen response not OK for scene (${url}): ${res.statusCode}`);
|
return res.code;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchProfile(actorName, scraperSlug, withReleases) {
|
||||||
|
const origin = `https://www.${scraperSlug}.com`;
|
||||||
|
const actorSlug = slugify(actorName);
|
||||||
|
const url = `${origin}/api/${actorSlug}`;
|
||||||
|
const res = await get(url);
|
||||||
|
|
||||||
|
if (res.code === 200) {
|
||||||
|
return scrapeProfile(res.body.data, origin, withReleases);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchUpcoming,
|
fetchUpcoming,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
|
fetchProfile,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue