Added actor scene URL parameter to Gamma scraper to phase out release URL function.

This commit is contained in:
DebaucheryLibrarian
2021-01-30 01:12:42 +01:00
parent d3d08b9c21
commit bfb5006e95
4 changed files with 29 additions and 19 deletions

View File

@@ -19,17 +19,9 @@ async function fetchSceneWrapper(url, site, baseRelease, options) {
return release;
}
function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
}
async function networkFetchProfile({ name: actorName }, context, include) {
return fetchProfile({ name: actorName }, context, null, getActorReleasesUrl, include);
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchProfile,
fetchUpcoming,
fetchScene: fetchSceneWrapper,
};

View File

@@ -5,6 +5,7 @@ const util = require('util');
const { JSDOM } = require('jsdom');
const cheerio = require('cheerio');
const moment = require('moment');
const format = require('template-format');
const logger = require('../logger')(__filename);
const qu = require('../utils/qu');
@@ -376,26 +377,34 @@ function scrapeActorSearch(html, url, actorName) {
return actorLink ? actorLink.href : null;
}
async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, accReleases = []) {
async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, accReleases = [], context) {
const { origin, pathname } = new URL(profileUrl);
const profilePath = `/${pathname.split('/').slice(-2).join('/')}`;
const url = getActorReleasesUrl(profilePath, page);
const url = (context.parameters.actorScenes && format(context.parameters.actorScenes, { path: profilePath, page }))
|| getActorReleasesUrl?.(profilePath, page);
if (!url) {
return [];
}
const res = await qu.get(url);
if (!res.ok) return [];
if (!res.ok) {
return [];
}
const releases = scrapeAll(res.item.html, null, origin);
const nextPage = res.item.query.url('.Gamma_Paginator a.next');
if (nextPage) {
return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases));
return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases), context);
}
return accReleases.concat(releases);
}
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) {
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases, context) {
const { query } = qu.extract(html);
const avatar = query.el('img.actorPicture');
@@ -429,8 +438,8 @@ async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUr
if (alias) profile.aliases = alias.split(':')[1].trim().split(', ');
if (nationality) profile.nationality = nationality.split(':')[1].trim();
if (getActorReleasesUrl && withReleases) {
profile.releases = await fetchActorReleases(url, getActorReleasesUrl);
if ((getActorReleasesUrl || context.parameters.actorScenes) && withReleases) {
profile.releases = await fetchActorReleases(url, getActorReleasesUrl, 1, [], context);
}
return profile;
@@ -661,7 +670,7 @@ async function fetchActorScenes(actorName, apiUrl, siteSlug) {
return [];
}
async function fetchProfile({ name: actorName }, context, altSearchUrl, getActorReleasesUrl, include) {
async function fetchProfile({ name: actorName }, context, include, altSearchUrl, getActorReleasesUrl) {
const siteSlug = context.entity.slug || context.site?.slug || context.network?.slug;
const actorSlug = actorName.toLowerCase().replace(/\s+/, '+');
@@ -684,7 +693,7 @@ async function fetchProfile({ name: actorName }, context, altSearchUrl, getActor
return null;
}
return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes);
return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes, context);
}
return null;