From bfb5006e9514b97d67fc94c7f0e6476783eab346 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sat, 30 Jan 2021 01:12:42 +0100 Subject: [PATCH] Added actor scene URL parameter to Gamma scraper to phase out release URL function. --- seeds/01_networks.js | 1 + src/actors.js | 10 +++++++++- src/scrapers/blowpass.js | 10 +--------- src/scrapers/gamma.js | 27 ++++++++++++++++++--------- 4 files changed, 29 insertions(+), 19 deletions(-) diff --git a/seeds/01_networks.js b/seeds/01_networks.js index 28be0286..d54eeec6 100644 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -125,6 +125,7 @@ const networks = [ description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!', parameters: { mobile: 'https://m.blowpass.com/en/video/v/%d', // v can be any string, %d will be scene ID + actorScenes: 'https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0{path}/{page}', }, parent: 'gamma', }, diff --git a/src/actors.js b/src/actors.js index 06868b35..42832eb8 100644 --- a/src/actors.js +++ b/src/actors.js @@ -622,7 +622,10 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy const entity = entitiesBySlug[scraperSlug] || null; const scraper = scrapers[scraperSlug]; - const layoutScraper = scraper?.[entity.parameters?.layout] || scraper?.[entity.parent?.parameters?.layout] || scraper?.[entity.parent?.parent?.parameters?.layout] || scraper; + const layoutScraper = scraper?.[entity.parameters?.layout] + || scraper?.[entity.parent?.parameters?.layout] + || scraper?.[entity.parent?.parent?.parameters?.layout] + || scraper; const context = { ...entity, @@ -631,6 +634,11 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy network: entity?.parent, entity, scraper: scraperSlug, + parameters: { + ...entity?.parent?.parent?.parameters, + ...entity?.parent?.parameters, + ...entity?.parameters, + }, }; const label = context.entity?.name; diff --git a/src/scrapers/blowpass.js b/src/scrapers/blowpass.js index 37f6ea0b..47565c94 100644 --- a/src/scrapers/blowpass.js +++ b/src/scrapers/blowpass.js @@ -19,17 +19,9 @@ async function fetchSceneWrapper(url, site, baseRelease, options) { return release; } -function getActorReleasesUrl(actorPath, page = 1) { - return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`; -} - -async function networkFetchProfile({ name: actorName }, context, include) { - return fetchProfile({ name: actorName }, context, null, getActorReleasesUrl, include); -} - module.exports = { fetchLatest, - fetchProfile: networkFetchProfile, + fetchProfile, fetchUpcoming, fetchScene: fetchSceneWrapper, }; diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index 898b50d0..6dd47066 100644 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -5,6 +5,7 @@ const util = require('util'); const { JSDOM } = require('jsdom'); const cheerio = require('cheerio'); const moment = require('moment'); +const format = require('template-format'); const logger = require('../logger')(__filename); const qu = require('../utils/qu'); @@ -376,26 +377,34 @@ function scrapeActorSearch(html, url, actorName) { return actorLink ? actorLink.href : null; } -async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, accReleases = []) { +async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, accReleases = [], context) { const { origin, pathname } = new URL(profileUrl); const profilePath = `/${pathname.split('/').slice(-2).join('/')}`; - const url = getActorReleasesUrl(profilePath, page); + const url = (context.parameters.actorScenes && format(context.parameters.actorScenes, { path: profilePath, page })) + || getActorReleasesUrl?.(profilePath, page); + + if (!url) { + return []; + } + const res = await qu.get(url); - if (!res.ok) return []; + if (!res.ok) { + return []; + } const releases = scrapeAll(res.item.html, null, origin); const nextPage = res.item.query.url('.Gamma_Paginator a.next'); if (nextPage) { - return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases)); + return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases), context); } return accReleases.concat(releases); } -async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) { +async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases, context) { const { query } = qu.extract(html); const avatar = query.el('img.actorPicture'); @@ -429,8 +438,8 @@ async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUr if (alias) profile.aliases = alias.split(':')[1].trim().split(', '); if (nationality) profile.nationality = nationality.split(':')[1].trim(); - if (getActorReleasesUrl && withReleases) { - profile.releases = await fetchActorReleases(url, getActorReleasesUrl); + if ((getActorReleasesUrl || context.parameters.actorScenes) && withReleases) { + profile.releases = await fetchActorReleases(url, getActorReleasesUrl, 1, [], context); } return profile; @@ -661,7 +670,7 @@ async function fetchActorScenes(actorName, apiUrl, siteSlug) { return []; } -async function fetchProfile({ name: actorName }, context, altSearchUrl, getActorReleasesUrl, include) { +async function fetchProfile({ name: actorName }, context, include, altSearchUrl, getActorReleasesUrl) { const siteSlug = context.entity.slug || context.site?.slug || context.network?.slug; const actorSlug = actorName.toLowerCase().replace(/\s+/, '+'); @@ -684,7 +693,7 @@ async function fetchProfile({ name: actorName }, context, altSearchUrl, getActor return null; } - return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes); + return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes, context); } return null;