diff --git a/src/actors.js b/src/actors.js index aad1fdb2..d26c86b8 100644 --- a/src/actors.js +++ b/src/actors.js @@ -8,6 +8,7 @@ const moment = require('moment'); const logger = require('./logger')(__filename); const knex = require('./knex'); const argv = require('./argv'); +const include = require('./utils/argv-include')(argv); const scrapers = require('./scrapers/scrapers'); const whereOr = require('./utils/where-or'); const resolvePlace = require('./utils/resolve-place'); @@ -391,7 +392,7 @@ async function scrapeProfiles(sources, actorName, actorEntry, sitesBySlug) { logger.verbose(`Searching '${actorName}' on ${scraperSlug}`); const site = sitesBySlug[scraperSlug] || null; - const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases); + const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, include); if (profile) { logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`); diff --git a/src/scrapers/bamvisions.js b/src/scrapers/bamvisions.js index 365e4501..9fae0f01 100644 --- a/src/scrapers/bamvisions.js +++ b/src/scrapers/bamvisions.js @@ -1,9 +1,11 @@ 'use strict'; -const { get, geta, formatDate } = require('../utils/qu'); +const { get, geta, initAll, formatDate } = require('../utils/qu'); const slugify = require('../utils/slugify'); -function scrapeLatest(scenes, site) { +const { feetInchesToCm } = require('../utils/convert'); + +function scrapeAll(scenes, site) { return scenes.map(({ qu }) => { const release = {}; @@ -62,11 +64,63 @@ function scrapeScene({ html, qu }, url, site) { return release; } +async function fetchActorReleases(actorId, site, page = 1, accScenes = []) { + const url = `${site.url}/sets.php?id=${actorId}&page=${page}`; + const res = await get(url); + + if (!res.ok) return []; + + const quReleases = initAll(res.item.el, '.item-episode'); + const releases = scrapeAll(quReleases, site); + + const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`); + + if (nextPage) { + return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases)); + } + + return accScenes.concat(releases); +} + +async function scrapeProfile({ qu }, site, withScenes) { + const profile = {}; + + const bio = qu.all('.stats li', true).reduce((acc, row) => { + const [key, value] = row.split(':'); + return { ...acc, [slugify(key, { delimiter: '_' })]: value.trim() }; + }, {}); + + if (bio.height) profile.height = feetInchesToCm(bio.height); + if (bio.measurements) { + const [bust, waist, hip] = bio.measurements.split('-'); + + if (bust) profile.bust = bust; + if (waist) profile.waist = Number(waist); + if (hip) profile.hip = Number(hip); + } + + profile.avatar = [ + qu.q('.profile-pic img', 'src0_3x'), + qu.q('.profile-pic img', 'src0_2x'), + qu.q('.profile-pic img', 'src0_1x'), + ].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`)); + + if (withScenes) { + const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1]; + + if (actorId) { + profile.releases = await fetchActorReleases(actorId, site); + } + } + + return profile; +} + async function fetchLatest(site, page = 1) { const url = `${site.url}/categories/movies/${page}/latest/`; const res = await geta(url, '.item-episode'); - return res.ok ? scrapeLatest(res.items, site) : res.status; + return res.ok ? scrapeAll(res.items, site) : res.status; } async function fetchScene(url, site) { @@ -75,7 +129,18 @@ async function fetchScene(url, site) { return res.ok ? scrapeScene(res.item, url, site) : res.status; } +async function fetchProfile(actorName, scraperSlug, site, include) { + const actorSlugA = slugify(actorName, { delimiter: '' }); + const actorSlugB = slugify(actorName); + + const resA = await get(`${site.url}/models/${actorSlugA}.html`); + const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`); + + return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status; +} + module.exports = { fetchLatest, fetchScene, + fetchProfile, }; diff --git a/src/scrapers/blowpass.js b/src/scrapers/blowpass.js index ccc3249a..72d3d3b5 100644 --- a/src/scrapers/blowpass.js +++ b/src/scrapers/blowpass.js @@ -25,8 +25,8 @@ function getActorReleasesUrl(actorPath, page = 1) { return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`; } -async function networkFetchProfile(actorName, siteSlug) { - return fetchProfile(actorName, siteSlug, null, getActorReleasesUrl); +async function networkFetchProfile(actorName, scraperSlug, site, include) { + return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include); } module.exports = { diff --git a/src/scrapers/famedigital.js b/src/scrapers/famedigital.js index 8e60b063..627e7cec 100644 --- a/src/scrapers/famedigital.js +++ b/src/scrapers/famedigital.js @@ -84,12 +84,12 @@ async function fetchClassicProfile(actorName, siteSlug) { return null; } -async function networkFetchProfile(actorName) { +async function networkFetchProfile(actorName, scraperSlug, site, include) { // not all Fame Digital sites offer Gamma actors const [devils, rocco, peter, silvia] = await Promise.all([ fetchApiProfile(actorName, 'devilsfilm', true), fetchApiProfile(actorName, 'roccosiffredi'), - argv.withReleases ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl) : [], + argv.withReleases ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [], argv.withReleases ? fetchClassicProfile(actorName, 'silviasaint') : [], argv.withReleases ? fetchClassicProfile(actorName, 'silverstonedvd') : [], ]); diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index d60ba89e..b54cddad 100644 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -7,7 +7,6 @@ const { JSDOM } = require('jsdom'); const cheerio = require('cheerio'); const moment = require('moment'); -const argv = require('../argv'); const logger = require('../logger')(__filename); const { ex, get } = require('../utils/q'); const slugify = require('../utils/slugify'); @@ -312,7 +311,7 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc return accReleases.concat(releases); } -async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl) { +async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) { const { q } = ex(html); const avatar = q('img.actorPicture'); @@ -346,7 +345,7 @@ async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUr if (alias) profile.aliases = alias.split(':')[1].trim().split(', '); if (nationality) profile.nationality = nationality.split(':')[1].trim(); - if (getActorReleasesUrl && argv.withReleases) { + if (getActorReleasesUrl && withReleases) { profile.releases = await fetchActorReleases(url, getActorReleasesUrl); } @@ -553,7 +552,7 @@ async function fetchActorScenes(actorName, apiUrl, siteSlug) { return []; } -async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesUrl) { +async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesUrl, include) { const actorSlug = actorName.toLowerCase().replace(/\s+/, '+'); const searchUrl = altSearchUrl ? `https://www.${siteSlug}.com/en/search/${actorSlug}/1/actor` @@ -574,7 +573,7 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesU return null; } - return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl); + return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes); } return null; diff --git a/src/scrapers/score.js b/src/scrapers/score.js index 3bd4e1cf..b6be7bdd 100644 --- a/src/scrapers/score.js +++ b/src/scrapers/score.js @@ -217,7 +217,7 @@ async function fetchScene(url, site) { return null; } -async function fetchProfile(actorName, scraperSlug, site, withReleases, page = 1, source = 0) { +async function fetchProfile(actorName, scraperSlug, site, include, page = 1, source = 0) { const letter = actorName.charAt(0).toUpperCase(); const sources = [ @@ -238,17 +238,17 @@ async function fetchProfile(actorName, scraperSlug, site, withReleases, page = 1 const actorRes = await bhttp.get(actorUrl); if (actorRes.statusCode === 200) { - return scrapeProfile(actorRes.body.toString(), actorUrl, withReleases); + return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes); } return null; } - return fetchProfile(actorName, scraperSlug, site, withReleases, page + 1, source); + return fetchProfile(actorName, scraperSlug, site, include, page + 1, source); } if (sources[source + 1]) { - return fetchProfile(actorName, scraperSlug, site, withReleases, 1, source + 1); + return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1); } return null; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index f372298a..81e6f885 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -134,6 +134,7 @@ module.exports = { anilos: nubiles, babes, baddaddypov: fullpornnetwork, + bamvisions, bangbros, blacked: vixen, blackedraw: vixen, diff --git a/src/scrapers/xempire.js b/src/scrapers/xempire.js index 0a8e89c2..bbcf744a 100644 --- a/src/scrapers/xempire.js +++ b/src/scrapers/xempire.js @@ -23,8 +23,8 @@ function getActorReleasesUrl(actorPath, page = 1) { return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`; } -async function networkFetchProfile(actorName, siteSlug) { - return fetchProfile(actorName, siteSlug, null, getActorReleasesUrl); +async function networkFetchProfile(actorName, scraperSlug, site, include) { + return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include); } module.exports = { diff --git a/src/utils/argv-include.js b/src/utils/argv-include.js index 35221bdc..65935932 100644 --- a/src/utils/argv-include.js +++ b/src/utils/argv-include.js @@ -7,6 +7,8 @@ function include(argv) { photos: argv.media && argv.photos, poster: argv.media && argv.posters, posters: argv.media && argv.posters, + releases: argv.withReleases, + scenes: argv.withReleases, teaser: argv.media && argv.videos && argv.teasers, teasers: argv.media && argv.videos && argv.teasers, trailer: argv.media && argv.videos && argv.trailers,