Added profile scraper with scenes to BAM Visions. Passing 'includes' object instead of withReleases boolean to Gamma.

This commit is contained in:
ThePendulum 2020-03-12 02:19:45 +01:00
parent 152813730e
commit 370605554b
9 changed files with 87 additions and 19 deletions

View File

@ -8,6 +8,7 @@ const moment = require('moment');
const logger = require('./logger')(__filename);
const knex = require('./knex');
const argv = require('./argv');
const include = require('./utils/argv-include')(argv);
const scrapers = require('./scrapers/scrapers');
const whereOr = require('./utils/where-or');
const resolvePlace = require('./utils/resolve-place');
@ -391,7 +392,7 @@ async function scrapeProfiles(sources, actorName, actorEntry, sitesBySlug) {
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
const site = sitesBySlug[scraperSlug] || null;
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases);
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, include);
if (profile) {
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);

View File

@ -1,9 +1,11 @@
'use strict';
const { get, geta, formatDate } = require('../utils/qu');
const { get, geta, initAll, formatDate } = require('../utils/qu');
const slugify = require('../utils/slugify');
function scrapeLatest(scenes, site) {
const { feetInchesToCm } = require('../utils/convert');
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
@ -62,11 +64,63 @@ function scrapeScene({ html, qu }, url, site) {
return release;
}
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
const res = await get(url);
if (!res.ok) return [];
const quReleases = initAll(res.item.el, '.item-episode');
const releases = scrapeAll(quReleases, site);
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
if (nextPage) {
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
}
return accScenes.concat(releases);
}
async function scrapeProfile({ qu }, site, withScenes) {
const profile = {};
const bio = qu.all('.stats li', true).reduce((acc, row) => {
const [key, value] = row.split(':');
return { ...acc, [slugify(key, { delimiter: '_' })]: value.trim() };
}, {});
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
profile.avatar = [
qu.q('.profile-pic img', 'src0_3x'),
qu.q('.profile-pic img', 'src0_2x'),
qu.q('.profile-pic img', 'src0_1x'),
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
if (withScenes) {
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
if (actorId) {
profile.releases = await fetchActorReleases(actorId, site);
}
}
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/categories/movies/${page}/latest/`;
const res = await geta(url, '.item-episode');
return res.ok ? scrapeLatest(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site) {
@ -75,7 +129,18 @@ async function fetchScene(url, site) {
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, scraperSlug, site, include) {
const actorSlugA = slugify(actorName, { delimiter: '' });
const actorSlugB = slugify(actorName);
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@ -25,8 +25,8 @@ function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
}
async function networkFetchProfile(actorName, siteSlug) {
return fetchProfile(actorName, siteSlug, null, getActorReleasesUrl);
async function networkFetchProfile(actorName, scraperSlug, site, include) {
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
}
module.exports = {

View File

@ -84,12 +84,12 @@ async function fetchClassicProfile(actorName, siteSlug) {
return null;
}
async function networkFetchProfile(actorName) {
async function networkFetchProfile(actorName, scraperSlug, site, include) {
// not all Fame Digital sites offer Gamma actors
const [devils, rocco, peter, silvia] = await Promise.all([
fetchApiProfile(actorName, 'devilsfilm', true),
fetchApiProfile(actorName, 'roccosiffredi'),
argv.withReleases ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl) : [],
argv.withReleases ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
argv.withReleases ? fetchClassicProfile(actorName, 'silviasaint') : [],
argv.withReleases ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
]);

View File

@ -7,7 +7,6 @@ const { JSDOM } = require('jsdom');
const cheerio = require('cheerio');
const moment = require('moment');
const argv = require('../argv');
const logger = require('../logger')(__filename);
const { ex, get } = require('../utils/q');
const slugify = require('../utils/slugify');
@ -312,7 +311,7 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc
return accReleases.concat(releases);
}
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl) {
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) {
const { q } = ex(html);
const avatar = q('img.actorPicture');
@ -346,7 +345,7 @@ async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUr
if (alias) profile.aliases = alias.split(':')[1].trim().split(', ');
if (nationality) profile.nationality = nationality.split(':')[1].trim();
if (getActorReleasesUrl && argv.withReleases) {
if (getActorReleasesUrl && withReleases) {
profile.releases = await fetchActorReleases(url, getActorReleasesUrl);
}
@ -553,7 +552,7 @@ async function fetchActorScenes(actorName, apiUrl, siteSlug) {
return [];
}
async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesUrl) {
async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesUrl, include) {
const actorSlug = actorName.toLowerCase().replace(/\s+/, '+');
const searchUrl = altSearchUrl
? `https://www.${siteSlug}.com/en/search/${actorSlug}/1/actor`
@ -574,7 +573,7 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesU
return null;
}
return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl);
return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes);
}
return null;

View File

@ -217,7 +217,7 @@ async function fetchScene(url, site) {
return null;
}
async function fetchProfile(actorName, scraperSlug, site, withReleases, page = 1, source = 0) {
async function fetchProfile(actorName, scraperSlug, site, include, page = 1, source = 0) {
const letter = actorName.charAt(0).toUpperCase();
const sources = [
@ -238,17 +238,17 @@ async function fetchProfile(actorName, scraperSlug, site, withReleases, page = 1
const actorRes = await bhttp.get(actorUrl);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorUrl, withReleases);
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
}
return null;
}
return fetchProfile(actorName, scraperSlug, site, withReleases, page + 1, source);
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
}
if (sources[source + 1]) {
return fetchProfile(actorName, scraperSlug, site, withReleases, 1, source + 1);
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
}
return null;

View File

@ -134,6 +134,7 @@ module.exports = {
anilos: nubiles,
babes,
baddaddypov: fullpornnetwork,
bamvisions,
bangbros,
blacked: vixen,
blackedraw: vixen,

View File

@ -23,8 +23,8 @@ function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
}
async function networkFetchProfile(actorName, siteSlug) {
return fetchProfile(actorName, siteSlug, null, getActorReleasesUrl);
async function networkFetchProfile(actorName, scraperSlug, site, include) {
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
}
module.exports = {

View File

@ -7,6 +7,8 @@ function include(argv) {
photos: argv.media && argv.photos,
poster: argv.media && argv.posters,
posters: argv.media && argv.posters,
releases: argv.withReleases,
scenes: argv.withReleases,
teaser: argv.media && argv.videos && argv.teasers,
teasers: argv.media && argv.videos && argv.teasers,
trailer: argv.media && argv.videos && argv.trailers,