Added profile scraper with scenes to BAM Visions. Passing 'includes' object instead of withReleases boolean to Gamma.

This commit is contained in:
ThePendulum 2020-03-12 02:19:45 +01:00
parent 152813730e
commit 370605554b
9 changed files with 87 additions and 19 deletions

View File

@ -8,6 +8,7 @@ const moment = require('moment');
const logger = require('./logger')(__filename); const logger = require('./logger')(__filename);
const knex = require('./knex'); const knex = require('./knex');
const argv = require('./argv'); const argv = require('./argv');
const include = require('./utils/argv-include')(argv);
const scrapers = require('./scrapers/scrapers'); const scrapers = require('./scrapers/scrapers');
const whereOr = require('./utils/where-or'); const whereOr = require('./utils/where-or');
const resolvePlace = require('./utils/resolve-place'); const resolvePlace = require('./utils/resolve-place');
@ -391,7 +392,7 @@ async function scrapeProfiles(sources, actorName, actorEntry, sitesBySlug) {
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`); logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
const site = sitesBySlug[scraperSlug] || null; const site = sitesBySlug[scraperSlug] || null;
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases); const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, include);
if (profile) { if (profile) {
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`); logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);

View File

@ -1,9 +1,11 @@
'use strict'; 'use strict';
const { get, geta, formatDate } = require('../utils/qu'); const { get, geta, initAll, formatDate } = require('../utils/qu');
const slugify = require('../utils/slugify'); const slugify = require('../utils/slugify');
function scrapeLatest(scenes, site) { const { feetInchesToCm } = require('../utils/convert');
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => { return scenes.map(({ qu }) => {
const release = {}; const release = {};
@ -62,11 +64,63 @@ function scrapeScene({ html, qu }, url, site) {
return release; return release;
} }
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
const res = await get(url);
if (!res.ok) return [];
const quReleases = initAll(res.item.el, '.item-episode');
const releases = scrapeAll(quReleases, site);
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
if (nextPage) {
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
}
return accScenes.concat(releases);
}
async function scrapeProfile({ qu }, site, withScenes) {
const profile = {};
const bio = qu.all('.stats li', true).reduce((acc, row) => {
const [key, value] = row.split(':');
return { ...acc, [slugify(key, { delimiter: '_' })]: value.trim() };
}, {});
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
profile.avatar = [
qu.q('.profile-pic img', 'src0_3x'),
qu.q('.profile-pic img', 'src0_2x'),
qu.q('.profile-pic img', 'src0_1x'),
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
if (withScenes) {
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
if (actorId) {
profile.releases = await fetchActorReleases(actorId, site);
}
}
return profile;
}
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
const url = `${site.url}/categories/movies/${page}/latest/`; const url = `${site.url}/categories/movies/${page}/latest/`;
const res = await geta(url, '.item-episode'); const res = await geta(url, '.item-episode');
return res.ok ? scrapeLatest(res.items, site) : res.status; return res.ok ? scrapeAll(res.items, site) : res.status;
} }
async function fetchScene(url, site) { async function fetchScene(url, site) {
@ -75,7 +129,18 @@ async function fetchScene(url, site) {
return res.ok ? scrapeScene(res.item, url, site) : res.status; return res.ok ? scrapeScene(res.item, url, site) : res.status;
} }
async function fetchProfile(actorName, scraperSlug, site, include) {
const actorSlugA = slugify(actorName, { delimiter: '' });
const actorSlugB = slugify(actorName);
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
}
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchScene, fetchScene,
fetchProfile,
}; };

View File

@ -25,8 +25,8 @@ function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`; return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
} }
async function networkFetchProfile(actorName, siteSlug) { async function networkFetchProfile(actorName, scraperSlug, site, include) {
return fetchProfile(actorName, siteSlug, null, getActorReleasesUrl); return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
} }
module.exports = { module.exports = {

View File

@ -84,12 +84,12 @@ async function fetchClassicProfile(actorName, siteSlug) {
return null; return null;
} }
async function networkFetchProfile(actorName) { async function networkFetchProfile(actorName, scraperSlug, site, include) {
// not all Fame Digital sites offer Gamma actors // not all Fame Digital sites offer Gamma actors
const [devils, rocco, peter, silvia] = await Promise.all([ const [devils, rocco, peter, silvia] = await Promise.all([
fetchApiProfile(actorName, 'devilsfilm', true), fetchApiProfile(actorName, 'devilsfilm', true),
fetchApiProfile(actorName, 'roccosiffredi'), fetchApiProfile(actorName, 'roccosiffredi'),
argv.withReleases ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl) : [], argv.withReleases ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
argv.withReleases ? fetchClassicProfile(actorName, 'silviasaint') : [], argv.withReleases ? fetchClassicProfile(actorName, 'silviasaint') : [],
argv.withReleases ? fetchClassicProfile(actorName, 'silverstonedvd') : [], argv.withReleases ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
]); ]);

View File

@ -7,7 +7,6 @@ const { JSDOM } = require('jsdom');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
const argv = require('../argv');
const logger = require('../logger')(__filename); const logger = require('../logger')(__filename);
const { ex, get } = require('../utils/q'); const { ex, get } = require('../utils/q');
const slugify = require('../utils/slugify'); const slugify = require('../utils/slugify');
@ -312,7 +311,7 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc
return accReleases.concat(releases); return accReleases.concat(releases);
} }
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl) { async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) {
const { q } = ex(html); const { q } = ex(html);
const avatar = q('img.actorPicture'); const avatar = q('img.actorPicture');
@ -346,7 +345,7 @@ async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUr
if (alias) profile.aliases = alias.split(':')[1].trim().split(', '); if (alias) profile.aliases = alias.split(':')[1].trim().split(', ');
if (nationality) profile.nationality = nationality.split(':')[1].trim(); if (nationality) profile.nationality = nationality.split(':')[1].trim();
if (getActorReleasesUrl && argv.withReleases) { if (getActorReleasesUrl && withReleases) {
profile.releases = await fetchActorReleases(url, getActorReleasesUrl); profile.releases = await fetchActorReleases(url, getActorReleasesUrl);
} }
@ -553,7 +552,7 @@ async function fetchActorScenes(actorName, apiUrl, siteSlug) {
return []; return [];
} }
async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesUrl) { async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesUrl, include) {
const actorSlug = actorName.toLowerCase().replace(/\s+/, '+'); const actorSlug = actorName.toLowerCase().replace(/\s+/, '+');
const searchUrl = altSearchUrl const searchUrl = altSearchUrl
? `https://www.${siteSlug}.com/en/search/${actorSlug}/1/actor` ? `https://www.${siteSlug}.com/en/search/${actorSlug}/1/actor`
@ -574,7 +573,7 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesU
return null; return null;
} }
return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl); return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes);
} }
return null; return null;

View File

@ -217,7 +217,7 @@ async function fetchScene(url, site) {
return null; return null;
} }
async function fetchProfile(actorName, scraperSlug, site, withReleases, page = 1, source = 0) { async function fetchProfile(actorName, scraperSlug, site, include, page = 1, source = 0) {
const letter = actorName.charAt(0).toUpperCase(); const letter = actorName.charAt(0).toUpperCase();
const sources = [ const sources = [
@ -238,17 +238,17 @@ async function fetchProfile(actorName, scraperSlug, site, withReleases, page = 1
const actorRes = await bhttp.get(actorUrl); const actorRes = await bhttp.get(actorUrl);
if (actorRes.statusCode === 200) { if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorUrl, withReleases); return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
} }
return null; return null;
} }
return fetchProfile(actorName, scraperSlug, site, withReleases, page + 1, source); return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
} }
if (sources[source + 1]) { if (sources[source + 1]) {
return fetchProfile(actorName, scraperSlug, site, withReleases, 1, source + 1); return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
} }
return null; return null;

View File

@ -134,6 +134,7 @@ module.exports = {
anilos: nubiles, anilos: nubiles,
babes, babes,
baddaddypov: fullpornnetwork, baddaddypov: fullpornnetwork,
bamvisions,
bangbros, bangbros,
blacked: vixen, blacked: vixen,
blackedraw: vixen, blackedraw: vixen,

View File

@ -23,8 +23,8 @@ function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`; return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
} }
async function networkFetchProfile(actorName, siteSlug) { async function networkFetchProfile(actorName, scraperSlug, site, include) {
return fetchProfile(actorName, siteSlug, null, getActorReleasesUrl); return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
} }
module.exports = { module.exports = {

View File

@ -7,6 +7,8 @@ function include(argv) {
photos: argv.media && argv.photos, photos: argv.media && argv.photos,
poster: argv.media && argv.posters, poster: argv.media && argv.posters,
posters: argv.media && argv.posters, posters: argv.media && argv.posters,
releases: argv.withReleases,
scenes: argv.withReleases,
teaser: argv.media && argv.videos && argv.teasers, teaser: argv.media && argv.videos && argv.teasers,
teasers: argv.media && argv.videos && argv.teasers, teasers: argv.media && argv.videos && argv.teasers,
trailer: argv.media && argv.videos && argv.trailers, trailer: argv.media && argv.videos && argv.trailers,