Added Filthy Family and 'legacy' scraper to Bang Bros. Added trans generic avatar. Added pagination support to site actions.
This commit is contained in:
@@ -169,7 +169,7 @@ function curateActor(actor, withDetails = false) {
|
||||
height: actor.height,
|
||||
weight: actor.weight,
|
||||
eyes: actor.eyes,
|
||||
hair: actor.hair,
|
||||
hairColor: actor.hair_color,
|
||||
hasTattoos: actor.has_tattoos,
|
||||
hasPiercings: actor.has_piercings,
|
||||
tattoos: actor.tattoos,
|
||||
@@ -245,7 +245,7 @@ function curateProfileEntry(profile) {
|
||||
natural_boobs: profile.naturalBoobs,
|
||||
height: profile.height,
|
||||
weight: profile.weight,
|
||||
hair: profile.hair,
|
||||
hair_color: profile.hairColor,
|
||||
eyes: profile.eyes,
|
||||
has_tattoos: profile.hasTattoos,
|
||||
has_piercings: profile.hasPiercings,
|
||||
@@ -284,7 +284,7 @@ async function curateProfile(profile) {
|
||||
curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available
|
||||
|
||||
curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null;
|
||||
curatedProfile.hair = hairColors[profile.hair?.trim().toLowerCase()] || null;
|
||||
curatedProfile.hairColor = hairColors[(profile.hairColor || profile.hair)?.trim().toLowerCase()] || null;
|
||||
curatedProfile.eyes = eyeColors[profile.eyes?.trim().toLowerCase()] || null;
|
||||
|
||||
curatedProfile.tattoos = profile.tattoos?.trim() || null;
|
||||
@@ -352,7 +352,7 @@ async function curateProfile(profile) {
|
||||
curatedProfile.releases = toBaseReleases(profile.releases);
|
||||
|
||||
if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.ethnicity}`);
|
||||
if (profile.hair && !curatedProfile.hair) logger.warn(`Unrecognized hair color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.hair}`);
|
||||
if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.hairColor || profile.hair}`);
|
||||
if (profile.eyes && !curatedProfile.eyes) logger.warn(`Unrecognized eye color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.eyes}`);
|
||||
|
||||
return curatedProfile;
|
||||
@@ -417,7 +417,7 @@ async function interpolateProfiles(actors) {
|
||||
'hip',
|
||||
'natural_boobs',
|
||||
'height',
|
||||
'hair',
|
||||
'hair_color',
|
||||
'eyes',
|
||||
'has_tattoos',
|
||||
'has_piercings',
|
||||
|
||||
@@ -7,7 +7,7 @@ const moment = require('moment');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
const slugify = require('../utils/slugify');
|
||||
const { ex } = require('../utils/q');
|
||||
const { get, getAll, ex } = require('../utils/q');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
@@ -16,7 +16,9 @@ function scrape(html, site) {
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thmb_lnk');
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const url = site.legacy
|
||||
? `https://${site.url}{sceneLinkElement.attr('href')}`
|
||||
: `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
|
||||
const entryId = url.split('/')[3].slice(5);
|
||||
|
||||
@@ -50,6 +52,26 @@ function scrape(html, site) {
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeLegacy(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
const pathname = qu.url('.mainplayer a, .palyer a'); // sic
|
||||
release.url = `${site.url}${pathname}`;
|
||||
release.entryId = pathname.match(/video\d+/)?.[0];
|
||||
|
||||
release.title = qu.q('h2', true);
|
||||
release.date = qu.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.description = qu.q('div + .videoDisc p', true);
|
||||
release.duration = qu.dur('.videoTag .title');
|
||||
|
||||
release.poster = qu.img('.mainplayer img, .palyer img'); // sic
|
||||
release.photos = qu.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
/* no dates available, breaks database
|
||||
function scrapeUpcoming(html, site) {
|
||||
const { document } = ex(html);
|
||||
@@ -114,6 +136,20 @@ function scrapeScene(html, url, _site) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneLegacy({ qu }, url) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/video\d+/)?.[0];
|
||||
|
||||
release.title = qu.q('h1', true);
|
||||
release.description = qu.q('.videoDetail', true);
|
||||
release.duration = qu.dur('.tags p span');
|
||||
|
||||
release.poster = qu.img('#video_container + div img, .videoOverlay img');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { q } = ex(html);
|
||||
const profile = {};
|
||||
@@ -134,9 +170,24 @@ function scrapeProfileSearch(html, actorName) {
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/${page}`);
|
||||
if (site.parameters?.legacy) {
|
||||
const url = `${site.url}/videos/${page}`;
|
||||
const res = await getAll(url, '.videoList');
|
||||
|
||||
return scrape(res.body.toString(), site);
|
||||
if (res.ok) {
|
||||
return scrapeLegacy(res.items, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
const res = await get(`${site.url}/${page}`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrape(res.item.html, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -153,13 +204,21 @@ async function fetchScene(url, site, release) {
|
||||
}
|
||||
|
||||
const { origin } = new URL(url);
|
||||
const res = await bhttp.get(url);
|
||||
const res = await get(url);
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
if (site.parameters?.legacy) {
|
||||
return scrapeSceneLegacy(res.item, url, site);
|
||||
}
|
||||
|
||||
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
|
||||
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.item.html, url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
|
||||
Reference in New Issue
Block a user