Added Filthy Family and 'legacy' scraper to Bang Bros. Added trans generic avatar. Added pagination support to site actions.

This commit is contained in:
2020-05-24 03:54:29 +02:00
parent 75d49517b7
commit 9843023c1f
137 changed files with 12892 additions and 270 deletions

View File

@@ -169,7 +169,7 @@ function curateActor(actor, withDetails = false) {
height: actor.height,
weight: actor.weight,
eyes: actor.eyes,
hair: actor.hair,
hairColor: actor.hair_color,
hasTattoos: actor.has_tattoos,
hasPiercings: actor.has_piercings,
tattoos: actor.tattoos,
@@ -245,7 +245,7 @@ function curateProfileEntry(profile) {
natural_boobs: profile.naturalBoobs,
height: profile.height,
weight: profile.weight,
hair: profile.hair,
hair_color: profile.hairColor,
eyes: profile.eyes,
has_tattoos: profile.hasTattoos,
has_piercings: profile.hasPiercings,
@@ -284,7 +284,7 @@ async function curateProfile(profile) {
curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available
curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null;
curatedProfile.hair = hairColors[profile.hair?.trim().toLowerCase()] || null;
curatedProfile.hairColor = hairColors[(profile.hairColor || profile.hair)?.trim().toLowerCase()] || null;
curatedProfile.eyes = eyeColors[profile.eyes?.trim().toLowerCase()] || null;
curatedProfile.tattoos = profile.tattoos?.trim() || null;
@@ -352,7 +352,7 @@ async function curateProfile(profile) {
curatedProfile.releases = toBaseReleases(profile.releases);
if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.ethnicity}`);
if (profile.hair && !curatedProfile.hair) logger.warn(`Unrecognized hair color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.hair}`);
if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.hairColor || profile.hair}`);
if (profile.eyes && !curatedProfile.eyes) logger.warn(`Unrecognized eye color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.eyes}`);
return curatedProfile;
@@ -417,7 +417,7 @@ async function interpolateProfiles(actors) {
'hip',
'natural_boobs',
'height',
'hair',
'hair_color',
'eyes',
'has_tattoos',
'has_piercings',

View File

@@ -7,7 +7,7 @@ const moment = require('moment');
const logger = require('../logger')(__filename);
const slugify = require('../utils/slugify');
const { ex } = require('../utils/q');
const { get, getAll, ex } = require('../utils/q');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
@@ -16,7 +16,9 @@ function scrape(html, site) {
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.thmb_lnk');
const title = sceneLinkElement.attr('title');
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
const url = site.legacy
? `https://${site.url}{sceneLinkElement.attr('href')}`
: `https://bangbros.com${sceneLinkElement.attr('href')}`;
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
const entryId = url.split('/')[3].slice(5);
@@ -50,6 +52,26 @@ function scrape(html, site) {
});
}
function scrapeLegacy(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
const pathname = qu.url('.mainplayer a, .palyer a'); // sic
release.url = `${site.url}${pathname}`;
release.entryId = pathname.match(/video\d+/)?.[0];
release.title = qu.q('h2', true);
release.date = qu.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.description = qu.q('div + .videoDisc p', true);
release.duration = qu.dur('.videoTag .title');
release.poster = qu.img('.mainplayer img, .palyer img'); // sic
release.photos = qu.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
return release;
});
}
/* no dates available, breaks database
function scrapeUpcoming(html, site) {
const { document } = ex(html);
@@ -114,6 +136,20 @@ function scrapeScene(html, url, _site) {
return release;
}
function scrapeSceneLegacy({ qu }, url) {
const release = {};
release.entryId = new URL(url).pathname.match(/video\d+/)?.[0];
release.title = qu.q('h1', true);
release.description = qu.q('.videoDetail', true);
release.duration = qu.dur('.tags p span');
release.poster = qu.img('#video_container + div img, .videoOverlay img');
return release;
}
function scrapeProfile(html) {
const { q } = ex(html);
const profile = {};
@@ -134,9 +170,24 @@ function scrapeProfileSearch(html, actorName) {
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/${page}`);
if (site.parameters?.legacy) {
const url = `${site.url}/videos/${page}`;
const res = await getAll(url, '.videoList');
return scrape(res.body.toString(), site);
if (res.ok) {
return scrapeLegacy(res.items, site);
}
return res.status;
}
const res = await get(`${site.url}/${page}`);
if (res.ok) {
return scrape(res.item.html, site);
}
return res.status;
}
/*
@@ -153,13 +204,21 @@ async function fetchScene(url, site, release) {
}
const { origin } = new URL(url);
const res = await bhttp.get(url);
const res = await get(url);
if (!res.ok) {
return res.status;
}
if (site.parameters?.legacy) {
return scrapeSceneLegacy(res.item, url, site);
}
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
}
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.item.html, url, site);
}
async function fetchProfile(actorName) {