Added Filthy Family and 'legacy' scraper to Bang Bros. Added trans generic avatar. Added pagination support to site actions.

This commit is contained in:
2020-05-24 03:54:29 +02:00
parent 75d49517b7
commit 9843023c1f
137 changed files with 12892 additions and 270 deletions

View File

@@ -7,7 +7,7 @@ const moment = require('moment');
const logger = require('../logger')(__filename);
const slugify = require('../utils/slugify');
const { ex } = require('../utils/q');
const { get, getAll, ex } = require('../utils/q');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
@@ -16,7 +16,9 @@ function scrape(html, site) {
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.thmb_lnk');
const title = sceneLinkElement.attr('title');
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
const url = site.legacy
? `https://${site.url}{sceneLinkElement.attr('href')}`
: `https://bangbros.com${sceneLinkElement.attr('href')}`;
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
const entryId = url.split('/')[3].slice(5);
@@ -50,6 +52,26 @@ function scrape(html, site) {
});
}
function scrapeLegacy(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
const pathname = qu.url('.mainplayer a, .palyer a'); // sic
release.url = `${site.url}${pathname}`;
release.entryId = pathname.match(/video\d+/)?.[0];
release.title = qu.q('h2', true);
release.date = qu.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.description = qu.q('div + .videoDisc p', true);
release.duration = qu.dur('.videoTag .title');
release.poster = qu.img('.mainplayer img, .palyer img'); // sic
release.photos = qu.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
return release;
});
}
/* no dates available, breaks database
function scrapeUpcoming(html, site) {
const { document } = ex(html);
@@ -114,6 +136,20 @@ function scrapeScene(html, url, _site) {
return release;
}
function scrapeSceneLegacy({ qu }, url) {
const release = {};
release.entryId = new URL(url).pathname.match(/video\d+/)?.[0];
release.title = qu.q('h1', true);
release.description = qu.q('.videoDetail', true);
release.duration = qu.dur('.tags p span');
release.poster = qu.img('#video_container + div img, .videoOverlay img');
return release;
}
function scrapeProfile(html) {
const { q } = ex(html);
const profile = {};
@@ -134,9 +170,24 @@ function scrapeProfileSearch(html, actorName) {
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/${page}`);
if (site.parameters?.legacy) {
const url = `${site.url}/videos/${page}`;
const res = await getAll(url, '.videoList');
return scrape(res.body.toString(), site);
if (res.ok) {
return scrapeLegacy(res.items, site);
}
return res.status;
}
const res = await get(`${site.url}/${page}`);
if (res.ok) {
return scrape(res.item.html, site);
}
return res.status;
}
/*
@@ -153,13 +204,21 @@ async function fetchScene(url, site, release) {
}
const { origin } = new URL(url);
const res = await bhttp.get(url);
const res = await get(url);
if (!res.ok) {
return res.status;
}
if (site.parameters?.legacy) {
return scrapeSceneLegacy(res.item, url, site);
}
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
}
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.item.html, url, site);
}
async function fetchProfile(actorName) {