forked from DebaucheryLibrarian/traxxx
Added Filthy Family and 'legacy' scraper to Bang Bros. Added trans generic avatar. Added pagination support to site actions.
This commit is contained in:
@@ -7,7 +7,7 @@ const moment = require('moment');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
const slugify = require('../utils/slugify');
|
||||
const { ex } = require('../utils/q');
|
||||
const { get, getAll, ex } = require('../utils/q');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
@@ -16,7 +16,9 @@ function scrape(html, site) {
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thmb_lnk');
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const url = site.legacy
|
||||
? `https://${site.url}{sceneLinkElement.attr('href')}`
|
||||
: `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
|
||||
const entryId = url.split('/')[3].slice(5);
|
||||
|
||||
@@ -50,6 +52,26 @@ function scrape(html, site) {
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeLegacy(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
const pathname = qu.url('.mainplayer a, .palyer a'); // sic
|
||||
release.url = `${site.url}${pathname}`;
|
||||
release.entryId = pathname.match(/video\d+/)?.[0];
|
||||
|
||||
release.title = qu.q('h2', true);
|
||||
release.date = qu.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.description = qu.q('div + .videoDisc p', true);
|
||||
release.duration = qu.dur('.videoTag .title');
|
||||
|
||||
release.poster = qu.img('.mainplayer img, .palyer img'); // sic
|
||||
release.photos = qu.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
/* no dates available, breaks database
|
||||
function scrapeUpcoming(html, site) {
|
||||
const { document } = ex(html);
|
||||
@@ -114,6 +136,20 @@ function scrapeScene(html, url, _site) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneLegacy({ qu }, url) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/video\d+/)?.[0];
|
||||
|
||||
release.title = qu.q('h1', true);
|
||||
release.description = qu.q('.videoDetail', true);
|
||||
release.duration = qu.dur('.tags p span');
|
||||
|
||||
release.poster = qu.img('#video_container + div img, .videoOverlay img');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { q } = ex(html);
|
||||
const profile = {};
|
||||
@@ -134,9 +170,24 @@ function scrapeProfileSearch(html, actorName) {
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/${page}`);
|
||||
if (site.parameters?.legacy) {
|
||||
const url = `${site.url}/videos/${page}`;
|
||||
const res = await getAll(url, '.videoList');
|
||||
|
||||
return scrape(res.body.toString(), site);
|
||||
if (res.ok) {
|
||||
return scrapeLegacy(res.items, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
const res = await get(`${site.url}/${page}`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrape(res.item.html, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -153,13 +204,21 @@ async function fetchScene(url, site, release) {
|
||||
}
|
||||
|
||||
const { origin } = new URL(url);
|
||||
const res = await bhttp.get(url);
|
||||
const res = await get(url);
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
if (site.parameters?.legacy) {
|
||||
return scrapeSceneLegacy(res.item, url, site);
|
||||
}
|
||||
|
||||
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
|
||||
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.item.html, url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
|
||||
Reference in New Issue
Block a user