forked from DebaucheryLibrarian/traxxx
Added avatar and actor releases to Bang Bros scraper.
This commit is contained in:
@@ -30,7 +30,12 @@ async function findSite(url, release) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeRelease(url, release, type = 'scene') {
|
||||
async function scrapeRelease(source, basicRelease = null, type = 'scene') {
|
||||
// profile scraper may return either URLs or pre-scraped scenes
|
||||
const sourceIsUrl = typeof source === 'string';
|
||||
const url = sourceIsUrl ? source : source.url;
|
||||
const release = sourceIsUrl ? basicRelease : source;
|
||||
|
||||
const site = await findSite(url, release);
|
||||
|
||||
if (!site) {
|
||||
@@ -61,12 +66,13 @@ async function scrapeRelease(url, release, type = 'scene') {
|
||||
|
||||
return {
|
||||
...scrapedRelease,
|
||||
...release,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeReleases(urls, release, type = 'scene') {
|
||||
const scrapedReleases = await Promise.map(urls, async url => scrapeRelease(url, release, type), {
|
||||
async function scrapeReleases(sources, release = null, type = 'scene') {
|
||||
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type), {
|
||||
concurrency: 5,
|
||||
});
|
||||
|
||||
|
||||
@@ -5,7 +5,10 @@ const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const slugify = require('../utils/slugify');
|
||||
const { ex } = require('../utils/q');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.echThumb').toArray();
|
||||
|
||||
@@ -27,6 +30,7 @@ function scrapeLatest(html, site) {
|
||||
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
|
||||
|
||||
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
|
||||
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
|
||||
|
||||
return {
|
||||
url,
|
||||
@@ -40,11 +44,41 @@ function scrapeLatest(html, site) {
|
||||
photos,
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
/* no dates available, breaks database
|
||||
function scrapeUpcoming(html, site) {
|
||||
const { document } = ex(html);
|
||||
|
||||
return ctxa(document, 'a[id*="upcoming-videos"]').map(({ element, q }) => {
|
||||
const release = {};
|
||||
[release.shootId] = element.id.split('-').slice(-1);
|
||||
const siteCode = release.shootId.match(/[a-z]+/)[0];
|
||||
|
||||
if (siteCode !== site.parameters.code) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const posterEl = q('img');
|
||||
|
||||
[release.entryId] = element.href.split('/')[1].match(/\d+/);
|
||||
release.url = `https://bangbros.com${element.href}`;
|
||||
release.title = posterEl.alt;
|
||||
release.poster = `https:${posterEl.src}`;
|
||||
|
||||
release.actors = q('.castName', true).split(/ in/g).slice(0, -1).map(actorName => actorName.trim());
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
*/
|
||||
|
||||
function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElement = $('.playerSection');
|
||||
|
||||
@@ -88,24 +122,77 @@ async function scrapeScene(html, url, site) {
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { q } = ex(html);
|
||||
const profile = {};
|
||||
|
||||
const avatar = q('.profilePic img', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
profile.releases = scrape(html);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileSearch(html, actorName) {
|
||||
const { q } = ex(html);
|
||||
const actorLink = q(`a[title="${actorName}"]`, 'href');
|
||||
|
||||
return actorLink ? `https://bangbros.com${actorLink}` : null;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrape(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
/*
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get('https://www.bangbros.com');
|
||||
|
||||
return scrapeUpcoming(res.body.toString(), site);
|
||||
}
|
||||
*/
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
if (!release?.date) {
|
||||
throw new Error(`Cannot fetch Bang Bros scenes from argument URL, as scene pages do not have release dates: ${url}`);
|
||||
}
|
||||
|
||||
const { origin } = new URL(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (origin !== 'https://bangbros.com') {
|
||||
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
|
||||
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `https://bangbros.com/search/${actorSlug}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
|
||||
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
// fetchUpcoming, no dates available
|
||||
};
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
// releases
|
||||
const babes = require('./babes');
|
||||
const bang = require('./bang');
|
||||
const bangbros = require('./bangbros');
|
||||
const dogfart = require('./dogfart');
|
||||
const digitalplayground = require('./digitalplayground');
|
||||
const fakehub = require('./fakehub');
|
||||
@@ -23,6 +22,7 @@ const vixen = require('./vixen');
|
||||
const vogov = require('./vogov');
|
||||
|
||||
// releases and profiles
|
||||
const bangbros = require('./bangbros');
|
||||
const blowpass = require('./blowpass');
|
||||
const brazzers = require('./brazzers');
|
||||
const ddfnetwork = require('./ddfnetwork');
|
||||
@@ -104,6 +104,7 @@ module.exports = {
|
||||
boobpedia,
|
||||
legalporno,
|
||||
kellymadison,
|
||||
bangbros,
|
||||
pornhub,
|
||||
freeones,
|
||||
freeonesLegacy,
|
||||
|
||||
@@ -7,5 +7,5 @@ const SitePlugins = require('./sites');
|
||||
module.exports = {
|
||||
ActorPlugins,
|
||||
SitePlugins,
|
||||
ReleasePlugins: [],
|
||||
// ReleasePlugins,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user