Added avatar and actor releases to Bang Bros scraper.

This commit is contained in:
ThePendulum 2020-01-31 21:43:16 +01:00
parent c8671afe47
commit b7f51a8deb
5 changed files with 273 additions and 179 deletions

View File

@ -261,420 +261,420 @@ function getSites(networksMap) {
},
// BANGBROS
{
slug: 'assparade',
network_id: networksMap.bangbros,
name: 'Ass Parade',
url: 'https://bangbros.com/websites/assparade',
slug: 'assparade',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ap' }),
},
{
slug: 'avaspice',
network_id: networksMap.bangbros,
name: 'AvaSpice',
url: 'https://bangbros.com/websites/avaspice',
slug: 'avaspice',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'av' }),
},
{
slug: 'backroomfacials',
network_id: networksMap.bangbros,
name: 'Back Room Facials',
url: 'https://bangbros.com/websites/backroomfacials',
slug: 'backroomfacials',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'brf' }),
},
{
slug: 'backroommilf',
network_id: networksMap.bangbros,
name: 'Backroom MILF',
url: 'https://bangbros.com/websites/backroommilf',
slug: 'backroommilf',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'mf' }),
},
{
slug: 'ballhoneys',
network_id: networksMap.bangbros,
name: 'Ball Honeys',
url: 'https://bangbros.com/websites/ballhoneys',
slug: 'ballhoneys',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'es' }),
},
{
slug: 'bangbros18',
network_id: networksMap.bangbros,
name: 'BangBros 18',
url: 'https://bangbros.com/websites/bangbros18',
slug: 'bangbros18',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bbe' }),
},
{
slug: 'bangbrosangels',
network_id: networksMap.bangbros,
name: 'BangBros Angels',
url: 'https://bangbros.com/websites/bangbrosangels',
slug: 'bangbrosangels',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bng' }),
},
{
slug: 'bangbrosclips',
network_id: networksMap.bangbros,
name: 'Bangbros Clips',
url: 'https://bangbros.com/websites/bangbrosclips',
slug: 'bangbrosclips',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bbc' }),
},
{
slug: 'bangbrosremastered',
network_id: networksMap.bangbros,
name: 'BangBros Remastered',
url: 'https://bangbros.com/websites/bangbrosremastered',
url: 'https://bangbros.com/websites/remaster',
slug: 'bangbrosremastered',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'rm' }),
},
{
slug: 'bangbus',
network_id: networksMap.bangbros,
name: 'Bang Bus',
url: 'https://bangbros.com/websites/bangbus',
slug: 'bangbus',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bb' }),
},
{
slug: 'bangbroscasting',
network_id: networksMap.bangbros,
name: 'Bang Casting',
url: 'https://bangbros.com/websites/bangcasting',
slug: 'bangbroscasting',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'hih' }),
},
{
slug: 'bangpov',
network_id: networksMap.bangbros,
name: 'Bang POV',
url: 'https://bangbros.com/websites/bangpov',
slug: 'bangpov',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bpov' }),
},
{
slug: 'bangtryouts',
network_id: networksMap.bangbros,
name: 'Bang Tryouts',
url: 'https://bangbros.com/websites/bangtryouts',
slug: 'bangtryouts',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bto' }),
},
{
slug: 'bigmouthfuls',
network_id: networksMap.bangbros,
name: 'Big Mouthfuls',
url: 'https://bangbros.com/websites/bigmouthfuls',
slug: 'bigmouthfuls',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bmf' }),
},
{
slug: 'bigtitcreampie',
network_id: networksMap.bangbros,
name: 'Big Tit Cream Pie',
url: 'https://bangbros.com/websites/bigtitcreampie',
slug: 'bigtitcreampie',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'btcp' }),
},
{
slug: 'bigtitsroundasses',
network_id: networksMap.bangbros,
name: 'Big Tits, Round Asses',
url: 'https://bangbros.com/websites/bigtitsroundasses',
slug: 'bigtitsroundasses',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'btra' }),
},
{
slug: 'blowjobfridays',
network_id: networksMap.bangbros,
name: 'BlowJob Fridays',
url: 'https://bangbros.com/websites/blowjobfridays',
slug: 'blowjobfridays',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bj' }),
},
{
slug: 'blowjobninjas',
network_id: networksMap.bangbros,
name: 'Blowjob Ninjas',
url: 'https://bangbros.com/websites/blowjobninjas',
slug: 'blowjobninjas',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'aa' }),
},
{
slug: 'boobsquad',
network_id: networksMap.bangbros,
name: 'Boob Squad',
url: 'https://bangbros.com/websites/boobsquad',
slug: 'boobsquad',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bs' }),
},
{
slug: 'brownbunnies',
network_id: networksMap.bangbros,
name: 'Brown Bunnies',
url: 'https://bangbros.com/websites/brownbunnies',
slug: 'brownbunnies',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bkb' }),
},
{
slug: 'canhescore',
network_id: networksMap.bangbros,
name: 'Can He Score?',
url: 'https://bangbros.com/websites/canhescore',
slug: 'canhescore',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bd' }),
},
{
slug: 'casting',
network_id: networksMap.bangbros,
name: 'Casting',
url: 'https://bangbros.com/websites/casting',
slug: 'casting',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ca' }),
},
{
slug: 'chongas',
network_id: networksMap.bangbros,
name: 'Chongas',
url: 'https://bangbros.com/websites/chongas',
slug: 'chongas',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ch' }),
},
{
slug: 'colombiafuckfest',
network_id: networksMap.bangbros,
name: 'Colombia Fuck Fest',
url: 'https://bangbros.com/websites/colombiafuckfest',
slug: 'colombiafuckfest',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'cff' }),
},
{
slug: 'dirtyworldtour',
network_id: networksMap.bangbros,
name: 'Dirty World Tour',
url: 'https://bangbros.com/websites/dirtyworldtour',
slug: 'dirtyworldtour',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bf' }),
},
{
slug: 'dorminvasion',
network_id: networksMap.bangbros,
name: 'Dorm Invasion',
url: 'https://bangbros.com/websites/dorminvasion',
slug: 'dorminvasion',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'di' }),
},
{
slug: 'facialfest',
network_id: networksMap.bangbros,
name: 'Facial Fest',
url: 'https://bangbros.com/websites/facialfest',
slug: 'facialfest',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ff' }),
},
{
slug: 'fuckteamfive',
network_id: networksMap.bangbros,
name: 'Fuck Team Five',
url: 'https://bangbros.com/websites/fuckteamfive',
slug: 'fuckteamfive',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'bbw' }),
},
{
slug: 'gloryholeloads',
network_id: networksMap.bangbros,
name: 'Glory Hole Loads',
url: 'https://bangbros.com/websites/gloryholeloads',
slug: 'gloryholeloads',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ghl' }),
},
{
slug: 'latinarampage',
network_id: networksMap.bangbros,
name: 'Latina Rampage',
url: 'https://bangbros.com/websites/latinarampage',
slug: 'latinarampage',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'lrp' }),
},
{
slug: 'livingwithanna',
network_id: networksMap.bangbros,
name: 'Living With Anna',
url: 'https://bangbros.com/websites/livingwithanna',
slug: 'livingwithanna',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'lr' }),
},
{
slug: 'magicalfeet',
network_id: networksMap.bangbros,
name: 'Magical Feet',
url: 'https://bangbros.com/websites/magicalfeet',
slug: 'magicalfeet',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'fj' }),
},
{
slug: 'milflessons',
network_id: networksMap.bangbros,
name: 'MILF Lessons',
url: 'https://bangbros.com/websites/milflessons',
description: null,
parameters: null,
},
{
slug: 'milfsoup',
network_id: networksMap.bangbros,
name: 'Milf Soup',
url: 'https://bangbros.com/websites/milfsoup',
slug: 'milfsoup',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ms' }),
},
{
slug: 'momishorny',
network_id: networksMap.bangbros,
name: 'MomIsHorny',
url: 'https://bangbros.com/websites/momishorny',
slug: 'momishorny',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'mih' }),
},
{
slug: 'monstersofcock',
network_id: networksMap.bangbros,
name: 'Monsters of Cock',
url: 'https://bangbros.com/websites/monstersofcock',
slug: 'monstersofcock',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'mc' }),
},
{
slug: 'mranal',
network_id: networksMap.bangbros,
name: 'Mr. Anal',
url: 'https://bangbros.com/websites/mranal',
description: null,
parameters: null,
},
{
slug: 'mrcameltoe',
network_id: networksMap.bangbros,
name: 'Mr CamelToe',
url: 'https://bangbros.com/websites/mrcameltoe',
slug: 'mrcameltoe',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ct' }),
},
{
slug: 'mydirtymaid',
network_id: networksMap.bangbros,
name: 'My Dirty Maid',
url: 'https://bangbros.com/websites/mydirtymaid',
slug: 'mydirtymaid',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'mda' }),
},
{
slug: 'mylifeinbrazil',
network_id: networksMap.bangbros,
name: 'My Life In Brazil',
url: 'https://bangbros.com/websites/mylifeinbrazil',
slug: 'mylifeinbrazil',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'mb' }),
},
{
slug: 'newbieblack',
network_id: networksMap.bangbros,
name: 'Newbie Black',
url: 'https://bangbros.com/websites/newbieblack',
slug: 'newbieblack',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'blkg' }),
},
{
slug: 'partyofthree',
network_id: networksMap.bangbros,
name: 'Party of Three',
url: 'https://bangbros.com/websites/partyof3',
url: 'https://bangbros.com/websites/partyofthree',
slug: 'partyofthree',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ls' }),
},
{
slug: 'pawg',
network_id: networksMap.bangbros,
name: 'Pawg',
url: 'https://bangbros.com/websites/pawg',
slug: 'pawg',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'pwg' }),
},
{
slug: 'pennyshow',
network_id: networksMap.bangbros,
name: 'Penny Show',
url: 'https://bangbros.com/websites/pennyshow',
slug: 'pennyshow',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ps' }),
},
{
slug: 'pornstarspa',
network_id: networksMap.bangbros,
name: 'Porn Star Spa',
url: 'https://bangbros.com/websites/pornstarspa',
slug: 'pornstarspa',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'pos' }),
},
{
slug: 'powermunch',
network_id: networksMap.bangbros,
name: 'Power Munch',
url: 'https://bangbros.com/websites/powermunch',
slug: 'powermunch',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'pm' }),
},
{
slug: 'publicbang',
network_id: networksMap.bangbros,
name: 'Public Bang',
url: 'https://bangbros.com/websites/publicbang',
slug: 'publicbang',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'pb' }),
},
{
slug: 'sluttywhitegirls',
network_id: networksMap.bangbros,
name: 'Slutty White Girls',
url: 'https://bangbros.com/websites/sluttywhitegirls',
slug: 'sluttywhitegirls',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'swg' }),
},
{
slug: 'stepmomvideos',
network_id: networksMap.bangbros,
name: 'Stepmom Videos',
url: 'https://bangbros.com/websites/stepmomvideos',
slug: 'stepmomvideos',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'smv' }),
},
{
slug: 'streetranger',
network_id: networksMap.bangbros,
name: 'Street Ranger',
url: 'https://bangbros.com/websites/streetranger',
url: 'https://bangbros.com/websites/thewheeler',
slug: 'streetranger',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'sg' }),
},
{
slug: 'tugjobs',
network_id: networksMap.bangbros,
name: 'Tugjobs',
url: 'https://bangbros.com/websites/tugjobs',
slug: 'tugjobs',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'hj' }),
},
{
slug: 'workinglatinas',
network_id: networksMap.bangbros,
name: 'Working Latinas',
url: 'https://bangbros.com/websites/workinglatinas',
slug: 'workinglatinas',
description: null,
parameters: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'lw' }),
},
{
name: 'MILF Lessons',
url: 'https://bangbros.com/websites/milflessons',
slug: 'milflessons',
description: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ml' }),
},
{
name: 'Mr. Anal',
url: 'https://bangbros.com/websites/mranal',
slug: 'mranal',
description: null,
network_id: networksMap.bangbros,
parameters: JSON.stringify({ code: 'ma' }),
},
// BLOWPASS
{

View File

@ -30,7 +30,12 @@ async function findSite(url, release) {
return null;
}
async function scrapeRelease(url, release, type = 'scene') {
async function scrapeRelease(source, basicRelease = null, type = 'scene') {
// profile scraper may return either URLs or pre-scraped scenes
const sourceIsUrl = typeof source === 'string';
const url = sourceIsUrl ? source : source.url;
const release = sourceIsUrl ? basicRelease : source;
const site = await findSite(url, release);
if (!site) {
@ -61,12 +66,13 @@ async function scrapeRelease(url, release, type = 'scene') {
return {
...scrapedRelease,
...release,
site,
};
}
async function scrapeReleases(urls, release, type = 'scene') {
const scrapedReleases = await Promise.map(urls, async url => scrapeRelease(url, release, type), {
async function scrapeReleases(sources, release = null, type = 'scene') {
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type), {
concurrency: 5,
});

View File

@ -5,7 +5,10 @@ const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
function scrapeLatest(html, site) {
const slugify = require('../utils/slugify');
const { ex } = require('../utils/q');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.echThumb').toArray();
@ -27,6 +30,7 @@ function scrapeLatest(html, site) {
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
return {
url,
@ -40,11 +44,41 @@ function scrapeLatest(html, site) {
photos,
rating: null,
site,
channel,
};
});
}
async function scrapeScene(html, url, site) {
/* no dates available, breaks database
function scrapeUpcoming(html, site) {
const { document } = ex(html);
return ctxa(document, 'a[id*="upcoming-videos"]').map(({ element, q }) => {
const release = {};
[release.shootId] = element.id.split('-').slice(-1);
const siteCode = release.shootId.match(/[a-z]+/)[0];
if (siteCode !== site.parameters.code) {
return null;
}
const posterEl = q('img');
[release.entryId] = element.href.split('/')[1].match(/\d+/);
release.url = `https://bangbros.com${element.href}`;
release.title = posterEl.alt;
release.poster = `https:${posterEl.src}`;
release.actors = q('.castName', true).split(/ in/g).slice(0, -1).map(actorName => actorName.trim());
console.log(release);
return release;
}).filter(Boolean);
}
*/
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElement = $('.playerSection');
@ -88,24 +122,77 @@ async function scrapeScene(html, url, site) {
};
}
function scrapeProfile(html) {
const { q } = ex(html);
const profile = {};
const avatar = q('.profilePic img', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
profile.releases = scrape(html);
return profile;
}
function scrapeProfileSearch(html, actorName) {
const { q } = ex(html);
const actorLink = q(`a[title="${actorName}"]`, 'href');
return actorLink ? `https://bangbros.com${actorLink}` : null;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/${page}`);
return scrapeLatest(res.body.toString(), site);
return scrape(res.body.toString(), site);
}
async function fetchScene(url, site) {
/*
async function fetchUpcoming(site) {
const res = await bhttp.get('https://www.bangbros.com');
return scrapeUpcoming(res.body.toString(), site);
}
*/
async function fetchScene(url, site, release) {
if (!release?.date) {
throw new Error(`Cannot fetch Bang Bros scenes from argument URL, as scene pages do not have release dates: ${url}`);
}
const { origin } = new URL(url);
const res = await bhttp.get(url);
if (origin !== 'https://bangbros.com') {
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
}
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSlug = slugify(actorName);
const url = `https://bangbros.com/search/${actorSlug}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
if (actorUrl) {
const actorRes = await bhttp.get(actorUrl);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString());
}
}
}
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
// fetchUpcoming, no dates available
};

View File

@ -3,7 +3,6 @@
// releases
const babes = require('./babes');
const bang = require('./bang');
const bangbros = require('./bangbros');
const dogfart = require('./dogfart');
const digitalplayground = require('./digitalplayground');
const fakehub = require('./fakehub');
@ -23,6 +22,7 @@ const vixen = require('./vixen');
const vogov = require('./vogov');
// releases and profiles
const bangbros = require('./bangbros');
const blowpass = require('./blowpass');
const brazzers = require('./brazzers');
const ddfnetwork = require('./ddfnetwork');
@ -104,6 +104,7 @@ module.exports = {
boobpedia,
legalporno,
kellymadison,
bangbros,
pornhub,
freeones,
freeonesLegacy,

View File

@ -7,5 +7,5 @@ const SitePlugins = require('./sites');
module.exports = {
ActorPlugins,
SitePlugins,
ReleasePlugins: [],
// ReleasePlugins,
};