'use strict'; /* eslint-disable newline-per-chained-call */ const bhttp = require('bhttp'); const cheerio = require('cheerio'); const moment = require('moment'); const logger = require('../logger')(__filename); const slugify = require('../utils/slugify'); const { ex } = require('../utils/q'); function scrape(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const sceneElements = $('.echThumb').toArray(); return sceneElements.map((element) => { const sceneLinkElement = $(element).find('.thmb_lnk'); const title = sceneLinkElement.attr('title'); const url = `https://bangbros.com${sceneLinkElement.attr('href')}`; const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1]; const entryId = url.split('/')[3].slice(5); const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate(); const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray(); const photoElement = $(element).find('.rollover-image'); const poster = `https:${photoElement.attr('data-original')}`; const photosUrl = photoElement.attr('data-rollover-url'); const photosMaxIndex = photoElement.attr('data-rollover-max-index'); const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`); const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds(); const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0]; return { url, entryId, shootId, title, actors, date, duration, poster, photos, rating: null, site, channel, }; }); } /* no dates available, breaks database function scrapeUpcoming(html, site) { const { document } = ex(html); return ctxa(document, 'a[id*="upcoming-videos"]').map(({ element, q }) => { const release = {}; [release.shootId] = element.id.split('-').slice(-1); const siteCode = release.shootId.match(/[a-z]+/)[0]; if (siteCode !== site.parameters.code) { return null; } const posterEl = q('img'); [release.entryId] = element.href.split('/')[1].match(/\d+/); release.url = `https://bangbros.com${element.href}`; release.title = posterEl.alt; release.poster = `https:${posterEl.src}`; release.actors = q('.castName', true).split(/ in/g).slice(0, -1).map(actorName => actorName.trim()); console.log(release); return release; }).filter(Boolean); } */ function scrapeScene(html, url, _site) { const { qu } = ex(html, '.playerSection'); const release = {}; [release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/); [release.entryId] = url.split('/')[3].match(/\d+$/); release.title = qu.q('.ps-vdoHdd h1', true); release.description = qu.q('.vdoDesc', true); release.actors = qu.all('a[href*="/model"]', true); release.tags = qu.all('.vdoTags a', true); release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20; const poster = qu.img('img#player-overlay-image'); release.poster = [ poster, poster.replace('/big_trailer', '/members/450x340'), // load error fallback ]; release.trailer = { src: qu.trailer() }; // all scenes seem to have 12 album photos available, not always included on the page const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]'); release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`)); const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/); if (channel === 'bangcasting') release.channel = 'bangbroscasting'; if (channel === 'remaster') release.channel = 'bangbrosremastered'; else release.channel = channel; return release; } function scrapeProfile(html) { const { q } = ex(html); const profile = {}; const avatar = q('.profilePic img', 'src'); if (avatar) profile.avatar = `https:${avatar}`; profile.releases = scrape(html); return profile; } function scrapeProfileSearch(html, actorName) { const { qu } = ex(html); const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`); return actorLink ? `https://bangbros.com${actorLink}` : null; } async function fetchLatest(site, page = 1) { const res = await bhttp.get(`${site.url}/${page}`); return scrape(res.body.toString(), site); } /* async function fetchUpcoming(site) { const res = await bhttp.get('https://www.bangbros.com'); return scrapeUpcoming(res.body.toString(), site); } */ async function fetchScene(url, site, release) { if (!release?.date) { logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`); } const { origin } = new URL(url); const res = await bhttp.get(url); if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) { throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.'); } return scrapeScene(res.body.toString(), url, site); } async function fetchProfile(actorName) { const actorSlug = slugify(actorName); const url = `https://bangbros.com/search/${actorSlug}`; const res = await bhttp.get(url); if (res.statusCode === 200) { const actorUrl = scrapeProfileSearch(res.body.toString(), actorName); if (actorUrl) { const actorRes = await bhttp.get(actorUrl); if (actorRes.statusCode === 200) { return scrapeProfile(actorRes.body.toString()); } } } return null; } module.exports = { fetchLatest, fetchScene, fetchProfile, // fetchUpcoming, no dates available };