'use strict'; /* eslint-disable newline-per-chained-call */ const cheerio = require('cheerio'); const moment = require('moment'); const logger = require('../logger')(__filename); const slugify = require('../utils/slugify'); const http = require('../utils/http'); const qu = require('../utils/qu'); const args = require('../argv'); function scrape(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const sceneElements = $('.echThumb').toArray(); return sceneElements.map((element) => { const release = {}; const sceneLinkElement = $(element).find('.thmb_lnk'); release.title = sceneLinkElement.attr('title'); release.url = site.parameters?.legacy || !site.parent ? `${site.url}${sceneLinkElement.attr('href')}` : `${site.parent.url}${sceneLinkElement.attr('href')}`; release.shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1]; release.entryId = new URL(release.url).pathname.match(/video(\d+)/)?.[1]; release.date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate(); release.actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray(); const photoElement = $(element).find('.rollover-image'); const photosUrl = photoElement.attr('data-rollover-url'); const photosMaxIndex = photoElement.attr('data-rollover-max-index'); release.poster = `https:${photoElement.attr('data-original')}`; release.photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`); release.duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds(); release.channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0]; return release; }); } function scrapeAllLegacy(scenes, site) { return scenes.map(({ query }) => { const release = {}; const pathname = query.url('.mainplayer a, .palyer a'); // sic release.url = `${site.url}${pathname}`; release.entryId = pathname.match(/video(\d+)/)?.[1]; release.title = query.q('h2', true); release.date = query.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); release.description = query.q('div + .videoDisc p', true); release.duration = query.dur('.videoTag .title'); release.poster = query.img('.mainplayer img, .palyer img'); // sic release.photos = query.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean); return release; }); } function scrapeAllMembers(scenes, _channel) { return scenes.map(({ query, el }) => { const release = {}; const data = JSON.parse(query.q(el, null, 'data-shoot')); release.entryId = data?.id || query.url('a.etLnk')?.match(/\d+$/)?.[0]; release.shootId = data?.code; release.url = data.url ? qu.prefixUrl(data.url, 'https://members.bangbros.com') : query.url('a.etLnk'); release.title = data?.title || query.cnt('.etl-hdd'); release.description = data?.description || query.cnt('.etl-desc'); release.date = query.date('.etl-dt', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/); release.actors = data?.model.map((actor) => ({ name: actor.name, url: qu.prefixUrl(actor.url, 'https://members.bangbros.com'), })); const rolloverUrl = query.q('.rollover-image', 'data-rollover-url'); release.poster = data?.image || query.img('.rollover-image', 'data-initial-image-url'); if (rolloverUrl) { release.photos = Array.from({ length: 15 }, (value, index) => `${rolloverUrl}${index + 1}.jpg`); } release.trailer = data?.trailer; release.tags = data?.tag.map((tag) => tag.name); return release; }); } /* no dates available, breaks database function scrapeUpcoming(html, site) { const { document } = ex(html); return ctxa(document, 'a[id*="upcoming-videos"]').map(({ element, q }) => { const release = {}; [release.shootId] = element.id.split('-').slice(-1); const siteCode = release.shootId.match(/[a-z]+/)[0]; if (siteCode !== site.parameters.code) { return null; } const posterEl = q('img'); [release.entryId] = element.href.split('/')[1].match(/\d+/); release.url = `https://bangbros.com${element.href}`; release.title = posterEl.alt; release.poster = `https:${posterEl.src}`; release.actors = q('.castName', true).split(/ in/g).slice(0, -1).map(actorName => actorName.trim()); console.log(release); return release; }).filter(Boolean); } */ function scrapeScene(html, url, _site) { const { query } = qu.ex(html, '.playerSection'); const release = {}; const { pathname } = new URL(url); [release.shootId] = query.cnt('.vdoTags + .vdoCast')?.match(/\w+$/) || []; release.entryId = pathname.match(/video(\d+)/)?.[1]; release.title = query.cnt('.ps-vdoHdd h1'); release.description = query.cnt('.vdoDesc'); release.actors = query.all('a[href*="/model"]', true); release.tags = query.all('.vdoTags a', true); release.stars = Number(query.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20; const poster = query.img('img#player-overlay-image, img.playerPic'); if (poster) { release.poster = [ poster.replace('//big_trailer', '/big_trailer'), poster.replace(/\/?\/big_trailer/, '/members/450x340'), // load error fallback ]; } release.trailer = [ query.video('video source[type="video/mp4"]'), query.video('video source[type="application/x-mpegURL"]'), ]; // all scenes seem to have 12 album photos available, not always included on the page const firstPhotoUrl = qu.ex(html).query.img('img[data-slider-index="1"]'); release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`)); const [channel] = query.url('a[href*="/websites"]').match(/\w+$/); if (channel === 'bangcasting') release.channel = 'bangbroscasting'; if (channel === 'remaster') release.channel = 'bangbrosremastered'; else release.channel = channel; return release; } function scrapeSceneLegacy({ query }, url) { const release = {}; release.entryId = new URL(url).pathname.match(/video\d+/)?.[0]; release.title = query.q('h1', true); release.description = query.q('.videoDetail', true); release.duration = query.dur('.tags p span'); release.poster = query.img('#video_container + div img, .videoOverlay img'); return release; } function scrapeSceneMembers({ query }, url) { const release = {}; release.entryId = new URL(url).pathname.match(/(\d+)\/?$/)[1]; release.shootId = query.img('.player img')?.match(/\/shoots\/(\w+)\//)?.[1]; release.title = query.cnt('.vdo-hdd1'); release.description = query.cnt('.ndcp'); release.actors = query.all('.vdsc a[href*="/model"]').map((actorEl) => ({ name: query.cnt(actorEl, 'span'), url: query.url(actorEl, null, 'href', { origin: 'https://members.bangbros.com' }), avatar: query.img(actorEl, 'img'), })); release.date = query.date('.ran:nth-child(2)', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/); release.duration = query.duration('.ran:nth-child(3)'); release.tags = query.cnts('.tag a[href*="/tags"]'); release.channel = slugify(query.cnt('.tag a[href*="/site"]'), ''); return release; } function scrapeProfile(html, scope) { const { query } = qu.ex(html); const profile = {}; const avatar = query.q('.profilePic img', 'src'); if (avatar) profile.avatar = `https:${avatar}`; profile.releases = scrape(html, scope.entity); return profile; } function scrapeProfileSearch(html, actorName) { const { query } = qu.ex(html); const actorLink = query.url(`a[title="${actorName}" i][href*="model"]`); return actorLink ? `https://bangbros.com${actorLink}` : null; } async function fetchLatest(site, page = 1) { const res = await qu.get(`${site.parameters?.latest || site.url}/${page}`); if (res.ok) { return scrape(res.item.html, site); } return res.status; } async function fetchLatestMembers(channel, page = 1, { parameters }) { if (!parameters.product) { throw new Error(`No member area product ID known for '${channel.name}'`); } if (!args.cookie) { throw new Error(`Please specifiy --cookie "PHPSESSID=xxx" to access the '${channel.name}' members area.`); } const url = `https://members.bangbros.com/product/${parameters.product}/videos/latest/${page}`; const res = await qu.getAll(url, '.thumbHolder .echThumb', { cookie: args.cookie, }); if (res.ok) { return scrapeAllMembers(res.items, channel); } return res.status; } async function fetchLatestLegacy(site, page = 1) { const url = `${site.parameters?.latest || site.url}/videos/${page}`; const res = await qu.getAll(url, '.videoList'); if (res.ok) { return scrapeAllLegacy(res.items, site); } return res.status; } /* async function fetchUpcoming(site) { const res = await http.get('https://www.bangbros.com'); return scrapeUpcoming(res.body.toString(), site); } */ async function fetchScene(url, site, release) { if (!release?.date) { logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`); } const { origin } = new URL(url); const res = await qu.get(url); if (!res.ok) { return res.status; } if (site.parameters?.legacy) { return scrapeSceneLegacy(res.item, url, site); } if (!/https?:\/\/(www.)?(bangbros|gaywire).com\/?$/.test(origin)) { throw new Error('Cannot fetch from this URL. Please find the scene on Bang Bros or Gaywire and try again.'); } return scrapeScene(res.item.html, url, site); } async function fetchSceneMembers(url, baseRelease, channel, { parameters }) { if (!parameters.product) { throw new Error(`No member area product ID known for '${channel.name}'`); } if (!args.cookie) { throw new Error(`Please specifiy --cookie "PHPSESSID=xxx" to access the '${channel.name}' members area.`); } const res = await qu.get(url, null, { cookie: args.cookie, }); if (res.ok) { return scrapeSceneMembers(res.item, url, channel); } return res.status; } async function fetchProfile({ name: actorName }, scope) { const actorSlug = slugify(actorName); const url = `https://bangbros.com/search/${actorSlug}`; const res = await http.get(url); if (res.statusCode === 200) { const actorUrl = scrapeProfileSearch(res.body.toString(), actorName); if (actorUrl) { const actorRes = await http.get(actorUrl); if (actorRes.statusCode === 200) { return scrapeProfile(actorRes.body.toString(), scope); } } } return null; } module.exports = { fetchLatest, fetchScene, fetchProfile, legacy: { fetchLatest: fetchLatestLegacy, }, members: { fetchLatest: fetchLatestMembers, fetchScene: fetchSceneMembers, }, // fetchUpcoming, no dates available };