Added Bang Bros members page scraper (not for full scenes).

This commit is contained in:
DebaucheryLibrarian
2022-01-29 01:21:41 +01:00
parent a22c62d893
commit 5c5a11fdca
122 changed files with 134 additions and 13 deletions

View File

@@ -8,6 +8,7 @@ const logger = require('../logger')(__filename);
const slugify = require('../utils/slugify');
const http = require('../utils/http');
const qu = require('../utils/qu');
const args = require('../argv');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
@@ -43,7 +44,7 @@ function scrape(html, site) {
});
}
function scrapeLegacy(scenes, site) {
function scrapeAllLegacy(scenes, site) {
return scenes.map(({ query }) => {
const release = {};
@@ -63,6 +64,38 @@ function scrapeLegacy(scenes, site) {
});
}
function scrapeAllMembers(scenes, _channel) {
return scenes.map(({ query, el }) => {
const release = {};
const data = JSON.parse(query.q(el, null, 'data-shoot'));
release.entryId = data?.id || query.url('a.etLnk')?.match(/\d+$/)?.[0];
release.shootId = data?.code;
release.url = data.url ? qu.prefixUrl(data.url, 'https://members.bangbros.com') : query.url('a.etLnk');
release.title = data?.title || query.cnt('.etl-hdd');
release.description = data?.description || query.cnt('.etl-desc');
release.date = query.date('.etl-dt', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/);
release.actors = data?.model.map((actor) => ({
name: actor.name,
url: qu.prefixUrl(actor.url, 'https://members.bangbros.com'),
}));
const rolloverUrl = query.q('.rollover-image', 'data-rollover-url');
release.poster = data?.image || query.img('.rollover-image', 'data-initial-image-url');
if (rolloverUrl) {
release.photos = Array.from({ length: 15 }, (value, index) => `${rolloverUrl}${index + 1}.jpg`);
}
release.trailer = data?.trailer;
release.tags = data?.tag.map((tag) => tag.name);
return release;
});
}
/* no dates available, breaks database
function scrapeUpcoming(html, site) {
const { document } = ex(html);
@@ -147,6 +180,30 @@ function scrapeSceneLegacy({ query }, url) {
return release;
}
function scrapeSceneMembers({ query }, url) {
const release = {};
release.entryId = new URL(url).pathname.match(/(\d+)\/?$/)[1];
release.shootId = query.img('.player img')?.match(/\/shoots\/(\w+)\//)?.[1];
release.title = query.cnt('.vdo-hdd1');
release.description = query.cnt('.ndcp');
release.actors = query.all('.vdsc a[href*="/model"]').map((actorEl) => ({
name: query.cnt(actorEl, 'span'),
url: query.url(actorEl, null, 'href', { origin: 'https://members.bangbros.com' }),
avatar: query.img(actorEl, 'img'),
}));
release.date = query.date('.ran:nth-child(2)', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/);
release.duration = query.duration('.ran:nth-child(3)');
release.tags = query.cnts('.tag a[href*="/tags"]');
release.channel = slugify(query.cnt('.tag a[href*="/site"]'), '');
return release;
}
function scrapeProfile(html, scope) {
const { query } = qu.ex(html);
const profile = {};
@@ -167,17 +224,6 @@ function scrapeProfileSearch(html, actorName) {
}
async function fetchLatest(site, page = 1) {
if (site.parameters?.legacy) {
const url = `${site.parameters?.latest || site.url}/videos/${page}`;
const res = await qu.getAll(url, '.videoList');
if (res.ok) {
return scrapeLegacy(res.items, site);
}
return res.status;
}
const res = await qu.get(`${site.parameters?.latest || site.url}/${page}`);
if (res.ok) {
@@ -187,6 +233,39 @@ async function fetchLatest(site, page = 1) {
return res.status;
}
async function fetchLatestMembers(channel, page = 1, { parameters }) {
if (!parameters.product) {
throw new Error(`No member area product ID known for '${channel.name}'`);
}
if (!args.cookie) {
throw new Error(`Please specifiy --cookie "PHPSESSID=xxx" to access the '${channel.name}' members area.`);
}
const url = `https://members.bangbros.com/product/${parameters.product}/videos/latest/${page}`;
const res = await qu.getAll(url, '.thumbHolder .echThumb', {
cookie: args.cookie,
});
if (res.ok) {
return scrapeAllMembers(res.items, channel);
}
return res.status;
}
async function fetchLatestLegacy(site, page = 1) {
const url = `${site.parameters?.latest || site.url}/videos/${page}`;
const res = await qu.getAll(url, '.videoList');
if (res.ok) {
return scrapeAllLegacy(res.items, site);
}
return res.status;
}
/*
async function fetchUpcoming(site) {
const res = await http.get('https://www.bangbros.com');
@@ -218,6 +297,26 @@ async function fetchScene(url, site, release) {
return scrapeScene(res.item.html, url, site);
}
async function fetchSceneMembers(url, baseRelease, channel, { parameters }) {
if (!parameters.product) {
throw new Error(`No member area product ID known for '${channel.name}'`);
}
if (!args.cookie) {
throw new Error(`Please specifiy --cookie "PHPSESSID=xxx" to access the '${channel.name}' members area.`);
}
const res = await qu.get(url, null, {
cookie: args.cookie,
});
if (res.ok) {
return scrapeSceneMembers(res.item, url, channel);
}
return res.status;
}
async function fetchProfile({ name: actorName }, scope) {
const actorSlug = slugify(actorName);
const url = `https://bangbros.com/search/${actorSlug}`;
@@ -242,5 +341,12 @@ module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
legacy: {
fetchLatest: fetchLatestLegacy,
},
members: {
fetchLatest: fetchLatestMembers,
fetchScene: fetchSceneMembers,
},
// fetchUpcoming, no dates available
};