Added Bang Bros members page scraper (not for full scenes).
This commit is contained in:
@@ -8,6 +8,7 @@ const logger = require('../logger')(__filename);
|
||||
const slugify = require('../utils/slugify');
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const args = require('../argv');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
@@ -43,7 +44,7 @@ function scrape(html, site) {
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeLegacy(scenes, site) {
|
||||
function scrapeAllLegacy(scenes, site) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
@@ -63,6 +64,38 @@ function scrapeLegacy(scenes, site) {
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeAllMembers(scenes, _channel) {
|
||||
return scenes.map(({ query, el }) => {
|
||||
const release = {};
|
||||
const data = JSON.parse(query.q(el, null, 'data-shoot'));
|
||||
|
||||
release.entryId = data?.id || query.url('a.etLnk')?.match(/\d+$/)?.[0];
|
||||
release.shootId = data?.code;
|
||||
release.url = data.url ? qu.prefixUrl(data.url, 'https://members.bangbros.com') : query.url('a.etLnk');
|
||||
|
||||
release.title = data?.title || query.cnt('.etl-hdd');
|
||||
release.description = data?.description || query.cnt('.etl-desc');
|
||||
|
||||
release.date = query.date('.etl-dt', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/);
|
||||
release.actors = data?.model.map((actor) => ({
|
||||
name: actor.name,
|
||||
url: qu.prefixUrl(actor.url, 'https://members.bangbros.com'),
|
||||
}));
|
||||
|
||||
const rolloverUrl = query.q('.rollover-image', 'data-rollover-url');
|
||||
release.poster = data?.image || query.img('.rollover-image', 'data-initial-image-url');
|
||||
|
||||
if (rolloverUrl) {
|
||||
release.photos = Array.from({ length: 15 }, (value, index) => `${rolloverUrl}${index + 1}.jpg`);
|
||||
}
|
||||
|
||||
release.trailer = data?.trailer;
|
||||
release.tags = data?.tag.map((tag) => tag.name);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
/* no dates available, breaks database
|
||||
function scrapeUpcoming(html, site) {
|
||||
const { document } = ex(html);
|
||||
@@ -147,6 +180,30 @@ function scrapeSceneLegacy({ query }, url) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneMembers({ query }, url) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/(\d+)\/?$/)[1];
|
||||
release.shootId = query.img('.player img')?.match(/\/shoots\/(\w+)\//)?.[1];
|
||||
|
||||
release.title = query.cnt('.vdo-hdd1');
|
||||
release.description = query.cnt('.ndcp');
|
||||
|
||||
release.actors = query.all('.vdsc a[href*="/model"]').map((actorEl) => ({
|
||||
name: query.cnt(actorEl, 'span'),
|
||||
url: query.url(actorEl, null, 'href', { origin: 'https://members.bangbros.com' }),
|
||||
avatar: query.img(actorEl, 'img'),
|
||||
}));
|
||||
|
||||
release.date = query.date('.ran:nth-child(2)', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/);
|
||||
release.duration = query.duration('.ran:nth-child(3)');
|
||||
|
||||
release.tags = query.cnts('.tag a[href*="/tags"]');
|
||||
release.channel = slugify(query.cnt('.tag a[href*="/site"]'), '');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html, scope) {
|
||||
const { query } = qu.ex(html);
|
||||
const profile = {};
|
||||
@@ -167,17 +224,6 @@ function scrapeProfileSearch(html, actorName) {
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters?.legacy) {
|
||||
const url = `${site.parameters?.latest || site.url}/videos/${page}`;
|
||||
const res = await qu.getAll(url, '.videoList');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeLegacy(res.items, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
const res = await qu.get(`${site.parameters?.latest || site.url}/${page}`);
|
||||
|
||||
if (res.ok) {
|
||||
@@ -187,6 +233,39 @@ async function fetchLatest(site, page = 1) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchLatestMembers(channel, page = 1, { parameters }) {
|
||||
if (!parameters.product) {
|
||||
throw new Error(`No member area product ID known for '${channel.name}'`);
|
||||
}
|
||||
|
||||
if (!args.cookie) {
|
||||
throw new Error(`Please specifiy --cookie "PHPSESSID=xxx" to access the '${channel.name}' members area.`);
|
||||
}
|
||||
|
||||
const url = `https://members.bangbros.com/product/${parameters.product}/videos/latest/${page}`;
|
||||
|
||||
const res = await qu.getAll(url, '.thumbHolder .echThumb', {
|
||||
cookie: args.cookie,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAllMembers(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchLatestLegacy(site, page = 1) {
|
||||
const url = `${site.parameters?.latest || site.url}/videos/${page}`;
|
||||
const res = await qu.getAll(url, '.videoList');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAllLegacy(res.items, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
/*
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await http.get('https://www.bangbros.com');
|
||||
@@ -218,6 +297,26 @@ async function fetchScene(url, site, release) {
|
||||
return scrapeScene(res.item.html, url, site);
|
||||
}
|
||||
|
||||
async function fetchSceneMembers(url, baseRelease, channel, { parameters }) {
|
||||
if (!parameters.product) {
|
||||
throw new Error(`No member area product ID known for '${channel.name}'`);
|
||||
}
|
||||
|
||||
if (!args.cookie) {
|
||||
throw new Error(`Please specifiy --cookie "PHPSESSID=xxx" to access the '${channel.name}' members area.`);
|
||||
}
|
||||
|
||||
const res = await qu.get(url, null, {
|
||||
cookie: args.cookie,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeSceneMembers(res.item, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, scope) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `https://bangbros.com/search/${actorSlug}`;
|
||||
@@ -242,5 +341,12 @@ module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
legacy: {
|
||||
fetchLatest: fetchLatestLegacy,
|
||||
},
|
||||
members: {
|
||||
fetchLatest: fetchLatestMembers,
|
||||
fetchScene: fetchSceneMembers,
|
||||
},
|
||||
// fetchUpcoming, no dates available
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user