Added media support to Bang Bros scraper. Added untracked files.

This commit is contained in:
2019-10-31 01:53:26 +01:00
parent 78e9f73ea0
commit 2198c7ceb0
4 changed files with 169 additions and 4 deletions

View File

@@ -22,6 +22,13 @@ function scrapeLatest(html, site) {
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const photoElement = $(element).find('.rollover-image');
const poster = `https:${photoElement.attr('data-original')}`;
const photosUrl = photoElement.attr('data-rollover-url');
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
return {
@@ -32,6 +39,8 @@ function scrapeLatest(html, site) {
actors,
date,
duration,
poster,
photos,
rating: null,
site,
};
@@ -50,11 +59,18 @@ async function scrapeScene(html, url, site) {
const [siteName, ...actors] = sceneElement.find('.vdoCast a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const siteId = siteName.replace(/[\s']+/g, '').toLowerCase();
const poster = `https:${$('img#player-overlay-image').attr('src')}`;
const trailer = `https:${$('source[type="video/mp4"]').attr('src')}`;
const firstPhotoUrl = `https:${$('img[data-slider-index="1"]').attr('src')}`;
// all scenes seem to have 12 album photos available, not always included on the page
const photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
const rawTags = $('.vdoTags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const [channelSite, tags] = await Promise.all([
knex('sites')
.where({ id: siteId })
.where({ slug: siteId })
.orWhere({ name: siteName })
.first(),
matchTags(rawTags),
@@ -70,6 +86,11 @@ async function scrapeScene(html, url, site) {
description,
actors,
tags,
poster,
photos,
trailer: {
src: trailer,
},
rating: {
stars,
},
@@ -78,7 +99,7 @@ async function scrapeScene(html, url, site) {
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`https://bangbros.com/websites/${site.id}/${page}`);
const res = await bhttp.get(`https://bangbros.com/websites/${site.slug}/${page}`);
return scrapeLatest(res.body.toString(), site);
}