Added Bang! deep scrape. Improved network page layout. Added Bang Bros logos.

2020-01-07 04:23:28 +01:00
parent 89064e9e0c
commit 0a19f2e624
71 changed files with 194 additions and 116 deletions
--- a/src/scrapers/bangbros.js
+++ b/src/scrapers/bangbros.js
@@ -5,9 +5,6 @@ const bhttp = require('bhttp');
 const cheerio = require('cheerio');
 const moment = require('moment');

-const knex = require('../knex');
-const { matchTags } = require('../tags');
-
 function scrapeLatest(html, site) {
    const $ = cheerio.load(html, { normalizeWhitespace: true });
    const sceneElements = $('.echThumb').toArray();
@@ -57,7 +54,7 @@ async function scrapeScene(html, url, site) {
    const description = sceneElement.find('.vdoDesc').text().trim();

    const [siteName, ...actors] = sceneElement.find('.vdoCast a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
-    const siteId = siteName.replace(/[\s']+/g, '').toLowerCase();
+    const siteSlug = siteName.replace(/[\s']+/g, '').toLowerCase();

    const poster = `https:${$('img#player-overlay-image').attr('src')}`;
    const trailer = `https:${$('source[type="video/mp4"]').attr('src')}`;
@@ -66,17 +63,7 @@ async function scrapeScene(html, url, site) {
    // all scenes seem to have 12 album photos available, not always included on the page
    const photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));

-    const rawTags = $('.vdoTags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
-
-    const [channelSite, tags] = await Promise.all([
-        site.isFallback
-            ? knex('sites')
-                .where({ slug: siteId })
-                .orWhere({ name: siteName })
-                .first()
-            : site,
-        matchTags(rawTags),
-    ]);
+    const tags = $('.vdoTags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();

    const stars = Number(sceneElement.find('.bVdPl_it_like .bVdPl_txt').text().replace('% like', '')) / 20;

@@ -96,12 +83,13 @@ async function scrapeScene(html, url, site) {
        rating: {
            stars,
        },
-        site: channelSite || site,
+        site,
+        channel: siteSlug === 'bangcasting' ? 'bangbroscasting' : siteSlug,
    };
 }

 async function fetchLatest(site, page = 1) {
-    const res = await bhttp.get(`https://bangbros.com/websites/${site.slug}/${page}`);
+    const res = await bhttp.get(`${site.url}/${page}`);

    return scrapeLatest(res.body.toString(), site);
 }