Fixed qu issues. Fixed media issues. Simplified and expanded date component in search query.

2020-03-10 00:17:57 +01:00
parent 61a795d634
commit 5c55750c0c
9 changed files with 113 additions and 116 deletions
--- a/src/scrapers/bangbros.js
+++ b/src/scrapers/bangbros.js
@@ -5,6 +5,7 @@ const bhttp = require('bhttp');
 const cheerio = require('cheerio');
 const moment = require('moment');

+const logger = require('../logger')(__filename);
 const slugify = require('../utils/slugify');
 const { ex } = require('../utils/q');

@@ -105,7 +106,10 @@ function scrapeScene(html, url, _site) {
    release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));

    const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
-    release.channel = channel === 'bangcasting' ? 'bangbroscasting' : channel;
+
+    if (channel === 'bangcasting') release.channel = 'bangbroscasting';
+    if (channel === 'remaster') release.channel = 'bangbrosremastered';
+    else release.channel = channel;

    return release;
 }
@@ -123,8 +127,8 @@ function scrapeProfile(html) {
 }

 function scrapeProfileSearch(html, actorName) {
-    const { q } = ex(html);
-    const actorLink = q(`a[title="${actorName}"]`, 'href');
+    const { qu } = ex(html);
+    const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);

    return actorLink ? `https://bangbros.com${actorLink}` : null;
 }
@@ -145,7 +149,7 @@ async function fetchUpcoming(site) {

 async function fetchScene(url, site, release) {
    if (!release?.date) {
-        throw new Error(`Cannot fetch Bang Bros scenes from argument URL, as scene pages do not have release dates: ${url}`);
+        logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
    }

    const { origin } = new URL(url);