From 32a0188b722a08c79c4376d1625689f6e77a41a7 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Mon, 27 Jan 2020 03:07:06 +0100 Subject: [PATCH] Fixed Boobpedia scraper. Catching non-OK responses for Vixen scraper. --- src/scrape-sites.js | 6 +----- src/scrapers/boobpedia.js | 24 +++++++++++++++++------- src/scrapers/vixen.js | 15 ++++++++++++--- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/scrape-sites.js b/src/scrape-sites.js index 24079ff2..bb941526 100644 --- a/src/scrape-sites.js +++ b/src/scrape-sites.js @@ -126,11 +126,7 @@ async function scrapeSites() { try { return await scrapeSiteReleases(scraper, site); } catch (error) { - if (argv.debug) { - logger.error(`${site.name}: Failed to scrape releases`, error); - } - - logger.warn(`${site.id}: Failed to scrape releases`); + logger.error(`${site.name}: Failed to scrape releases: ${error.message}`); return []; } diff --git a/src/scrapers/boobpedia.js b/src/scrapers/boobpedia.js index e8e50971..2d5d3fea 100644 --- a/src/scrapers/boobpedia.js +++ b/src/scrapers/boobpedia.js @@ -24,15 +24,22 @@ function scrapeProfile(html) { if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity; if (bio.Measurements) { - const [bust, waist, hip] = bio.Measurements + const measurements = bio.Measurements .match(/\d+(\w+)?-\d+-\d+/g) - .slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in' + ?.slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in' .split('-'); - if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included + // account for measuemrents being just e.g. '32EE' + if (measurements) { + const [bust, waist, hip] = measurements; - profile.waist = Number(waist); - profile.hip = Number(hip); + if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included + + profile.waist = Number(waist); + profile.hip = Number(hip); + } + + if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements; } if (bio.Bra_cup_size) { @@ -56,9 +63,12 @@ function scrapeProfile(html) { if (avatars.length > 0) { const [avatarThumbPath] = avatars; - const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', ''); - profile.avatar = `http://www.boobpedia.com${avatarPath}`; + if (!/NoImageAvailable/.test(avatarThumbPath)) { + const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', ''); + + profile.avatar = `http://www.boobpedia.com${avatarPath}`; + } } profile.social = qu('.infobox a.external'); diff --git a/src/scrapers/vixen.js b/src/scrapers/vixen.js index 5c40a809..a618dad8 100644 --- a/src/scrapers/vixen.js +++ b/src/scrapers/vixen.js @@ -110,15 +110,24 @@ async function scrapeScene(html, url, site) { } async function fetchLatest(site, page = 1) { - const res = await bhttp.get(`${site.url}/videos?page=${page}&size=7`); + const url = `${site.url}/videos?page=${page}&size=7`; + const res = await bhttp.get(url); - return scrapeLatest(res.body.toString(), site); + if (res.statusCode === 200) { + return scrapeLatest(res.body.toString(), site); + } + + throw new Error(`Vixen response not OK for latest: ${res.statusCode}`); } async function fetchScene(url, site) { const res = await bhttp.get(url); - return scrapeScene(res.body.toString(), url, site); + if (res.statusCode === 200) { + return scrapeScene(res.body.toString(), url, site); + } + + throw new Error(`Vixen response not OK for scene (${url}): ${res.statusCode}`); } module.exports = {