Fixed Boobpedia scraper. Catching non-OK responses for Vixen scraper.

This commit is contained in:
ThePendulum 2020-01-27 03:07:06 +01:00
parent 77b214f1dc
commit 32a0188b72
3 changed files with 30 additions and 15 deletions

View File

@ -126,11 +126,7 @@ async function scrapeSites() {
try { try {
return await scrapeSiteReleases(scraper, site); return await scrapeSiteReleases(scraper, site);
} catch (error) { } catch (error) {
if (argv.debug) { logger.error(`${site.name}: Failed to scrape releases: ${error.message}`);
logger.error(`${site.name}: Failed to scrape releases`, error);
}
logger.warn(`${site.id}: Failed to scrape releases`);
return []; return [];
} }

View File

@ -24,17 +24,24 @@ function scrapeProfile(html) {
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity; if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements) { if (bio.Measurements) {
const [bust, waist, hip] = bio.Measurements const measurements = bio.Measurements
.match(/\d+(\w+)?-\d+-\d+/g) .match(/\d+(\w+)?-\d+-\d+/g)
.slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in' ?.slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in'
.split('-'); .split('-');
// account for measuemrents being just e.g. '32EE'
if (measurements) {
const [bust, waist, hip] = measurements;
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
profile.waist = Number(waist); profile.waist = Number(waist);
profile.hip = Number(hip); profile.hip = Number(hip);
} }
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
}
if (bio.Bra_cup_size) { if (bio.Bra_cup_size) {
const bust = bio.Bra_cup_size.match(/^\d+\w+/); const bust = bio.Bra_cup_size.match(/^\d+\w+/);
if (bust) [profile.bust] = bust; if (bust) [profile.bust] = bust;
@ -56,10 +63,13 @@ function scrapeProfile(html) {
if (avatars.length > 0) { if (avatars.length > 0) {
const [avatarThumbPath] = avatars; const [avatarThumbPath] = avatars;
if (!/NoImageAvailable/.test(avatarThumbPath)) {
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', ''); const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
profile.avatar = `http://www.boobpedia.com${avatarPath}`; profile.avatar = `http://www.boobpedia.com${avatarPath}`;
} }
}
profile.social = qu('.infobox a.external'); profile.social = qu('.infobox a.external');

View File

@ -110,17 +110,26 @@ async function scrapeScene(html, url, site) {
} }
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/videos?page=${page}&size=7`); const url = `${site.url}/videos?page=${page}&size=7`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site); return scrapeLatest(res.body.toString(), site);
} }
throw new Error(`Vixen response not OK for latest: ${res.statusCode}`);
}
async function fetchScene(url, site) { async function fetchScene(url, site) {
const res = await bhttp.get(url); const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url, site); return scrapeScene(res.body.toString(), url, site);
} }
throw new Error(`Vixen response not OK for scene (${url}): ${res.statusCode}`);
}
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchScene, fetchScene,