traxxx/src/scrapers/boobpedia.js

93 lines
2.7 KiB
JavaScript
Raw Normal View History

'use strict';
const { ex } = require('../utils/q');
const http = require('../utils/http');
function scrapeProfile(html) {
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
const profile = {};
const bio = qu.all('.infobox tr[valign="top"]')
2021-11-20 22:59:15 +00:00
.map((detail) => qu.all(detail, 'td', true))
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
/* unreliable, see: Syren De Mer
const catlinks = qa('#mw-normal-catlinks a', true);
const isTrans = catlinks.some(link => link.match(/shemale|transgender/i));
profile.gender = isTrans ? 'transsexual' : 'female';
*/
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
profile.description = qu.q('#mw-content-text > p', true);
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements) {
const measurements = bio.Measurements
.match(/\d+(\w+)?-\d+-\d+/g)
2021-02-21 21:58:46 +00:00
?.slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in'
.split('-');
// account for measuemrents being just e.g. '32EE'
if (measurements) {
const [bust, waist, hip] = measurements;
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
profile.waist = Number(waist);
profile.hip = Number(hip);
}
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
}
if (bio.Bra_cup_size) {
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
if (bust) [profile.bust] = bust;
}
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
if (bio.Eye_color) profile.eyes = bio.Eye_color;
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
if (bio.Blood_group) profile.blood = bio.Blood_group;
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
const avatarThumbPath = qu.img('.image img');
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
profile.avatar = {
src: `http://www.boobpedia.com${avatarPath}`,
credit: null,
};
}
profile.social = qu.urls('.infobox a.external');
return profile;
}
async function fetchProfile({ name: actorName }) {
const actorSlug = actorName.replace(/\s+/, '_');
const res = await http.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString());
}
return null;
}
module.exports = {
fetchProfile,
};