Improved scraping and association behavior.

This commit is contained in:
2019-11-19 04:36:15 +01:00
parent 7e9fd19c2a
commit 3ec7b15886
10 changed files with 166 additions and 94 deletions

View File

@@ -19,7 +19,9 @@ async function scrapeActorFrontpage(html, url, name) {
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const birthdateString = bio['Date of Birth:'];
const birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
const birthdate = birthdateString && birthdateString !== 'Unknown (Add)'
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
: null;
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
@@ -74,8 +76,9 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const birthdateString = bio['Date of Birth:'];
const birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
const active = bio['Career Status:'].trim() === 'Active';
const birthdate = birthdateString && birthdateString !== 'Unknown'
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
: null;
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
@@ -114,7 +117,6 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
eyes,
piercings,
tattoos,
active,
social,
};
}
@@ -124,11 +126,16 @@ async function fetchActor(actorName) {
const frontpageUrl = `https://freeones.com/html/v_links/${slug}`;
const resFrontpage = await bhttp.get(frontpageUrl);
const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
const resBio = await bhttp.get(url);
if (resFrontpage.statusCode === 200) {
const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
return scrapeActorBio(resBio.body.toString(), bio, url, actorName);
const resBio = await bhttp.get(url);
return scrapeActorBio(resBio.body.toString(), bio, url, actorName);
}
return null;
}
module.exports = {