Resolving actor birth and residence place before storage. Layout improvements.
This commit is contained in:
@@ -4,8 +4,6 @@ const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
|
||||
const ethnicityMap = {
|
||||
White: 'Caucasian',
|
||||
};
|
||||
@@ -14,10 +12,6 @@ const hairMap = {
|
||||
Brunette: 'brown',
|
||||
};
|
||||
|
||||
const countryMap = {
|
||||
'United States of America': 'United States',
|
||||
};
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
@@ -28,9 +22,7 @@ async function scrapeProfile(html, _url, actorName) {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]');
|
||||
const birthPlaceString = bio['Birth Place'] || bio.Birthplace;
|
||||
const residencePlaceString = bio['City and Country'];
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
|
||||
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
|
||||
|
||||
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
|
||||
@@ -38,35 +30,20 @@ async function scrapeProfile(html, _url, actorName) {
|
||||
|
||||
if (descriptionString) profile.description = descriptionString.textContent;
|
||||
|
||||
if (bio.Birthday) bio.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate();
|
||||
if (bio.Born) bio.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
if (bio.Birthday) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate();
|
||||
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
|
||||
if (birthPlaceString) {
|
||||
const birthPlaceSegments = birthPlaceString.split(',');
|
||||
const birthCountryName = birthPlaceSegments.slice(-1)[0].trim();
|
||||
const birthCountryEntry = await knex('countries').where('name', countryMap[birthCountryName] || birthCountryName).first();
|
||||
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
|
||||
profile.residencePlace = bio['City and Country'];
|
||||
|
||||
profile.birthPlace = birthPlaceSegments.slice(0, -1).join(',').trim();
|
||||
profile.birthCountry = birthCountryEntry ? birthCountryEntry.alpha2 : null;
|
||||
}
|
||||
|
||||
if (residencePlaceString) {
|
||||
const residencePlaceSegments = residencePlaceString.split(',');
|
||||
const residenceCountryAlpha2 = residencePlaceSegments.slice(-1)[0].trim();
|
||||
const residenceCountryEntry = await knex('countries').where('alpha2', residenceCountryAlpha2).first();
|
||||
|
||||
profile.residencePlace = residencePlaceSegments.slice(0, -1).join(',').trim();
|
||||
profile.residenceCountry = residenceCountryEntry ? residenceCountryEntry.alpha2 : null;
|
||||
}
|
||||
|
||||
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-').map(measurement => parseInt(measurement, 10) || null);
|
||||
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
|
||||
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
|
||||
if (bio.Tattoos) profile.hasTattoos = bio.hasTattoos === 'Yes';
|
||||
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
|
||||
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
||||
|
||||
Reference in New Issue
Block a user