From 747c2e16371218f5345894831f86a14602a72eee Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Thu, 23 Jul 2020 04:39:12 +0200 Subject: [PATCH] Checking nationality against alpha2 and alpha2. Improved Team Skeet profile scraper. --- src/actors.js | 10 +++++++--- src/scrapers/teamskeet.js | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/actors.js b/src/actors.js index d3e879e8..09547f34 100644 --- a/src/actors.js +++ b/src/actors.js @@ -332,12 +332,16 @@ async function curateProfile(profile) { if (!curatedProfile.placeOfBirth && curatedProfile.nationality) { const country = await knex('countries') .where('nationality', 'ilike', `%${curatedProfile.nationality}%`) + .orWhere('alpha3', 'ilike', `%${curatedProfile.nationality}%`) + .orWhere('alpha2', 'ilike', `%${curatedProfile.nationality}%`) .orderBy('priority', 'desc') .first(); - curatedProfile.placeOfBirth = { - country: country.alpha2, - }; + if (country) { + curatedProfile.placeOfBirth = { + country: country.alpha2, + }; + } } curatedProfile.social = Array.isArray(profile.social) diff --git a/src/scrapers/teamskeet.js b/src/scrapers/teamskeet.js index daac7fae..84d74f9d 100644 --- a/src/scrapers/teamskeet.js +++ b/src/scrapers/teamskeet.js @@ -55,7 +55,7 @@ function scrapeScene(scene) { ]; release.channel = slugify(scene.site.name, '') - .replace('hobybuchanon', 'tshobybuchanon'); // slug collision with his own site + .replace('hobybuchanon', 'tshobybuchanon'); // slug collision with his own site if (scene.video) { release.trailer = { stream: `https://videodelivery.net/${scene.video}/manifest/video.mpd` }; @@ -67,7 +67,8 @@ function scrapeScene(scene) { function scrapeProfile(actor) { const profile = {}; - if (actor.bio.about) { + // TODO: split bio https://store.psmcdn.net/ts-organic-iiiokv9kyo/modelsContent/valerie-white.json + if (actor.bio.about && !/\band\b/.test(actor.bio.about)) { // birthdate seems never/rarely correct const measurements = actor.bio.about.match(/Measurements: (\d+)(\w+)-(\d+)-(\d+)/i);