Checking nationality against alpha2 and alpha2. Improved Team Skeet profile scraper.

This commit is contained in:
DebaucheryLibrarian 2020-07-23 04:39:12 +02:00
parent 23e4f87af0
commit 747c2e1637
2 changed files with 10 additions and 5 deletions

View File

@ -332,12 +332,16 @@ async function curateProfile(profile) {
if (!curatedProfile.placeOfBirth && curatedProfile.nationality) { if (!curatedProfile.placeOfBirth && curatedProfile.nationality) {
const country = await knex('countries') const country = await knex('countries')
.where('nationality', 'ilike', `%${curatedProfile.nationality}%`) .where('nationality', 'ilike', `%${curatedProfile.nationality}%`)
.orWhere('alpha3', 'ilike', `%${curatedProfile.nationality}%`)
.orWhere('alpha2', 'ilike', `%${curatedProfile.nationality}%`)
.orderBy('priority', 'desc') .orderBy('priority', 'desc')
.first(); .first();
curatedProfile.placeOfBirth = { if (country) {
country: country.alpha2, curatedProfile.placeOfBirth = {
}; country: country.alpha2,
};
}
} }
curatedProfile.social = Array.isArray(profile.social) curatedProfile.social = Array.isArray(profile.social)

View File

@ -55,7 +55,7 @@ function scrapeScene(scene) {
]; ];
release.channel = slugify(scene.site.name, '') release.channel = slugify(scene.site.name, '')
.replace('hobybuchanon', 'tshobybuchanon'); // slug collision with his own site .replace('hobybuchanon', 'tshobybuchanon'); // slug collision with his own site
if (scene.video) { if (scene.video) {
release.trailer = { stream: `https://videodelivery.net/${scene.video}/manifest/video.mpd` }; release.trailer = { stream: `https://videodelivery.net/${scene.video}/manifest/video.mpd` };
@ -67,7 +67,8 @@ function scrapeScene(scene) {
function scrapeProfile(actor) { function scrapeProfile(actor) {
const profile = {}; const profile = {};
if (actor.bio.about) { // TODO: split bio https://store.psmcdn.net/ts-organic-iiiokv9kyo/modelsContent/valerie-white.json
if (actor.bio.about && !/\band\b/.test(actor.bio.about)) {
// birthdate seems never/rarely correct // birthdate seems never/rarely correct
const measurements = actor.bio.about.match(/Measurements: (\d+)(\w+)-(\d+)-(\d+)/i); const measurements = actor.bio.about.match(/Measurements: (\d+)(\w+)-(\d+)-(\d+)/i);