Showing all unique descriptions on profile with network logo. Fixed Fame Digital scraper.

This commit is contained in:
2020-05-19 04:46:49 +02:00
parent 9883c3d9c2
commit c0898b84d6
23 changed files with 131 additions and 27 deletions

View File

@@ -3,6 +3,12 @@
const config = require('config');
const Promise = require('bluebird');
const moment = require('moment');
const blake2 = require('blake2');
const DOMPurify = require('dompurify');
const { JSDOM } = require('jsdom');
const { window } = new JSDOM('');
const domPurify = DOMPurify(window);
// const logger = require('./logger')(__filename);
const knex = require('./knex');
@@ -148,6 +154,7 @@ function curateProfileEntry(profile) {
gender: profile.gender,
ethnicity: profile.ethnicity,
description: profile.description,
description_hash: profile.descriptionHash,
birth_city: profile.placeOfBirth?.city || null,
birth_state: profile.placeOfBirth?.state || null,
birth_country_alpha2: profile.placeOfBirth?.country || null,
@@ -189,7 +196,14 @@ async function curateProfile(profile) {
update: profile.update,
};
curatedProfile.description = profile.description?.trim() || null;
curatedProfile.description = domPurify.sanitize(profile.description, { ALLOWED_TAGS: [] }).trim() || null;
const hasher = curatedProfile.description && blake2
.createHash('blake2b')
.update(Buffer.from(slugify(curatedProfile.description)));
curatedProfile.descriptionHash = curatedProfile.description && hasher.digest('hex');
curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available
curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null;