Showing all unique descriptions on profile with network logo. Fixed Fame Digital scraper.
This commit is contained in:
@@ -3,6 +3,12 @@
|
||||
const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
const moment = require('moment');
|
||||
const blake2 = require('blake2');
|
||||
const DOMPurify = require('dompurify');
|
||||
const { JSDOM } = require('jsdom');
|
||||
|
||||
const { window } = new JSDOM('');
|
||||
const domPurify = DOMPurify(window);
|
||||
|
||||
// const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
@@ -148,6 +154,7 @@ function curateProfileEntry(profile) {
|
||||
gender: profile.gender,
|
||||
ethnicity: profile.ethnicity,
|
||||
description: profile.description,
|
||||
description_hash: profile.descriptionHash,
|
||||
birth_city: profile.placeOfBirth?.city || null,
|
||||
birth_state: profile.placeOfBirth?.state || null,
|
||||
birth_country_alpha2: profile.placeOfBirth?.country || null,
|
||||
@@ -189,7 +196,14 @@ async function curateProfile(profile) {
|
||||
update: profile.update,
|
||||
};
|
||||
|
||||
curatedProfile.description = profile.description?.trim() || null;
|
||||
curatedProfile.description = domPurify.sanitize(profile.description, { ALLOWED_TAGS: [] }).trim() || null;
|
||||
|
||||
const hasher = curatedProfile.description && blake2
|
||||
.createHash('blake2b')
|
||||
.update(Buffer.from(slugify(curatedProfile.description)));
|
||||
|
||||
curatedProfile.descriptionHash = curatedProfile.description && hasher.digest('hex');
|
||||
|
||||
curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available
|
||||
|
||||
curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null;
|
||||
|
||||
@@ -39,9 +39,8 @@ function decodeId(id) {
|
||||
.toString('hex');
|
||||
}
|
||||
|
||||
function scrapeScene(scene, site) {
|
||||
function scrapeScene(scene) {
|
||||
const release = {
|
||||
site,
|
||||
entryId: scene.id,
|
||||
title: scene.name,
|
||||
description: scene.description,
|
||||
@@ -80,11 +79,11 @@ function scrapeScene(scene, site) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ _source: scene }) => scrapeScene(scene));
|
||||
}
|
||||
|
||||
async function fetchActorReleases(actor, site) {
|
||||
async function fetchActorReleases(actor) {
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
query: {
|
||||
@@ -138,11 +137,10 @@ async function fetchActorReleases(actor, site) {
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
return scrapeAll(res.body.hits.hits);
|
||||
}
|
||||
|
||||
|
||||
async function scrapeProfile(actor, site, include) {
|
||||
async function scrapeProfile(actor, include) {
|
||||
const profile = {};
|
||||
|
||||
profile.aliases = actor.aliases;
|
||||
@@ -174,7 +172,7 @@ async function scrapeProfile(actor, site, include) {
|
||||
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
|
||||
|
||||
if (include.releases) {
|
||||
profile.releases = await fetchActorReleases(actor, site);
|
||||
profile.releases = await fetchActorReleases(actor);
|
||||
}
|
||||
|
||||
return profile;
|
||||
@@ -267,7 +265,7 @@ async function fetchLatest(site, page = 1) {
|
||||
return scrapeAll(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
async function fetchScene(url) {
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
|
||||
@@ -277,10 +275,10 @@ async function fetchScene(url, site) {
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
||||
return scrapeScene(res.body._source); // eslint-disable-line no-underscore-dangle
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, actorSlug, site, include) {
|
||||
async function fetchProfile(actorName, context, include) {
|
||||
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||
size: 5,
|
||||
sort: [{
|
||||
@@ -315,7 +313,7 @@ async function fetchProfile(actorName, actorSlug, site, include) {
|
||||
const actor = res.body.hits.hits.find(hit => hit._source.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actor) {
|
||||
return scrapeProfile(actor._source, site, include);
|
||||
return scrapeProfile(actor._source, include);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -91,8 +91,8 @@ async function fetchClassicProfile(actorName, { site }) {
|
||||
}
|
||||
|
||||
async function networkFetchProfile(actorName, context, include) {
|
||||
const profile = await ((context.site.parameters.api && fetchApiProfile(actorName, context, include))
|
||||
|| (context.site.parameters.classic && include.scenes && fetchClassicProfile(actorName, context, include)) // classic profiles only have scenes, no bio
|
||||
const profile = await ((context.site.parameters?.api && fetchApiProfile(actorName, context, include))
|
||||
|| (context.site.parameters?.classic && include.scenes && fetchClassicProfile(actorName, context, include)) // classic profiles only have scenes, no bio
|
||||
|| fetchProfile(actorName, context, true, getActorReleasesUrl, include));
|
||||
|
||||
return profile;
|
||||
|
||||
Reference in New Issue
Block a user