forked from DebaucheryLibrarian/traxxx
Fixed profile location interpolation. Generalizing ethnicity, hair color and eye color.
This commit is contained in:
198
src/actors.js
198
src/actors.js
@@ -20,11 +20,56 @@ const slugify = require('./utils/slugify');
|
||||
const capitalize = require('./utils/capitalize');
|
||||
const resolvePlace = require('./utils/resolve-place');
|
||||
|
||||
const hairColors = {
|
||||
'jet-black': 'black',
|
||||
'red-head': 'red',
|
||||
'soft-black': 'black',
|
||||
black: 'black',
|
||||
blonde: 'blonde',
|
||||
blondie: 'blonde',
|
||||
brown: 'brown',
|
||||
brunette: 'brown',
|
||||
fair: 'blonde',
|
||||
raven: 'black',
|
||||
red: 'red',
|
||||
redhead: 'red',
|
||||
};
|
||||
|
||||
const eyeColors = {
|
||||
blue: 'blue',
|
||||
brown: 'brown',
|
||||
dark: 'brown',
|
||||
gray: 'gray',
|
||||
green: 'green',
|
||||
grey: 'gray',
|
||||
hazel: 'hazel',
|
||||
};
|
||||
|
||||
const ethnicities = {
|
||||
'african american': 'black',
|
||||
'african-american': 'black',
|
||||
'native american': 'native american',
|
||||
african: 'black',
|
||||
aravic: 'arabic',
|
||||
asian: 'asian',
|
||||
black: 'black',
|
||||
caucasian: 'white',
|
||||
european: 'white',
|
||||
hispanic: 'latina',
|
||||
indian: 'indian',
|
||||
japanese: 'japanese',
|
||||
latina: 'latina',
|
||||
latino: 'latino',
|
||||
white: 'white',
|
||||
};
|
||||
|
||||
function getMostFrequent(items) {
|
||||
const { mostFrequent } = items.reduce((acc, item) => {
|
||||
acc.counts[item] = (acc.counts[item] || 0) + 1;
|
||||
const slug = slugify(item);
|
||||
|
||||
if (!acc.mostFrequent || acc.counts[item] > acc.counts[acc.mostFrequent]) {
|
||||
acc.counts[slug] = (acc.counts[slug] || 0) + 1;
|
||||
|
||||
if (!acc.mostFrequent || acc.counts[slug] > acc.counts[slugify(acc.mostFrequent)]) {
|
||||
acc.mostFrequent = item;
|
||||
}
|
||||
|
||||
@@ -144,9 +189,11 @@ async function curateProfile(profile) {
|
||||
|
||||
curatedProfile.description = profile.description?.trim() || null;
|
||||
curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available
|
||||
curatedProfile.ethnicity = profile.ethnicity?.trim() || null;
|
||||
curatedProfile.hair = profile.hair?.trim() || null;
|
||||
curatedProfile.eyes = profile.eyes?.trim() || null;
|
||||
|
||||
curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null;
|
||||
curatedProfile.hair = hairColors[profile.hair?.trim().toLowerCase()] || null;
|
||||
curatedProfile.eyes = eyeColors[profile.eyes?.trim().toLowerCase()] || null;
|
||||
|
||||
curatedProfile.tattoos = profile.tattoos?.trim() || null;
|
||||
curatedProfile.piercings = profile.piercings?.trim() || null;
|
||||
|
||||
@@ -211,6 +258,10 @@ async function curateProfile(profile) {
|
||||
|
||||
curatedProfile.releases = toBaseReleases(profile.releases);
|
||||
|
||||
if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.ethnicity}`);
|
||||
if (profile.hair && !curatedProfile.hair) logger.warn(`Unrecognized hair color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.hair}`);
|
||||
if (profile.eyes && !curatedProfile.eyes) logger.warn(`Unrecognized eye color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.eyes}`);
|
||||
|
||||
return curatedProfile;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to curate '${profile.name}': ${error.message}`);
|
||||
@@ -234,15 +285,28 @@ async function interpolateProfiles(actors) {
|
||||
}), {});
|
||||
|
||||
const interpolatedProfiles = Object.entries(profilesByActorId).map(([actorId, actorProfiles]) => {
|
||||
// group values from each profile
|
||||
const valuesByProperty = actorProfiles.reduce((acc, profile) => Object
|
||||
.entries(profile)
|
||||
.reduce((profileAcc, [property, value]) => ({
|
||||
...profileAcc,
|
||||
[property]: [
|
||||
...(acc[property] || []),
|
||||
...(value === null ? [] : [value]),
|
||||
...(value === null ? [] : Array.from({ length: profile.priority }, () => value)), // multiply by priority, increasing the odds of being the most frequent value
|
||||
],
|
||||
}), {}), {});
|
||||
}), {
|
||||
// bundle location values so they can be assessed together, to ensure the most frequent city is in the most frequent state is in most frequent country
|
||||
origin: [...acc.origin || [], {
|
||||
...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }),
|
||||
...(profile.birth_state && { state: profile.birth_state }),
|
||||
...(profile.birth_city && { city: profile.birth_city }),
|
||||
}].filter(location => Object.keys(location).length > 0),
|
||||
residence: [...acc.residence || [], {
|
||||
...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }),
|
||||
...(profile.residence_state && { state: profile.residence_state }),
|
||||
...(profile.residence_city && { city: profile.residence_city }),
|
||||
}].filter(location => Object.keys(location).length > 0),
|
||||
}), {});
|
||||
|
||||
const avatars = actorProfiles.map(profile => profile.avatar_media_id && ({
|
||||
id: profile.avatar_media_id,
|
||||
@@ -251,39 +315,50 @@ async function interpolateProfiles(actors) {
|
||||
size: profile.avatar_size,
|
||||
})).filter(Boolean);
|
||||
|
||||
const mostFrequentValues = [
|
||||
'gender',
|
||||
'ethnicity',
|
||||
'cup',
|
||||
'bust',
|
||||
'waist',
|
||||
'hip',
|
||||
'natural_boobs',
|
||||
'height',
|
||||
'hair',
|
||||
'eyes',
|
||||
'has_tattoos',
|
||||
'has_piercings',
|
||||
].reduce((acc, property) => ({
|
||||
...acc,
|
||||
[property]: getMostFrequent(valuesByProperty[property]),
|
||||
}), {});
|
||||
|
||||
const profile = {
|
||||
id: actorId,
|
||||
...mostFrequentValues,
|
||||
};
|
||||
|
||||
profile.gender = getMostFrequent(valuesByProperty.gender);
|
||||
profile.ethnicity = getMostFrequent(valuesByProperty.ethnicity.map(ethnicity => ethnicity.toLowerCase()));
|
||||
|
||||
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
|
||||
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
|
||||
|
||||
// TODO: fix city, state and country not matching
|
||||
profile.birth_city = getMostFrequent(valuesByProperty.birth_city);
|
||||
profile.birth_state = getMostFrequent(valuesByProperty.birth_state);
|
||||
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2);
|
||||
// ensure most frequent country, city and state match up
|
||||
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map(location => location.country));
|
||||
const remainingOriginCountries = valuesByProperty.origin.filter(location => location.country === profile.birth_country_alpha2);
|
||||
|
||||
profile.residence_city = getMostFrequent(valuesByProperty.residence_city);
|
||||
profile.residence_state = getMostFrequent(valuesByProperty.residence_state);
|
||||
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence_country_alpha2);
|
||||
profile.birth_state = getMostFrequent(remainingOriginCountries.map(location => location.state));
|
||||
const remainingOriginStates = remainingOriginCountries.filter(location => !profile.birth_state || location.state === profile.birth_state);
|
||||
|
||||
profile.cup = getMostFrequent(valuesByProperty.cup);
|
||||
profile.bust = getMostFrequent(valuesByProperty.bust);
|
||||
profile.waist = getMostFrequent(valuesByProperty.waist);
|
||||
profile.hip = getMostFrequent(valuesByProperty.hip);
|
||||
profile.natural_boobs = getMostFrequent(valuesByProperty.natural_boobs);
|
||||
profile.birth_city = getMostFrequent(remainingOriginStates.map(location => location.city));
|
||||
|
||||
profile.hair = getMostFrequent(valuesByProperty.hair.map(hair => hair.toLowerCase()));
|
||||
profile.eyes = getMostFrequent(valuesByProperty.eyes.map(eyes => eyes.toLowerCase()));
|
||||
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map(location => location.country));
|
||||
const remainingResidenceCountries = valuesByProperty.residence.filter(location => location.country === profile.residence_country_alpha2);
|
||||
|
||||
profile.residence_state = getMostFrequent(remainingResidenceCountries.map(location => location.state));
|
||||
const remainingResidenceStates = remainingResidenceCountries.filter(location => !profile.residence_state || location.state === profile.residence_state);
|
||||
|
||||
profile.residence_city = getMostFrequent(remainingResidenceStates.map(location => location.city));
|
||||
|
||||
profile.weight = getAverage(valuesByProperty.weight);
|
||||
profile.height = getMostFrequent(valuesByProperty.height);
|
||||
|
||||
profile.has_tattoos = getMostFrequent(valuesByProperty.has_tattoos);
|
||||
profile.has_piercings = getMostFrequent(valuesByProperty.has_piercings);
|
||||
|
||||
profile.tattoos = getLongest(valuesByProperty.tattoos);
|
||||
profile.piercings = getLongest(valuesByProperty.piercings);
|
||||
@@ -366,38 +441,47 @@ async function upsertProfiles(profiles) {
|
||||
async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) {
|
||||
const profiles = Promise.map(sources, async (source) => {
|
||||
try {
|
||||
// config may group sources to try until success
|
||||
return await [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => {
|
||||
const scraper = scrapers[scraperSlug];
|
||||
const context = {
|
||||
site: sitesBySlug[scraperSlug] || null,
|
||||
network: networksBySlug[scraperSlug] || sitesBySlug[scraperSlug]?.network || null,
|
||||
scraper: scraperSlug,
|
||||
};
|
||||
try {
|
||||
const scraper = scrapers[scraperSlug];
|
||||
const context = {
|
||||
site: sitesBySlug[scraperSlug] || null,
|
||||
network: networksBySlug[scraperSlug] || sitesBySlug[scraperSlug]?.network || null,
|
||||
scraper: scraperSlug,
|
||||
};
|
||||
|
||||
if (!scraper?.fetchProfile) {
|
||||
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
|
||||
throw new Error(`No profile profile scraper available for ${scraperSlug}`);
|
||||
if (!scraper?.fetchProfile) {
|
||||
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
|
||||
throw new Error(`No profile profile scraper available for ${scraperSlug}`);
|
||||
}
|
||||
|
||||
if (!context.site && !context.network) {
|
||||
logger.warn(`No site or network found for ${scraperSlug}`);
|
||||
throw new Error(`No site or network found for ${scraperSlug}`);
|
||||
}
|
||||
|
||||
logger.verbose(`Searching profile for '${actor.name}' on '${scraperSlug}'`);
|
||||
|
||||
const profile = await scraper.fetchProfile(actor.name, context, include);
|
||||
|
||||
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
|
||||
logger.verbose(`Profile for '${actor.name}' not available on ${context.site?.name || context.network?.name || context.scraper}, scraper returned ${profile}`);
|
||||
throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${context.site?.name || context.network?.name || context.scraper}`), { code: 'PROFILE_NOT_AVAILABLE' });
|
||||
}
|
||||
|
||||
return {
|
||||
...actor,
|
||||
...profile,
|
||||
...context,
|
||||
};
|
||||
} catch (error) {
|
||||
if (error.code !== 'PROFILE_NOT_AVAILABLE') {
|
||||
logger.error(`Failed to fetch profile for '${actor.name}' from '${scraperSlug}': ${error.message}`);
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (!context.site && !context.network) {
|
||||
logger.warn(`No site or network found for ${scraperSlug}`);
|
||||
throw new Error(`No site or network found for ${scraperSlug}`);
|
||||
}
|
||||
|
||||
logger.verbose(`Searching profile for '${actor.name}' on '${scraperSlug}'`);
|
||||
|
||||
const profile = await scraper.fetchProfile(actor.name, context, include);
|
||||
|
||||
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
|
||||
logger.verbose(`Profile for '${actor.name}' not available on ${scraperSlug}, scraper returned ${profile}`);
|
||||
throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${scraperSlug}`), { code: 'PROFILE_NOT_AVAILABLE' });
|
||||
}
|
||||
|
||||
return {
|
||||
...actor,
|
||||
...profile,
|
||||
...context,
|
||||
};
|
||||
}), Promise.reject(new Error()));
|
||||
} catch (error) {
|
||||
if (error.code !== 'PROFILE_NOT_AVAILABLE') {
|
||||
|
||||
@@ -119,7 +119,6 @@ async function getPhotos(entryId, site, type = 'highres', page = 1) {
|
||||
}
|
||||
|
||||
function getEntryId(html) {
|
||||
// TODO: not working for https://www.julesjordan.com/members/scenes/jada-stevens-anal-ass-gets-oiled-up-for-james-deens-cock_vids.html
|
||||
const entryId = html.match(/showtagform\((\d+)\)/);
|
||||
|
||||
if (entryId) {
|
||||
|
||||
@@ -9,7 +9,7 @@ const slugify = require('../utils/slugify');
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS)\d+/); // detect studio prefixes
|
||||
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ function slugify(string, delimiter = '-', {
|
||||
encode = false,
|
||||
limit = 1000,
|
||||
} = {}) {
|
||||
if (!string) {
|
||||
if (!string || typeof string !== 'string') {
|
||||
return string;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user