Fixed profile location interpolation. Generalizing ethnicity, hair color and eye color.

This commit is contained in:
ThePendulum 2020-05-19 01:10:32 +02:00
parent 4826ae8571
commit 0c4628677f
16 changed files with 1976 additions and 1862 deletions

View File

@ -190,6 +190,22 @@
</span>
</li>
<li
v-if="actor.eyes"
class="bio-item eyes hideable"
>
<dfn class="bio-label"><Icon icon="eye" />Eyes</dfn>
<span>{{ actor.eyes }}</span>
</li>
<li
v-if="actor.hair"
class="bio-item hair hideable"
>
<dfn class="bio-label"><Icon icon="haircut" />Hair</dfn>
<span>{{ actor.hair }}</span>
</li>
<li
v-if="actor.hasTattoos"
class="bio-item tattoos hideable"
@ -513,7 +529,9 @@ export default {
transform: scaleX(-1);
}
.ethnicity {
.ethnicity,
.hair,
.eyes {
text-transform: capitalize;
}

View File

@ -5,13 +5,15 @@
class="summary"
>Searching...</span>
<div v-if="!loading && actors.length > 0">
<span
v-if="!loading"
class="summary"
>Found {{ actors.length }} actors for '{{ query }}'</span>
<div class="tiles">
<div
v-if="!loading && actors.length > 0"
class="tiles"
>
<Actor
v-for="actor in actors"
:key="`actor-${actor.id}`"
@ -19,16 +21,16 @@
:alias="actor.aliasFor && actor"
/>
</div>
</div>
<div v-if="!loading && actors.length > 0">
<span
v-if="!loading"
class="summary"
>Found {{ releases.length }} releases for '{{ query }}'</span>
<Releases :releases="releases" />
</div>
<Releases
v-if="!loading && releases.length > 0"
:releases="releases"
/>
</div>
</template>

View File

@ -0,0 +1,6 @@
<!-- Generated by IcoMoon.io -->
<svg version="1.1" xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16">
<title>haircut</title>
<path d="M7 10c-0.364 0-0.706 0.098-1 0.269v-2.769c0-0.024-0.002-0.047-0.005-0.071l-1-7c-0.035-0.246-0.246-0.429-0.495-0.429s-0.46 0.183-0.495 0.429l-1 7c-0.003 0.023-0.005 0.047-0.005 0.071v2.769c-0.294-0.171-0.636-0.269-1-0.269-1.103 0-2 0.897-2 2s0.897 2 2 2 2-0.897 2-2v-4c0-0.276 0.224-0.5 0.5-0.5s0.5 0.224 0.5 0.5v4c0 0.801 0.474 1.494 1.156 1.813-0.1 0.214-0.156 0.449-0.156 0.687 0 0.827 0.673 1.5 1.5 1.5 0.276 0 0.5-0.224 0.5-0.5s-0.224-0.5-0.5-0.5c-0.276 0-0.5-0.224-0.5-0.5 0-0.197 0.115-0.41 0.277-0.52 0.972-0.135 1.723-0.972 1.723-1.98 0-1.103-0.897-2-2-2zM2 13c-0.551 0-1-0.449-1-1s0.449-1 1-1 1 0.449 1 1-0.449 1-1 1zM7 13c-0.551 0-1-0.449-1-1s0.449-1 1-1 1 0.449 1 1-0.449 1-1 1z"></path>
<path d="M15 0h-3.5c-0.276 0-0.5 0.224-0.5 0.5s0.224 0.5 0.5 0.5h2.5v1h-2.5c-0.276 0-0.5 0.224-0.5 0.5s0.224 0.5 0.5 0.5h2.5v1h-2.5c-0.276 0-0.5 0.224-0.5 0.5s0.224 0.5 0.5 0.5h2.5v1h-2.5c-0.276 0-0.5 0.224-0.5 0.5s0.224 0.5 0.5 0.5h2.5v1h-2.5c-0.276 0-0.5 0.224-0.5 0.5s0.224 0.5 0.5 0.5h2.5v5c0 0.552 0.448 1 1 1s1-0.448 1-1v-13c0-0.552-0.448-1-1-1z"></path>
</svg>

After

Width:  |  Height:  |  Size: 1.2 KiB

View File

@ -47,6 +47,8 @@ function initActorActions(store, _router) {
heightImperial: height(units:IMPERIAL)
weightMetric: weight(units:METRIC)
weightImperial: weight(units:IMPERIAL)
hair
eyes
hasTattoos
hasPiercings
tattoos

View File

@ -14,6 +14,7 @@ exports.up = knex => Promise.resolve()
table.integer('code', 3);
table.string('nationality');
table.integer('priority', 2)
.defaultTo(0);
}))
@ -344,6 +345,8 @@ exports.up = knex => Promise.resolve()
.inTable('sites');
table.unique(['actor_id', 'network_id', 'site_id']);
table.integer('priority', 4)
.defaultTo(1);
table.string('real_name');

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -77,7 +77,7 @@ const countries = [
code: 32,
alpha2: 'AR',
alpha3: 'ARG',
nationality: 'Argentine',
nationality: 'Argentine, Argentinian',
},
{
name: 'Armenia',

View File

@ -20,11 +20,56 @@ const slugify = require('./utils/slugify');
const capitalize = require('./utils/capitalize');
const resolvePlace = require('./utils/resolve-place');
const hairColors = {
'jet-black': 'black',
'red-head': 'red',
'soft-black': 'black',
black: 'black',
blonde: 'blonde',
blondie: 'blonde',
brown: 'brown',
brunette: 'brown',
fair: 'blonde',
raven: 'black',
red: 'red',
redhead: 'red',
};
const eyeColors = {
blue: 'blue',
brown: 'brown',
dark: 'brown',
gray: 'gray',
green: 'green',
grey: 'gray',
hazel: 'hazel',
};
const ethnicities = {
'african american': 'black',
'african-american': 'black',
'native american': 'native american',
african: 'black',
aravic: 'arabic',
asian: 'asian',
black: 'black',
caucasian: 'white',
european: 'white',
hispanic: 'latina',
indian: 'indian',
japanese: 'japanese',
latina: 'latina',
latino: 'latino',
white: 'white',
};
function getMostFrequent(items) {
const { mostFrequent } = items.reduce((acc, item) => {
acc.counts[item] = (acc.counts[item] || 0) + 1;
const slug = slugify(item);
if (!acc.mostFrequent || acc.counts[item] > acc.counts[acc.mostFrequent]) {
acc.counts[slug] = (acc.counts[slug] || 0) + 1;
if (!acc.mostFrequent || acc.counts[slug] > acc.counts[slugify(acc.mostFrequent)]) {
acc.mostFrequent = item;
}
@ -144,9 +189,11 @@ async function curateProfile(profile) {
curatedProfile.description = profile.description?.trim() || null;
curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available
curatedProfile.ethnicity = profile.ethnicity?.trim() || null;
curatedProfile.hair = profile.hair?.trim() || null;
curatedProfile.eyes = profile.eyes?.trim() || null;
curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null;
curatedProfile.hair = hairColors[profile.hair?.trim().toLowerCase()] || null;
curatedProfile.eyes = eyeColors[profile.eyes?.trim().toLowerCase()] || null;
curatedProfile.tattoos = profile.tattoos?.trim() || null;
curatedProfile.piercings = profile.piercings?.trim() || null;
@ -211,6 +258,10 @@ async function curateProfile(profile) {
curatedProfile.releases = toBaseReleases(profile.releases);
if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.ethnicity}`);
if (profile.hair && !curatedProfile.hair) logger.warn(`Unrecognized hair color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.hair}`);
if (profile.eyes && !curatedProfile.eyes) logger.warn(`Unrecognized eye color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.eyes}`);
return curatedProfile;
} catch (error) {
logger.error(`Failed to curate '${profile.name}': ${error.message}`);
@ -234,15 +285,28 @@ async function interpolateProfiles(actors) {
}), {});
const interpolatedProfiles = Object.entries(profilesByActorId).map(([actorId, actorProfiles]) => {
// group values from each profile
const valuesByProperty = actorProfiles.reduce((acc, profile) => Object
.entries(profile)
.reduce((profileAcc, [property, value]) => ({
...profileAcc,
[property]: [
...(acc[property] || []),
...(value === null ? [] : [value]),
...(value === null ? [] : Array.from({ length: profile.priority }, () => value)), // multiply by priority, increasing the odds of being the most frequent value
],
}), {}), {});
}), {
// bundle location values so they can be assessed together, to ensure the most frequent city is in the most frequent state is in most frequent country
origin: [...acc.origin || [], {
...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }),
...(profile.birth_state && { state: profile.birth_state }),
...(profile.birth_city && { city: profile.birth_city }),
}].filter(location => Object.keys(location).length > 0),
residence: [...acc.residence || [], {
...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }),
...(profile.residence_state && { state: profile.residence_state }),
...(profile.residence_city && { city: profile.residence_city }),
}].filter(location => Object.keys(location).length > 0),
}), {});
const avatars = actorProfiles.map(profile => profile.avatar_media_id && ({
id: profile.avatar_media_id,
@ -251,39 +315,50 @@ async function interpolateProfiles(actors) {
size: profile.avatar_size,
})).filter(Boolean);
const mostFrequentValues = [
'gender',
'ethnicity',
'cup',
'bust',
'waist',
'hip',
'natural_boobs',
'height',
'hair',
'eyes',
'has_tattoos',
'has_piercings',
].reduce((acc, property) => ({
...acc,
[property]: getMostFrequent(valuesByProperty[property]),
}), {});
const profile = {
id: actorId,
...mostFrequentValues,
};
profile.gender = getMostFrequent(valuesByProperty.gender);
profile.ethnicity = getMostFrequent(valuesByProperty.ethnicity.map(ethnicity => ethnicity.toLowerCase()));
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
// TODO: fix city, state and country not matching
profile.birth_city = getMostFrequent(valuesByProperty.birth_city);
profile.birth_state = getMostFrequent(valuesByProperty.birth_state);
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2);
// ensure most frequent country, city and state match up
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map(location => location.country));
const remainingOriginCountries = valuesByProperty.origin.filter(location => location.country === profile.birth_country_alpha2);
profile.residence_city = getMostFrequent(valuesByProperty.residence_city);
profile.residence_state = getMostFrequent(valuesByProperty.residence_state);
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence_country_alpha2);
profile.birth_state = getMostFrequent(remainingOriginCountries.map(location => location.state));
const remainingOriginStates = remainingOriginCountries.filter(location => !profile.birth_state || location.state === profile.birth_state);
profile.cup = getMostFrequent(valuesByProperty.cup);
profile.bust = getMostFrequent(valuesByProperty.bust);
profile.waist = getMostFrequent(valuesByProperty.waist);
profile.hip = getMostFrequent(valuesByProperty.hip);
profile.natural_boobs = getMostFrequent(valuesByProperty.natural_boobs);
profile.birth_city = getMostFrequent(remainingOriginStates.map(location => location.city));
profile.hair = getMostFrequent(valuesByProperty.hair.map(hair => hair.toLowerCase()));
profile.eyes = getMostFrequent(valuesByProperty.eyes.map(eyes => eyes.toLowerCase()));
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map(location => location.country));
const remainingResidenceCountries = valuesByProperty.residence.filter(location => location.country === profile.residence_country_alpha2);
profile.residence_state = getMostFrequent(remainingResidenceCountries.map(location => location.state));
const remainingResidenceStates = remainingResidenceCountries.filter(location => !profile.residence_state || location.state === profile.residence_state);
profile.residence_city = getMostFrequent(remainingResidenceStates.map(location => location.city));
profile.weight = getAverage(valuesByProperty.weight);
profile.height = getMostFrequent(valuesByProperty.height);
profile.has_tattoos = getMostFrequent(valuesByProperty.has_tattoos);
profile.has_piercings = getMostFrequent(valuesByProperty.has_piercings);
profile.tattoos = getLongest(valuesByProperty.tattoos);
profile.piercings = getLongest(valuesByProperty.piercings);
@ -366,7 +441,9 @@ async function upsertProfiles(profiles) {
async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) {
const profiles = Promise.map(sources, async (source) => {
try {
// config may group sources to try until success
return await [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => {
try {
const scraper = scrapers[scraperSlug];
const context = {
site: sitesBySlug[scraperSlug] || null,
@ -389,8 +466,8 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) {
const profile = await scraper.fetchProfile(actor.name, context, include);
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
logger.verbose(`Profile for '${actor.name}' not available on ${scraperSlug}, scraper returned ${profile}`);
throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${scraperSlug}`), { code: 'PROFILE_NOT_AVAILABLE' });
logger.verbose(`Profile for '${actor.name}' not available on ${context.site?.name || context.network?.name || context.scraper}, scraper returned ${profile}`);
throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${context.site?.name || context.network?.name || context.scraper}`), { code: 'PROFILE_NOT_AVAILABLE' });
}
return {
@ -398,6 +475,13 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) {
...profile,
...context,
};
} catch (error) {
if (error.code !== 'PROFILE_NOT_AVAILABLE') {
logger.error(`Failed to fetch profile for '${actor.name}' from '${scraperSlug}': ${error.message}`);
}
throw error;
}
}), Promise.reject(new Error()));
} catch (error) {
if (error.code !== 'PROFILE_NOT_AVAILABLE') {

View File

@ -119,7 +119,6 @@ async function getPhotos(entryId, site, type = 'highres', page = 1) {
}
function getEntryId(html) {
// TODO: not working for https://www.julesjordan.com/members/scenes/jada-stevens-anal-ass-gets-oiled-up-for-james-deens-cock_vids.html
const entryId = html.match(/showtagform\((\d+)\)/);
if (entryId) {

View File

@ -9,7 +9,7 @@ const slugify = require('../utils/slugify');
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS)\d+/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;

View File

@ -4,7 +4,7 @@ function slugify(string, delimiter = '-', {
encode = false,
limit = 1000,
} = {}) {
if (!string) {
if (!string || typeof string !== 'string') {
return string;
}