Added actor interpolation.
This commit is contained in:
parent
351bd046ba
commit
5085d24b70
|
@ -0,0 +1,11 @@
|
||||||
|
{
|
||||||
|
"presets": [
|
||||||
|
[
|
||||||
|
"@babel/preset-env"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"plugins": [
|
||||||
|
"@babel/plugin-transform-optional-chaining",
|
||||||
|
"@babel/plugin-syntax-import-attributes"
|
||||||
|
],
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
# top-most EditorConfig file
|
||||||
|
root = true
|
||||||
|
|
||||||
|
# Unix-style newlines with a newline ending every file
|
||||||
|
[*]
|
||||||
|
end_of_line = lf
|
||||||
|
insert_final_newline = true
|
||||||
|
indent_style = tab
|
||||||
|
indent_size = 4
|
||||||
|
|
||||||
|
# Matches multiple files with brace expansion notation
|
||||||
|
# Set default charset
|
||||||
|
[*.js]
|
||||||
|
charset = utf-8
|
|
@ -0,0 +1,32 @@
|
||||||
|
{
|
||||||
|
"root": true,
|
||||||
|
"extends": [
|
||||||
|
"airbnb-base"
|
||||||
|
],
|
||||||
|
"parserOptions": {
|
||||||
|
"parser": "@babel/eslint-parser",
|
||||||
|
"ecmaVersion": 2022,
|
||||||
|
"sourceType": "module"
|
||||||
|
},
|
||||||
|
"rules": {
|
||||||
|
"default-param-last": 0,
|
||||||
|
"import/no-extraneous-dependencies": ["error", {"devDependencies": true}],
|
||||||
|
"import/prefer-default-export": 0,
|
||||||
|
"no-underscore-dangle": 0,
|
||||||
|
"import/extensions": [2, "always"],
|
||||||
|
"indent": ["error", "tab"],
|
||||||
|
"max-len": 0,
|
||||||
|
"no-console": 0,
|
||||||
|
"no-param-reassign": ["error", {
|
||||||
|
"props": true,
|
||||||
|
"ignorePropertyModificationsFor": ["state", "acc"]
|
||||||
|
}],
|
||||||
|
"no-tabs": "off",
|
||||||
|
"no-unused-vars": ["error", {"argsIgnorePattern": "^_"}],
|
||||||
|
"prefer-destructuring": 0,
|
||||||
|
"template-curly-spacing": "off",
|
||||||
|
},
|
||||||
|
"globals": {
|
||||||
|
"CONFIG": true
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
12
package.json
12
package.json
|
@ -16,5 +16,15 @@
|
||||||
"adult"
|
"adult"
|
||||||
],
|
],
|
||||||
"author": "DebaucheryLibrarian",
|
"author": "DebaucheryLibrarian",
|
||||||
"license": "ISC"
|
"license": "ISC",
|
||||||
|
"devDependencies": {
|
||||||
|
"@babel/cli": "^7.25.7",
|
||||||
|
"@babel/core": "^7.25.8",
|
||||||
|
"@babel/eslint-parser": "^7.25.8",
|
||||||
|
"@babel/plugin-syntax-import-attributes": "^7.25.7",
|
||||||
|
"@babel/plugin-transform-optional-chaining": "^7.25.8",
|
||||||
|
"@babel/preset-env": "^7.25.8",
|
||||||
|
"eslint": "^8.57.1",
|
||||||
|
"eslint-config-airbnb-base": "^15.0.0"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,276 @@
|
||||||
|
function getMostFrequent(items, { slugify }) {
|
||||||
|
const { mostFrequent } = items.reduce((acc, item) => {
|
||||||
|
if (item === undefined || item === null) {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
const slug = slugify(item);
|
||||||
|
|
||||||
|
acc.counts[slug] = (acc.counts[slug] || 0) + 1;
|
||||||
|
|
||||||
|
if (!acc.mostFrequent || acc.counts[slug] > acc.counts[slugify(acc.mostFrequent)]) {
|
||||||
|
acc.mostFrequent = item;
|
||||||
|
}
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, {
|
||||||
|
counts: {},
|
||||||
|
mostFrequent: null,
|
||||||
|
});
|
||||||
|
|
||||||
|
return mostFrequent;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getMostFrequentDate(dates, { moment }) {
|
||||||
|
const year = getMostFrequent(dates.map((dateX) => dateX.getFullYear()));
|
||||||
|
const month = getMostFrequent(dates.map((dateX) => dateX.getMonth()));
|
||||||
|
const date = getMostFrequent(dates.map((dateX) => dateX.getDate()));
|
||||||
|
|
||||||
|
if (year === null || month === null || date === null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return moment({ year, month, date }).toDate();
|
||||||
|
}
|
||||||
|
|
||||||
|
function getHighest(items) {
|
||||||
|
return items.reduce((prevItem, item) => (item > prevItem ? item : prevItem), null);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getLongest(items) {
|
||||||
|
return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAverage(items) {
|
||||||
|
return Math.round(items.reduce((acc, item) => acc + item, 0) / items.length) || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function curateProfileEntry(profile) {
|
||||||
|
if (!profile.id) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const curatedProfileEntry = {
|
||||||
|
...(profile.update !== false && { id: profile.update }),
|
||||||
|
actor_id: profile.id,
|
||||||
|
entity_id: profile.entity?.id || null,
|
||||||
|
date_of_birth: profile.dateOfBirth,
|
||||||
|
date_of_death: profile.dateOfDeath,
|
||||||
|
age: profile.age,
|
||||||
|
url: profile.url,
|
||||||
|
gender: profile.gender,
|
||||||
|
orientation: profile.orientation,
|
||||||
|
ethnicity: profile.ethnicity,
|
||||||
|
description: profile.description,
|
||||||
|
description_hash: profile.descriptionHash,
|
||||||
|
birth_city: profile.placeOfBirth?.city || null,
|
||||||
|
birth_state: profile.placeOfBirth?.state || null,
|
||||||
|
birth_country_alpha2: profile.placeOfBirth?.country || null,
|
||||||
|
residence_city: profile.placeOfResidence?.city || null,
|
||||||
|
residence_state: profile.placeOfResidence?.state || null,
|
||||||
|
residence_country_alpha2: profile.placeOfResidence?.country || null,
|
||||||
|
cup: profile.cup,
|
||||||
|
bust: profile.bust,
|
||||||
|
waist: profile.waist,
|
||||||
|
leg: profile.leg,
|
||||||
|
thigh: profile.thigh,
|
||||||
|
foot: profile.foot,
|
||||||
|
hip: profile.hip,
|
||||||
|
penis_length: profile.penisLength,
|
||||||
|
penis_girth: profile.penisGirth,
|
||||||
|
circumcised: profile.circumcised,
|
||||||
|
natural_boobs: profile.naturalBoobs,
|
||||||
|
height: profile.height,
|
||||||
|
weight: profile.weight,
|
||||||
|
shoe_size: profile.shoeSize,
|
||||||
|
hair_color: profile.hairColor,
|
||||||
|
hair_type: profile.hairType,
|
||||||
|
eyes: profile.eyes,
|
||||||
|
has_tattoos: profile.hasTattoos,
|
||||||
|
has_piercings: profile.hasPiercings,
|
||||||
|
piercings: profile.piercings,
|
||||||
|
tattoos: profile.tattoos,
|
||||||
|
blood_type: profile.bloodType,
|
||||||
|
avatar_media_id: profile.avatarMediaId || null,
|
||||||
|
};
|
||||||
|
|
||||||
|
return curatedProfileEntry;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchProfiles(actorIdsOrNames, { knex }) {
|
||||||
|
return knex('actors_profiles')
|
||||||
|
.select(knex.raw('actors_profiles.*, actors.name, row_to_json(media) as avatar'))
|
||||||
|
.leftJoin('actors', 'actors.id', 'actors_profiles.actor_id')
|
||||||
|
.modify((query) => {
|
||||||
|
if (actorIdsOrNames) {
|
||||||
|
query
|
||||||
|
.whereIn('actor_id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
|
||||||
|
.orWhere((builder) => {
|
||||||
|
builder
|
||||||
|
.whereIn('actors.name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
|
||||||
|
.whereNull('actors.entity_id');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.leftJoin('media', 'actors_profiles.avatar_media_id', 'media.id');
|
||||||
|
}
|
||||||
|
|
||||||
|
function mergeMainProfile(profile, mainProfile) {
|
||||||
|
const preservedKeys = ['id'];
|
||||||
|
|
||||||
|
const mergedProfile = Object.fromEntries(Object.entries(profile).map(([key, value]) => [key, mainProfile[key] === null || preservedKeys.includes(key)
|
||||||
|
? value
|
||||||
|
: mainProfile[key]]));
|
||||||
|
|
||||||
|
return mergedProfile;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function interpolateProfiles(actorIdsOrNames, context) {
|
||||||
|
const profiles = await fetchProfiles(actorIdsOrNames, context);
|
||||||
|
|
||||||
|
const profilesByActorId = profiles.reduce((acc, profile) => ({
|
||||||
|
...acc,
|
||||||
|
[profile.actor_id]: [
|
||||||
|
...(acc[profile.actor_id] || []),
|
||||||
|
profile,
|
||||||
|
],
|
||||||
|
}), {});
|
||||||
|
|
||||||
|
context.logger.info(`Interpolating ${profiles.length} profiles from ${Object.keys(profilesByActorId).length} actors`);
|
||||||
|
|
||||||
|
const interpolatedProfiles = Object.entries(profilesByActorId).map(([actorId, actorProfiles]) => {
|
||||||
|
// group values from each profile
|
||||||
|
const valuesByProperty = actorProfiles
|
||||||
|
.filter((profile) => profile.entity_id !== null) // main profile is interpolated separately at the end
|
||||||
|
.reduce((acc, profile) => Object
|
||||||
|
.entries(profile)
|
||||||
|
.reduce((profileAcc, [property, value]) => ({
|
||||||
|
...profileAcc,
|
||||||
|
[property]: [
|
||||||
|
...(acc[property] || []),
|
||||||
|
...(value === null ? [] : Array.from({ length: profile.priority }, () => value)), // multiply by priority, increasing the odds of being the most frequent value
|
||||||
|
],
|
||||||
|
}), {
|
||||||
|
// bundle location values so they can be assessed together, to ensure the most frequent city is in the most frequent state is in most frequent country
|
||||||
|
origin: [...acc.origin || [], {
|
||||||
|
...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }),
|
||||||
|
...(profile.birth_state && { state: profile.birth_state }),
|
||||||
|
...(profile.birth_city && { city: profile.birth_city }),
|
||||||
|
}].filter((location) => Object.keys(location).length > 0),
|
||||||
|
residence: [...acc.residence || [], {
|
||||||
|
...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }),
|
||||||
|
...(profile.residence_state && { state: profile.residence_state }),
|
||||||
|
...(profile.residence_city && { city: profile.residence_city }),
|
||||||
|
}].filter((location) => Object.keys(location).length > 0),
|
||||||
|
}), {});
|
||||||
|
|
||||||
|
const mostFrequentValues = [
|
||||||
|
'gender',
|
||||||
|
'orientation',
|
||||||
|
'ethnicity',
|
||||||
|
'cup',
|
||||||
|
'bust',
|
||||||
|
'waist',
|
||||||
|
'hip',
|
||||||
|
'leg',
|
||||||
|
'thigh',
|
||||||
|
'foot',
|
||||||
|
'shoe_size',
|
||||||
|
'penis_length',
|
||||||
|
'penis_girth',
|
||||||
|
'circumcised',
|
||||||
|
'hair_color',
|
||||||
|
'eyes',
|
||||||
|
'has_tattoos',
|
||||||
|
'has_piercings',
|
||||||
|
'blood_type',
|
||||||
|
].reduce((acc, property) => ({
|
||||||
|
...acc,
|
||||||
|
[property]: getMostFrequent(valuesByProperty[property], context),
|
||||||
|
}), {});
|
||||||
|
|
||||||
|
const profile = {
|
||||||
|
id: actorId,
|
||||||
|
...mostFrequentValues,
|
||||||
|
};
|
||||||
|
|
||||||
|
profile.height = getMostFrequent(valuesByProperty.height.filter((height) => height > 50 && height < 300), context); // remove unlikely values
|
||||||
|
|
||||||
|
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth, context);
|
||||||
|
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death, context);
|
||||||
|
profile.age = getHighest(valuesByProperty.age);
|
||||||
|
|
||||||
|
profile.natural_boobs = profile.gender === 'male' ? null : getMostFrequent(valuesByProperty.natural_boobs, context);
|
||||||
|
|
||||||
|
// ensure most frequent country, city and state match up
|
||||||
|
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map((location) => location.country), context);
|
||||||
|
const remainingOriginCountries = valuesByProperty.origin.filter((location) => location.country === profile.birth_country_alpha2);
|
||||||
|
|
||||||
|
profile.birth_state = getMostFrequent(remainingOriginCountries.map((location) => location.state), context);
|
||||||
|
const remainingOriginStates = remainingOriginCountries.filter((location) => !profile.birth_state || location.state === profile.birth_state);
|
||||||
|
|
||||||
|
profile.birth_city = getMostFrequent(remainingOriginStates.map((location) => location.city), context);
|
||||||
|
|
||||||
|
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map((location) => location.country), context);
|
||||||
|
const remainingResidenceCountries = valuesByProperty.residence.filter((location) => location.country === profile.residence_country_alpha2);
|
||||||
|
|
||||||
|
profile.residence_state = getMostFrequent(remainingResidenceCountries.map((location) => location.state), context);
|
||||||
|
const remainingResidenceStates = remainingResidenceCountries.filter((location) => !profile.residence_state || location.state === profile.residence_state);
|
||||||
|
|
||||||
|
profile.residence_city = getMostFrequent(remainingResidenceStates.map((location) => location.city), context);
|
||||||
|
|
||||||
|
profile.weight = getAverage(valuesByProperty.weight);
|
||||||
|
|
||||||
|
profile.tattoos = getLongest(valuesByProperty.tattoos);
|
||||||
|
profile.piercings = getLongest(valuesByProperty.piercings);
|
||||||
|
|
||||||
|
profile.avatar_media_id = actorProfiles
|
||||||
|
.map((actorProfile) => actorProfile.avatar)
|
||||||
|
.filter((avatar) => avatar && (avatar.entropy === null || avatar.entropy > 5.5))
|
||||||
|
.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null;
|
||||||
|
|
||||||
|
if (!profile.avatar_media_id) {
|
||||||
|
// try to settle for low quality avatar
|
||||||
|
profile.avatar_media_id = actorProfiles
|
||||||
|
.map((actorProfile) => actorProfile.avatar)
|
||||||
|
.filter((avatar) => avatar)
|
||||||
|
.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const mainProfile = actorProfiles.find((actorProfile) => actorProfile.entity_id === null);
|
||||||
|
|
||||||
|
return mergeMainProfile(profile, mainProfile);
|
||||||
|
});
|
||||||
|
|
||||||
|
const transaction = await context.knex.transaction();
|
||||||
|
|
||||||
|
// clear existing interpolated data
|
||||||
|
const emptyProfile = Object
|
||||||
|
.keys(context.omit(curateProfileEntry({ id: 1 }), ['id', 'actor_id', 'entity_id', 'url', 'description_hash']))
|
||||||
|
.reduce((acc, key) => ({ ...acc, [key]: null }), {});
|
||||||
|
|
||||||
|
await context.knex('actors')
|
||||||
|
.modify((modifyBuilder) => {
|
||||||
|
if (actorIdsOrNames) {
|
||||||
|
modifyBuilder
|
||||||
|
.whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
|
||||||
|
.orWhere((whereBuilder) => {
|
||||||
|
whereBuilder
|
||||||
|
.whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
|
||||||
|
.whereNull('entity_id');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.update(emptyProfile)
|
||||||
|
.transacting(transaction);
|
||||||
|
|
||||||
|
// insert new interpolated data
|
||||||
|
const queries = interpolatedProfiles.map((profile) => context.knex('actors')
|
||||||
|
.where('id', profile.id)
|
||||||
|
.update(profile)
|
||||||
|
.transacting(transaction));
|
||||||
|
|
||||||
|
await Promise.all(queries)
|
||||||
|
.then(transaction.commit)
|
||||||
|
.catch(transaction.rollback);
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const substitutes = {
|
||||||
|
à: 'a',
|
||||||
|
á: 'a',
|
||||||
|
ä: 'a',
|
||||||
|
å: 'a',
|
||||||
|
ã: 'a',
|
||||||
|
æ: 'ae',
|
||||||
|
ç: 'c',
|
||||||
|
è: 'e',
|
||||||
|
é: 'e',
|
||||||
|
ë: 'e',
|
||||||
|
ẽ: 'e',
|
||||||
|
ì: 'i',
|
||||||
|
í: 'i',
|
||||||
|
ï: 'i',
|
||||||
|
ĩ: 'i',
|
||||||
|
ǹ: 'n',
|
||||||
|
ń: 'n',
|
||||||
|
ñ: 'n',
|
||||||
|
ò: 'o',
|
||||||
|
ó: 'o',
|
||||||
|
ö: 'o',
|
||||||
|
õ: 'o',
|
||||||
|
ø: 'o',
|
||||||
|
œ: 'oe',
|
||||||
|
ß: 'ss',
|
||||||
|
ù: 'u',
|
||||||
|
ú: 'u',
|
||||||
|
ü: 'u',
|
||||||
|
ũ: 'u',
|
||||||
|
ỳ: 'y',
|
||||||
|
ý: 'y',
|
||||||
|
ÿ: 'y',
|
||||||
|
ỹ: 'y',
|
||||||
|
};
|
||||||
|
|
||||||
|
function slugify(strings, delimiter = '-', {
|
||||||
|
encode = false,
|
||||||
|
removeAccents = true,
|
||||||
|
removePunctuation = false,
|
||||||
|
limit = 1000,
|
||||||
|
} = {}) {
|
||||||
|
if (!strings || (typeof strings !== 'string' && !Array.isArray(strings))) {
|
||||||
|
return strings;
|
||||||
|
}
|
||||||
|
|
||||||
|
const slugComponents = []
|
||||||
|
.concat(strings)
|
||||||
|
.filter(Boolean)
|
||||||
|
.flatMap((string) => string
|
||||||
|
.trim()
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(removePunctuation && /[.,:;'"_-]/g, '')
|
||||||
|
.match(/[A-Za-zÀ-ÖØ-öø-ÿ0-9]+/g));
|
||||||
|
|
||||||
|
if (!slugComponents) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
const slug = slugComponents.reduce((acc, component, index) => {
|
||||||
|
const accSlug = `${acc}${index > 0 ? delimiter : ''}${component}`;
|
||||||
|
|
||||||
|
if (accSlug.length < limit) {
|
||||||
|
if (removeAccents) {
|
||||||
|
return accSlug.replace(/[à-ÿ]/g, (match) => substitutes[match] || '');
|
||||||
|
}
|
||||||
|
|
||||||
|
return accSlug;
|
||||||
|
}
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, '');
|
||||||
|
|
||||||
|
return encode ? encodeURI(slug) : slug;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = slugify;
|
Loading…
Reference in New Issue