function getMostFrequent(items, { slugify }) { const { mostFrequent } = items.reduce((acc, item) => { if (item === undefined || item === null) { return acc; } const slug = slugify(item); acc.counts[slug] = (acc.counts[slug] || 0) + 1; if (!acc.mostFrequent || acc.counts[slug] > acc.counts[slugify(acc.mostFrequent)]) { acc.mostFrequent = item; } return acc; }, { counts: {}, mostFrequent: null, }); return mostFrequent; } function getMostFrequentDate(dates, context) { const year = getMostFrequent(dates.map((dateX) => dateX.getFullYear()), context); const month = getMostFrequent(dates.map((dateX) => dateX.getMonth()), context); const date = getMostFrequent(dates.map((dateX) => dateX.getDate()), context); if (year === null || month === null || date === null) { return null; } return context.moment({ year, month, date }).toDate(); } function getHighest(items) { return items.reduce((prevItem, item) => (item > prevItem ? item : prevItem), null); } function getLongest(items) { return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null; } function getAverage(items) { return Math.round(items.reduce((acc, item) => acc + item, 0) / items.length) || null; } async function fetchProfiles(actorIdsOrNames, { knex }) { return knex('actors_profiles') .select(knex.raw('actors_profiles.*, actors.name, row_to_json(media) as avatar')) .leftJoin('actors', 'actors.id', 'actors_profiles.actor_id') .modify((query) => { if (actorIdsOrNames) { query .whereIn('actor_id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number')) .orWhere((builder) => { builder .whereIn('actors.name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string')) .whereNull('actors.entity_id'); }); } }) .leftJoin('media', 'actors_profiles.avatar_media_id', 'media.id'); } function mergeMainProfile(profile, mainProfile) { if (!mainProfile) { return profile; } const preservedKeys = ['id']; const mergedProfile = Object.fromEntries(Object.entries(profile).map(([key, value]) => [key, mainProfile[key] === null || preservedKeys.includes(key) ? value : mainProfile[key]])); return mergedProfile; } export async function interpolateProfiles(actorIdsOrNames, context, options = {}) { const profiles = await fetchProfiles(actorIdsOrNames, context); const columns = await context.knex.table('actors').columnInfo().then((table) => Object.keys(table)); const preservedKeys = ['id', 'name', 'slug', 'entity_id', 'entry_id']; const profilesByActorId = profiles.reduce((acc, profile) => ({ ...acc, [profile.actor_id]: [ ...(acc[profile.actor_id] || []), profile, ], }), {}); context.logger.info(`Interpolating ${profiles.length} profiles from ${Object.keys(profilesByActorId).length} actors`); const interpolatedProfiles = Object.entries(profilesByActorId).map(([actorId, actorProfiles]) => { const mainProfile = actorProfiles.find((actorProfile) => actorProfile.entity_id === null); if (mainProfile && actorProfiles.length === 1) { // no other profiles to interpolate return { ...Object.fromEntries(columns.map((key) => [key, mainProfile[key]])), id: actorId, }; } // group values from each profile const valuesByProperty = actorProfiles .filter((profile) => profile.entity_id !== null) // main profile is interpolated separately at the end .reduce((acc, profile) => Object .entries(profile) .reduce((profileAcc, [property, value]) => ({ ...profileAcc, [property]: [ ...(acc[property] || []), ...(value === null ? [] : Array.from({ length: profile.priority }, () => value)), // multiply by priority, increasing the odds of being the most frequent value ], }), { // bundle location values so they can be assessed together, to ensure the most frequent city is in the most frequent state is in most frequent country origin: [...acc.origin || [], { ...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }), ...(profile.birth_state && { state: profile.birth_state }), ...(profile.birth_city && { city: profile.birth_city }), }].filter((location) => Object.keys(location).length > 0), residence: [...acc.residence || [], { ...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }), ...(profile.residence_state && { state: profile.residence_state }), ...(profile.residence_city && { city: profile.residence_city }), }].filter((location) => Object.keys(location).length > 0), }), {}); const profile = { id: actorId, }; profile.height = getMostFrequent(valuesByProperty.height.filter((height) => height > 50 && height < 300), context); // remove unlikely values profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth, context); profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death, context); profile.age = getHighest(valuesByProperty.age); // profile.natural_boobs = profile.gender === 'male' ? null : getMostFrequent(valuesByProperty.natural_boobs, context); // ensure most frequent country, city and state match up profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map((location) => location.country), context); const remainingOriginCountries = valuesByProperty.origin.filter((location) => location.country === profile.birth_country_alpha2); profile.birth_state = getMostFrequent(remainingOriginCountries.map((location) => location.state), context); const remainingOriginStates = remainingOriginCountries.filter((location) => !profile.birth_state || location.state === profile.birth_state); profile.birth_city = getMostFrequent(remainingOriginStates.map((location) => location.city), context); profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map((location) => location.country), context); const remainingResidenceCountries = valuesByProperty.residence.filter((location) => location.country === profile.residence_country_alpha2); profile.residence_state = getMostFrequent(remainingResidenceCountries.map((location) => location.state), context); const remainingResidenceStates = remainingResidenceCountries.filter((location) => !profile.residence_state || location.state === profile.residence_state); profile.residence_city = getMostFrequent(remainingResidenceStates.map((location) => location.city), context); profile.weight = getAverage(valuesByProperty.weight); profile.tattoos = getLongest(valuesByProperty.tattoos); profile.piercings = getLongest(valuesByProperty.piercings); profile.avatar_media_id = actorProfiles .map((actorProfile) => actorProfile.avatar) .filter((avatar) => avatar && (avatar.entropy === null || avatar.entropy > 5.5)) .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; if (!profile.avatar_media_id) { // try to settle for low quality avatar profile.avatar_media_id = actorProfiles .map((actorProfile) => actorProfile.avatar) .filter((avatar) => avatar) .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; } columns.forEach((key) => { // generic handling for remaining properties if (Object.hasOwn(valuesByProperty, key) && !Object.hasOwn(profile, key) && !preservedKeys.includes(key)) { profile[key] = getMostFrequent(valuesByProperty[key], context); } }); return mergeMainProfile(profile, mainProfile); }); const transaction = await context.knex.transaction(); // clear existing interpolated data const emptyProfile = Object.fromEntries(columns.filter((key) => !preservedKeys.includes(key)).map((key) => [key, null])); await context.knex('actors') .modify((modifyBuilder) => { if (actorIdsOrNames) { modifyBuilder .whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number')) .orWhere((whereBuilder) => { whereBuilder .whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string')) .whereNull('entity_id'); }); } }) .update(emptyProfile) .transacting(transaction); // insert new interpolated data const queries = interpolatedProfiles.map(async (profile) => context.knex('actors') .where('id', profile.id) .update(profile) .transacting(transaction)); await Promise.all(queries) .then(transaction.commit) .catch(async (error) => { context.logger.error(error); return transaction.rollback(); }); if (options.refreshView) { await context.knex.schema.refreshMaterializedView('actors_meta'); } }