function getMostFrequent(items, { slugify }) { const { mostFrequent } = items.reduce((acc, item) => { if (item === undefined || item === null) { return acc; } const slug = slugify(item); acc.counts[slug] = (acc.counts[slug] || 0) + 1; if (!acc.mostFrequent || acc.counts[slug] > acc.counts[slugify(acc.mostFrequent)]) { acc.mostFrequent = item; } return acc; }, { counts: {}, mostFrequent: null, }); return mostFrequent; } function getMostFrequentDate(dates, context) { const year = getMostFrequent(dates.map((dateX) => dateX.getFullYear()), context); const month = getMostFrequent(dates.map((dateX) => dateX.getMonth()), context); const date = getMostFrequent(dates.map((dateX) => dateX.getDate()), context); if (year === null || month === null || date === null) { return null; } return context.moment({ year, month, date }).toDate(); } function getHighest(items) { return items.reduce((prevItem, item) => (item > prevItem ? item : prevItem), null); } function getLongest(items) { return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null; } function getAverage(items) { return Math.round(items.reduce((acc, item) => acc + item, 0) / items.length) || null; } function curateProfileEntry(profile) { if (!profile.id) { return null; } const curatedProfileEntry = { ...(profile.update !== false && { id: profile.update }), actor_id: profile.id, entity_id: profile.entity?.id || null, date_of_birth: profile.dateOfBirth, date_of_death: profile.dateOfDeath, age: profile.age, url: profile.url, gender: profile.gender, orientation: profile.orientation, ethnicity: profile.ethnicity, description: profile.description, description_hash: profile.descriptionHash, birth_city: profile.placeOfBirth?.city || null, birth_state: profile.placeOfBirth?.state || null, birth_country_alpha2: profile.placeOfBirth?.country || null, residence_city: profile.placeOfResidence?.city || null, residence_state: profile.placeOfResidence?.state || null, residence_country_alpha2: profile.placeOfResidence?.country || null, cup: profile.cup, bust: profile.bust, waist: profile.waist, leg: profile.leg, thigh: profile.thigh, foot: profile.foot, hip: profile.hip, penis_length: profile.penisLength, penis_girth: profile.penisGirth, circumcised: profile.circumcised, natural_boobs: profile.naturalBoobs, boobs_volume: profile.boobsVolume, boobs_implant: profile.boobsImplant, boobs_placement: profile.boobsPlacement, boobs_surgeon: profile.boobsSurgeon, natural_butt: profile.naturalButt, butt_volume: profile.buttVolume, butt_implant: profile.buttImplant, natural_lips: profile.naturalLips, lips_volume: profile.lipsVolume, height: profile.height, weight: profile.weight, shoe_size: profile.shoeSize, hair_color: profile.hairColor, hair_type: profile.hairType, eyes: profile.eyes, has_tattoos: profile.hasTattoos, has_piercings: profile.hasPiercings, piercings: profile.piercings, tattoos: profile.tattoos, agency: profile.agency, blood_type: profile.bloodType, avatar_media_id: profile.avatarMediaId || null, }; return curatedProfileEntry; } async function fetchProfiles(actorIdsOrNames, { knex }) { return knex('actors_profiles') .select(knex.raw('actors_profiles.*, actors.name, row_to_json(media) as avatar')) .leftJoin('actors', 'actors.id', 'actors_profiles.actor_id') .modify((query) => { if (actorIdsOrNames) { query .whereIn('actor_id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number')) .orWhere((builder) => { builder .whereIn('actors.name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string')) .whereNull('actors.entity_id'); }); } }) .leftJoin('media', 'actors_profiles.avatar_media_id', 'media.id'); } function mergeMainProfile(profile, mainProfile) { const preservedKeys = ['id']; const mergedProfile = Object.fromEntries(Object.entries(profile).map(([key, value]) => [key, mainProfile[key] === null || preservedKeys.includes(key) ? value : mainProfile[key]])); return mergedProfile; } export async function interpolateProfiles(actorIdsOrNames, context) { const profiles = await fetchProfiles(actorIdsOrNames, context); const profilesByActorId = profiles.reduce((acc, profile) => ({ ...acc, [profile.actor_id]: [ ...(acc[profile.actor_id] || []), profile, ], }), {}); context.logger.info(`Interpolating ${profiles.length} profiles from ${Object.keys(profilesByActorId).length} actors`); const interpolatedProfiles = Object.entries(profilesByActorId).map(([actorId, actorProfiles]) => { // group values from each profile const valuesByProperty = actorProfiles .filter((profile) => profile.entity_id !== null) // main profile is interpolated separately at the end .reduce((acc, profile) => Object .entries(profile) .reduce((profileAcc, [property, value]) => ({ ...profileAcc, [property]: [ ...(acc[property] || []), ...(value === null ? [] : Array.from({ length: profile.priority }, () => value)), // multiply by priority, increasing the odds of being the most frequent value ], }), { // bundle location values so they can be assessed together, to ensure the most frequent city is in the most frequent state is in most frequent country origin: [...acc.origin || [], { ...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }), ...(profile.birth_state && { state: profile.birth_state }), ...(profile.birth_city && { city: profile.birth_city }), }].filter((location) => Object.keys(location).length > 0), residence: [...acc.residence || [], { ...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }), ...(profile.residence_state && { state: profile.residence_state }), ...(profile.residence_city && { city: profile.residence_city }), }].filter((location) => Object.keys(location).length > 0), }), {}); const mostFrequentValues = [ 'gender', 'orientation', 'ethnicity', 'cup', 'bust', 'waist', 'hip', 'leg', 'thigh', 'foot', 'shoe_size', 'penis_length', 'penis_girth', 'circumcised', 'natural_boobs', 'boobs_volume', 'boobs_implant', 'boobs_placement', 'boobs_surgeon', 'natural_butt', 'butt_volume', 'butt_implant', 'natural_lips', 'lips_volume', 'hair_color', 'eyes', 'has_tattoos', 'has_piercings', 'agency', 'blood_type', ].reduce((acc, property) => ({ ...acc, [property]: getMostFrequent(valuesByProperty[property], context), }), {}); const profile = { id: actorId, ...mostFrequentValues, }; profile.height = getMostFrequent(valuesByProperty.height.filter((height) => height > 50 && height < 300), context); // remove unlikely values profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth, context); profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death, context); profile.age = getHighest(valuesByProperty.age); // profile.natural_boobs = profile.gender === 'male' ? null : getMostFrequent(valuesByProperty.natural_boobs, context); // ensure most frequent country, city and state match up profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map((location) => location.country), context); const remainingOriginCountries = valuesByProperty.origin.filter((location) => location.country === profile.birth_country_alpha2); profile.birth_state = getMostFrequent(remainingOriginCountries.map((location) => location.state), context); const remainingOriginStates = remainingOriginCountries.filter((location) => !profile.birth_state || location.state === profile.birth_state); profile.birth_city = getMostFrequent(remainingOriginStates.map((location) => location.city), context); profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map((location) => location.country), context); const remainingResidenceCountries = valuesByProperty.residence.filter((location) => location.country === profile.residence_country_alpha2); profile.residence_state = getMostFrequent(remainingResidenceCountries.map((location) => location.state), context); const remainingResidenceStates = remainingResidenceCountries.filter((location) => !profile.residence_state || location.state === profile.residence_state); profile.residence_city = getMostFrequent(remainingResidenceStates.map((location) => location.city), context); profile.weight = getAverage(valuesByProperty.weight); profile.tattoos = getLongest(valuesByProperty.tattoos); profile.piercings = getLongest(valuesByProperty.piercings); profile.avatar_media_id = actorProfiles .map((actorProfile) => actorProfile.avatar) .filter((avatar) => avatar && (avatar.entropy === null || avatar.entropy > 5.5)) .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; if (!profile.avatar_media_id) { // try to settle for low quality avatar profile.avatar_media_id = actorProfiles .map((actorProfile) => actorProfile.avatar) .filter((avatar) => avatar) .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; } const mainProfile = actorProfiles.find((actorProfile) => actorProfile.entity_id === null); return mergeMainProfile(profile, mainProfile); }); const transaction = await context.knex.transaction(); // clear existing interpolated data const emptyProfile = Object .keys(context.omit(curateProfileEntry({ id: 1 }), ['id', 'actor_id', 'entity_id', 'url', 'description_hash'])) .reduce((acc, key) => ({ ...acc, [key]: null }), {}); await context.knex('actors') .modify((modifyBuilder) => { if (actorIdsOrNames) { modifyBuilder .whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number')) .orWhere((whereBuilder) => { whereBuilder .whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string')) .whereNull('entity_id'); }); } }) .update(emptyProfile) .transacting(transaction); // insert new interpolated data const queries = interpolatedProfiles.map(async (profile) => context.knex('actors') .where('id', profile.id) .update(profile) .transacting(transaction)); await Promise.all(queries) .then(transaction.commit) .catch(transaction.rollback); await context.knex.schema.refreshMaterializedView('actors_meta'); }