diff --git a/actors.mjs b/actors.mjs new file mode 100644 index 0000000..b291d22 --- /dev/null +++ b/actors.mjs @@ -0,0 +1,297 @@ +function getMostFrequent(items, { slugify }) { + const { mostFrequent } = items.reduce((acc, item) => { + if (item === undefined || item === null) { + return acc; + } + + const slug = slugify(item); + + acc.counts[slug] = (acc.counts[slug] || 0) + 1; + + if (!acc.mostFrequent || acc.counts[slug] > acc.counts[slugify(acc.mostFrequent)]) { + acc.mostFrequent = item; + } + + return acc; + }, { + counts: {}, + mostFrequent: null, + }); + + return mostFrequent; +} + +function getMostFrequentDate(dates, context) { + const year = getMostFrequent(dates.map((dateX) => dateX.getFullYear()), context); + const month = getMostFrequent(dates.map((dateX) => dateX.getMonth()), context); + const date = getMostFrequent(dates.map((dateX) => dateX.getDate()), context); + + if (year === null || month === null || date === null) { + return null; + } + + return context.moment({ year, month, date }).toDate(); +} + +function getHighest(items) { + return items.reduce((prevItem, item) => (item > prevItem ? item : prevItem), null); +} + +function getLongest(items) { + return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null; +} + +function getAverage(items) { + return Math.round(items.reduce((acc, item) => acc + item, 0) / items.length) || null; +} + +function curateProfileEntry(profile) { + if (!profile.id) { + return null; + } + + const curatedProfileEntry = { + ...(profile.update !== false && { id: profile.update }), + actor_id: profile.id, + entity_id: profile.entity?.id || null, + date_of_birth: profile.dateOfBirth, + date_of_death: profile.dateOfDeath, + age: profile.age, + url: profile.url, + gender: profile.gender, + orientation: profile.orientation, + ethnicity: profile.ethnicity, + description: profile.description, + description_hash: profile.descriptionHash, + birth_city: profile.placeOfBirth?.city || null, + birth_state: profile.placeOfBirth?.state || null, + birth_country_alpha2: profile.placeOfBirth?.country || null, + residence_city: profile.placeOfResidence?.city || null, + residence_state: profile.placeOfResidence?.state || null, + residence_country_alpha2: profile.placeOfResidence?.country || null, + cup: profile.cup, + bust: profile.bust, + waist: profile.waist, + leg: profile.leg, + thigh: profile.thigh, + foot: profile.foot, + hip: profile.hip, + penis_length: profile.penisLength, + penis_girth: profile.penisGirth, + circumcised: profile.circumcised, + natural_boobs: profile.naturalBoobs, + boobs_volume: profile.boobsVolume, + boobs_implant: profile.boobsImplant, + boobs_placement: profile.boobsPlacement, + boobs_surgeon: profile.boobsSurgeon, + natural_butt: profile.naturalButt, + butt_volume: profile.buttVolume, + butt_implant: profile.buttImplant, + natural_lips: profile.naturalLips, + lips_volume: profile.lipsVolume, + height: profile.height, + weight: profile.weight, + shoe_size: profile.shoeSize, + hair_color: profile.hairColor, + hair_type: profile.hairType, + eyes: profile.eyes, + has_tattoos: profile.hasTattoos, + has_piercings: profile.hasPiercings, + piercings: profile.piercings, + tattoos: profile.tattoos, + blood_type: profile.bloodType, + avatar_media_id: profile.avatarMediaId || null, + }; + + return curatedProfileEntry; +} + +async function fetchProfiles(actorIdsOrNames, { knex }) { + return knex('actors_profiles') + .select(knex.raw('actors_profiles.*, actors.name, row_to_json(media) as avatar')) + .leftJoin('actors', 'actors.id', 'actors_profiles.actor_id') + .modify((query) => { + if (actorIdsOrNames) { + query + .whereIn('actor_id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number')) + .orWhere((builder) => { + builder + .whereIn('actors.name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string')) + .whereNull('actors.entity_id'); + }); + } + }) + .leftJoin('media', 'actors_profiles.avatar_media_id', 'media.id'); +} + +function mergeMainProfile(profile, mainProfile) { + const preservedKeys = ['id']; + + const mergedProfile = Object.fromEntries(Object.entries(profile).map(([key, value]) => [key, mainProfile[key] === null || preservedKeys.includes(key) + ? value + : mainProfile[key]])); + + return mergedProfile; +} + +export async function interpolateProfiles(actorIdsOrNames, context) { + const profiles = await fetchProfiles(actorIdsOrNames, context); + + const profilesByActorId = profiles.reduce((acc, profile) => ({ + ...acc, + [profile.actor_id]: [ + ...(acc[profile.actor_id] || []), + profile, + ], + }), {}); + + context.logger.info(`Interpolating ${profiles.length} profiles from ${Object.keys(profilesByActorId).length} actors`); + + const interpolatedProfiles = Object.entries(profilesByActorId).map(([actorId, actorProfiles]) => { + // group values from each profile + const valuesByProperty = actorProfiles + .filter((profile) => profile.entity_id !== null) // main profile is interpolated separately at the end + .reduce((acc, profile) => Object + .entries(profile) + .reduce((profileAcc, [property, value]) => ({ + ...profileAcc, + [property]: [ + ...(acc[property] || []), + ...(value === null ? [] : Array.from({ length: profile.priority }, () => value)), // multiply by priority, increasing the odds of being the most frequent value + ], + }), { + // bundle location values so they can be assessed together, to ensure the most frequent city is in the most frequent state is in most frequent country + origin: [...acc.origin || [], { + ...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }), + ...(profile.birth_state && { state: profile.birth_state }), + ...(profile.birth_city && { city: profile.birth_city }), + }].filter((location) => Object.keys(location).length > 0), + residence: [...acc.residence || [], { + ...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }), + ...(profile.residence_state && { state: profile.residence_state }), + ...(profile.residence_city && { city: profile.residence_city }), + }].filter((location) => Object.keys(location).length > 0), + }), {}); + + const mostFrequentValues = [ + 'gender', + 'orientation', + 'ethnicity', + 'cup', + 'bust', + 'waist', + 'hip', + 'leg', + 'thigh', + 'foot', + 'shoe_size', + 'penis_length', + 'penis_girth', + 'circumcised', + 'natural_boobs', + 'boobs_volume', + 'boobs_implant', + 'boobs_placement', + 'boobs_surgeon', + 'natural_butt', + 'butt_volume', + 'butt_implant', + 'natural_lips', + 'lips_volume', + 'hair_color', + 'eyes', + 'has_tattoos', + 'has_piercings', + 'blood_type', + ].reduce((acc, property) => ({ + ...acc, + [property]: getMostFrequent(valuesByProperty[property], context), + }), {}); + + const profile = { + id: actorId, + ...mostFrequentValues, + }; + + profile.height = getMostFrequent(valuesByProperty.height.filter((height) => height > 50 && height < 300), context); // remove unlikely values + + profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth, context); + profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death, context); + profile.age = getHighest(valuesByProperty.age); + + // profile.natural_boobs = profile.gender === 'male' ? null : getMostFrequent(valuesByProperty.natural_boobs, context); + + // ensure most frequent country, city and state match up + profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map((location) => location.country), context); + const remainingOriginCountries = valuesByProperty.origin.filter((location) => location.country === profile.birth_country_alpha2); + + profile.birth_state = getMostFrequent(remainingOriginCountries.map((location) => location.state), context); + const remainingOriginStates = remainingOriginCountries.filter((location) => !profile.birth_state || location.state === profile.birth_state); + + profile.birth_city = getMostFrequent(remainingOriginStates.map((location) => location.city), context); + + profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map((location) => location.country), context); + const remainingResidenceCountries = valuesByProperty.residence.filter((location) => location.country === profile.residence_country_alpha2); + + profile.residence_state = getMostFrequent(remainingResidenceCountries.map((location) => location.state), context); + const remainingResidenceStates = remainingResidenceCountries.filter((location) => !profile.residence_state || location.state === profile.residence_state); + + profile.residence_city = getMostFrequent(remainingResidenceStates.map((location) => location.city), context); + + profile.weight = getAverage(valuesByProperty.weight); + + profile.tattoos = getLongest(valuesByProperty.tattoos); + profile.piercings = getLongest(valuesByProperty.piercings); + + profile.avatar_media_id = actorProfiles + .map((actorProfile) => actorProfile.avatar) + .filter((avatar) => avatar && (avatar.entropy === null || avatar.entropy > 5.5)) + .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; + + if (!profile.avatar_media_id) { + // try to settle for low quality avatar + profile.avatar_media_id = actorProfiles + .map((actorProfile) => actorProfile.avatar) + .filter((avatar) => avatar) + .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; + } + + const mainProfile = actorProfiles.find((actorProfile) => actorProfile.entity_id === null); + + return mergeMainProfile(profile, mainProfile); + }); + + const transaction = await context.knex.transaction(); + + // clear existing interpolated data + const emptyProfile = Object + .keys(context.omit(curateProfileEntry({ id: 1 }), ['id', 'actor_id', 'entity_id', 'url', 'description_hash'])) + .reduce((acc, key) => ({ ...acc, [key]: null }), {}); + + await context.knex('actors') + .modify((modifyBuilder) => { + if (actorIdsOrNames) { + modifyBuilder + .whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number')) + .orWhere((whereBuilder) => { + whereBuilder + .whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string')) + .whereNull('entity_id'); + }); + } + }) + .update(emptyProfile) + .transacting(transaction); + + // insert new interpolated data + const queries = interpolatedProfiles.map(async (profile) => context.knex('actors') + .where('id', profile.id) + .update(profile) + .transacting(transaction)); + + await Promise.all(queries) + .then(transaction.commit) + .catch(transaction.rollback); + + await context.knex.schema.refreshMaterializedView('actors_meta'); +}