diff --git a/actors.cjs b/actors.cjs deleted file mode 100644 index 51ba3b3..0000000 --- a/actors.cjs +++ /dev/null @@ -1,301 +0,0 @@ -function getMostFrequent(items, { slugify }) { - const { mostFrequent } = items.reduce((acc, item) => { - if (item === undefined || item === null) { - return acc; - } - - const slug = slugify(item); - - acc.counts[slug] = (acc.counts[slug] || 0) + 1; - - if (!acc.mostFrequent || acc.counts[slug] > acc.counts[slugify(acc.mostFrequent)]) { - acc.mostFrequent = item; - } - - return acc; - }, { - counts: {}, - mostFrequent: null, - }); - - return mostFrequent; -} - -function getMostFrequentDate(dates, context) { - const year = getMostFrequent(dates.map((dateX) => dateX.getFullYear()), context); - const month = getMostFrequent(dates.map((dateX) => dateX.getMonth()), context); - const date = getMostFrequent(dates.map((dateX) => dateX.getDate()), context); - - if (year === null || month === null || date === null) { - return null; - } - - return context.moment({ year, month, date }).toDate(); -} - -function getHighest(items) { - return items.reduce((prevItem, item) => (item > prevItem ? item : prevItem), null); -} - -function getLongest(items) { - return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null; -} - -function getAverage(items) { - return Math.round(items.reduce((acc, item) => acc + item, 0) / items.length) || null; -} - -function curateProfileEntry(profile) { - if (!profile.id) { - return null; - } - - const curatedProfileEntry = { - ...(profile.update !== false && { id: profile.update }), - actor_id: profile.id, - entity_id: profile.entity?.id || null, - date_of_birth: profile.dateOfBirth, - date_of_death: profile.dateOfDeath, - age: profile.age, - url: profile.url, - gender: profile.gender, - orientation: profile.orientation, - ethnicity: profile.ethnicity, - description: profile.description, - description_hash: profile.descriptionHash, - birth_city: profile.placeOfBirth?.city || null, - birth_state: profile.placeOfBirth?.state || null, - birth_country_alpha2: profile.placeOfBirth?.country || null, - residence_city: profile.placeOfResidence?.city || null, - residence_state: profile.placeOfResidence?.state || null, - residence_country_alpha2: profile.placeOfResidence?.country || null, - cup: profile.cup, - bust: profile.bust, - waist: profile.waist, - leg: profile.leg, - thigh: profile.thigh, - foot: profile.foot, - hip: profile.hip, - penis_length: profile.penisLength, - penis_girth: profile.penisGirth, - circumcised: profile.circumcised, - natural_boobs: profile.naturalBoobs, - boobs_volume: profile.boobsVolume, - boobs_implant: profile.boobsImplant, - boobs_placement: profile.boobsPlacement, - boobs_surgeon: profile.boobsSurgeon, - natural_butt: profile.naturalButt, - butt_volume: profile.buttVolume, - butt_implant: profile.buttImplant, - natural_lips: profile.naturalLips, - lips_volume: profile.lipsVolume, - height: profile.height, - weight: profile.weight, - shoe_size: profile.shoeSize, - hair_color: profile.hairColor, - hair_type: profile.hairType, - eyes: profile.eyes, - has_tattoos: profile.hasTattoos, - has_piercings: profile.hasPiercings, - piercings: profile.piercings, - tattoos: profile.tattoos, - blood_type: profile.bloodType, - avatar_media_id: profile.avatarMediaId || null, - }; - - return curatedProfileEntry; -} - -async function fetchProfiles(actorIdsOrNames, { knex }) { - return knex('actors_profiles') - .select(knex.raw('actors_profiles.*, actors.name, row_to_json(media) as avatar')) - .leftJoin('actors', 'actors.id', 'actors_profiles.actor_id') - .modify((query) => { - if (actorIdsOrNames) { - query - .whereIn('actor_id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number')) - .orWhere((builder) => { - builder - .whereIn('actors.name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string')) - .whereNull('actors.entity_id'); - }); - } - }) - .leftJoin('media', 'actors_profiles.avatar_media_id', 'media.id'); -} - -function mergeMainProfile(profile, mainProfile) { - const preservedKeys = ['id']; - - const mergedProfile = Object.fromEntries(Object.entries(profile).map(([key, value]) => [key, mainProfile[key] === null || preservedKeys.includes(key) - ? value - : mainProfile[key]])); - - return mergedProfile; -} - -async function interpolateProfiles(actorIdsOrNames, context) { - const profiles = await fetchProfiles(actorIdsOrNames, context); - - const profilesByActorId = profiles.reduce((acc, profile) => ({ - ...acc, - [profile.actor_id]: [ - ...(acc[profile.actor_id] || []), - profile, - ], - }), {}); - - context.logger.info(`Interpolating ${profiles.length} profiles from ${Object.keys(profilesByActorId).length} actors`); - - const interpolatedProfiles = Object.entries(profilesByActorId).map(([actorId, actorProfiles]) => { - // group values from each profile - const valuesByProperty = actorProfiles - .filter((profile) => profile.entity_id !== null) // main profile is interpolated separately at the end - .reduce((acc, profile) => Object - .entries(profile) - .reduce((profileAcc, [property, value]) => ({ - ...profileAcc, - [property]: [ - ...(acc[property] || []), - ...(value === null ? [] : Array.from({ length: profile.priority }, () => value)), // multiply by priority, increasing the odds of being the most frequent value - ], - }), { - // bundle location values so they can be assessed together, to ensure the most frequent city is in the most frequent state is in most frequent country - origin: [...acc.origin || [], { - ...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }), - ...(profile.birth_state && { state: profile.birth_state }), - ...(profile.birth_city && { city: profile.birth_city }), - }].filter((location) => Object.keys(location).length > 0), - residence: [...acc.residence || [], { - ...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }), - ...(profile.residence_state && { state: profile.residence_state }), - ...(profile.residence_city && { city: profile.residence_city }), - }].filter((location) => Object.keys(location).length > 0), - }), {}); - - const mostFrequentValues = [ - 'gender', - 'orientation', - 'ethnicity', - 'cup', - 'bust', - 'waist', - 'hip', - 'leg', - 'thigh', - 'foot', - 'shoe_size', - 'penis_length', - 'penis_girth', - 'circumcised', - 'natural_boobs', - 'boobs_volume', - 'boobs_implant', - 'boobs_placement', - 'boobs_surgeon', - 'natural_butt', - 'butt_volume', - 'butt_implant', - 'natural_lips', - 'lips_volume', - 'hair_color', - 'eyes', - 'has_tattoos', - 'has_piercings', - 'blood_type', - ].reduce((acc, property) => ({ - ...acc, - [property]: getMostFrequent(valuesByProperty[property], context), - }), {}); - - const profile = { - id: actorId, - ...mostFrequentValues, - }; - - profile.height = getMostFrequent(valuesByProperty.height.filter((height) => height > 50 && height < 300), context); // remove unlikely values - - profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth, context); - profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death, context); - profile.age = getHighest(valuesByProperty.age); - - // profile.natural_boobs = profile.gender === 'male' ? null : getMostFrequent(valuesByProperty.natural_boobs, context); - - // ensure most frequent country, city and state match up - profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map((location) => location.country), context); - const remainingOriginCountries = valuesByProperty.origin.filter((location) => location.country === profile.birth_country_alpha2); - - profile.birth_state = getMostFrequent(remainingOriginCountries.map((location) => location.state), context); - const remainingOriginStates = remainingOriginCountries.filter((location) => !profile.birth_state || location.state === profile.birth_state); - - profile.birth_city = getMostFrequent(remainingOriginStates.map((location) => location.city), context); - - profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map((location) => location.country), context); - const remainingResidenceCountries = valuesByProperty.residence.filter((location) => location.country === profile.residence_country_alpha2); - - profile.residence_state = getMostFrequent(remainingResidenceCountries.map((location) => location.state), context); - const remainingResidenceStates = remainingResidenceCountries.filter((location) => !profile.residence_state || location.state === profile.residence_state); - - profile.residence_city = getMostFrequent(remainingResidenceStates.map((location) => location.city), context); - - profile.weight = getAverage(valuesByProperty.weight); - - profile.tattoos = getLongest(valuesByProperty.tattoos); - profile.piercings = getLongest(valuesByProperty.piercings); - - profile.avatar_media_id = actorProfiles - .map((actorProfile) => actorProfile.avatar) - .filter((avatar) => avatar && (avatar.entropy === null || avatar.entropy > 5.5)) - .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; - - if (!profile.avatar_media_id) { - // try to settle for low quality avatar - profile.avatar_media_id = actorProfiles - .map((actorProfile) => actorProfile.avatar) - .filter((avatar) => avatar) - .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; - } - - const mainProfile = actorProfiles.find((actorProfile) => actorProfile.entity_id === null); - - return mergeMainProfile(profile, mainProfile); - }); - - const transaction = await context.knex.transaction(); - - // clear existing interpolated data - const emptyProfile = Object - .keys(context.omit(curateProfileEntry({ id: 1 }), ['id', 'actor_id', 'entity_id', 'url', 'description_hash'])) - .reduce((acc, key) => ({ ...acc, [key]: null }), {}); - - await context.knex('actors') - .modify((modifyBuilder) => { - if (actorIdsOrNames) { - modifyBuilder - .whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number')) - .orWhere((whereBuilder) => { - whereBuilder - .whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string')) - .whereNull('entity_id'); - }); - } - }) - .update(emptyProfile) - .transacting(transaction); - - // insert new interpolated data - const queries = interpolatedProfiles.map(async (profile) => context.knex('actors') - .where('id', profile.id) - .update(profile) - .transacting(transaction)); - - await Promise.all(queries) - .then(transaction.commit) - .catch(transaction.rollback); - - await context.knex.schema.refreshMaterializedView('actors_meta'); -} - -module.exports = { - interpolateProfiles, -};