Updated dependencies. Added periodic memory logger.
This commit is contained in:
104
src/actors.js
104
src/actors.js
@@ -124,9 +124,9 @@ function getMostFrequent(items) {
|
||||
}
|
||||
|
||||
function getMostFrequentDate(dates) {
|
||||
const year = getMostFrequent(dates.map(dateX => dateX.getFullYear()));
|
||||
const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
|
||||
const date = getMostFrequent(dates.map(dateX => dateX.getDate()));
|
||||
const year = getMostFrequent(dates.map((dateX) => dateX.getFullYear()));
|
||||
const month = getMostFrequent(dates.map((dateX) => dateX.getMonth()));
|
||||
const date = getMostFrequent(dates.map((dateX) => dateX.getDate()));
|
||||
|
||||
if (year === null || month === null || date === null) {
|
||||
return null;
|
||||
@@ -153,7 +153,7 @@ function toBaseActors(actorsOrNames, release) {
|
||||
}
|
||||
|
||||
const baseActors = actorsOrNames
|
||||
.filter(actorOrName => actorOrName && (typeof actorOrName === 'string' || actorOrName.name))
|
||||
.filter((actorOrName) => actorOrName && (typeof actorOrName === 'string' || actorOrName.name))
|
||||
.map((actorOrName) => {
|
||||
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
|
||||
|
||||
@@ -265,7 +265,7 @@ function curateActor(actor, withDetails = false, isProfile = false) {
|
||||
size: actor.avatar.size,
|
||||
source: actor.avatar.source,
|
||||
},
|
||||
...(actor.profiles && { profiles: actor.profiles?.map(profile => curateActor(profile, true, true)) }),
|
||||
...(actor.profiles && { profiles: actor.profiles?.map((profile) => curateActor(profile, true, true)) }),
|
||||
}),
|
||||
};
|
||||
|
||||
@@ -285,7 +285,7 @@ function curateActorEntry(baseActor, batchId) {
|
||||
}
|
||||
|
||||
function curateActorEntries(baseActors, batchId) {
|
||||
return baseActors.map(baseActor => curateActorEntry(baseActor, batchId));
|
||||
return baseActors.map((baseActor) => curateActorEntry(baseActor, batchId));
|
||||
}
|
||||
|
||||
function curateProfileEntry(profile) {
|
||||
@@ -448,7 +448,7 @@ async function curateProfile(profile, actor) {
|
||||
curatedProfile.scenes = toBaseReleases(profile.scenes || profile.releases, profile.entity, actor)
|
||||
// attach actor to base scene, in case it was not scraped
|
||||
.map((scene) => {
|
||||
if (actor && !scene.actors?.find(sceneActor => slugify(sceneActor) === actor.slug || slugify(sceneActor.name) === actor.slug)) {
|
||||
if (actor && !scene.actors?.find((sceneActor) => slugify(sceneActor) === actor.slug || slugify(sceneActor.name) === actor.slug)) {
|
||||
return {
|
||||
...scene,
|
||||
actors: [actor, ...(scene.actors || [])],
|
||||
@@ -477,10 +477,10 @@ async function fetchProfiles(actorIdsOrNames) {
|
||||
.modify((query) => {
|
||||
if (actorIdsOrNames) {
|
||||
query
|
||||
.whereIn('actor_id', actorIdsOrNames.filter(idOrName => typeof idOrName === 'number'))
|
||||
.whereIn('actor_id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
|
||||
.orWhere((builder) => {
|
||||
builder
|
||||
.whereIn('actors.name', actorIdsOrNames.filter(idOrName => typeof idOrName === 'string'))
|
||||
.whereIn('actors.name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
|
||||
.whereNull('actors.entity_id');
|
||||
});
|
||||
}
|
||||
@@ -517,12 +517,12 @@ async function interpolateProfiles(actorIdsOrNames) {
|
||||
...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }),
|
||||
...(profile.birth_state && { state: profile.birth_state }),
|
||||
...(profile.birth_city && { city: profile.birth_city }),
|
||||
}].filter(location => Object.keys(location).length > 0),
|
||||
}].filter((location) => Object.keys(location).length > 0),
|
||||
residence: [...acc.residence || [], {
|
||||
...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }),
|
||||
...(profile.residence_state && { state: profile.residence_state }),
|
||||
...(profile.residence_city && { city: profile.residence_city }),
|
||||
}].filter(location => Object.keys(location).length > 0),
|
||||
}].filter((location) => Object.keys(location).length > 0),
|
||||
}), {});
|
||||
|
||||
const mostFrequentValues = [
|
||||
@@ -549,7 +549,7 @@ async function interpolateProfiles(actorIdsOrNames) {
|
||||
...mostFrequentValues,
|
||||
};
|
||||
|
||||
profile.height = getMostFrequent(valuesByProperty.height.filter(height => height > 50 && height < 300)); // remove unlikely values
|
||||
profile.height = getMostFrequent(valuesByProperty.height.filter((height) => height > 50 && height < 300)); // remove unlikely values
|
||||
|
||||
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
|
||||
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
|
||||
@@ -558,21 +558,21 @@ async function interpolateProfiles(actorIdsOrNames) {
|
||||
profile.natural_boobs = profile.gender === 'male' ? null : getMostFrequent(valuesByProperty.natural_boobs);
|
||||
|
||||
// ensure most frequent country, city and state match up
|
||||
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map(location => location.country));
|
||||
const remainingOriginCountries = valuesByProperty.origin.filter(location => location.country === profile.birth_country_alpha2);
|
||||
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map((location) => location.country));
|
||||
const remainingOriginCountries = valuesByProperty.origin.filter((location) => location.country === profile.birth_country_alpha2);
|
||||
|
||||
profile.birth_state = getMostFrequent(remainingOriginCountries.map(location => location.state));
|
||||
const remainingOriginStates = remainingOriginCountries.filter(location => !profile.birth_state || location.state === profile.birth_state);
|
||||
profile.birth_state = getMostFrequent(remainingOriginCountries.map((location) => location.state));
|
||||
const remainingOriginStates = remainingOriginCountries.filter((location) => !profile.birth_state || location.state === profile.birth_state);
|
||||
|
||||
profile.birth_city = getMostFrequent(remainingOriginStates.map(location => location.city));
|
||||
profile.birth_city = getMostFrequent(remainingOriginStates.map((location) => location.city));
|
||||
|
||||
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map(location => location.country));
|
||||
const remainingResidenceCountries = valuesByProperty.residence.filter(location => location.country === profile.residence_country_alpha2);
|
||||
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map((location) => location.country));
|
||||
const remainingResidenceCountries = valuesByProperty.residence.filter((location) => location.country === profile.residence_country_alpha2);
|
||||
|
||||
profile.residence_state = getMostFrequent(remainingResidenceCountries.map(location => location.state));
|
||||
const remainingResidenceStates = remainingResidenceCountries.filter(location => !profile.residence_state || location.state === profile.residence_state);
|
||||
profile.residence_state = getMostFrequent(remainingResidenceCountries.map((location) => location.state));
|
||||
const remainingResidenceStates = remainingResidenceCountries.filter((location) => !profile.residence_state || location.state === profile.residence_state);
|
||||
|
||||
profile.residence_city = getMostFrequent(remainingResidenceStates.map(location => location.city));
|
||||
profile.residence_city = getMostFrequent(remainingResidenceStates.map((location) => location.city));
|
||||
|
||||
profile.weight = getAverage(valuesByProperty.weight);
|
||||
|
||||
@@ -580,8 +580,8 @@ async function interpolateProfiles(actorIdsOrNames) {
|
||||
profile.piercings = getLongest(valuesByProperty.piercings);
|
||||
|
||||
profile.avatar_media_id = actorProfiles
|
||||
.map(actorProfile => actorProfile.avatar)
|
||||
.filter(avatar => avatar && (avatar.entropy === null || avatar.entropy > 5.5))
|
||||
.map((actorProfile) => actorProfile.avatar)
|
||||
.filter((avatar) => avatar && (avatar.entropy === null || avatar.entropy > 5.5))
|
||||
.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null;
|
||||
|
||||
return profile;
|
||||
@@ -598,10 +598,10 @@ async function interpolateProfiles(actorIdsOrNames) {
|
||||
.modify((modifyBuilder) => {
|
||||
if (actorIdsOrNames) {
|
||||
modifyBuilder
|
||||
.whereIn('id', actorIdsOrNames.filter(idOrName => typeof idOrName === 'number'))
|
||||
.whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
|
||||
.orWhere((whereBuilder) => {
|
||||
whereBuilder
|
||||
.whereIn('name', actorIdsOrNames.filter(idOrName => typeof idOrName === 'string'))
|
||||
.whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
|
||||
.whereNull('entity_id');
|
||||
});
|
||||
}
|
||||
@@ -610,7 +610,7 @@ async function interpolateProfiles(actorIdsOrNames) {
|
||||
.transacting(transaction);
|
||||
|
||||
// insert new interpolated data
|
||||
const queries = interpolatedProfiles.map(profile => knex('actors')
|
||||
const queries = interpolatedProfiles.map((profile) => knex('actors')
|
||||
.where('id', profile.id)
|
||||
.update(profile)
|
||||
.transacting(transaction));
|
||||
@@ -621,8 +621,8 @@ async function interpolateProfiles(actorIdsOrNames) {
|
||||
}
|
||||
|
||||
async function upsertProfiles(profiles) {
|
||||
const newProfileEntries = profiles.filter(profile => !profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean);
|
||||
const updatingProfileEntries = profiles.filter(profile => profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean);
|
||||
const newProfileEntries = profiles.filter((profile) => !profile.update).map((profile) => curateProfileEntry(profile)).filter(Boolean);
|
||||
const updatingProfileEntries = profiles.filter((profile) => profile.update).map((profile) => curateProfileEntry(profile)).filter(Boolean);
|
||||
|
||||
if (newProfileEntries.length > 0) {
|
||||
await bulkInsert('actors_profiles', newProfileEntries);
|
||||
@@ -632,7 +632,7 @@ async function upsertProfiles(profiles) {
|
||||
|
||||
if (argv.force && updatingProfileEntries.length > 0) {
|
||||
const transaction = await knex.transaction();
|
||||
const queries = updatingProfileEntries.map(profileEntry => knex('actors_profiles')
|
||||
const queries = updatingProfileEntries.map((profileEntry) => knex('actors_profiles')
|
||||
.where('id', profileEntry.id)
|
||||
.update(profileEntry)
|
||||
.returning(['id', 'actor_id'])
|
||||
@@ -647,7 +647,7 @@ async function upsertProfiles(profiles) {
|
||||
}
|
||||
|
||||
async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) {
|
||||
const validSources = actor.entity ? sources.filter(source => source === actor.entity.slug) : sources;
|
||||
const validSources = actor.entity ? sources.filter((source) => source === actor.entity.slug) : sources;
|
||||
|
||||
const profiles = Promise.map(validSources, async (source) => {
|
||||
try {
|
||||
@@ -748,12 +748,12 @@ async function getActorNames(actorNames) {
|
||||
)
|
||||
`, [argv.actorsUpdate || new Date()]);
|
||||
|
||||
return actorsWithoutProfiles.rows.map(actor => actor.name);
|
||||
return actorsWithoutProfiles.rows.map((actor) => actor.name);
|
||||
}
|
||||
|
||||
async function storeProfiles(profiles) {
|
||||
const profilesWithAvatarIds = await associateAvatars(profiles);
|
||||
const actorIds = Array.from(new Set(profiles.map(profile => profile.id)));
|
||||
const actorIds = Array.from(new Set(profiles.map((profile) => profile.id)));
|
||||
|
||||
await upsertProfiles(profilesWithAvatarIds);
|
||||
await interpolateProfiles(actorIds);
|
||||
@@ -772,7 +772,7 @@ async function scrapeActors(argNames) {
|
||||
fetchEntitiesBySlug(entitySlugs, 'desc'),
|
||||
knex('actors')
|
||||
.select(knex.raw('actors.id, actors.name, actors.slug, actors.entry_id, actors.entity_id, row_to_json(entities) as entity'))
|
||||
.whereIn('actors.slug', baseActors.map(baseActor => baseActor.slug))
|
||||
.whereIn('actors.slug', baseActors.map((baseActor) => baseActor.slug))
|
||||
.whereNull('actors.alias_for')
|
||||
.leftJoin('entities', 'entities.id', 'actors.entity_id')
|
||||
.groupBy('actors.id', 'entities.id'),
|
||||
@@ -786,7 +786,7 @@ async function scrapeActors(argNames) {
|
||||
},
|
||||
}), {});
|
||||
|
||||
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug]?.[baseActor.entryId]);
|
||||
const newBaseActors = baseActors.filter((baseActor) => !existingActorEntriesBySlugAndEntryId[baseActor.slug]?.[baseActor.entryId]);
|
||||
|
||||
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
|
||||
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
|
||||
@@ -799,7 +799,7 @@ async function scrapeActors(argNames) {
|
||||
|
||||
const existingProfiles = await knex('actors_profiles')
|
||||
.select(knex.raw('actors_profiles.*, row_to_json(avatars) as avatar'))
|
||||
.whereIn('actor_id', actors.map(actor => actor.id))
|
||||
.whereIn('actor_id', actors.map((actor) => actor.id))
|
||||
.leftJoin('media as avatars', 'avatars.id', 'actors_profiles.avatar_media_id');
|
||||
|
||||
const existingProfilesByActorEntityId = existingProfiles.reduce((acc, profile) => ({
|
||||
@@ -812,7 +812,7 @@ async function scrapeActors(argNames) {
|
||||
|
||||
const profilesPerActor = await Promise.map(
|
||||
actors,
|
||||
async actor => scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId),
|
||||
async (actor) => scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId),
|
||||
{ concurrency: 10 },
|
||||
);
|
||||
|
||||
@@ -833,7 +833,7 @@ async function scrapeActors(argNames) {
|
||||
|
||||
async function getOrCreateActors(baseActors, batchId) {
|
||||
// WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available
|
||||
const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId, :entryId, :collisionLikely)', {
|
||||
const actorValues = baseActors.map((actor) => knex.raw('(:slug, :entityId, :entryId, :collisionLikely)', {
|
||||
slug: actor.slug,
|
||||
entityId: actor.entity.id,
|
||||
entryId: actor.entryId,
|
||||
@@ -867,7 +867,7 @@ async function getOrCreateActors(baseActors, batchId) {
|
||||
},
|
||||
}), {});
|
||||
|
||||
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.entryId]?.[baseActor.slug] && !existingActorSlugs.null?.null?.[baseActor.slug]);
|
||||
const uniqueBaseActors = baseActors.filter((baseActor) => !existingActorSlugs[baseActor.entity.id]?.[baseActor.entryId]?.[baseActor.slug] && !existingActorSlugs.null?.null?.[baseActor.slug]);
|
||||
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
|
||||
|
||||
const newActors = await bulkInsert('actors', curatedActorEntries);
|
||||
@@ -884,13 +884,13 @@ async function getOrCreateActors(baseActors, batchId) {
|
||||
}), {});
|
||||
|
||||
const newActorProfiles = await Promise.all(baseActors
|
||||
.filter(actor => actor.hasProfile)
|
||||
.map(actor => ({
|
||||
.filter((actor) => actor.hasProfile)
|
||||
.map((actor) => ({
|
||||
...actor,
|
||||
id: newActorIdsByEntityIdEntryIdAndSlug[actor.entity?.id]?.[actor.entryId]?.[actor.slug] || newActorIdsByEntityIdEntryIdAndSlug.null?.null?.[actor.slug],
|
||||
}))
|
||||
.filter(actor => !!actor.id)
|
||||
.map(actor => curateProfile(actor)));
|
||||
.filter((actor) => !!actor.id)
|
||||
.map((actor) => curateProfile(actor)));
|
||||
|
||||
await storeProfiles(newActorProfiles);
|
||||
|
||||
@@ -950,16 +950,16 @@ async function associatePeople(releases, batchId, type = 'actor') {
|
||||
|
||||
const releaseActorAssociations = Object.entries(baseActorsByReleaseId)
|
||||
.map(([releaseId, releaseActors]) => releaseActors
|
||||
.map(releaseActor => ({
|
||||
.map((releaseActor) => ({
|
||||
release_id: releaseId,
|
||||
...(actorIdsByEntityIdEntryIdAndSlug[releaseActor.entity?.id]?.[releaseActor.entryId]?.[releaseActor.slug] || actorIdsByEntityIdEntryIdAndSlug.null.null[releaseActor.slug]),
|
||||
})))
|
||||
.flat();
|
||||
|
||||
const validReleaseActorAssociations = releaseActorAssociations.filter(association => association.release_id && association[personKey]);
|
||||
const validReleaseActorAssociations = releaseActorAssociations.filter((association) => association.release_id && association[personKey]);
|
||||
|
||||
if (releaseActorAssociations.length > validReleaseActorAssociations.length) {
|
||||
const invalidReleaseActorAssociations = releaseActorAssociations.filter(association => !association.release_id || !association[personKey]);
|
||||
const invalidReleaseActorAssociations = releaseActorAssociations.filter((association) => !association.release_id || !association[personKey]);
|
||||
|
||||
logger.error(invalidReleaseActorAssociations);
|
||||
}
|
||||
@@ -1021,15 +1021,15 @@ async function searchActors(query) {
|
||||
.from(knex.raw('search_actors(?) as actors', [query]))
|
||||
.limit(100);
|
||||
|
||||
return actors.map(actor => curateActor(actor));
|
||||
return actors.map((actor) => curateActor(actor));
|
||||
}
|
||||
|
||||
async function flushProfiles(actorIdsOrNames) {
|
||||
const profiles = await fetchProfiles(actorIdsOrNames);
|
||||
const actorNames = Array.from(new Set(profiles.map(profile => profile.actor.name)));
|
||||
const actorNames = Array.from(new Set(profiles.map((profile) => profile.actor.name)));
|
||||
|
||||
const deleteCount = await knex('actors_profiles')
|
||||
.whereIn('id', profiles.map(profile => profile.id))
|
||||
.whereIn('id', profiles.map((profile) => profile.id))
|
||||
.delete();
|
||||
|
||||
await interpolateProfiles(actorIdsOrNames);
|
||||
@@ -1050,14 +1050,14 @@ async function flushProfiles(actorIdsOrNames) {
|
||||
|
||||
async function deleteActors(actorIdsOrNames) {
|
||||
const actors = await knex('actors')
|
||||
.whereIn('id', actorIdsOrNames.filter(idOrName => typeof idOrName === 'number'))
|
||||
.whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
|
||||
.orWhere((builder) => {
|
||||
builder
|
||||
.whereIn('name', actorIdsOrNames.filter(idOrName => typeof idOrName === 'string'))
|
||||
.whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
|
||||
.whereNull('entity_id');
|
||||
});
|
||||
|
||||
const actorIds = actors.map(actor => actor.id);
|
||||
const actorIds = actors.map((actor) => actor.id);
|
||||
|
||||
const sceneIds = await knex('releases_actors')
|
||||
.select('releases.id')
|
||||
|
||||
Reference in New Issue
Block a user