Updated dependencies. Added periodic memory logger.

This commit is contained in:
DebaucheryLibrarian
2021-11-20 23:59:15 +01:00
parent a867817dc1
commit 26539b74a5
109 changed files with 10238 additions and 10833 deletions

View File

@@ -124,9 +124,9 @@ function getMostFrequent(items) {
}
function getMostFrequentDate(dates) {
const year = getMostFrequent(dates.map(dateX => dateX.getFullYear()));
const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
const date = getMostFrequent(dates.map(dateX => dateX.getDate()));
const year = getMostFrequent(dates.map((dateX) => dateX.getFullYear()));
const month = getMostFrequent(dates.map((dateX) => dateX.getMonth()));
const date = getMostFrequent(dates.map((dateX) => dateX.getDate()));
if (year === null || month === null || date === null) {
return null;
@@ -153,7 +153,7 @@ function toBaseActors(actorsOrNames, release) {
}
const baseActors = actorsOrNames
.filter(actorOrName => actorOrName && (typeof actorOrName === 'string' || actorOrName.name))
.filter((actorOrName) => actorOrName && (typeof actorOrName === 'string' || actorOrName.name))
.map((actorOrName) => {
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
@@ -265,7 +265,7 @@ function curateActor(actor, withDetails = false, isProfile = false) {
size: actor.avatar.size,
source: actor.avatar.source,
},
...(actor.profiles && { profiles: actor.profiles?.map(profile => curateActor(profile, true, true)) }),
...(actor.profiles && { profiles: actor.profiles?.map((profile) => curateActor(profile, true, true)) }),
}),
};
@@ -285,7 +285,7 @@ function curateActorEntry(baseActor, batchId) {
}
function curateActorEntries(baseActors, batchId) {
return baseActors.map(baseActor => curateActorEntry(baseActor, batchId));
return baseActors.map((baseActor) => curateActorEntry(baseActor, batchId));
}
function curateProfileEntry(profile) {
@@ -448,7 +448,7 @@ async function curateProfile(profile, actor) {
curatedProfile.scenes = toBaseReleases(profile.scenes || profile.releases, profile.entity, actor)
// attach actor to base scene, in case it was not scraped
.map((scene) => {
if (actor && !scene.actors?.find(sceneActor => slugify(sceneActor) === actor.slug || slugify(sceneActor.name) === actor.slug)) {
if (actor && !scene.actors?.find((sceneActor) => slugify(sceneActor) === actor.slug || slugify(sceneActor.name) === actor.slug)) {
return {
...scene,
actors: [actor, ...(scene.actors || [])],
@@ -477,10 +477,10 @@ async function fetchProfiles(actorIdsOrNames) {
.modify((query) => {
if (actorIdsOrNames) {
query
.whereIn('actor_id', actorIdsOrNames.filter(idOrName => typeof idOrName === 'number'))
.whereIn('actor_id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
.orWhere((builder) => {
builder
.whereIn('actors.name', actorIdsOrNames.filter(idOrName => typeof idOrName === 'string'))
.whereIn('actors.name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
.whereNull('actors.entity_id');
});
}
@@ -517,12 +517,12 @@ async function interpolateProfiles(actorIdsOrNames) {
...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }),
...(profile.birth_state && { state: profile.birth_state }),
...(profile.birth_city && { city: profile.birth_city }),
}].filter(location => Object.keys(location).length > 0),
}].filter((location) => Object.keys(location).length > 0),
residence: [...acc.residence || [], {
...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }),
...(profile.residence_state && { state: profile.residence_state }),
...(profile.residence_city && { city: profile.residence_city }),
}].filter(location => Object.keys(location).length > 0),
}].filter((location) => Object.keys(location).length > 0),
}), {});
const mostFrequentValues = [
@@ -549,7 +549,7 @@ async function interpolateProfiles(actorIdsOrNames) {
...mostFrequentValues,
};
profile.height = getMostFrequent(valuesByProperty.height.filter(height => height > 50 && height < 300)); // remove unlikely values
profile.height = getMostFrequent(valuesByProperty.height.filter((height) => height > 50 && height < 300)); // remove unlikely values
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
@@ -558,21 +558,21 @@ async function interpolateProfiles(actorIdsOrNames) {
profile.natural_boobs = profile.gender === 'male' ? null : getMostFrequent(valuesByProperty.natural_boobs);
// ensure most frequent country, city and state match up
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map(location => location.country));
const remainingOriginCountries = valuesByProperty.origin.filter(location => location.country === profile.birth_country_alpha2);
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map((location) => location.country));
const remainingOriginCountries = valuesByProperty.origin.filter((location) => location.country === profile.birth_country_alpha2);
profile.birth_state = getMostFrequent(remainingOriginCountries.map(location => location.state));
const remainingOriginStates = remainingOriginCountries.filter(location => !profile.birth_state || location.state === profile.birth_state);
profile.birth_state = getMostFrequent(remainingOriginCountries.map((location) => location.state));
const remainingOriginStates = remainingOriginCountries.filter((location) => !profile.birth_state || location.state === profile.birth_state);
profile.birth_city = getMostFrequent(remainingOriginStates.map(location => location.city));
profile.birth_city = getMostFrequent(remainingOriginStates.map((location) => location.city));
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map(location => location.country));
const remainingResidenceCountries = valuesByProperty.residence.filter(location => location.country === profile.residence_country_alpha2);
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map((location) => location.country));
const remainingResidenceCountries = valuesByProperty.residence.filter((location) => location.country === profile.residence_country_alpha2);
profile.residence_state = getMostFrequent(remainingResidenceCountries.map(location => location.state));
const remainingResidenceStates = remainingResidenceCountries.filter(location => !profile.residence_state || location.state === profile.residence_state);
profile.residence_state = getMostFrequent(remainingResidenceCountries.map((location) => location.state));
const remainingResidenceStates = remainingResidenceCountries.filter((location) => !profile.residence_state || location.state === profile.residence_state);
profile.residence_city = getMostFrequent(remainingResidenceStates.map(location => location.city));
profile.residence_city = getMostFrequent(remainingResidenceStates.map((location) => location.city));
profile.weight = getAverage(valuesByProperty.weight);
@@ -580,8 +580,8 @@ async function interpolateProfiles(actorIdsOrNames) {
profile.piercings = getLongest(valuesByProperty.piercings);
profile.avatar_media_id = actorProfiles
.map(actorProfile => actorProfile.avatar)
.filter(avatar => avatar && (avatar.entropy === null || avatar.entropy > 5.5))
.map((actorProfile) => actorProfile.avatar)
.filter((avatar) => avatar && (avatar.entropy === null || avatar.entropy > 5.5))
.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null;
return profile;
@@ -598,10 +598,10 @@ async function interpolateProfiles(actorIdsOrNames) {
.modify((modifyBuilder) => {
if (actorIdsOrNames) {
modifyBuilder
.whereIn('id', actorIdsOrNames.filter(idOrName => typeof idOrName === 'number'))
.whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
.orWhere((whereBuilder) => {
whereBuilder
.whereIn('name', actorIdsOrNames.filter(idOrName => typeof idOrName === 'string'))
.whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
.whereNull('entity_id');
});
}
@@ -610,7 +610,7 @@ async function interpolateProfiles(actorIdsOrNames) {
.transacting(transaction);
// insert new interpolated data
const queries = interpolatedProfiles.map(profile => knex('actors')
const queries = interpolatedProfiles.map((profile) => knex('actors')
.where('id', profile.id)
.update(profile)
.transacting(transaction));
@@ -621,8 +621,8 @@ async function interpolateProfiles(actorIdsOrNames) {
}
async function upsertProfiles(profiles) {
const newProfileEntries = profiles.filter(profile => !profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean);
const updatingProfileEntries = profiles.filter(profile => profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean);
const newProfileEntries = profiles.filter((profile) => !profile.update).map((profile) => curateProfileEntry(profile)).filter(Boolean);
const updatingProfileEntries = profiles.filter((profile) => profile.update).map((profile) => curateProfileEntry(profile)).filter(Boolean);
if (newProfileEntries.length > 0) {
await bulkInsert('actors_profiles', newProfileEntries);
@@ -632,7 +632,7 @@ async function upsertProfiles(profiles) {
if (argv.force && updatingProfileEntries.length > 0) {
const transaction = await knex.transaction();
const queries = updatingProfileEntries.map(profileEntry => knex('actors_profiles')
const queries = updatingProfileEntries.map((profileEntry) => knex('actors_profiles')
.where('id', profileEntry.id)
.update(profileEntry)
.returning(['id', 'actor_id'])
@@ -647,7 +647,7 @@ async function upsertProfiles(profiles) {
}
async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) {
const validSources = actor.entity ? sources.filter(source => source === actor.entity.slug) : sources;
const validSources = actor.entity ? sources.filter((source) => source === actor.entity.slug) : sources;
const profiles = Promise.map(validSources, async (source) => {
try {
@@ -748,12 +748,12 @@ async function getActorNames(actorNames) {
)
`, [argv.actorsUpdate || new Date()]);
return actorsWithoutProfiles.rows.map(actor => actor.name);
return actorsWithoutProfiles.rows.map((actor) => actor.name);
}
async function storeProfiles(profiles) {
const profilesWithAvatarIds = await associateAvatars(profiles);
const actorIds = Array.from(new Set(profiles.map(profile => profile.id)));
const actorIds = Array.from(new Set(profiles.map((profile) => profile.id)));
await upsertProfiles(profilesWithAvatarIds);
await interpolateProfiles(actorIds);
@@ -772,7 +772,7 @@ async function scrapeActors(argNames) {
fetchEntitiesBySlug(entitySlugs, 'desc'),
knex('actors')
.select(knex.raw('actors.id, actors.name, actors.slug, actors.entry_id, actors.entity_id, row_to_json(entities) as entity'))
.whereIn('actors.slug', baseActors.map(baseActor => baseActor.slug))
.whereIn('actors.slug', baseActors.map((baseActor) => baseActor.slug))
.whereNull('actors.alias_for')
.leftJoin('entities', 'entities.id', 'actors.entity_id')
.groupBy('actors.id', 'entities.id'),
@@ -786,7 +786,7 @@ async function scrapeActors(argNames) {
},
}), {});
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug]?.[baseActor.entryId]);
const newBaseActors = baseActors.filter((baseActor) => !existingActorEntriesBySlugAndEntryId[baseActor.slug]?.[baseActor.entryId]);
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
@@ -799,7 +799,7 @@ async function scrapeActors(argNames) {
const existingProfiles = await knex('actors_profiles')
.select(knex.raw('actors_profiles.*, row_to_json(avatars) as avatar'))
.whereIn('actor_id', actors.map(actor => actor.id))
.whereIn('actor_id', actors.map((actor) => actor.id))
.leftJoin('media as avatars', 'avatars.id', 'actors_profiles.avatar_media_id');
const existingProfilesByActorEntityId = existingProfiles.reduce((acc, profile) => ({
@@ -812,7 +812,7 @@ async function scrapeActors(argNames) {
const profilesPerActor = await Promise.map(
actors,
async actor => scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId),
async (actor) => scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId),
{ concurrency: 10 },
);
@@ -833,7 +833,7 @@ async function scrapeActors(argNames) {
async function getOrCreateActors(baseActors, batchId) {
// WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available
const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId, :entryId, :collisionLikely)', {
const actorValues = baseActors.map((actor) => knex.raw('(:slug, :entityId, :entryId, :collisionLikely)', {
slug: actor.slug,
entityId: actor.entity.id,
entryId: actor.entryId,
@@ -867,7 +867,7 @@ async function getOrCreateActors(baseActors, batchId) {
},
}), {});
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.entryId]?.[baseActor.slug] && !existingActorSlugs.null?.null?.[baseActor.slug]);
const uniqueBaseActors = baseActors.filter((baseActor) => !existingActorSlugs[baseActor.entity.id]?.[baseActor.entryId]?.[baseActor.slug] && !existingActorSlugs.null?.null?.[baseActor.slug]);
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
const newActors = await bulkInsert('actors', curatedActorEntries);
@@ -884,13 +884,13 @@ async function getOrCreateActors(baseActors, batchId) {
}), {});
const newActorProfiles = await Promise.all(baseActors
.filter(actor => actor.hasProfile)
.map(actor => ({
.filter((actor) => actor.hasProfile)
.map((actor) => ({
...actor,
id: newActorIdsByEntityIdEntryIdAndSlug[actor.entity?.id]?.[actor.entryId]?.[actor.slug] || newActorIdsByEntityIdEntryIdAndSlug.null?.null?.[actor.slug],
}))
.filter(actor => !!actor.id)
.map(actor => curateProfile(actor)));
.filter((actor) => !!actor.id)
.map((actor) => curateProfile(actor)));
await storeProfiles(newActorProfiles);
@@ -950,16 +950,16 @@ async function associatePeople(releases, batchId, type = 'actor') {
const releaseActorAssociations = Object.entries(baseActorsByReleaseId)
.map(([releaseId, releaseActors]) => releaseActors
.map(releaseActor => ({
.map((releaseActor) => ({
release_id: releaseId,
...(actorIdsByEntityIdEntryIdAndSlug[releaseActor.entity?.id]?.[releaseActor.entryId]?.[releaseActor.slug] || actorIdsByEntityIdEntryIdAndSlug.null.null[releaseActor.slug]),
})))
.flat();
const validReleaseActorAssociations = releaseActorAssociations.filter(association => association.release_id && association[personKey]);
const validReleaseActorAssociations = releaseActorAssociations.filter((association) => association.release_id && association[personKey]);
if (releaseActorAssociations.length > validReleaseActorAssociations.length) {
const invalidReleaseActorAssociations = releaseActorAssociations.filter(association => !association.release_id || !association[personKey]);
const invalidReleaseActorAssociations = releaseActorAssociations.filter((association) => !association.release_id || !association[personKey]);
logger.error(invalidReleaseActorAssociations);
}
@@ -1021,15 +1021,15 @@ async function searchActors(query) {
.from(knex.raw('search_actors(?) as actors', [query]))
.limit(100);
return actors.map(actor => curateActor(actor));
return actors.map((actor) => curateActor(actor));
}
async function flushProfiles(actorIdsOrNames) {
const profiles = await fetchProfiles(actorIdsOrNames);
const actorNames = Array.from(new Set(profiles.map(profile => profile.actor.name)));
const actorNames = Array.from(new Set(profiles.map((profile) => profile.actor.name)));
const deleteCount = await knex('actors_profiles')
.whereIn('id', profiles.map(profile => profile.id))
.whereIn('id', profiles.map((profile) => profile.id))
.delete();
await interpolateProfiles(actorIdsOrNames);
@@ -1050,14 +1050,14 @@ async function flushProfiles(actorIdsOrNames) {
async function deleteActors(actorIdsOrNames) {
const actors = await knex('actors')
.whereIn('id', actorIdsOrNames.filter(idOrName => typeof idOrName === 'number'))
.whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
.orWhere((builder) => {
builder
.whereIn('name', actorIdsOrNames.filter(idOrName => typeof idOrName === 'string'))
.whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
.whereNull('entity_id');
});
const actorIds = actors.map(actor => actor.id);
const actorIds = actors.map((actor) => actor.id);
const sceneIds = await knex('releases_actors')
.select('releases.id')