Improved actor entity and entry ID storage.

This commit is contained in:
DebaucheryLibrarian 2021-02-16 03:37:52 +01:00
parent b26a029f66
commit 67055bf920
5 changed files with 52 additions and 24 deletions

View File

@ -16,13 +16,13 @@
>{{ actor.name }}</span>
<router-link
v-if="actor.network"
v-tooltip="actor.network.name"
:to="{ name: 'network', params: { networkSlug: actor.network.slug } }"
v-if="actor.entity"
v-tooltip="actor.entity.name"
:to="{ name: actor.entity.type, params: { entitySlug: actor.entity.slug, range: 'new', pageNumber: 1 } }"
class="favicon"
>
<img
:src="`/img/logos/${actor.network.slug}/favicon.png`"
:src="`/img/logos/${actor.entity.slug}/favicon_dark.png`"
class="favicon-icon"
>
</router-link>

View File

@ -61,7 +61,7 @@ const actorFields = `
lazy
}
}
network: entity {
entity {
id
name
slug

View File

@ -180,6 +180,11 @@ function toBaseActors(actorsOrNames, release) {
return baseActors;
}
function getCollisionLikely(actor) {
// actor with single name
return actor.name.match(/\w+/g).length === 1;
}
function curateActor(actor, withDetails = false, isProfile = false) {
if (!actor) {
return null;
@ -260,11 +265,13 @@ function curateActor(actor, withDetails = false, isProfile = false) {
}
function curateActorEntry(baseActor, batchId) {
const collisionLikely = getCollisionLikely(baseActor);
return {
name: baseActor.name,
slug: baseActor.slug,
entity_id: null,
entry_id: baseActor.entryId,
entity_id: collisionLikely ? baseActor.entity.id : null,
entry_id: collisionLikely ? baseActor.entryId : null,
batch_id: batchId,
};
}
@ -641,6 +648,11 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
const scraper = scrapers[scraperSlug];
const layoutScraper = resolveLayoutScraper(entity, scraper);
if (!layoutScraper?.fetchProfile) {
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
throw new Error(`No profile profile scraper available for ${scraperSlug}`);
}
const context = {
...entity,
// legacy
@ -653,11 +665,6 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
const label = context.entity?.name;
if (!layoutScraper?.fetchProfile) {
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
throw new Error(`No profile profile scraper available for ${scraperSlug}`);
}
if (!context.entity) {
logger.warn(`No entity found for ${scraperSlug}`);
throw new Error(`No entity found for ${scraperSlug}`);
@ -813,33 +820,53 @@ async function scrapeActors(argNames) {
async function getOrCreateActors(baseActors, batchId) {
// WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available
const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId)', { slug: actor.slug, entityId: actor.entity.id })).join(', ');
const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId, :entryId, :collisionLikely)', {
slug: actor.slug,
entityId: actor.entity.id,
entryId: actor.entryId,
collisionLikely: getCollisionLikely(actor),
})).join(', ');
const existingActors = await knex
.select('actors.*')
.from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id)`))
.whereRaw('actors.slug = base_actors.slug AND actors.entity_id IS NULL')
.orWhereRaw('actors.slug = base_actors.slug AND actors.entity_id = base_actors.entity_id');
.from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id, entry_id, collision_likely)`))
.whereRaw(`
actors.slug = base_actors.slug
AND actors.entity_id IS NULL
AND NOT base_actors.collision_likely
`)
.orWhereRaw(`
actors.slug = base_actors.slug
AND actors.entity_id = base_actors.entity_id
AND ((actors.entry_id IS NULL AND base_actors.entry_id IS NULL)
OR actors.entry_id = base_actors.entry_id)
`);
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
const existingActorSlugs = existingActors.reduce((acc, actor) => ({
...acc,
[actor.entity_id]: {
...acc[actor.entity_id],
[actor.slug]: true,
[actor.entry_id]: {
...acc[actor.entity_id]?.[actor.entry_id],
[actor.slug]: true,
},
},
}), {});
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]);
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.entryId]?.[baseActor.slug] && !existingActorSlugs.null?.null?.[baseActor.slug]);
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
const newActors = await bulkInsert('actors', curatedActorEntries);
const newActorIdsByEntityIdAndSlug = newActors.reduce((acc, actor) => ({
const newActorIdsByEntityIdEntryIdAndSlug = newActors.reduce((acc, actor) => ({
...acc,
[actor.entity_id]: {
...acc[actor.entity_id],
[actor.slug]: actor.id,
[actor.entry_id]: {
...acc[actor.entity_id]?.[actor.entry_id],
[actor.slug]: actor.id,
},
},
}), {});
@ -847,7 +874,7 @@ async function getOrCreateActors(baseActors, batchId) {
.filter(actor => actor.hasProfile)
.map(actor => ({
...actor,
id: newActorIdsByEntityIdAndSlug[actor.entity?.id]?.[actor.slug] || newActorIdsByEntityIdAndSlug.null?.[actor.slug],
id: newActorIdsByEntityIdEntryIdAndSlug[actor.entity?.id]?.[actor.entryId]?.[actor.slug] || newActorIdsByEntityIdEntryIdAndSlug.null?.null?.[actor.slug],
}))
.filter(actor => !!actor.id)
.map(actor => curateProfile(actor)));

View File

@ -19,7 +19,7 @@ function resolveLayoutScraper(entity, scraper) {
return scraper[entity.parameters.layout];
}
if (entity.parent) {
if (entity?.parent) {
return resolveLayoutScraper(entity.parent, scraper);
}

View File

@ -258,7 +258,8 @@ async function fetchLatest(entity, page, options) {
.limit(faker.random.number({ min: 2, max: 15 }))
.pluck('name');
release.actors = [...actors(release), null]; // include empty actor to ensure proper handling
// release.actors = [...actors(release), null]; // include empty actor to ensure proper handling
release.actors = ['Amber'];
release.title = title(release);
return release;