Improved actor entity and entry ID storage.

This commit is contained in:
DebaucheryLibrarian 2021-02-16 03:37:52 +01:00
parent b26a029f66
commit 67055bf920
5 changed files with 52 additions and 24 deletions

View File

@ -16,13 +16,13 @@
>{{ actor.name }}</span> >{{ actor.name }}</span>
<router-link <router-link
v-if="actor.network" v-if="actor.entity"
v-tooltip="actor.network.name" v-tooltip="actor.entity.name"
:to="{ name: 'network', params: { networkSlug: actor.network.slug } }" :to="{ name: actor.entity.type, params: { entitySlug: actor.entity.slug, range: 'new', pageNumber: 1 } }"
class="favicon" class="favicon"
> >
<img <img
:src="`/img/logos/${actor.network.slug}/favicon.png`" :src="`/img/logos/${actor.entity.slug}/favicon_dark.png`"
class="favicon-icon" class="favicon-icon"
> >
</router-link> </router-link>

View File

@ -61,7 +61,7 @@ const actorFields = `
lazy lazy
} }
} }
network: entity { entity {
id id
name name
slug slug

View File

@ -180,6 +180,11 @@ function toBaseActors(actorsOrNames, release) {
return baseActors; return baseActors;
} }
function getCollisionLikely(actor) {
// actor with single name
return actor.name.match(/\w+/g).length === 1;
}
function curateActor(actor, withDetails = false, isProfile = false) { function curateActor(actor, withDetails = false, isProfile = false) {
if (!actor) { if (!actor) {
return null; return null;
@ -260,11 +265,13 @@ function curateActor(actor, withDetails = false, isProfile = false) {
} }
function curateActorEntry(baseActor, batchId) { function curateActorEntry(baseActor, batchId) {
const collisionLikely = getCollisionLikely(baseActor);
return { return {
name: baseActor.name, name: baseActor.name,
slug: baseActor.slug, slug: baseActor.slug,
entity_id: null, entity_id: collisionLikely ? baseActor.entity.id : null,
entry_id: baseActor.entryId, entry_id: collisionLikely ? baseActor.entryId : null,
batch_id: batchId, batch_id: batchId,
}; };
} }
@ -641,6 +648,11 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
const scraper = scrapers[scraperSlug]; const scraper = scrapers[scraperSlug];
const layoutScraper = resolveLayoutScraper(entity, scraper); const layoutScraper = resolveLayoutScraper(entity, scraper);
if (!layoutScraper?.fetchProfile) {
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
throw new Error(`No profile profile scraper available for ${scraperSlug}`);
}
const context = { const context = {
...entity, ...entity,
// legacy // legacy
@ -653,11 +665,6 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
const label = context.entity?.name; const label = context.entity?.name;
if (!layoutScraper?.fetchProfile) {
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
throw new Error(`No profile profile scraper available for ${scraperSlug}`);
}
if (!context.entity) { if (!context.entity) {
logger.warn(`No entity found for ${scraperSlug}`); logger.warn(`No entity found for ${scraperSlug}`);
throw new Error(`No entity found for ${scraperSlug}`); throw new Error(`No entity found for ${scraperSlug}`);
@ -813,41 +820,61 @@ async function scrapeActors(argNames) {
async function getOrCreateActors(baseActors, batchId) { async function getOrCreateActors(baseActors, batchId) {
// WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available // WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available
const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId)', { slug: actor.slug, entityId: actor.entity.id })).join(', '); const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId, :entryId, :collisionLikely)', {
slug: actor.slug,
entityId: actor.entity.id,
entryId: actor.entryId,
collisionLikely: getCollisionLikely(actor),
})).join(', ');
const existingActors = await knex const existingActors = await knex
.select('actors.*') .select('actors.*')
.from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id)`)) .from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id, entry_id, collision_likely)`))
.whereRaw('actors.slug = base_actors.slug AND actors.entity_id IS NULL') .whereRaw(`
.orWhereRaw('actors.slug = base_actors.slug AND actors.entity_id = base_actors.entity_id'); actors.slug = base_actors.slug
AND actors.entity_id IS NULL
AND NOT base_actors.collision_likely
`)
.orWhereRaw(`
actors.slug = base_actors.slug
AND actors.entity_id = base_actors.entity_id
AND ((actors.entry_id IS NULL AND base_actors.entry_id IS NULL)
OR actors.entry_id = base_actors.entry_id)
`);
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug)); // const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
const existingActorSlugs = existingActors.reduce((acc, actor) => ({ const existingActorSlugs = existingActors.reduce((acc, actor) => ({
...acc, ...acc,
[actor.entity_id]: { [actor.entity_id]: {
...acc[actor.entity_id], ...acc[actor.entity_id],
[actor.entry_id]: {
...acc[actor.entity_id]?.[actor.entry_id],
[actor.slug]: true, [actor.slug]: true,
}, },
},
}), {}); }), {});
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]); const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.entryId]?.[baseActor.slug] && !existingActorSlugs.null?.null?.[baseActor.slug]);
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId); const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
const newActors = await bulkInsert('actors', curatedActorEntries); const newActors = await bulkInsert('actors', curatedActorEntries);
const newActorIdsByEntityIdAndSlug = newActors.reduce((acc, actor) => ({ const newActorIdsByEntityIdEntryIdAndSlug = newActors.reduce((acc, actor) => ({
...acc, ...acc,
[actor.entity_id]: { [actor.entity_id]: {
...acc[actor.entity_id], ...acc[actor.entity_id],
[actor.entry_id]: {
...acc[actor.entity_id]?.[actor.entry_id],
[actor.slug]: actor.id, [actor.slug]: actor.id,
}, },
},
}), {}); }), {});
const newActorProfiles = await Promise.all(baseActors const newActorProfiles = await Promise.all(baseActors
.filter(actor => actor.hasProfile) .filter(actor => actor.hasProfile)
.map(actor => ({ .map(actor => ({
...actor, ...actor,
id: newActorIdsByEntityIdAndSlug[actor.entity?.id]?.[actor.slug] || newActorIdsByEntityIdAndSlug.null?.[actor.slug], id: newActorIdsByEntityIdEntryIdAndSlug[actor.entity?.id]?.[actor.entryId]?.[actor.slug] || newActorIdsByEntityIdEntryIdAndSlug.null?.null?.[actor.slug],
})) }))
.filter(actor => !!actor.id) .filter(actor => !!actor.id)
.map(actor => curateProfile(actor))); .map(actor => curateProfile(actor)));

View File

@ -19,7 +19,7 @@ function resolveLayoutScraper(entity, scraper) {
return scraper[entity.parameters.layout]; return scraper[entity.parameters.layout];
} }
if (entity.parent) { if (entity?.parent) {
return resolveLayoutScraper(entity.parent, scraper); return resolveLayoutScraper(entity.parent, scraper);
} }

View File

@ -258,7 +258,8 @@ async function fetchLatest(entity, page, options) {
.limit(faker.random.number({ min: 2, max: 15 })) .limit(faker.random.number({ min: 2, max: 15 }))
.pluck('name'); .pluck('name');
release.actors = [...actors(release), null]; // include empty actor to ensure proper handling // release.actors = [...actors(release), null]; // include empty actor to ensure proper handling
release.actors = ['Amber'];
release.title = title(release); release.title = title(release);
return release; return release;