Refactoring to use entities over sites and networks.

This commit is contained in:
2020-06-17 04:07:24 +02:00
parent 1907ce1e54
commit f0a89df6ab
7 changed files with 93 additions and 97 deletions

View File

@@ -144,7 +144,7 @@ function curateActor(actor, withDetails = false) {
name: actor.name,
slug: actor.slug,
gender: actor.gender,
networkId: actor.network_id,
networkId: actor.entity_id,
aliasFor: actor.alias_for,
dateOfBirth: actor.date_of_birth,
birthCountry: actor.birth_country_alpha2,
@@ -211,7 +211,7 @@ function curateActorEntry(baseActor, batchId) {
return {
name: baseActor.name,
slug: baseActor.slug,
network_id: null,
entity_id: null,
batch_id: batchId,
};
}
@@ -225,7 +225,7 @@ function curateProfileEntry(profile) {
...(profile.update !== false && { id: profile.update }),
actor_id: profile.id,
site_id: profile.site?.id || null,
network_id: profile.network?.id || null,
entity_id: profile.network?.id || null,
date_of_birth: profile.dateOfBirth,
date_of_death: profile.dateOfDeath,
gender: profile.gender,
@@ -577,14 +577,17 @@ async function scrapeActors(actorNames) {
const siteSlugs = sources.flat();
const [networks, sites, existingActorEntries] = await Promise.all([
knex('networks').whereIn('slug', siteSlugs),
knex('sites')
knex('entities')
.where('type', 2)
.whereIn('slug', siteSlugs),
knex('entities')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
'entities.*',
'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.description as network_description', 'parents.parameters as network_parameters',
)
.whereIn('sites.slug', siteSlugs)
.leftJoin('networks', 'sites.network_id', 'networks.id'),
.where('type', 2)
.whereIn('entities.slug', siteSlugs)
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id'),
knex('actors')
.select(['id', 'name', 'slug'])
.modify((queryBuilder) => {
@@ -612,8 +615,8 @@ async function scrapeActors(actorNames) {
...acc,
[profile.actor_id]: {
...acc[profile.actor_id],
[profile.network_id]: {
...acc[profile.network_id],
[profile.entity_id]: {
...acc[profile.entity_id],
[profile.site_id]: profile,
},
},
@@ -644,17 +647,19 @@ async function scrapeActors(actorNames) {
}
async function getOrCreateActors(baseActors, batchId) {
console.log(baseActors);
const existingActors = await knex('actors')
.select('id', 'alias_for', 'name', 'slug', 'network_id')
.select('id', 'alias_for', 'name', 'slug', 'entity_id')
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('network_id')
.orWhereIn(['slug', 'network_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id]));
.whereNull('entity_id')
.orWhereIn(['slug', 'entity_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id]));
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
const existingActorSlugs = existingActors.reduce((acc, actor) => ({
...acc,
[actor.network_id]: {
...acc[actor.network_id],
[actor.entity_id]: {
...acc[actor.entity_id],
[actor.slug]: true,
},
}), {});
@@ -662,7 +667,7 @@ async function getOrCreateActors(baseActors, batchId) {
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]);
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'network_id']);
const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'entity_id']);
if (Array.isArray(newActors)) {
return newActors.concat(existingActors);
@@ -732,7 +737,7 @@ async function fetchActor(actorId) {
queryBuilder.where('actors.id', actorId);
})
.leftJoin('actors as actor_alias', 'actor_alias.id', 'actors.alias_for')
.leftJoin('networks', 'networks.id', 'actors.network_id')
.leftJoin('networks', 'networks.id', 'actors.entity_id')
.leftJoin('countries as birth_country', 'birth_country.alpha2', 'actors.birth_country_alpha2')
.leftJoin('countries as residence_country', 'residence_country.alpha2', 'actors.residence_country_alpha2')
.leftJoin('media', 'media.id', 'actors.avatar_media_id')