Refactored deep and store modules to use entities.

This commit is contained in:
2020-06-25 02:26:25 +02:00
parent f0a89df6ab
commit 4959dfd14f
14 changed files with 132 additions and 164 deletions

View File

@@ -8,7 +8,7 @@ const knex = require('./knex');
const slugify = require('./utils/slugify');
const { associateActors, scrapeActors } = require('./actors');
const { associateReleaseTags } = require('./tags');
const { curateSite } = require('./sites');
const { curateEntity } = require('./entities');
const { associateReleaseMedia } = require('./media');
function curateReleaseEntry(release, batchId, existingRelease) {
@@ -20,9 +20,9 @@ function curateReleaseEntry(release, batchId, existingRelease) {
const curatedRelease = {
title: release.title,
entry_id: release.entryId || null,
entity_id: release.site?.id,
shoot_id: release.shootId || null,
entity_id: release.entity.id,
studio_id: release.studio?.id || null,
shoot_id: release.shootId || null,
url: release.url,
date: Number(release.date) ? release.date : null,
slug,
@@ -45,51 +45,47 @@ function curateReleaseEntry(release, batchId, existingRelease) {
return curatedRelease;
}
async function attachChannelSites(releases) {
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isNetwork || release.site.slug !== release.channel));
async function attachChannelEntities(releases) {
const releasesWithoutEntity = releases.filter(release => release.channel && !release.entity && release.entity.type !== 1);
const channelSites = await knex('entities')
.leftJoin('entities AS parents', 'parents.id', 'entities.parent_id')
.select('entities.*', 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.parameters as network_parameters', 'parents.description as network_description')
.whereIn('entities.slug', releasesWithoutSite.map(release => release.channel));
const channelEntities = await knex('entities')
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
.whereIn('entities.slug', releasesWithoutEntity.map(release => release.channel))
.where('entities.type', 2)
.leftJoin('entities AS parents', 'parents.id', 'entities.parent_id');
const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const channelEntitiesBySlug = channelEntities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {});
const releasesWithChannelSite = await Promise.all(releases
const releasesWithChannelEntity = await Promise.all(releases
.map(async (release) => {
if (release.channel && channelSitesBySlug[release.channel]) {
const curatedSite = await curateSite(channelSitesBySlug[release.channel]);
if (release.channel && channelEntitiesBySlug[release.channel]) {
const curatedEntity = await curateEntity(channelEntitiesBySlug[release.channel]);
return {
...release,
site: curatedSite,
entity: curatedEntity,
};
}
if (release.site && !release.site.isNetwork) {
if (release.entity) {
return release;
}
if (release.site && release.site.isNetwork) {
return {
...release,
site: null,
network: release.site,
};
}
logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`);
return null;
}));
return releasesWithChannelSite.filter(Boolean);
return releasesWithChannelEntity.filter(Boolean);
}
async function attachStudios(releases) {
const studioSlugs = releases.map(release => release.studio).filter(Boolean);
const studios = await knex('studios').whereIn('slug', studioSlugs);
const studios = await knex('entities')
.whereIn('slug', studioSlugs)
.where('type', 3);
const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {});
const releasesWithStudio = releases.map((release) => {
@@ -111,7 +107,7 @@ async function attachStudios(releases) {
}
function attachReleaseIds(releases, storedReleases) {
const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => {
const storedReleaseIdsByEntityIdAndEntryId = storedReleases.reduce((acc, release) => {
if (!acc[release.entity_id]) acc[release.entity_id] = {};
acc[release.entity_id][release.entry_id] = release.id;
@@ -120,29 +116,29 @@ function attachReleaseIds(releases, storedReleases) {
const releasesWithId = releases.map(release => ({
...release,
id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId],
id: storedReleaseIdsByEntityIdAndEntryId[release.entity.id][release.entryId],
}));
return releasesWithId;
}
function filterInternalDuplicateReleases(releases) {
const releasesBySiteIdAndEntryId = releases.reduce((acc, release) => {
if (!release.site) {
const releasesByEntityIdAndEntryId = releases.reduce((acc, release) => {
if (!release.entity) {
return acc;
}
if (!acc[release.site.id]) {
acc[release.site.id] = {};
if (!acc[release.entity.id]) {
acc[release.entity.id] = {};
}
acc[release.site.id][release.entryId] = release;
acc[release.entity.id][release.entryId] = release;
return acc;
}, {});
return Object.values(releasesBySiteIdAndEntryId)
.map(siteReleases => Object.values(siteReleases))
return Object.values(releasesByEntityIdAndEntryId)
.map(entityReleases => Object.values(entityReleases))
.flat();
}
@@ -150,17 +146,17 @@ async function filterDuplicateReleases(releases) {
const internalUniqueReleases = filterInternalDuplicateReleases(releases);
const duplicateReleaseEntries = await knex('releases')
.whereIn(['entry_id', 'entity_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id]));
.whereIn(['entry_id', 'entity_id'], internalUniqueReleases.map(release => [release.entryId, release.entity.id]));
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
const duplicateReleasesByEntityIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
if (!acc[release.entity_id]) acc[release.entity_id] = {};
acc[release.entity_id][release.entry_id] = true;
return acc;
}, {});
const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]);
const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]);
return {
uniqueReleases,
@@ -216,10 +212,10 @@ async function storeReleases(releases) {
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
const releasesWithSites = await attachChannelSites(releases);
const releasesWithStudios = await attachStudios(releasesWithSites);
const releasesWithChannels = await attachChannelEntities(releases);
const releasesWithStudios = await attachStudios(releasesWithChannels);
// uniqueness is site ID + entry ID, filter uniques after adding sites
// uniqueness is entity ID + entry ID, filter uniques after adding entities
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);
const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));