'use strict'; const config = require('config'); const Promise = require('bluebird'); // const logger = require('./logger')(__filename); const knex = require('./knex'); const scrapers = require('./scrapers/scrapers'); const argv = require('./argv'); const slugify = require('./utils/slugify'); const capitalize = require('./utils/capitalize'); function toBaseActors(actorsOrNames, release) { return actorsOrNames.map((actorOrName) => { const name = capitalize(actorOrName.name || actorOrName); const slug = slugify(name); const baseActor = { name, slug, network: release?.site.network, }; if (actorOrName.name) { return { ...actorOrName, ...baseActor, }; } return baseActor; }); } function curateActorEntry(baseActor, batchId) { return { name: baseActor.name, slug: baseActor.slug, network_id: null, batch_id: batchId, }; } function curateActorEntries(baseActors, batchId) { return baseActors.map(baseActor => curateActorEntry(baseActor, batchId)); } async function scrapeActors(actorNames) { const baseActors = toBaseActors(actorNames); const sources = argv.sources || config.profiles || Object.keys(scrapers.actors); const siteSlugs = sources.flat(); const [networks, sites, existingActorEntries] = await Promise.all([ knex('networks').whereIn('slug', siteSlugs), knex('sites').whereIn('slug', siteSlugs), knex('actors') .select(['id', 'name', 'slug']) .whereIn('slug', baseActors.map(baseActor => baseActor.slug)) .whereNull('network_id'), ]); const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {}); const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: { ...network, isNetwork: true } }), {}); const sitesBySlug = sites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]); const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null]; const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId); const newActorEntries = batchId && await knex('actors').insert(curatedActorEntries).returning(['id', 'name', 'slug']); const actorEntries = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []); console.log(actorEntries, newActorEntries, actorEntries); } async function getOrCreateActors(baseActors, batchId) { const existingActors = await knex('actors') .select('id', 'alias_for', 'name', 'slug', 'network_id') .whereIn('slug', baseActors.map(baseActor => baseActor.slug)) .whereNull('network_id') .orWhereIn(['slug', 'network_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id])); // const existingActorSlugs = new Set(existingActors.map(actor => actor.slug)); const existingActorSlugs = existingActors.reduce((acc, actor) => ({ ...acc, [actor.network_id]: { ...acc[actor.network_id], [actor.slug]: true, }, }), {}); const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]); const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId); const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'network_id']); if (Array.isArray(newActors)) { return newActors.concat(existingActors); } return existingActors; } async function associateActors(releases, batchId) { const baseActorsByReleaseId = releases.reduce((acc, release) => { if (release.actors) { acc[release.id] = toBaseActors(release.actors, release); } return acc; }, {}); const baseActors = Object.values(baseActorsByReleaseId).flat(); if (baseActors.length === 0) { return; } const baseActorsBySlugAndNetworkId = baseActors.reduce((acc, baseActor) => ({ ...acc, [baseActor.slug]: { ...acc[baseActor.slug], [baseActor.network.id]: baseActor, }, }), {}); const uniqueBaseActors = Object.values(baseActorsBySlugAndNetworkId).map(baseActorsByNetworkId => Object.values(baseActorsByNetworkId)).flat(); const actors = await getOrCreateActors(uniqueBaseActors, batchId); const actorIdsBySlugAndNetworkId = actors.reduce((acc, actor) => ({ ...acc, [actor.network_id]: { ...acc[actor.network_id], [actor.slug]: actor.alias_for || actor.id, }, }), {}); const releaseActorAssociations = Object.entries(baseActorsByReleaseId) .map(([releaseId, releaseActors]) => releaseActors .map(releaseActor => ({ release_id: releaseId, actor_id: actorIdsBySlugAndNetworkId[releaseActor.network.id]?.[releaseActor.slug] || actorIdsBySlugAndNetworkId.null[releaseActor.slug], }))) .flat(); await knex.raw(`${knex('releases_actors').insert(releaseActorAssociations).toString()} ON CONFLICT DO NOTHING;`); } module.exports = { associateActors, scrapeActors, };