Using temporary table instead of WHERE IN to stack depth error when finding duplicate actors.

This commit is contained in:
DebaucheryLibrarian 2020-10-28 03:50:52 +01:00
parent 64a52fbb1e
commit 4469376dd2
1 changed files with 8 additions and 5 deletions

View File

@ -696,11 +696,14 @@ async function scrapeActors(argNames) {
} }
async function getOrCreateActors(baseActors, batchId) { async function getOrCreateActors(baseActors, batchId) {
const existingActors = await knex('actors') // WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available
.select('id', 'alias_for', 'name', 'slug', 'entity_id') const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId)', { slug: actor.slug, entityId: actor.entity.id })).join(', ');
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('entity_id') const existingActors = await knex
.orWhereIn(['slug', 'entity_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.entity.id])); .select('actors.*')
.from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id)`))
.whereRaw('actors.slug = base_actors.slug AND actors.entity_id IS NULL')
.orWhereRaw('actors.slug = base_actors.slug AND actors.entity_id = base_actors.entity_id');
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug)); // const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
const existingActorSlugs = existingActors.reduce((acc, actor) => ({ const existingActorSlugs = existingActors.reduce((acc, actor) => ({