From 4469376dd27969fd18e9f2732c9c205af1797021 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 28 Oct 2020 03:50:52 +0100 Subject: [PATCH] Using temporary table instead of WHERE IN to stack depth error when finding duplicate actors. --- src/actors.js | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/actors.js b/src/actors.js index 53d0b078..12479ffa 100644 --- a/src/actors.js +++ b/src/actors.js @@ -696,11 +696,14 @@ async function scrapeActors(argNames) { } async function getOrCreateActors(baseActors, batchId) { - const existingActors = await knex('actors') - .select('id', 'alias_for', 'name', 'slug', 'entity_id') - .whereIn('slug', baseActors.map(baseActor => baseActor.slug)) - .whereNull('entity_id') - .orWhereIn(['slug', 'entity_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.entity.id])); + // WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available + const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId)', { slug: actor.slug, entityId: actor.entity.id })).join(', '); + + const existingActors = await knex + .select('actors.*') + .from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id)`)) + .whereRaw('actors.slug = base_actors.slug AND actors.entity_id IS NULL') + .orWhereRaw('actors.slug = base_actors.slug AND actors.entity_id = base_actors.entity_id'); // const existingActorSlugs = new Set(existingActors.map(actor => actor.slug)); const existingActorSlugs = existingActors.reduce((acc, actor) => ({