Using temporary table instead of WHERE IN to stack depth error when finding duplicate actors.
This commit is contained in:
parent
64a52fbb1e
commit
4469376dd2
|
@ -696,11 +696,14 @@ async function scrapeActors(argNames) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getOrCreateActors(baseActors, batchId) {
|
async function getOrCreateActors(baseActors, batchId) {
|
||||||
const existingActors = await knex('actors')
|
// WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available
|
||||||
.select('id', 'alias_for', 'name', 'slug', 'entity_id')
|
const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId)', { slug: actor.slug, entityId: actor.entity.id })).join(', ');
|
||||||
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
|
||||||
.whereNull('entity_id')
|
const existingActors = await knex
|
||||||
.orWhereIn(['slug', 'entity_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.entity.id]));
|
.select('actors.*')
|
||||||
|
.from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id)`))
|
||||||
|
.whereRaw('actors.slug = base_actors.slug AND actors.entity_id IS NULL')
|
||||||
|
.orWhereRaw('actors.slug = base_actors.slug AND actors.entity_id = base_actors.entity_id');
|
||||||
|
|
||||||
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
|
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
|
||||||
const existingActorSlugs = existingActors.reduce((acc, actor) => ({
|
const existingActorSlugs = existingActors.reduce((acc, actor) => ({
|
||||||
|
|
Loading…
Reference in New Issue