From fc0661804ff0f0531c3e8609906693c23616a162 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Fri, 9 Feb 2024 22:30:38 +0100 Subject: [PATCH] Added filtered titles and secondary tags to manticore database. --- assets/components/entities/children.vue | 2 +- migrations/20240125011700_manticore.js | 3 +- seeds/00_tags.js | 4 +- src/tools/manticore.js | 66 ++++++++++++++----------- src/update-search.js | 61 +++++++++++++---------- 5 files changed, 77 insertions(+), 59 deletions(-) diff --git a/assets/components/entities/children.vue b/assets/components/entities/children.vue index 74e9dcd33..ccb33578f 100755 --- a/assets/components/entities/children.vue +++ b/assets/components/entities/children.vue @@ -20,13 +20,13 @@ export default { components: { EntityTile, }, - emits: ['load'], props: { entity: { type: Object, default: null, }, }, + emits: ['load'], }; diff --git a/migrations/20240125011700_manticore.js b/migrations/20240125011700_manticore.js index fb83b15a6..57dc85afa 100644 --- a/migrations/20240125011700_manticore.js +++ b/migrations/20240125011700_manticore.js @@ -11,7 +11,8 @@ exports.up = async () => { await utilsApi.sql(`create table scenes ( id int, title text, - entry_id text, + title_filtered text, + shoot_id text, channel_id int, channel_name text, channel_slug text, diff --git a/seeds/00_tags.js b/seeds/00_tags.js index 289f3fda5..29677f377 100755 --- a/seeds/00_tags.js +++ b/seeds/00_tags.js @@ -1302,8 +1302,8 @@ const aliases = [ for: 'atogm', }, { - name: 'ass to other mouth', - for: 'atom', + name: 'atom', + for: 'atogm', }, { name: 'atm', diff --git a/src/tools/manticore.js b/src/tools/manticore.js index 464fcdb6b..15b4154dd 100644 --- a/src/tools/manticore.js +++ b/src/tools/manticore.js @@ -3,6 +3,7 @@ const config = require('config'); const manticore = require('manticoresearch'); const args = require('yargs').argv; +const { format } = require('date-fns'); const knex = require('../knex'); @@ -24,7 +25,7 @@ async function fetchScenes() { scenes_meta.title, scenes_meta.created_at, scenes_meta.date, - scenes_meta.entry_id, + scenes_meta.shoot_id, scenes_meta.stashed, entities.id as channel_id, entities.slug as channel_slug, @@ -33,7 +34,7 @@ async function fetchScenes() { parents.slug as network_slug, parents.name as network_name, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, - COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags + COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags FROM scenes_meta LEFT JOIN entities ON scenes_meta.entity_id = entities.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id @@ -42,14 +43,14 @@ async function fetchScenes() { LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = scenes_meta.id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN actors AS directors ON local_directors.director_id = directors.id - LEFT JOIN tags ON local_tags.tag_id = tags.id AND tags.priority >= 6 + LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true GROUP BY scenes_meta.id, scenes_meta.title, scenes_meta.created_at, scenes_meta.date, - scenes_meta.entry_id, + scenes_meta.shoot_id, scenes_meta.stashed, entities.id, entities.name, @@ -70,7 +71,8 @@ async function init() { await utilsApi.sql(`create table scenes ( id int, title text, - entry_id text, + title_filtered text, + shoot_id text, channel_id int, channel_name text, channel_slug text, @@ -90,30 +92,38 @@ async function init() { const scenes = await fetchScenes(); - const docs = scenes.map((scene) => ({ - replace: { - index: 'scenes', - id: scene.id, - doc: { - title: scene.title || undefined, - date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined, - created_at: Math.round(scene.created_at.getTime() / 1000), - effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000), - entry_id: scene.entry_id, - channel_id: scene.channel_id, - channel_slug: scene.channel_slug, - channel_name: scene.channel_name, - network_id: scene.network_id || undefined, - network_slug: scene.network_slug || undefined, - network_name: scene.network_name || undefined, - actor_ids: scene.actors.map((actor) => actor.f1), - actors: scene.actors.map((actor) => actor.f2).join(), - tag_ids: scene.tags.map((tag) => tag.f1), - tags: scene.tags.map((tag) => tag.f2).join(), - stashed: scene.stashed || 0, + const docs = scenes.map((scene) => { + const flatActors = scene.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc. + const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results + const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag, 'i'), ''), scene.title).trim().replace(/\s{2,}/, ' '); + + return { + replace: { + index: 'scenes', + id: scene.id, + doc: { + title: scene.title || undefined, + title_filtered: filteredTitle || undefined, + date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined, + created_at: Math.round(scene.created_at.getTime() / 1000), + effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000), + shoot_id: scene.shoot_id || undefined, + channel_id: scene.channel_id, + channel_slug: scene.channel_slug, + channel_name: scene.channel_name, + network_id: scene.network_id || undefined, + network_slug: scene.network_slug || undefined, + network_name: scene.network_name || undefined, + actor_ids: scene.actors.map((actor) => actor.f1), + actors: scene.actors.map((actor) => actor.f2).join(), + tag_ids: scene.tags.map((tag) => tag.f1), + tags: flatTags.join(' '), + meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined, + stashed: scene.stashed || 0, + }, }, - }, - })); + }; + }); const data = await indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n')); diff --git a/src/update-search.js b/src/update-search.js index f306320a2..3a7c9f70e 100644 --- a/src/update-search.js +++ b/src/update-search.js @@ -17,7 +17,7 @@ async function updateManticoreSearch(releaseIds) { scenes_meta.title, scenes_meta.created_at, scenes_meta.date, - scenes_meta.entry_id, + scenes_meta.shoot_id, scenes_meta.stashed, entities.id as channel_id, entities.slug as channel_slug, @@ -26,7 +26,7 @@ async function updateManticoreSearch(releaseIds) { parents.slug as network_slug, parents.name as network_name, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, - COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags + COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags FROM scenes_meta LEFT JOIN entities ON scenes_meta.entity_id = entities.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id @@ -43,7 +43,7 @@ async function updateManticoreSearch(releaseIds) { scenes_meta.title, scenes_meta.created_at, scenes_meta.date, - scenes_meta.entry_id, + scenes_meta.shoot_id, scenes_meta.stashed, entities.id, entities.name, @@ -55,31 +55,38 @@ async function updateManticoreSearch(releaseIds) { parents.alias; `, releaseIds && [releaseIds]); - const docs = scenes.rows.map((scene) => ({ - replace: { - index: 'scenes', - id: scene.id, - doc: { - title: scene.title || undefined, - date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined, - created_at: Math.round(scene.created_at.getTime() / 1000), - effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000), - entry_id: scene.entry_id, - channel_id: scene.channel_id, - channel_slug: scene.channel_slug, - channel_name: scene.channel_name, - network_id: scene.network_id || undefined, - network_slug: scene.network_slug || undefined, - network_name: scene.network_name || undefined, - actor_ids: scene.actors.map((actor) => actor.f1), - actors: scene.actors.map((actor) => actor.f2).join(), - tag_ids: scene.tags.map((tag) => tag.f1), - tags: scene.tags.map((tag) => tag.f2).join(), - meta: scene.date ? format(scene.date, 'y M d') : undefined, - stashed: scene.stashed || 0, + const docs = scenes.rows.map((scene) => { + const flatActors = scene.actors.flatMap((actor) => actor.f2.split(' ')); + const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => tag.f2.split(' ')); // only make top tags searchable to minimize cluttered results + const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag, 'i'), ''), scene.title).trim().replace(/\s{2,}/, ' '); + + return { + replace: { + index: 'scenes', + id: scene.id, + doc: { + title: scene.title || undefined, + title_filtered: filteredTitle || undefined, + date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined, + created_at: Math.round(scene.created_at.getTime() / 1000), + effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000), + shoot_id: scene.shoot_id || undefined, + channel_id: scene.channel_id, + channel_slug: scene.channel_slug, + channel_name: scene.channel_name, + network_id: scene.network_id || undefined, + network_slug: scene.network_slug || undefined, + network_name: scene.network_name || undefined, + actor_ids: scene.actors.map((actor) => actor.f1), + actors: scene.actors.map((actor) => actor.f2).join(), + tag_ids: scene.tags.map((tag) => tag.f1), + tags: scene.tags.filter((tag) => tag.f3 > 6).map((tag) => tag.f2).join(), // only make top tags searchable to minimize cluttered results + meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined, + stashed: scene.stashed || 0, + }, }, - }, - })); + }; + }); if (docs.length === 0) { return;