Added filtered titles and secondary tags to manticore database.

2024-02-09 22:30:38 +01:00 · 2024-02-09 22:30:38 +01:00 · fc0661804f
parent 31e884359e
commit fc0661804f
5 changed files with 77 additions and 59 deletions
--- a/assets/components/entities/children.vue
+++ b/assets/components/entities/children.vue
@ -20,13 +20,13 @@ export default {
 	components: {
 		EntityTile,
 	},
-	emits: ['load'],
 	props: {
 		entity: {
 			type: Object,
 			default: null,
 		},
 	},
+	emits: ['load'],
 };
 </script>

--- a/migrations/20240125011700_manticore.js
+++ b/migrations/20240125011700_manticore.js
@ -11,7 +11,8 @@ exports.up = async () => {
 	await utilsApi.sql(`create table scenes (
 		 id int,
 		 title text,
-		 entry_id text,
+		 title_filtered text,
+		 shoot_id text,
 		 channel_id int,
 		 channel_name text,
 		 channel_slug text,
--- a/seeds/00_tags.js
+++ b/seeds/00_tags.js
@ -1302,8 +1302,8 @@ const aliases = [
 		for: 'atogm',
 	},
 	{
-		name: 'ass to other mouth',
-		for: 'atom',
+		name: 'atom',
+		for: 'atogm',
 	},
 	{
 		name: 'atm',
--- a/src/tools/manticore.js
+++ b/src/tools/manticore.js
@ -3,6 +3,7 @@
 const config = require('config');
 const manticore = require('manticoresearch');
 const args = require('yargs').argv;
+const { format } = require('date-fns');

 const knex = require('../knex');

@ -24,7 +25,7 @@ async function fetchScenes() {
 			scenes_meta.title,
 			scenes_meta.created_at,
 			scenes_meta.date,
-			scenes_meta.entry_id,
+			scenes_meta.shoot_id,
 			scenes_meta.stashed,
 			entities.id as channel_id,
 			entities.slug as channel_slug,
@ -33,7 +34,7 @@ async function fetchScenes() {
 			parents.slug as network_slug,
 			parents.name as network_name,
 			COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
-			COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
+			COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
        FROM scenes_meta
        LEFT JOIN entities ON scenes_meta.entity_id = entities.id
        LEFT JOIN entities AS parents ON parents.id = entities.parent_id
@ -42,14 +43,14 @@ async function fetchScenes() {
        LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = scenes_meta.id
        LEFT JOIN actors ON local_actors.actor_id = actors.id
        LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
-        LEFT JOIN tags ON local_tags.tag_id = tags.id AND tags.priority >= 6
+        LEFT JOIN tags ON local_tags.tag_id = tags.id
        LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
        GROUP BY
            scenes_meta.id,
 			scenes_meta.title,
 			scenes_meta.created_at,
 			scenes_meta.date,
-			scenes_meta.entry_id,
+			scenes_meta.shoot_id,
 			scenes_meta.stashed,
 			entities.id,
 			entities.name,
@ -70,7 +71,8 @@ async function init() {
 		await utilsApi.sql(`create table scenes (
 			 id int,
 			 title text,
-			 entry_id text,
+			 title_filtered text,
+			 shoot_id text,
 			 channel_id int,
 			 channel_name text,
 			 channel_slug text,
@ -90,30 +92,38 @@ async function init() {

 		const scenes = await fetchScenes();

-		const docs = scenes.map((scene) => ({
-			replace: {
-				index: 'scenes',
-				id: scene.id,
-				doc: {
-					title: scene.title || undefined,
-					date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
-					created_at: Math.round(scene.created_at.getTime() / 1000),
-					effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
-					entry_id: scene.entry_id,
-					channel_id: scene.channel_id,
-					channel_slug: scene.channel_slug,
-					channel_name: scene.channel_name,
-					network_id: scene.network_id || undefined,
-					network_slug: scene.network_slug || undefined,
-					network_name: scene.network_name || undefined,
-					actor_ids: scene.actors.map((actor) => actor.f1),
-					actors: scene.actors.map((actor) => actor.f2).join(),
-					tag_ids: scene.tags.map((tag) => tag.f1),
-					tags: scene.tags.map((tag) => tag.f2).join(),
-					stashed: scene.stashed || 0,
+		const docs = scenes.map((scene) => {
+			const flatActors = scene.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc.
+			const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
+			const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag, 'i'), ''), scene.title).trim().replace(/\s{2,}/, ' ');
+
+			return {
+				replace: {
+					index: 'scenes',
+					id: scene.id,
+					doc: {
+						title: scene.title || undefined,
+						title_filtered: filteredTitle || undefined,
+						date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
+						created_at: Math.round(scene.created_at.getTime() / 1000),
+						effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
+						shoot_id: scene.shoot_id || undefined,
+						channel_id: scene.channel_id,
+						channel_slug: scene.channel_slug,
+						channel_name: scene.channel_name,
+						network_id: scene.network_id || undefined,
+						network_slug: scene.network_slug || undefined,
+						network_name: scene.network_name || undefined,
+						actor_ids: scene.actors.map((actor) => actor.f1),
+						actors: scene.actors.map((actor) => actor.f2).join(),
+						tag_ids: scene.tags.map((tag) => tag.f1),
+						tags: flatTags.join(' '),
+						meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined,
+						stashed: scene.stashed || 0,
+					},
 				},
-			},
-		}));
+			};
+		});

 		const data = await indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n'));

--- a/src/update-search.js
+++ b/src/update-search.js
@ -17,7 +17,7 @@ async function updateManticoreSearch(releaseIds) {
 			scenes_meta.title,
 			scenes_meta.created_at,
 			scenes_meta.date,
-			scenes_meta.entry_id,
+			scenes_meta.shoot_id,
 			scenes_meta.stashed,
 			entities.id as channel_id,
 			entities.slug as channel_slug,
@ -26,7 +26,7 @@ async function updateManticoreSearch(releaseIds) {
 			parents.slug as network_slug,
 			parents.name as network_name,
 			COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
-			COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
+			COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
        FROM scenes_meta
        LEFT JOIN entities ON scenes_meta.entity_id = entities.id
        LEFT JOIN entities AS parents ON parents.id = entities.parent_id
@ -43,7 +43,7 @@ async function updateManticoreSearch(releaseIds) {
 			scenes_meta.title,
 			scenes_meta.created_at,
 			scenes_meta.date,
-			scenes_meta.entry_id,
+			scenes_meta.shoot_id,
 			scenes_meta.stashed,
 			entities.id,
 			entities.name,
@ -55,31 +55,38 @@ async function updateManticoreSearch(releaseIds) {
 			parents.alias;
    `, releaseIds && [releaseIds]);

-	const docs = scenes.rows.map((scene) => ({
-		replace: {
-			index: 'scenes',
-			id: scene.id,
-			doc: {
-				title: scene.title || undefined,
-				date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
-				created_at: Math.round(scene.created_at.getTime() / 1000),
-				effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
-				entry_id: scene.entry_id,
-				channel_id: scene.channel_id,
-				channel_slug: scene.channel_slug,
-				channel_name: scene.channel_name,
-				network_id: scene.network_id || undefined,
-				network_slug: scene.network_slug || undefined,
-				network_name: scene.network_name || undefined,
-				actor_ids: scene.actors.map((actor) => actor.f1),
-				actors: scene.actors.map((actor) => actor.f2).join(),
-				tag_ids: scene.tags.map((tag) => tag.f1),
-				tags: scene.tags.map((tag) => tag.f2).join(),
-				meta: scene.date ? format(scene.date, 'y M d') : undefined,
-				stashed: scene.stashed || 0,
+	const docs = scenes.rows.map((scene) => {
+		const flatActors = scene.actors.flatMap((actor) => actor.f2.split(' '));
+		const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => tag.f2.split(' ')); // only make top tags searchable to minimize cluttered results
+		const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag, 'i'), ''), scene.title).trim().replace(/\s{2,}/, ' ');
+
+		return {
+			replace: {
+				index: 'scenes',
+				id: scene.id,
+				doc: {
+					title: scene.title || undefined,
+					title_filtered: filteredTitle || undefined,
+					date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
+					created_at: Math.round(scene.created_at.getTime() / 1000),
+					effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
+					shoot_id: scene.shoot_id || undefined,
+					channel_id: scene.channel_id,
+					channel_slug: scene.channel_slug,
+					channel_name: scene.channel_name,
+					network_id: scene.network_id || undefined,
+					network_slug: scene.network_slug || undefined,
+					network_name: scene.network_name || undefined,
+					actor_ids: scene.actors.map((actor) => actor.f1),
+					actors: scene.actors.map((actor) => actor.f2).join(),
+					tag_ids: scene.tags.map((tag) => tag.f1),
+					tags: scene.tags.filter((tag) => tag.f3 > 6).map((tag) => tag.f2).join(), // only make top tags searchable to minimize cluttered results
+					meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined,
+					stashed: scene.stashed || 0,
+				},
 			},
-		},
-	}));
+		};
+	});

 	if (docs.length === 0) {
 		return;