Added filtered titles and secondary tags to manticore database.

This commit is contained in:
DebaucheryLibrarian 2024-02-09 22:30:38 +01:00
parent 31e884359e
commit fc0661804f
5 changed files with 77 additions and 59 deletions

View File

@ -20,13 +20,13 @@ export default {
components: { components: {
EntityTile, EntityTile,
}, },
emits: ['load'],
props: { props: {
entity: { entity: {
type: Object, type: Object,
default: null, default: null,
}, },
}, },
emits: ['load'],
}; };
</script> </script>

View File

@ -11,7 +11,8 @@ exports.up = async () => {
await utilsApi.sql(`create table scenes ( await utilsApi.sql(`create table scenes (
id int, id int,
title text, title text,
entry_id text, title_filtered text,
shoot_id text,
channel_id int, channel_id int,
channel_name text, channel_name text,
channel_slug text, channel_slug text,

View File

@ -1302,8 +1302,8 @@ const aliases = [
for: 'atogm', for: 'atogm',
}, },
{ {
name: 'ass to other mouth', name: 'atom',
for: 'atom', for: 'atogm',
}, },
{ {
name: 'atm', name: 'atm',

View File

@ -3,6 +3,7 @@
const config = require('config'); const config = require('config');
const manticore = require('manticoresearch'); const manticore = require('manticoresearch');
const args = require('yargs').argv; const args = require('yargs').argv;
const { format } = require('date-fns');
const knex = require('../knex'); const knex = require('../knex');
@ -24,7 +25,7 @@ async function fetchScenes() {
scenes_meta.title, scenes_meta.title,
scenes_meta.created_at, scenes_meta.created_at,
scenes_meta.date, scenes_meta.date,
scenes_meta.entry_id, scenes_meta.shoot_id,
scenes_meta.stashed, scenes_meta.stashed,
entities.id as channel_id, entities.id as channel_id,
entities.slug as channel_slug, entities.slug as channel_slug,
@ -33,7 +34,7 @@ async function fetchScenes() {
parents.slug as network_slug, parents.slug as network_slug,
parents.name as network_name, parents.name as network_name,
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
FROM scenes_meta FROM scenes_meta
LEFT JOIN entities ON scenes_meta.entity_id = entities.id LEFT JOIN entities ON scenes_meta.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN entities AS parents ON parents.id = entities.parent_id
@ -42,14 +43,14 @@ async function fetchScenes() {
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = scenes_meta.id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = scenes_meta.id
LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id AND tags.priority >= 6 LEFT JOIN tags ON local_tags.tag_id = tags.id
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
GROUP BY GROUP BY
scenes_meta.id, scenes_meta.id,
scenes_meta.title, scenes_meta.title,
scenes_meta.created_at, scenes_meta.created_at,
scenes_meta.date, scenes_meta.date,
scenes_meta.entry_id, scenes_meta.shoot_id,
scenes_meta.stashed, scenes_meta.stashed,
entities.id, entities.id,
entities.name, entities.name,
@ -70,7 +71,8 @@ async function init() {
await utilsApi.sql(`create table scenes ( await utilsApi.sql(`create table scenes (
id int, id int,
title text, title text,
entry_id text, title_filtered text,
shoot_id text,
channel_id int, channel_id int,
channel_name text, channel_name text,
channel_slug text, channel_slug text,
@ -90,30 +92,38 @@ async function init() {
const scenes = await fetchScenes(); const scenes = await fetchScenes();
const docs = scenes.map((scene) => ({ const docs = scenes.map((scene) => {
replace: { const flatActors = scene.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc.
index: 'scenes', const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
id: scene.id, const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag, 'i'), ''), scene.title).trim().replace(/\s{2,}/, ' ');
doc: {
title: scene.title || undefined, return {
date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined, replace: {
created_at: Math.round(scene.created_at.getTime() / 1000), index: 'scenes',
effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000), id: scene.id,
entry_id: scene.entry_id, doc: {
channel_id: scene.channel_id, title: scene.title || undefined,
channel_slug: scene.channel_slug, title_filtered: filteredTitle || undefined,
channel_name: scene.channel_name, date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
network_id: scene.network_id || undefined, created_at: Math.round(scene.created_at.getTime() / 1000),
network_slug: scene.network_slug || undefined, effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
network_name: scene.network_name || undefined, shoot_id: scene.shoot_id || undefined,
actor_ids: scene.actors.map((actor) => actor.f1), channel_id: scene.channel_id,
actors: scene.actors.map((actor) => actor.f2).join(), channel_slug: scene.channel_slug,
tag_ids: scene.tags.map((tag) => tag.f1), channel_name: scene.channel_name,
tags: scene.tags.map((tag) => tag.f2).join(), network_id: scene.network_id || undefined,
stashed: scene.stashed || 0, network_slug: scene.network_slug || undefined,
network_name: scene.network_name || undefined,
actor_ids: scene.actors.map((actor) => actor.f1),
actors: scene.actors.map((actor) => actor.f2).join(),
tag_ids: scene.tags.map((tag) => tag.f1),
tags: flatTags.join(' '),
meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined,
stashed: scene.stashed || 0,
},
}, },
}, };
})); });
const data = await indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n')); const data = await indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n'));

View File

@ -17,7 +17,7 @@ async function updateManticoreSearch(releaseIds) {
scenes_meta.title, scenes_meta.title,
scenes_meta.created_at, scenes_meta.created_at,
scenes_meta.date, scenes_meta.date,
scenes_meta.entry_id, scenes_meta.shoot_id,
scenes_meta.stashed, scenes_meta.stashed,
entities.id as channel_id, entities.id as channel_id,
entities.slug as channel_slug, entities.slug as channel_slug,
@ -26,7 +26,7 @@ async function updateManticoreSearch(releaseIds) {
parents.slug as network_slug, parents.slug as network_slug,
parents.name as network_name, parents.name as network_name,
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
FROM scenes_meta FROM scenes_meta
LEFT JOIN entities ON scenes_meta.entity_id = entities.id LEFT JOIN entities ON scenes_meta.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN entities AS parents ON parents.id = entities.parent_id
@ -43,7 +43,7 @@ async function updateManticoreSearch(releaseIds) {
scenes_meta.title, scenes_meta.title,
scenes_meta.created_at, scenes_meta.created_at,
scenes_meta.date, scenes_meta.date,
scenes_meta.entry_id, scenes_meta.shoot_id,
scenes_meta.stashed, scenes_meta.stashed,
entities.id, entities.id,
entities.name, entities.name,
@ -55,31 +55,38 @@ async function updateManticoreSearch(releaseIds) {
parents.alias; parents.alias;
`, releaseIds && [releaseIds]); `, releaseIds && [releaseIds]);
const docs = scenes.rows.map((scene) => ({ const docs = scenes.rows.map((scene) => {
replace: { const flatActors = scene.actors.flatMap((actor) => actor.f2.split(' '));
index: 'scenes', const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => tag.f2.split(' ')); // only make top tags searchable to minimize cluttered results
id: scene.id, const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag, 'i'), ''), scene.title).trim().replace(/\s{2,}/, ' ');
doc: {
title: scene.title || undefined, return {
date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined, replace: {
created_at: Math.round(scene.created_at.getTime() / 1000), index: 'scenes',
effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000), id: scene.id,
entry_id: scene.entry_id, doc: {
channel_id: scene.channel_id, title: scene.title || undefined,
channel_slug: scene.channel_slug, title_filtered: filteredTitle || undefined,
channel_name: scene.channel_name, date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
network_id: scene.network_id || undefined, created_at: Math.round(scene.created_at.getTime() / 1000),
network_slug: scene.network_slug || undefined, effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
network_name: scene.network_name || undefined, shoot_id: scene.shoot_id || undefined,
actor_ids: scene.actors.map((actor) => actor.f1), channel_id: scene.channel_id,
actors: scene.actors.map((actor) => actor.f2).join(), channel_slug: scene.channel_slug,
tag_ids: scene.tags.map((tag) => tag.f1), channel_name: scene.channel_name,
tags: scene.tags.map((tag) => tag.f2).join(), network_id: scene.network_id || undefined,
meta: scene.date ? format(scene.date, 'y M d') : undefined, network_slug: scene.network_slug || undefined,
stashed: scene.stashed || 0, network_name: scene.network_name || undefined,
actor_ids: scene.actors.map((actor) => actor.f1),
actors: scene.actors.map((actor) => actor.f2).join(),
tag_ids: scene.tags.map((tag) => tag.f1),
tags: scene.tags.filter((tag) => tag.f3 > 6).map((tag) => tag.f2).join(), // only make top tags searchable to minimize cluttered results
meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined,
stashed: scene.stashed || 0,
},
}, },
}, };
})); });
if (docs.length === 0) { if (docs.length === 0) {
return; return;