traxxx/src/update-search.js

355 lines
14 KiB
JavaScript

'use strict';
const manticore = require('manticoresearch');
const { format } = require('date-fns');
const knex = require('./knex');
const logger = require('./logger')(__filename);
const bulkInsert = require('./utils/bulk-insert');
const chunk = require('./utils/chunk');
const mantiClient = new manticore.ApiClient();
const indexApi = new manticore.IndexApi(mantiClient);
async function updateManticoreStashedScenes(docs) {
await chunk(docs, 1000).reduce(async (chain, docsChunk) => {
await chain;
const sceneIds = docsChunk.map((doc) => doc.replace.id);
const stashes = await knex('stashes_scenes')
.select('stashes_scenes.id as stashed_id', 'stashes_scenes.scene_id', 'stashes_scenes.created_at', 'stashes.id as stash_id', 'stashes.user_id as user_id')
.leftJoin('stashes', 'stashes.id', 'stashes_scenes.stash_id')
.whereIn('scene_id', sceneIds);
const stashDocs = docsChunk.flatMap((doc) => {
const sceneStashes = stashes.filter((stash) => stash.scene_id === doc.replace.id);
if (sceneStashes.length === 0) {
return [];
}
const stashDoc = sceneStashes.map((stash) => ({
replace: {
index: 'scenes_stashed',
id: stash.stashed_id,
doc: {
// ...doc.replace.doc,
scene_id: doc.replace.id,
user_id: stash.user_id,
stash_id: stash.stash_id,
created_at: Math.round(stash.created_at.getTime() / 1000),
},
},
}));
return stashDoc;
});
if (stashDocs.length > 0) {
await indexApi.bulk(stashDocs.map((doc) => JSON.stringify(doc)).join('\n'));
}
}, Promise.resolve());
}
async function updateManticoreSceneSearch(releaseIds) {
logger.info(`Updating Manticore search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
const scenes = await knex.raw(`
SELECT
releases.id AS id,
releases.title,
releases.created_at,
releases.date,
releases.shoot_id,
scenes_meta.stashed,
entities.id as channel_id,
entities.slug as channel_slug,
entities.name as channel_name,
parents.id as network_id,
parents.slug as network_slug,
parents.name as network_name,
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
COALESCE(JSON_AGG(DISTINCT (movies.id)) FILTER (WHERE movies.id IS NOT NULL), '[]') as movies,
studios.showcased IS NOT false
AND (entities.showcased IS NOT false OR COALESCE(studios.showcased, false) = true)
AND (parents.showcased IS NOT false OR COALESCE(entities.showcased, false) = true OR COALESCE(studios.showcased, false) = true)
AS showcased
FROM releases
LEFT JOIN scenes_meta ON scenes_meta.scene_id = releases.id
LEFT JOIN entities ON releases.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id
LEFT JOIN entities AS studios ON studios.id = releases.studio_id
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = releases.id
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
LEFT JOIN movies_scenes ON movies_scenes.scene_id = releases.id
LEFT JOIN movies ON movies.id = movies_scenes.movie_id
${releaseIds ? 'WHERE releases.id = ANY(?)' : ''}
GROUP BY
releases.id,
releases.title,
releases.created_at,
releases.date,
releases.shoot_id,
scenes_meta.stashed,
entities.id,
entities.name,
entities.slug,
entities.alias,
entities.showcased,
parents.id,
parents.name,
parents.slug,
parents.alias,
parents.showcased,
studios.showcased
`, releaseIds && [releaseIds]);
// console.log(scenes.rows);
const docs = scenes.rows.map((scene) => {
const flatActors = scene.actors.flatMap((actor) => actor.f2.split(' '));
const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => [tag.f2].concat(tag.f4)).filter(Boolean); // only make top tags searchable to minimize cluttered results
const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(`\\b${tag.replace(/[^\w\s]+/g, '')}\\b`, 'gi'), ''), scene.title).trim().replace(/\s{2,}/, ' ');
return {
replace: {
index: 'scenes',
id: scene.id,
doc: {
title: scene.title || undefined,
title_filtered: filteredTitle || undefined,
date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
created_at: Math.round(scene.created_at.getTime() / 1000),
effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
is_showcased: scene.showcased,
shoot_id: scene.shoot_id || undefined,
channel_id: scene.channel_id,
channel_slug: scene.channel_slug,
channel_name: scene.channel_name,
network_id: scene.network_id || undefined,
network_slug: scene.network_slug || undefined,
network_name: scene.network_name || undefined,
entity_ids: [scene.channel_id, scene.network_id].filter(Boolean), // manticore does not support OR, this allows IN
actor_ids: scene.actors.map((actor) => actor.f1),
actors: scene.actors.map((actor) => actor.f2).join(),
tag_ids: scene.tags.map((tag) => tag.f1),
tags: flatTags.join(' '), // only make top tags searchable to minimize cluttered results
movie_ids: scene.movies,
meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined,
stashed: scene.stashed || 0,
},
},
};
});
if (docs.length === 0) {
return;
}
await Promise.all([
indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n')),
updateManticoreStashedScenes(docs),
]);
}
async function updateSqlSceneSearch(releaseIds) {
logger.info(`Updating SQL search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
const documents = await knex.raw(`
SELECT
releases.id AS release_id,
TO_TSVECTOR(
'english',
COALESCE(releases.title, '') || ' ' ||
releases.entry_id || ' ' ||
entities.name || ' ' ||
entities.slug || ' ' ||
COALESCE(array_to_string(entities.alias, ' '), '') || ' ' ||
COALESCE(parents.name, '') || ' ' ||
COALESCE(parents.slug, '') || ' ' ||
COALESCE(array_to_string(parents.alias, ' '), '') || ' ' ||
COALESCE(releases.shoot_id, '') || ' ' ||
COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMMonth mon DD FMDD'), '') || ' ' ||
STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' ||
STRING_AGG(COALESCE(directors.name, ''), ' ') || ' ' ||
STRING_AGG(COALESCE(tags.name, ''), ' ') || ' ' ||
STRING_AGG(COALESCE(tags_aliases.name, ''), ' ')
) as document
FROM releases
LEFT JOIN entities ON releases.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = releases.id
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id AND tags.priority >= 6
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
${releaseIds ? 'WHERE releases.id = ANY(?)' : ''}
GROUP BY releases.id, entities.name, entities.slug, entities.alias, parents.name, parents.slug, parents.alias;
`, releaseIds && [releaseIds]);
if (documents.rows?.length > 0) {
await bulkInsert('releases_search', documents.rows, ['release_id']);
}
await knex.raw('REFRESH MATERIALIZED VIEW releases_summaries;');
}
async function updateSceneSearch(releaseIds) {
await knex.raw('REFRESH MATERIALIZED VIEW scenes_meta;');
await updateSqlSceneSearch(releaseIds);
await updateManticoreSceneSearch(releaseIds);
}
async function updateManticoreMovieSearch(movieIds) {
const movies = await knex.raw(`
SELECT
movies.id AS id,
movies.title,
movies.created_at,
movies.date,
movies_meta.stashed,
entities.id as channel_id,
entities.slug as channel_slug,
entities.name as channel_name,
parents.id as network_id,
parents.slug as network_slug,
parents.name as network_name,
movies_covers IS NOT NULL as has_cover,
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
FROM movies
LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id
LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id
LEFT JOIN entities ON movies.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id
LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = movies_scenes.scene_id
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = movies_scenes.scene_id
LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id
${movieIds ? 'WHERE movies.id = ANY(?)' : ''}
GROUP BY
movies.id,
movies.title,
movies.created_at,
movies.date,
movies_meta.stashed,
movies_meta.stashed_scenes,
movies_meta.stashed_total,
entities.id,
entities.name,
entities.slug,
entities.alias,
parents.id,
parents.name,
parents.slug,
parents.alias,
movies_covers.*
`, movieIds && [movieIds]);
const docs = movies.rows.map((movie) => {
const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc.
const flatTags = movie.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' ');
return {
replace: {
index: 'movies',
id: movie.id,
doc: {
title: movie.title || undefined,
title_filtered: filteredTitle || undefined,
date: movie.date ? Math.round(movie.date.getTime() / 1000) : undefined,
created_at: Math.round(movie.created_at.getTime() / 1000),
effective_date: Math.round((movie.date || movie.created_at).getTime() / 1000),
channel_id: movie.channel_id,
channel_slug: movie.channel_slug,
channel_name: movie.channel_name,
network_id: movie.network_id || undefined,
network_slug: movie.network_slug || undefined,
network_name: movie.network_name || undefined,
entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN
actor_ids: movie.actors.map((actor) => actor.f1),
actors: movie.actors.map((actor) => actor.f2).join(),
tag_ids: movie.tags.map((tag) => tag.f1),
tags: flatTags.join(' '),
has_cover: movie.has_cover,
meta: movie.date ? format(movie.date, 'y yy M MMM MMMM d') : undefined,
stashed: movie.stashed || 0,
stashed_scenes: movie.stashed_scenes || 0,
stashed_total: movie.stashed_total || 0,
},
},
};
});
if (docs.length === 0) {
return;
}
await indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n'));
}
async function updateSqlMovieSearch(movieIds, target = 'movie') {
logger.info(`Updating search documents for ${movieIds ? movieIds.length : 'all' } ${target}s`);
const documents = await knex.raw(`
SELECT
${target}s.id AS ${target}_id,
TO_TSVECTOR(
'english',
COALESCE(${target}s.title, '') || ' ' ||
entities.name || ' ' ||
entities.slug || ' ' ||
COALESCE(array_to_string(entities.alias, ' '), '') || ' ' ||
COALESCE(parents.name, '') || ' ' ||
COALESCE(parents.slug, '') || ' ' ||
COALESCE(array_to_string(parents.alias, ' '), '') || ' ' ||
COALESCE(TO_CHAR(${target}s.date, 'YYYY YY MM FMMM FMMonth mon DD FMDD'), '') || ' ' ||
STRING_AGG(COALESCE(releases.title, ''), ' ') || ' ' ||
STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' ||
STRING_AGG(COALESCE(tags.name, ''), ' ')
) as document
FROM ${target}s
LEFT JOIN entities ON ${target}s.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id
LEFT JOIN ${target}s_scenes ON ${target}s_scenes.${target}_id = ${target}s.id
LEFT JOIN releases ON releases.id = ${target}s_scenes.scene_id
LEFT JOIN releases_actors ON releases_actors.release_id = ${target}s_scenes.scene_id
LEFT JOIN releases_tags ON releases_tags.release_id = releases.id
LEFT JOIN actors ON actors.id = releases_actors.actor_id
LEFT JOIN tags ON tags.id = releases_tags.tag_id
${movieIds ? `WHERE ${target}s.id = ANY(?)` : ''}
GROUP BY ${target}s.id, entities.name, entities.slug, entities.alias, parents.name, parents.slug, parents.alias;
`, movieIds && [movieIds]);
if (documents.rows?.length > 0) {
await bulkInsert(`${target}s_search`, documents.rows, [`${target}_id`]);
}
}
async function updateMovieSearch(releaseIds) {
await knex.raw('REFRESH MATERIALIZED VIEW movies_meta;');
await updateSqlMovieSearch(releaseIds);
await updateManticoreMovieSearch(releaseIds);
}
module.exports = {
updateSceneSearch,
updateMovieSearch,
};