'use strict'; const manticore = require('manticoresearch'); const { format } = require('date-fns'); const knex = require('./knex'); const logger = require('./logger')(__filename); const bulkInsert = require('./utils/bulk-insert'); const chunk = require('./utils/chunk'); const mantiClient = new manticore.ApiClient(); const indexApi = new manticore.IndexApi(mantiClient); async function updateManticoreStashedScenes(docs) { await chunk(docs, 1000).reduce(async (chain, docsChunk) => { await chain; const sceneIds = docsChunk.map((doc) => doc.replace.id); const stashes = await knex('stashes_scenes') .select('stashes_scenes.id as stashed_id', 'stashes_scenes.scene_id', 'stashes_scenes.created_at', 'stashes.id as stash_id', 'stashes.user_id as user_id') .leftJoin('stashes', 'stashes.id', 'stashes_scenes.stash_id') .whereIn('scene_id', sceneIds); const stashDocs = docsChunk.flatMap((doc) => { const sceneStashes = stashes.filter((stash) => stash.scene_id === doc.replace.id); if (sceneStashes.length === 0) { return []; } const stashDoc = sceneStashes.map((stash) => ({ replace: { index: 'scenes_stashed', id: stash.stashed_id, doc: { // ...doc.replace.doc, scene_id: doc.replace.id, user_id: stash.user_id, stash_id: stash.stash_id, created_at: Math.round(stash.created_at.getTime() / 1000), }, }, })); return stashDoc; }); if (stashDocs.length > 0) { await indexApi.bulk(stashDocs.map((doc) => JSON.stringify(doc)).join('\n')); } }, Promise.resolve()); } async function updateManticoreSceneSearch(releaseIds) { logger.info(`Updating Manticore search documents for ${releaseIds ? releaseIds.length : 'all' } releases`); const scenes = await knex.raw(` SELECT releases.id AS id, releases.title, releases.created_at, releases.date, releases.shoot_id, scenes_meta.stashed, entities.id as channel_id, entities.slug as channel_slug, entities.name as channel_name, parents.id as network_id, parents.slug as network_slug, parents.name as network_name, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags, COALESCE(JSON_AGG(DISTINCT (movies.id)) FILTER (WHERE movies.id IS NOT NULL), '[]') as movies, studios.showcased IS NOT false AND (entities.showcased IS NOT false OR COALESCE(studios.showcased, false) = true) AND (parents.showcased IS NOT false OR COALESCE(entities.showcased, false) = true OR COALESCE(studios.showcased, false) = true) AS showcased FROM releases LEFT JOIN scenes_meta ON scenes_meta.scene_id = releases.id LEFT JOIN entities ON releases.entity_id = entities.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN entities AS studios ON studios.id = releases.studio_id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = releases.id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true LEFT JOIN movies_scenes ON movies_scenes.scene_id = releases.id LEFT JOIN movies ON movies.id = movies_scenes.movie_id ${releaseIds ? 'WHERE releases.id = ANY(?)' : ''} GROUP BY releases.id, releases.title, releases.created_at, releases.date, releases.shoot_id, scenes_meta.stashed, entities.id, entities.name, entities.slug, entities.alias, entities.showcased, parents.id, parents.name, parents.slug, parents.alias, parents.showcased, studios.showcased `, releaseIds && [releaseIds]); // console.log(scenes.rows); const docs = scenes.rows.map((scene) => { const flatActors = scene.actors.flatMap((actor) => actor.f2.split(' ')); const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => [tag.f2].concat(tag.f4)).filter(Boolean); // only make top tags searchable to minimize cluttered results const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(`\\b${tag.replace(/[^\w\s]+/g, '')}\\b`, 'gi'), ''), scene.title).trim().replace(/\s{2,}/, ' '); return { replace: { index: 'scenes', id: scene.id, doc: { title: scene.title || undefined, title_filtered: filteredTitle || undefined, date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined, created_at: Math.round(scene.created_at.getTime() / 1000), effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000), is_showcased: scene.showcased, shoot_id: scene.shoot_id || undefined, channel_id: scene.channel_id, channel_slug: scene.channel_slug, channel_name: scene.channel_name, network_id: scene.network_id || undefined, network_slug: scene.network_slug || undefined, network_name: scene.network_name || undefined, entity_ids: [scene.channel_id, scene.network_id].filter(Boolean), // manticore does not support OR, this allows IN actor_ids: scene.actors.map((actor) => actor.f1), actors: scene.actors.map((actor) => actor.f2).join(), tag_ids: scene.tags.map((tag) => tag.f1), tags: flatTags.join(' '), // only make top tags searchable to minimize cluttered results movie_ids: scene.movies, meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined, stashed: scene.stashed || 0, }, }, }; }); if (docs.length === 0) { return; } await Promise.all([ indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n')), updateManticoreStashedScenes(docs), ]); } async function updateSqlSceneSearch(releaseIds) { logger.info(`Updating SQL search documents for ${releaseIds ? releaseIds.length : 'all' } releases`); const documents = await knex.raw(` SELECT releases.id AS release_id, TO_TSVECTOR( 'english', COALESCE(releases.title, '') || ' ' || releases.entry_id || ' ' || entities.name || ' ' || entities.slug || ' ' || COALESCE(array_to_string(entities.alias, ' '), '') || ' ' || COALESCE(parents.name, '') || ' ' || COALESCE(parents.slug, '') || ' ' || COALESCE(array_to_string(parents.alias, ' '), '') || ' ' || COALESCE(releases.shoot_id, '') || ' ' || COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMMonth mon DD FMDD'), '') || ' ' || STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' || STRING_AGG(COALESCE(directors.name, ''), ' ') || ' ' || STRING_AGG(COALESCE(tags.name, ''), ' ') || ' ' || STRING_AGG(COALESCE(tags_aliases.name, ''), ' ') ) as document FROM releases LEFT JOIN entities ON releases.entity_id = entities.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = releases.id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN tags ON local_tags.tag_id = tags.id AND tags.priority >= 6 LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true ${releaseIds ? 'WHERE releases.id = ANY(?)' : ''} GROUP BY releases.id, entities.name, entities.slug, entities.alias, parents.name, parents.slug, parents.alias; `, releaseIds && [releaseIds]); if (documents.rows?.length > 0) { await bulkInsert('releases_search', documents.rows, ['release_id']); } await knex.raw('REFRESH MATERIALIZED VIEW releases_summaries;'); } async function updateSceneSearch(releaseIds) { await knex.raw('REFRESH MATERIALIZED VIEW scenes_meta;'); await updateSqlSceneSearch(releaseIds); await updateManticoreSceneSearch(releaseIds); } async function updateManticoreMovieSearch(movieIds) { const movies = await knex.raw(` SELECT movies.id AS id, movies.title, movies.created_at, movies.date, movies_meta.stashed, entities.id as channel_id, entities.slug as channel_slug, entities.name as channel_name, parents.id as network_id, parents.slug as network_slug, parents.name as network_name, movies_covers IS NOT NULL as has_cover, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags FROM movies LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id LEFT JOIN entities ON movies.entity_id = entities.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = movies_scenes.scene_id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = movies_scenes.scene_id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id ${movieIds ? 'WHERE movies.id = ANY(?)' : ''} GROUP BY movies.id, movies.title, movies.created_at, movies.date, movies_meta.stashed, movies_meta.stashed_scenes, movies_meta.stashed_total, entities.id, entities.name, entities.slug, entities.alias, parents.id, parents.name, parents.slug, parents.alias, movies_covers.* `, movieIds && [movieIds]); const docs = movies.rows.map((movie) => { const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc. const flatTags = movie.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' '); return { replace: { index: 'movies', id: movie.id, doc: { title: movie.title || undefined, title_filtered: filteredTitle || undefined, date: movie.date ? Math.round(movie.date.getTime() / 1000) : undefined, created_at: Math.round(movie.created_at.getTime() / 1000), effective_date: Math.round((movie.date || movie.created_at).getTime() / 1000), channel_id: movie.channel_id, channel_slug: movie.channel_slug, channel_name: movie.channel_name, network_id: movie.network_id || undefined, network_slug: movie.network_slug || undefined, network_name: movie.network_name || undefined, entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN actor_ids: movie.actors.map((actor) => actor.f1), actors: movie.actors.map((actor) => actor.f2).join(), tag_ids: movie.tags.map((tag) => tag.f1), tags: flatTags.join(' '), has_cover: movie.has_cover, meta: movie.date ? format(movie.date, 'y yy M MMM MMMM d') : undefined, stashed: movie.stashed || 0, stashed_scenes: movie.stashed_scenes || 0, stashed_total: movie.stashed_total || 0, }, }, }; }); if (docs.length === 0) { return; } await indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n')); } async function updateSqlMovieSearch(movieIds, target = 'movie') { logger.info(`Updating search documents for ${movieIds ? movieIds.length : 'all' } ${target}s`); const documents = await knex.raw(` SELECT ${target}s.id AS ${target}_id, TO_TSVECTOR( 'english', COALESCE(${target}s.title, '') || ' ' || entities.name || ' ' || entities.slug || ' ' || COALESCE(array_to_string(entities.alias, ' '), '') || ' ' || COALESCE(parents.name, '') || ' ' || COALESCE(parents.slug, '') || ' ' || COALESCE(array_to_string(parents.alias, ' '), '') || ' ' || COALESCE(TO_CHAR(${target}s.date, 'YYYY YY MM FMMM FMMonth mon DD FMDD'), '') || ' ' || STRING_AGG(COALESCE(releases.title, ''), ' ') || ' ' || STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' || STRING_AGG(COALESCE(tags.name, ''), ' ') ) as document FROM ${target}s LEFT JOIN entities ON ${target}s.entity_id = entities.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN ${target}s_scenes ON ${target}s_scenes.${target}_id = ${target}s.id LEFT JOIN releases ON releases.id = ${target}s_scenes.scene_id LEFT JOIN releases_actors ON releases_actors.release_id = ${target}s_scenes.scene_id LEFT JOIN releases_tags ON releases_tags.release_id = releases.id LEFT JOIN actors ON actors.id = releases_actors.actor_id LEFT JOIN tags ON tags.id = releases_tags.tag_id ${movieIds ? `WHERE ${target}s.id = ANY(?)` : ''} GROUP BY ${target}s.id, entities.name, entities.slug, entities.alias, parents.name, parents.slug, parents.alias; `, movieIds && [movieIds]); if (documents.rows?.length > 0) { await bulkInsert(`${target}s_search`, documents.rows, [`${target}_id`]); } } async function updateMovieSearch(releaseIds) { await knex.raw('REFRESH MATERIALIZED VIEW movies_meta;'); await updateSqlMovieSearch(releaseIds); await updateManticoreMovieSearch(releaseIds); } module.exports = { updateSceneSearch, updateMovieSearch, };