'use strict'; const config = require('config'); const manticore = require('manticoresearch'); const args = require('yargs').argv; const { format } = require('date-fns'); const knex = require('../knex'); const mantiClient = new manticore.ApiClient(); mantiClient.basePath = `http://${config.database.manticore.host}:${config.database.manticore.httpPort}`; // const searchApi = new manticore.SearchApi(mantiClient); const utilsApi = new manticore.UtilsApi(mantiClient); const indexApi = new manticore.IndexApi(mantiClient); const update = args.update; async function fetchMovies() { const movies = await knex.raw(` SELECT movies.id AS id, movies.title, movies.created_at, movies.date, movies_meta.stashed, entities.id as channel_id, entities.slug as channel_slug, entities.name as channel_name, parents.id as network_id, parents.slug as network_slug, parents.name as network_name, movies_covers IS NOT NULL as has_cover, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags, COALESCE(JSON_AGG(DISTINCT (movie_tags.id, movie_tags.name, movie_tags.priority, movie_tags_aliases.name)) FILTER (WHERE movie_tags.id IS NOT NULL), '[]') as movie_tags, row_number() OVER (PARTITION BY movies.entry_id, parents.id ORDER BY movies.effective_date DESC) as dupe_index FROM movies LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id LEFT JOIN movies_tags ON movies_tags.movie_id = movies.id LEFT JOIN entities ON movies.entity_id = entities.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = movies_scenes.scene_id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = movies_scenes.scene_id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true LEFT JOIN tags as movie_tags ON movies_tags.tag_id = movie_tags.id LEFT JOIN tags as movie_tags_aliases ON movies_tags.tag_id = movie_tags_aliases.alias_for AND movie_tags_aliases.secondary = true LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id GROUP BY movies.id, movies.title, movies.created_at, movies.date, movies_meta.stashed, movies_meta.stashed_scenes, movies_meta.stashed_total, entities.id, entities.name, entities.slug, entities.alias, parents.id, parents.name, parents.slug, parents.alias, movies_covers.* `); return movies.rows; } async function init() { if (update) { await utilsApi.sql('drop table if exists movies'); await utilsApi.sql(`create table movies ( id int, title text, title_filtered text, channel_id int, channel_name text, channel_slug text, network_id int, network_name text, network_slug text, entity_ids multi, actor_ids multi, actors text, tag_ids multi, tags text, meta text, date timestamp, has_cover bool, created_at timestamp, effective_date timestamp, stashed int, stashed_scenes int, stashed_total int, dupe_index int )`); const movies = await fetchMovies(); console.log(movies.toSorted((movieA, movieB) => movieA.dupe_index - movieB.dupe_index)); const docs = movies.map((movie) => { const combinedTags = Object.values(Object.fromEntries(movie.tags.concat(movie.movie_tags).map((tag) => [tag.f1, { id: tag.f1, name: tag.f2, priority: tag.f3, alias: tag.f4, }]))); const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc. const flatTags = combinedTags.filter((tag) => tag.priority > 6).flatMap((tag) => (tag.alias ? `${tag.name} ${tag.alias}` : tag.name).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' '); return { replace: { index: 'movies', id: movie.id, doc: { title: movie.title || undefined, title_filtered: filteredTitle || undefined, date: movie.date ? Math.round(movie.date.getTime() / 1000) : undefined, created_at: Math.round(movie.created_at.getTime() / 1000), effective_date: Math.round((movie.date || movie.created_at).getTime() / 1000), channel_id: movie.channel_id, channel_slug: movie.channel_slug, channel_name: movie.channel_name, network_id: movie.network_id || undefined, network_slug: movie.network_slug || undefined, network_name: movie.network_name || undefined, entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN actor_ids: movie.actors.map((actor) => actor.f1), actors: movie.actors.map((actor) => actor.f2).join(), tag_ids: combinedTags.map((tag) => tag.id), tags: flatTags.join(' '), has_cover: movie.has_cover, meta: movie.date ? format(movie.date, 'y yy M MM MMM MMMM d dd') : undefined, stashed: movie.stashed || 0, stashed_scenes: movie.stashed_scenes || 0, stashed_total: movie.stashed_total || 0, dupe_index: movie.dupe_index || 0, }, }, }; }); console.log(docs.map((doc) => doc.replace)); const data = await indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n')); console.log('data', data); } knex.destroy(); } init();