traxxx/src/tools/manticore-scenes.js

144 lines
5.0 KiB
JavaScript

'use strict';
const config = require('config');
const manticore = require('manticoresearch');
const args = require('yargs').argv;
const { format } = require('date-fns');
const knex = require('../knex');
const mantiClient = new manticore.ApiClient();
mantiClient.basePath = `http://${config.database.manticore.host}:${config.database.manticore.httpPort}`;
const utilsApi = new manticore.UtilsApi(mantiClient);
const indexApi = new manticore.IndexApi(mantiClient);
const update = args.update;
async function fetchScenes() {
const scenes = await knex.raw(`
SELECT
releases.id AS id,
releases.title,
releases.created_at,
releases.date,
releases.shoot_id,
scenes_meta.stashed,
entities.id as channel_id,
entities.slug as channel_slug,
entities.name as channel_name,
parents.id as network_id,
parents.slug as network_slug,
parents.name as network_name,
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
COALESCE(JSON_AGG(DISTINCT (movies.id)) FILTER (WHERE movies.id IS NOT NULL), '[]') as movies
FROM releases
LEFT JOIN scenes_meta ON scenes_meta.scene_id = releases.id
LEFT JOIN entities ON releases.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = releases.id
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
LEFT JOIN movies_scenes ON movies_scenes.scene_id = releases.id
LEFT JOIN movies ON movies.id = movies_scenes.movie_id
GROUP BY
releases.id,
releases.title,
releases.created_at,
releases.date,
releases.shoot_id,
scenes_meta.stashed,
entities.id,
entities.name,
entities.slug,
entities.alias,
parents.id,
parents.name,
parents.slug,
parents.alias;
`);
return scenes.rows;
}
async function init() {
if (update) {
await utilsApi.sql('drop table if exists scenes');
await utilsApi.sql(`create table scenes (
id int,
title text,
title_filtered text,
shoot_id text,
channel_id int,
channel_name text,
channel_slug text,
network_id int,
network_name text,
network_slug text,
entity_ids multi,
actor_ids multi,
actors text,
tag_ids multi,
tags text,
movie_ids multi,
meta text,
date timestamp,
is_showcased bool,
created_at timestamp,
effective_date timestamp,
stashed int
)`);
const scenes = await fetchScenes();
const docs = scenes.map((scene) => {
const flatActors = scene.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc.
const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'i'), ''), scene.title).trim().replace(/\s{2,}/, ' ');
return {
replace: {
index: 'scenes',
id: scene.id,
doc: {
title: scene.title || undefined,
title_filtered: filteredTitle || undefined,
date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
created_at: Math.round(scene.created_at.getTime() / 1000),
effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
shoot_id: scene.shoot_id || undefined,
channel_id: scene.channel_id,
channel_slug: scene.channel_slug,
channel_name: scene.channel_name,
network_id: scene.network_id || undefined,
network_slug: scene.network_slug || undefined,
network_name: scene.network_name || undefined,
entity_ids: [scene.channel_id, scene.network_id].filter(Boolean), // manticore does not support OR, this allows IN
actor_ids: scene.actors.map((actor) => actor.f1),
actors: scene.actors.map((actor) => actor.f2).join(),
tag_ids: scene.tags.map((tag) => tag.f1),
tags: flatTags.join(' '),
movie_ids: scene.movies,
meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined,
stashed: scene.stashed || 0,
},
},
};
});
const data = await indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n'));
console.log('data', data);
}
knex.destroy();
}
init();