2023-07-25 01:03:41 +00:00
'use strict' ;
2024-01-25 00:15:42 +00:00
const manticore = require ( 'manticoresearch' ) ;
2024-01-25 00:58:55 +00:00
const { format } = require ( 'date-fns' ) ;
2024-01-25 00:15:42 +00:00
2023-07-25 01:03:41 +00:00
const knex = require ( './knex' ) ;
const logger = require ( './logger' ) ( _ _filename ) ;
const bulkInsert = require ( './utils/bulk-insert' ) ;
2024-03-14 23:57:28 +00:00
const chunk = require ( './utils/chunk' ) ;
2023-07-25 01:03:41 +00:00
2024-01-25 00:15:42 +00:00
const mantiClient = new manticore . ApiClient ( ) ;
const indexApi = new manticore . IndexApi ( mantiClient ) ;
2024-03-14 23:57:28 +00:00
async function updateManticoreStashedScenes ( docs ) {
await chunk ( docs , 1000 ) . reduce ( async ( chain , docsChunk ) => {
await chain ;
const sceneIds = docsChunk . map ( ( doc ) => doc . replace . id ) ;
const stashes = await knex ( 'stashes_scenes' )
2024-03-17 02:57:55 +00:00
. select ( 'stashes_scenes.id as stashed_id' , 'stashes_scenes.scene_id' , 'stashes_scenes.created_at' , 'stashes.id as stash_id' , 'stashes.user_id as user_id' )
2024-03-14 23:57:28 +00:00
. leftJoin ( 'stashes' , 'stashes.id' , 'stashes_scenes.stash_id' )
. whereIn ( 'scene_id' , sceneIds ) ;
const stashDocs = docsChunk . flatMap ( ( doc ) => {
const sceneStashes = stashes . filter ( ( stash ) => stash . scene _id === doc . replace . id ) ;
if ( sceneStashes . length === 0 ) {
return [ ] ;
}
const stashDoc = sceneStashes . map ( ( stash ) => ( {
replace : {
index : 'scenes_stashed' ,
id : stash . stashed _id ,
doc : {
// ...doc.replace.doc,
scene _id : doc . replace . id ,
user _id : stash . user _id ,
stash _id : stash . stash _id ,
2024-03-17 02:57:55 +00:00
created _at : Math . round ( stash . created _at . getTime ( ) / 1000 ) ,
2024-03-14 23:57:28 +00:00
} ,
} ,
} ) ) ;
return stashDoc ;
} ) ;
if ( stashDocs . length > 0 ) {
await indexApi . bulk ( stashDocs . map ( ( doc ) => JSON . stringify ( doc ) ) . join ( '\n' ) ) ;
}
} , Promise . resolve ( ) ) ;
}
async function updateManticoreSceneSearch ( releaseIds ) {
logger . info ( ` Updating Manticore search documents for ${ releaseIds ? releaseIds . length : 'all' } releases ` ) ;
2024-01-25 00:15:42 +00:00
const scenes = await knex . raw ( `
SELECT
2024-03-14 23:57:28 +00:00
releases . id AS id ,
releases . title ,
releases . created _at ,
releases . date ,
releases . shoot _id ,
2024-01-25 00:15:42 +00:00
scenes _meta . stashed ,
entities . id as channel _id ,
entities . slug as channel _slug ,
entities . name as channel _name ,
parents . id as network _id ,
parents . slug as network _slug ,
parents . name as network _name ,
2024-06-04 01:49:35 +00:00
grandparents . id as parent _network _id ,
2024-01-25 00:15:42 +00:00
COALESCE ( JSON _AGG ( DISTINCT ( actors . id , actors . name ) ) FILTER ( WHERE actors . id IS NOT NULL ) , '[]' ) as actors ,
2024-05-01 01:45:51 +00:00
COALESCE ( JSON _AGG ( DISTINCT ( tags . id , tags . name , tags . priority , tags _aliases . name ) ) FILTER ( WHERE tags . id IS NOT NULL ) , '[]' ) as tags ,
2024-06-01 22:28:42 +00:00
COALESCE ( JSON _AGG ( DISTINCT ( movies _scenes . movie _id ) ) FILTER ( WHERE movies _scenes . movie _id IS NOT NULL ) , '[]' ) as movies ,
COALESCE ( JSON _AGG ( DISTINCT ( series _scenes . serie _id ) ) FILTER ( WHERE series _scenes . serie _id IS NOT NULL ) , '[]' ) as series ,
2024-03-17 02:57:55 +00:00
studios . showcased IS NOT false
AND ( entities . showcased IS NOT false OR COALESCE ( studios . showcased , false ) = true )
AND ( parents . showcased IS NOT false OR COALESCE ( entities . showcased , false ) = true OR COALESCE ( studios . showcased , false ) = true )
2024-06-05 21:04:07 +00:00
AND ( releases _summaries . batch _showcased IS NOT false )
2024-03-17 02:57:55 +00:00
AS showcased
2024-03-14 23:57:28 +00:00
FROM releases
2024-06-05 21:04:07 +00:00
LEFT JOIN releases _summaries ON releases _summaries . release _id = releases . id
2024-03-14 23:57:28 +00:00
LEFT JOIN scenes _meta ON scenes _meta . scene _id = releases . id
LEFT JOIN entities ON releases . entity _id = entities . id
2024-01-25 00:15:42 +00:00
LEFT JOIN entities AS parents ON parents . id = entities . parent _id
2024-06-04 01:49:35 +00:00
LEFT JOIN entities AS grandparents ON grandparents . id = parents . parent _id
2024-03-17 02:57:55 +00:00
LEFT JOIN entities AS studios ON studios . id = releases . studio _id
2024-03-14 23:57:28 +00:00
LEFT JOIN releases _actors AS local _actors ON local _actors . release _id = releases . id
LEFT JOIN releases _directors AS local _directors ON local _directors . release _id = releases . id
LEFT JOIN releases _tags AS local _tags ON local _tags . release _id = releases . id
2024-01-25 00:15:42 +00:00
LEFT JOIN actors ON local _actors . actor _id = actors . id
LEFT JOIN actors AS directors ON local _directors . director _id = directors . id
2024-03-25 20:51:52 +00:00
LEFT JOIN tags ON local _tags . tag _id = tags . id
2024-01-25 00:15:42 +00:00
LEFT JOIN tags as tags _aliases ON local _tags . tag _id = tags _aliases . alias _for AND tags _aliases . secondary = true
2024-03-25 01:08:54 +00:00
LEFT JOIN movies _scenes ON movies _scenes . scene _id = releases . id
2024-06-01 22:28:42 +00:00
LEFT JOIN series _scenes ON series _scenes . scene _id = releases . id
2024-03-14 23:57:28 +00:00
$ { releaseIds ? 'WHERE releases.id = ANY(?)' : '' }
2024-01-25 00:15:42 +00:00
GROUP BY
2024-03-14 23:57:28 +00:00
releases . id ,
releases . title ,
releases . created _at ,
releases . date ,
releases . shoot _id ,
2024-01-25 00:15:42 +00:00
scenes _meta . stashed ,
2024-06-05 21:04:07 +00:00
releases _summaries . batch _showcased ,
2024-01-25 00:15:42 +00:00
entities . id ,
entities . name ,
entities . slug ,
entities . alias ,
2024-03-17 02:57:55 +00:00
entities . showcased ,
2024-01-25 00:15:42 +00:00
parents . id ,
parents . name ,
parents . slug ,
2024-03-17 02:57:55 +00:00
parents . alias ,
2024-06-04 01:49:35 +00:00
grandparents . id ,
2024-03-17 02:57:55 +00:00
parents . showcased ,
studios . showcased
2024-01-25 00:15:42 +00:00
` , releaseIds && [releaseIds]);
2024-03-17 02:57:55 +00:00
// console.log(scenes.rows);
2024-02-09 21:30:38 +00:00
const docs = scenes . rows . map ( ( scene ) => {
const flatActors = scene . actors . flatMap ( ( actor ) => actor . f2 . split ( ' ' ) ) ;
2024-05-01 01:45:51 +00:00
const flatTags = scene . tags . filter ( ( tag ) => tag . f3 > 6 ) . flatMap ( ( tag ) => [ tag . f2 ] . concat ( tag . f4 ) ) . filter ( Boolean ) ; // only make top tags searchable to minimize cluttered results
const filteredTitle = scene . title && [ ... flatActors , ... flatTags ] . reduce ( ( accTitle , tag ) => accTitle . replace ( new RegExp ( ` \\ b ${ tag . replace ( /[^\w\s]+/g , '' ) } \\ b ` , 'gi' ) , '' ) , scene . title ) . trim ( ) . replace ( /\s{2,}/ , ' ' ) ;
2024-02-09 21:30:38 +00:00
return {
replace : {
index : 'scenes' ,
id : scene . id ,
doc : {
title : scene . title || undefined ,
title _filtered : filteredTitle || undefined ,
date : scene . date ? Math . round ( scene . date . getTime ( ) / 1000 ) : undefined ,
created _at : Math . round ( scene . created _at . getTime ( ) / 1000 ) ,
effective _date : Math . round ( ( scene . date || scene . created _at ) . getTime ( ) / 1000 ) ,
2024-03-17 02:57:55 +00:00
is _showcased : scene . showcased ,
2024-02-09 21:30:38 +00:00
shoot _id : scene . shoot _id || undefined ,
channel _id : scene . channel _id ,
channel _slug : scene . channel _slug ,
channel _name : scene . channel _name ,
network _id : scene . network _id || undefined ,
network _slug : scene . network _slug || undefined ,
network _name : scene . network _name || undefined ,
2024-06-04 01:49:35 +00:00
entity _ids : [ scene . channel _id , scene . network _id , scene . parent _network _id ] . filter ( Boolean ) , // manticore does not support OR, this allows IN
2024-02-09 21:30:38 +00:00
actor _ids : scene . actors . map ( ( actor ) => actor . f1 ) ,
actors : scene . actors . map ( ( actor ) => actor . f2 ) . join ( ) ,
tag _ids : scene . tags . map ( ( tag ) => tag . f1 ) ,
2024-03-25 20:51:52 +00:00
tags : flatTags . join ( ' ' ) , // only make top tags searchable to minimize cluttered results
2024-03-25 01:08:54 +00:00
movie _ids : scene . movies ,
2024-06-01 22:28:42 +00:00
serie _ids : scene . series ,
2024-02-09 21:30:38 +00:00
meta : scene . date ? format ( scene . date , 'y yy M MMM MMMM d' ) : undefined ,
stashed : scene . stashed || 0 ,
} ,
2024-01-25 00:15:42 +00:00
} ,
2024-02-09 21:30:38 +00:00
} ;
} ) ;
2024-01-25 00:15:42 +00:00
if ( docs . length === 0 ) {
return ;
}
2024-03-14 23:57:28 +00:00
await Promise . all ( [
indexApi . bulk ( docs . map ( ( doc ) => JSON . stringify ( doc ) ) . join ( '\n' ) ) ,
updateManticoreStashedScenes ( docs ) ,
] ) ;
2024-01-25 00:15:42 +00:00
}
2024-03-14 23:57:28 +00:00
async function updateSqlSceneSearch ( releaseIds ) {
logger . info ( ` Updating SQL search documents for ${ releaseIds ? releaseIds . length : 'all' } releases ` ) ;
2023-07-25 01:03:41 +00:00
const documents = await knex . raw ( `
SELECT
releases . id AS release _id ,
TO _TSVECTOR (
'english' ,
COALESCE ( releases . title , '' ) || ' ' ||
releases . entry _id || ' ' ||
entities . name || ' ' ||
entities . slug || ' ' ||
COALESCE ( array _to _string ( entities . alias , ' ' ) , '' ) || ' ' ||
COALESCE ( parents . name , '' ) || ' ' ||
COALESCE ( parents . slug , '' ) || ' ' ||
COALESCE ( array _to _string ( parents . alias , ' ' ) , '' ) || ' ' ||
COALESCE ( releases . shoot _id , '' ) || ' ' ||
COALESCE ( TO _CHAR ( releases . date , 'YYYY YY MM FMMM FMMonth mon DD FMDD' ) , '' ) || ' ' ||
STRING _AGG ( COALESCE ( actors . name , '' ) , ' ' ) || ' ' ||
STRING _AGG ( COALESCE ( directors . name , '' ) , ' ' ) || ' ' ||
STRING _AGG ( COALESCE ( tags . name , '' ) , ' ' ) || ' ' ||
STRING _AGG ( COALESCE ( tags _aliases . name , '' ) , ' ' )
) as document
FROM releases
LEFT JOIN entities ON releases . entity _id = entities . id
LEFT JOIN entities AS parents ON parents . id = entities . parent _id
LEFT JOIN releases _actors AS local _actors ON local _actors . release _id = releases . id
LEFT JOIN releases _directors AS local _directors ON local _directors . release _id = releases . id
LEFT JOIN releases _tags AS local _tags ON local _tags . release _id = releases . id
LEFT JOIN actors ON local _actors . actor _id = actors . id
LEFT JOIN actors AS directors ON local _directors . director _id = directors . id
LEFT JOIN tags ON local _tags . tag _id = tags . id AND tags . priority >= 6
LEFT JOIN tags as tags _aliases ON local _tags . tag _id = tags _aliases . alias _for AND tags _aliases . secondary = true
$ { releaseIds ? 'WHERE releases.id = ANY(?)' : '' }
GROUP BY releases . id , entities . name , entities . slug , entities . alias , parents . name , parents . slug , parents . alias ;
` , releaseIds && [releaseIds]);
if ( documents . rows ? . length > 0 ) {
await bulkInsert ( 'releases_search' , documents . rows , [ 'release_id' ] ) ;
}
await knex . raw ( 'REFRESH MATERIALIZED VIEW releases_summaries;' ) ;
}
2024-01-25 00:15:42 +00:00
async function updateSceneSearch ( releaseIds ) {
await knex . raw ( 'REFRESH MATERIALIZED VIEW scenes_meta;' ) ;
2024-03-14 23:57:28 +00:00
await updateSqlSceneSearch ( releaseIds ) ;
await updateManticoreSceneSearch ( releaseIds ) ;
}
async function updateManticoreMovieSearch ( movieIds ) {
const movies = await knex . raw ( `
SELECT
movies . id AS id ,
movies . title ,
movies . created _at ,
movies . date ,
movies _meta . stashed ,
entities . id as channel _id ,
entities . slug as channel _slug ,
entities . name as channel _name ,
parents . id as network _id ,
parents . slug as network _slug ,
parents . name as network _name ,
movies _covers IS NOT NULL as has _cover ,
COALESCE ( JSON _AGG ( DISTINCT ( actors . id , actors . name ) ) FILTER ( WHERE actors . id IS NOT NULL ) , '[]' ) as actors ,
COALESCE ( JSON _AGG ( DISTINCT ( tags . id , tags . name , tags . priority , tags _aliases . name ) ) FILTER ( WHERE tags . id IS NOT NULL ) , '[]' ) as tags
FROM movies
LEFT JOIN movies _meta ON movies _meta . movie _id = movies . id
LEFT JOIN movies _scenes ON movies _scenes . movie _id = movies . id
LEFT JOIN entities ON movies . entity _id = entities . id
LEFT JOIN entities AS parents ON parents . id = entities . parent _id
LEFT JOIN releases _actors AS local _actors ON local _actors . release _id = movies _scenes . scene _id
LEFT JOIN releases _directors AS local _directors ON local _directors . release _id = movies _scenes . scene _id
LEFT JOIN releases _tags AS local _tags ON local _tags . release _id = movies _scenes . scene _id
LEFT JOIN actors ON local _actors . actor _id = actors . id
LEFT JOIN actors AS directors ON local _directors . director _id = directors . id
LEFT JOIN tags ON local _tags . tag _id = tags . id
LEFT JOIN tags as tags _aliases ON local _tags . tag _id = tags _aliases . alias _for AND tags _aliases . secondary = true
LEFT JOIN movies _covers ON movies _covers . movie _id = movies . id
$ { movieIds ? 'WHERE movies.id = ANY(?)' : '' }
GROUP BY
movies . id ,
movies . title ,
movies . created _at ,
movies . date ,
movies _meta . stashed ,
movies _meta . stashed _scenes ,
movies _meta . stashed _total ,
entities . id ,
entities . name ,
entities . slug ,
entities . alias ,
parents . id ,
parents . name ,
parents . slug ,
parents . alias ,
movies _covers . *
` , movieIds && [movieIds]);
const docs = movies . rows . map ( ( movie ) => {
const flatActors = movie . actors . flatMap ( ( actor ) => actor . f2 . match ( /[\w']+/g ) ) ; // match word characters to filter out brackets etc.
const flatTags = movie . tags . filter ( ( tag ) => tag . f3 > 6 ) . flatMap ( ( tag ) => ( tag . f4 ? ` ${ tag . f2 } ${ tag . f4 } ` : tag . f2 ) . match ( /[\w']+/g ) ) ; // only make top tags searchable to minimize cluttered results
const filteredTitle = movie . title && [ ... flatActors , ... flatTags ] . reduce ( ( accTitle , tag ) => accTitle . replace ( new RegExp ( tag . replace ( /[^\w\s]+/g , '' ) , 'gi' ) , '' ) , movie . title ) . trim ( ) . replace ( /\s{2,}/g , ' ' ) ;
return {
replace : {
index : 'movies' ,
id : movie . id ,
doc : {
title : movie . title || undefined ,
title _filtered : filteredTitle || undefined ,
date : movie . date ? Math . round ( movie . date . getTime ( ) / 1000 ) : undefined ,
created _at : Math . round ( movie . created _at . getTime ( ) / 1000 ) ,
effective _date : Math . round ( ( movie . date || movie . created _at ) . getTime ( ) / 1000 ) ,
channel _id : movie . channel _id ,
channel _slug : movie . channel _slug ,
channel _name : movie . channel _name ,
network _id : movie . network _id || undefined ,
network _slug : movie . network _slug || undefined ,
network _name : movie . network _name || undefined ,
2024-03-25 01:08:54 +00:00
entity _ids : [ movie . channel _id , movie . network _id ] . filter ( Boolean ) , // manticore does not support OR, this allows IN
2024-03-14 23:57:28 +00:00
actor _ids : movie . actors . map ( ( actor ) => actor . f1 ) ,
actors : movie . actors . map ( ( actor ) => actor . f2 ) . join ( ) ,
tag _ids : movie . tags . map ( ( tag ) => tag . f1 ) ,
tags : flatTags . join ( ' ' ) ,
has _cover : movie . has _cover ,
meta : movie . date ? format ( movie . date , 'y yy M MMM MMMM d' ) : undefined ,
stashed : movie . stashed || 0 ,
stashed _scenes : movie . stashed _scenes || 0 ,
stashed _total : movie . stashed _total || 0 ,
} ,
} ,
} ;
} ) ;
if ( docs . length === 0 ) {
return ;
}
await indexApi . bulk ( docs . map ( ( doc ) => JSON . stringify ( doc ) ) . join ( '\n' ) ) ;
2024-01-25 00:15:42 +00:00
}
2024-03-14 23:57:28 +00:00
async function updateSqlMovieSearch ( movieIds , target = 'movie' ) {
2023-07-25 01:03:41 +00:00
logger . info ( ` Updating search documents for ${ movieIds ? movieIds . length : 'all' } ${ target } s ` ) ;
const documents = await knex . raw ( `
SELECT
$ { target } s . id AS $ { target } _id ,
TO _TSVECTOR (
'english' ,
COALESCE ( $ { target } s . title , '' ) || ' ' ||
entities . name || ' ' ||
entities . slug || ' ' ||
COALESCE ( array _to _string ( entities . alias , ' ' ) , '' ) || ' ' ||
COALESCE ( parents . name , '' ) || ' ' ||
COALESCE ( parents . slug , '' ) || ' ' ||
COALESCE ( array _to _string ( parents . alias , ' ' ) , '' ) || ' ' ||
COALESCE ( TO _CHAR ( $ { target } s . date , 'YYYY YY MM FMMM FMMonth mon DD FMDD' ) , '' ) || ' ' ||
STRING _AGG ( COALESCE ( releases . title , '' ) , ' ' ) || ' ' ||
STRING _AGG ( COALESCE ( actors . name , '' ) , ' ' ) || ' ' ||
STRING _AGG ( COALESCE ( tags . name , '' ) , ' ' )
) as document
FROM $ { target } s
LEFT JOIN entities ON $ { target } s . entity _id = entities . id
LEFT JOIN entities AS parents ON parents . id = entities . parent _id
LEFT JOIN $ { target } s _scenes ON $ { target } s _scenes . $ { target } _id = $ { target } s . id
LEFT JOIN releases ON releases . id = $ { target } s _scenes . scene _id
LEFT JOIN releases _actors ON releases _actors . release _id = $ { target } s _scenes . scene _id
LEFT JOIN releases _tags ON releases _tags . release _id = releases . id
LEFT JOIN actors ON actors . id = releases _actors . actor _id
LEFT JOIN tags ON tags . id = releases _tags . tag _id
$ { movieIds ? ` WHERE ${ target } s.id = ANY(?) ` : '' }
GROUP BY $ { target } s . id , entities . name , entities . slug , entities . alias , parents . name , parents . slug , parents . alias ;
` , movieIds && [movieIds]);
if ( documents . rows ? . length > 0 ) {
await bulkInsert ( ` ${ target } s_search ` , documents . rows , [ ` ${ target } _id ` ] ) ;
}
}
2024-03-14 23:57:28 +00:00
async function updateMovieSearch ( releaseIds ) {
await knex . raw ( 'REFRESH MATERIALIZED VIEW movies_meta;' ) ;
await updateSqlMovieSearch ( releaseIds ) ;
await updateManticoreMovieSearch ( releaseIds ) ;
}
2023-07-25 01:03:41 +00:00
module . exports = {
updateSceneSearch ,
updateMovieSearch ,
} ;