Using unprint as default, marked to-be-updated scrapers as deprecated. Merging movie tags and movie scene tags for manticore movies table. Removed poster set to null in deep merge; annotate if it has purpose. Refactored Brad Montana scraper.
This commit is contained in:
@@ -34,10 +34,12 @@ async function fetchMovies() {
|
||||
parents.name as network_name,
|
||||
movies_covers IS NOT NULL as has_cover,
|
||||
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
|
||||
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
|
||||
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
|
||||
COALESCE(JSON_AGG(DISTINCT (movie_tags.id, movie_tags.name, movie_tags.priority, movie_tags_aliases.name)) FILTER (WHERE movie_tags.id IS NOT NULL), '[]') as movie_tags
|
||||
FROM movies
|
||||
LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id
|
||||
LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id
|
||||
LEFT JOIN movies_tags ON movies_tags.movie_id = movies.id
|
||||
LEFT JOIN entities ON movies.entity_id = entities.id
|
||||
LEFT JOIN entities AS parents ON parents.id = entities.parent_id
|
||||
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id
|
||||
@@ -47,6 +49,8 @@ async function fetchMovies() {
|
||||
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
|
||||
LEFT JOIN tags ON local_tags.tag_id = tags.id
|
||||
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
|
||||
LEFT JOIN tags as movie_tags ON movies_tags.tag_id = movie_tags.id
|
||||
LEFT JOIN tags as movie_tags_aliases ON movies_tags.tag_id = movie_tags_aliases.alias_for AND movie_tags_aliases.secondary = true
|
||||
LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id
|
||||
GROUP BY
|
||||
movies.id,
|
||||
@@ -101,8 +105,15 @@ async function init() {
|
||||
const movies = await fetchMovies();
|
||||
|
||||
const docs = movies.map((movie) => {
|
||||
const combinedTags = Object.values(Object.fromEntries(movie.tags.concat(movie.movie_tags).map((tag) => [tag.f1, {
|
||||
id: tag.f1,
|
||||
name: tag.f2,
|
||||
priority: tag.f3,
|
||||
alias: tag.f4,
|
||||
}])));
|
||||
|
||||
const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc.
|
||||
const flatTags = movie.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
|
||||
const flatTags = combinedTags.filter((tag) => tag.priority > 6).flatMap((tag) => (tag.alias ? `${tag.name} ${tag.alias}` : tag.name).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
|
||||
const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' ');
|
||||
|
||||
return {
|
||||
@@ -124,7 +135,7 @@ async function init() {
|
||||
entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN
|
||||
actor_ids: movie.actors.map((actor) => actor.f1),
|
||||
actors: movie.actors.map((actor) => actor.f2).join(),
|
||||
tag_ids: movie.tags.map((tag) => tag.f1),
|
||||
tag_ids: combinedTags.map((tag) => tag.id),
|
||||
tags: flatTags.join(' '),
|
||||
has_cover: movie.has_cover,
|
||||
meta: movie.date ? format(movie.date, 'y yy M MMM MMMM d') : undefined,
|
||||
|
||||
Reference in New Issue
Block a user