Using unprint as default, marked to-be-updated scrapers as deprecated. Merging movie tags and movie scene tags for manticore movies table. Removed poster set to null in deep merge; annotate if it has purpose. Refactored Brad Montana scraper.

This commit is contained in:
DebaucheryLibrarian 2024-08-20 02:33:42 +02:00
parent f81ec6f393
commit d3a978c501
29 changed files with 116 additions and 113 deletions

View File

@ -84,9 +84,15 @@ async function fetchScene(scraper, url, entity, baseRelease, options, type = 'sc
} }
if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) { if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) {
/*
if (scraper.useUnprint || (type === 'scene' && scraper.scrapeScene?.unprint) || (type === 'movie' && scraper.scrapeMovie?.unprint)) { if (scraper.useUnprint || (type === 'scene' && scraper.scrapeScene?.unprint) || (type === 'movie' && scraper.scrapeMovie?.unprint)) {
return fetchUnprintScene(scraper, url, entity, baseRelease, options, type); return fetchUnprintScene(scraper, url, entity, baseRelease, options, type);
} }
*/
if (!scraper.deprecated) {
return fetchUnprintScene(scraper, url, entity, baseRelease, options, type);
}
const session = qu.session(); const session = qu.session();
@ -191,7 +197,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') {
}), }),
}), {}); }), {});
curatedScrapedRelease.poster = null; // curatedScrapedRelease.poster = null; // wat
const mergedRelease = { const mergedRelease = {
...merge(baseRelease, curatedScrapedRelease, { ...merge(baseRelease, curatedScrapedRelease, {
@ -199,6 +205,9 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') {
hardMergeKeys: ['actors', 'covers', 'poster', 'trailer', 'teaser'], hardMergeKeys: ['actors', 'covers', 'poster', 'trailer', 'teaser'],
ignoreKeys: ['poster'], ignoreKeys: ['poster'],
}), }),
datePrecision: curatedScrapedRelease.date // don't inherit date precision from base release
? curatedScrapedRelease.datePrecision
: baseRelease.datePrecision,
poster: Array.from(new Set([ poster: Array.from(new Set([
...[].concat(curatedScrapedRelease.poster), ...[].concat(curatedScrapedRelease.poster),
...[].concat(baseRelease.poster), ...[].concat(baseRelease.poster),

View File

@ -205,12 +205,6 @@ module.exports = {
fetchLatest, fetchLatest,
// fetchMovies, // fetchMovies,
fetchProfile, fetchProfile,
scrapeScene: { scrapeScene: scrapeRelease,
scraper: scrapeRelease, scrapeMovie: scrapeRelease,
unprint: true,
},
scrapeMovie: {
scraper: scrapeRelease,
unprint: true,
},
}; };

View File

@ -74,8 +74,5 @@ async function fetchLatest(channel, page = 1) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
}; };

View File

@ -138,9 +138,6 @@ async function fetchProfile(actor, { channel }) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
fetchProfile, fetchProfile,
}; };

View File

@ -208,12 +208,6 @@ async function fetchProfile({ name: actorName, url: actorUrl }, { entity, includ
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchProfile, fetchProfile,
scrapeScene: { scrapeScene,
scraper: scrapeScene, scrapeMovie,
unprint: true,
},
scrapeMovie: {
scraper: scrapeMovie,
unprint: true,
},
}; };

View File

@ -225,5 +225,4 @@ module.exports = {
fetchUpcoming, fetchUpcoming,
fetchProfile, fetchProfile,
scrapeScene, scrapeScene,
useUnprint: true,
}; };

View File

@ -234,7 +234,6 @@ module.exports = {
fetchProfile, fetchProfile,
scrapeScene: { scrapeScene: {
scraper: scrapeScene, scraper: scrapeScene,
unprint: true,
parser: { parser: {
runScripts: 'dangerously', runScripts: 'dangerously',
}, },

View File

@ -1,6 +1,7 @@
'use strict'; 'use strict';
const qu = require('../utils/q'); const unprint = require('unprint');
const slugify = require('../utils/slugify'); const slugify = require('../utils/slugify');
function genderFromUrl(url) { function genderFromUrl(url) {
@ -20,18 +21,21 @@ function genderFromUrl(url) {
function scrapeAll(scenes) { function scrapeAll(scenes) {
return scenes.map(({ query }) => { return scenes.map(({ query }) => {
const release = {}; const release = {};
const subtitle = query.cnt('.subtitle');
release.url = query.url('a'); release.url = query.url(null);
release.entryId = new URL(release.url).pathname.match(/\/videos\/([\w-]+)/)[1]; release.entryId = new URL(release.url).pathname.match(/\/videos\/([\w-]+)/)[1];
release.title = query.cnt('.title') || query.q('img', 'title'); release.title = query.attribute('img', 'title') || query.content('.font-semibold');
release.actors = subtitle.slice(subtitle.indexOf(':') + 1).split(',').map((actor) => actor.trim()).filter(Boolean);
release.poster = query.img('.thumb img'); const poster = query.img('img[src*="/uploads"]');
if (release.poster) { if (poster) {
const match = release.poster.match(/\/uploads\/(\d{4})\/(\d{2})/); release.poster = [
poster.replace(/-\d+x\d+/, ''),
poster,
];
const match = poster.match(/\/uploads\/(\d{4})\/(\d{2})/);
if (match) { if (match) {
release.date = new Date(match[1], match[2] - 1, 1); release.date = new Date(match[1], match[2] - 1, 1);
@ -43,30 +47,34 @@ function scrapeAll(scenes) {
}); });
} }
function scrapeScene({ query, html }, url, channel) { function scrapeScene({ query, html }, { url, entity }) {
const release = {}; const release = {};
const dataString = query.html('.yoast-schema-graph'); const data = query.json('.yoast-schema-graph')?.['@graph'];
const data = dataString && JSON.parse(dataString)['@graph'];
const pageData = data.find((item) => item['@type'] === 'WebPage'); const pageData = data?.find((item) => item['@type'] === 'WebPage');
const imageData = data.find((item) => item['@type'] === 'ImageObject'); const imageData = data?.find((item) => item['@type'] === 'ImageObject');
release.entryId = new URL(url).pathname.match(/\/videos\/([\w-]+)/)[1]; release.entryId = new URL(url).pathname.match(/\/videos\/([\w-]+)/)[1];
release.title = query.cnt('.video .title h1') release.title = query.content('.w-screen + div .font-semibold')
|| data.find((item) => item['@type'] === 'BreadcrumbList')?.itemListElement.slice(-1)[0].item.name || data?.find((item) => item['@type'] === 'BreadcrumbList')?.itemListElement.slice(-1)[0].item?.name
|| pageData?.name.slice(0, pageData.name.lastIndexOf('-')).trim(); || pageData?.name.slice(0, pageData?.name.lastIndexOf('-')).trim();
release.description = query.cnt('.video .descript'); release.description = query.content('.leading-relaxed');
release.date = pageData?.datePublished && new Date(pageData.datePublished);
release.date = pageData.datePublished && new Date(pageData.datePublished); release.actors = query.elements('.models-slider-single a').map((el) => {
const actorUrl = unprint.query.url(el, null);
release.actors = query.all('.video .elenco a').map((el) => { const avatarUrl = unprint.query.img(el);
const actorUrl = query.url(el, null);
return { return {
name: query.cnt(el), name: unprint.query.content(el),
url: actorUrl, url: actorUrl,
avatar: [
avatarUrl?.replace(/-\d+x\d+/, ''),
avatarUrl,
],
gender: genderFromUrl(actorUrl), gender: genderFromUrl(actorUrl),
}; };
}); });
@ -75,11 +83,8 @@ function scrapeScene({ query, html }, url, channel) {
|| query.meta('property="og:image"') || query.meta('property="og:image"')
|| html.match(/poster: '(http.*\.jpg)'/)?.[1]; || html.match(/poster: '(http.*\.jpg)'/)?.[1];
release.photos = query.imgs('.listPostSm a', 'href'); release.photos = query.imgs('.gallery img');
release.trailer = query.video('source', 'src', { origin: channel.url }); release.trailer = query.video('source', 'src', { origin: entity.url });
release.likes = query.number('.vortex-p-like-counter');
release.dislikes = query.number('.vortex-p-dislike-counter');
if (!release.date && release.poster) { if (!release.date && release.poster) {
const match = release.poster.match(/\/uploads\/(\d{4})\/(\d{2})/); const match = release.poster.match(/\/uploads\/(\d{4})\/(\d{2})/);
@ -93,38 +98,42 @@ function scrapeScene({ query, html }, url, channel) {
return release; return release;
} }
function scrapeProfile({ query, el }, entity, url) { function scrapeProfile({ query }, entity, url) {
const profile = { url }; const profile = { url };
const data = query.json('.yoast-schema-graph');
profile.gender = genderFromUrl(url); profile.gender = genderFromUrl(url);
profile.description = query.cnt('.about')?.replace(/sobre a atriz:/i, '').trim(); if (data) {
profile.avatar = query.img('.left .thumb img'); profile.avatar = data['@graph']?.find((item) => item['@type'] === 'ImageObject')?.url;
}
profile.scenes = scrapeAll(qu.initAll(el, '.listPostLg .post'));
return profile; return profile;
} }
async function fetchLatest(channel, page = 1) { async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/videos/page/${page}`; const url = `${channel.url}/videos/page/${page}`;
const res = await qu.getAll(url, '.listPostLg .post'); const res = await unprint.get(url, { selectAll: '.grid > a[href*="/videos"]' });
if (res.ok) { if (res.ok) {
return scrapeAll(res.items, channel); return scrapeAll(res.context, channel);
} }
return res.status; return res.status;
} }
async function fetchProfilePage({ name, gender }, entity, secondAttempt) { async function fetchProfilePage({ name, gender, url: actorUrl }, entity, secondAttempt) {
const url = `${entity.url}/${gender === 'male' || secondAttempt ? 'atores' : 'atrizes'}/${slugify(name, '-')}`; const url = actorUrl || `${entity.url}/${gender === 'male' || secondAttempt ? 'atores' : 'atrizes'}/${slugify(name, '-')}`;
const res = await qu.get(url); const res = await unprint.get(url);
if (res.ok) { if (res.ok) {
return { res, url }; return { res, url };
} }
if (actorUrl) {
return fetchProfilePage({ name, gender }, entity, false); // don't count as second attempt, retry without actor URL
}
if (secondAttempt) { if (secondAttempt) {
return res.status; return res.status;
} }
@ -136,7 +145,7 @@ async function fetchProfile(baseActor, entity, options) {
const { res, url } = await fetchProfilePage(baseActor, entity, false); const { res, url } = await fetchProfilePage(baseActor, entity, false);
if (res.ok) { if (res.ok) {
return scrapeProfile(res.item, entity, url, options); return scrapeProfile(res.context, entity, url, options);
} }
return res.status; return res.status;

View File

@ -75,5 +75,4 @@ async function fetchLatest(channel, page = 1) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
scrapeScene, scrapeScene,
useUnprint: true,
}; };

View File

@ -981,4 +981,5 @@ module.exports = {
scrapeAll, scrapeAll,
scrapeMovie, scrapeMovie,
scrapeScene, scrapeScene,
deprecated: true,
}; };

View File

@ -93,8 +93,5 @@ async function fetchProfile({ name: actorName }, entity, include) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchProfile, fetchProfile,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
}; };

View File

@ -351,8 +351,5 @@ module.exports = {
fetchMovie, fetchMovie,
fetchProfile, fetchProfile,
fetchUpcoming, fetchUpcoming,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
}; };

View File

@ -95,4 +95,5 @@ module.exports = {
fetchLatest, fetchLatest,
fetchProfile, fetchProfile,
scrapeScene, scrapeScene,
deprecated: true,
}; };

View File

@ -151,4 +151,5 @@ module.exports = {
fetchLatest, fetchLatest,
fetchProfile, fetchProfile,
scrapeScene, scrapeScene,
deprecated: true,
}; };

View File

@ -88,9 +88,6 @@ async function fetchProfile(actor) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
fetchProfile, fetchProfile,
}; };

View File

@ -87,8 +87,5 @@ async function fetchProfile({ name }, entity) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchProfile, fetchProfile,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
}; };

View File

@ -130,8 +130,5 @@ async function fetchProfile({ slug }, { channel }) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchProfile, fetchProfile,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
}; };

View File

@ -158,10 +158,8 @@ async function fetchLatestBlock(site, page) {
module.exports = { module.exports = {
fetchLatest: fetchLatestClassic, fetchLatest: fetchLatestClassic,
scrapeScene: scrapeSceneClassic, scrapeScene: scrapeSceneClassic,
useUnprint: true,
block: { block: {
scrapeScene: scrapeSceneBlock, scrapeScene: scrapeSceneBlock,
fetchLatest: fetchLatestBlock, fetchLatest: fetchLatestBlock,
useUnprint: true,
}, },
}; };

View File

@ -160,4 +160,5 @@ module.exports = {
fetchUpcoming, fetchUpcoming,
fetchProfile, fetchProfile,
scrapeScene, scrapeScene,
deprecated: true,
}; };

View File

@ -150,4 +150,5 @@ module.exports = {
fetchLatest, fetchLatest,
scrapeScene, scrapeScene,
fetchProfile, fetchProfile,
deprecated: true,
}; };

View File

@ -169,4 +169,5 @@ module.exports = {
fetchUpcoming, fetchUpcoming,
scrapeAll, scrapeAll,
scrapeScene, scrapeScene,
deprecated: true,
}; };

View File

@ -168,4 +168,5 @@ module.exports = {
fetchLatest, fetchLatest,
fetchProfile, fetchProfile,
scrapeScene, scrapeScene,
deprecated: true,
}; };

View File

@ -118,8 +118,5 @@ async function fetchProfile(actor, entity, include) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchProfile, fetchProfile,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
}; };

View File

@ -104,8 +104,5 @@ async function fetchProfile(actor, { entity }) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchProfile, fetchProfile,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
}; };

View File

@ -174,4 +174,5 @@ module.exports = {
fetchLatest, fetchLatest,
scrapeScene, scrapeScene,
fetchProfile, fetchProfile,
deprecated: true,
}; };

View File

@ -86,8 +86,5 @@ async function fetchLatest(channel, page = 1) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
scrapeScene: { scrapeScene,
scraper: scrapeScene,
unprint: true,
},
}; };

View File

@ -199,11 +199,11 @@ function filterInternalDuplicateReleases(releases) {
.flat(); .flat();
} }
async function filterDuplicateReleases(releases) { async function filterDuplicateReleases(releases, domain = 'releases') {
const internalUniqueReleases = filterInternalDuplicateReleases(releases); const internalUniqueReleases = filterInternalDuplicateReleases(releases);
const internalUniqueReleaseChunks = chunk(internalUniqueReleases); const internalUniqueReleaseChunks = chunk(internalUniqueReleases);
const duplicateReleaseEntryChunks = await Promise.map(internalUniqueReleaseChunks, async (internalUniqueReleasesChunk) => knex('releases') const duplicateReleaseEntryChunks = await Promise.map(internalUniqueReleaseChunks, async (internalUniqueReleasesChunk) => knex(domain)
.whereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk.map((release) => [release.entryId, release.entity.id])) .whereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk.map((release) => [release.entryId, release.entity.id]))
.orWhereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk .orWhereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk
// scene IDs shared across network, mark as duplicate so scene can be updated with channel if only available on release day (i.e. Perv City) // scene IDs shared across network, mark as duplicate so scene can be updated with channel if only available on release day (i.e. Perv City)
@ -349,7 +349,7 @@ async function storeMovies(movies, useBatchId) {
return []; return [];
} }
const { uniqueReleases } = await filterDuplicateReleases(movies); const { uniqueReleases } = await filterDuplicateReleases(movies, 'movies');
const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id'); const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id');
const curatedMovieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'movie'))); const curatedMovieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'movie')));
@ -357,9 +357,10 @@ async function storeMovies(movies, useBatchId) {
const storedMovies = await bulkInsert('movies', curatedMovieEntries, ['entity_id', 'entry_id'], true); const storedMovies = await bulkInsert('movies', curatedMovieEntries, ['entity_id', 'entry_id'], true);
const moviesWithId = attachReleaseIds(movies, storedMovies); const moviesWithId = attachReleaseIds(movies, storedMovies);
await updateMovieSearch(moviesWithId.map((movie) => movie.id));
await associateReleaseMedia(moviesWithId, 'movie');
await associateReleaseTags(moviesWithId, 'movie'); await associateReleaseTags(moviesWithId, 'movie');
await updateMovieSearch(moviesWithId.map((movie) => movie.id));
await associateReleaseMedia(moviesWithId, 'movie');
return moviesWithId; return moviesWithId;
} }
@ -369,7 +370,7 @@ async function storeSeries(series, useBatchId) {
return []; return [];
} }
const { uniqueReleases } = await filterDuplicateReleases(series); const { uniqueReleases } = await filterDuplicateReleases(series, 'series');
const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id'); const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id');
const curatedSerieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'serie'))); const curatedSerieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'serie')));
@ -395,7 +396,7 @@ async function storeScenes(releases, useBatchId) {
const releasesWithStudios = await attachStudios(releasesWithBaseActors); const releasesWithStudios = await attachStudios(releasesWithBaseActors);
// uniqueness is entity ID + entry ID, filter uniques after adding entities // uniqueness is entity ID + entry ID, filter uniques after adding entities
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios); const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios, 'releases');
const curatedNewReleaseEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId))); const curatedNewReleaseEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId)));
const storedReleases = await bulkInsert('releases', curatedNewReleaseEntries); const storedReleases = await bulkInsert('releases', curatedNewReleaseEntries);
@ -433,6 +434,7 @@ async function storeScenes(releases, useBatchId) {
await associateSerieScenes(storedSeries, releasesWithId); await associateSerieScenes(storedSeries, releasesWithId);
await associateDirectors(releasesWithId, batchId); // some directors may also be actors, don't associate at the same time await associateDirectors(releasesWithId, batchId); // some directors may also be actors, don't associate at the same time
await updateSceneSearch(releasesWithId.map((release) => release.id)); await updateSceneSearch(releasesWithId.map((release) => release.id));
// media is more error-prone, associate separately // media is more error-prone, associate separately

View File

@ -34,10 +34,12 @@ async function fetchMovies() {
parents.name as network_name, parents.name as network_name,
movies_covers IS NOT NULL as has_cover, movies_covers IS NOT NULL as has_cover,
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
COALESCE(JSON_AGG(DISTINCT (movie_tags.id, movie_tags.name, movie_tags.priority, movie_tags_aliases.name)) FILTER (WHERE movie_tags.id IS NOT NULL), '[]') as movie_tags
FROM movies FROM movies
LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id
LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id
LEFT JOIN movies_tags ON movies_tags.movie_id = movies.id
LEFT JOIN entities ON movies.entity_id = entities.id LEFT JOIN entities ON movies.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN entities AS parents ON parents.id = entities.parent_id
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id
@ -47,6 +49,8 @@ async function fetchMovies() {
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags ON local_tags.tag_id = tags.id
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
LEFT JOIN tags as movie_tags ON movies_tags.tag_id = movie_tags.id
LEFT JOIN tags as movie_tags_aliases ON movies_tags.tag_id = movie_tags_aliases.alias_for AND movie_tags_aliases.secondary = true
LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id
GROUP BY GROUP BY
movies.id, movies.id,
@ -101,8 +105,15 @@ async function init() {
const movies = await fetchMovies(); const movies = await fetchMovies();
const docs = movies.map((movie) => { const docs = movies.map((movie) => {
const combinedTags = Object.values(Object.fromEntries(movie.tags.concat(movie.movie_tags).map((tag) => [tag.f1, {
id: tag.f1,
name: tag.f2,
priority: tag.f3,
alias: tag.f4,
}])));
const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc. const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc.
const flatTags = movie.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results const flatTags = combinedTags.filter((tag) => tag.priority > 6).flatMap((tag) => (tag.alias ? `${tag.name} ${tag.alias}` : tag.name).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' '); const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' ');
return { return {
@ -124,7 +135,7 @@ async function init() {
entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN
actor_ids: movie.actors.map((actor) => actor.f1), actor_ids: movie.actors.map((actor) => actor.f1),
actors: movie.actors.map((actor) => actor.f2).join(), actors: movie.actors.map((actor) => actor.f2).join(),
tag_ids: movie.tags.map((tag) => tag.f1), tag_ids: combinedTags.map((tag) => tag.id),
tags: flatTags.join(' '), tags: flatTags.join(' '),
has_cover: movie.has_cover, has_cover: movie.has_cover,
meta: movie.date ? format(movie.date, 'y yy M MMM MMMM d') : undefined, meta: movie.date ? format(movie.date, 'y yy M MMM MMMM d') : undefined,

View File

@ -235,10 +235,12 @@ async function updateManticoreMovieSearch(movieIds) {
parents.name as network_name, parents.name as network_name,
movies_covers IS NOT NULL as has_cover, movies_covers IS NOT NULL as has_cover,
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
COALESCE(JSON_AGG(DISTINCT (movie_tags.id, movie_tags.name, movie_tags.priority, movie_tags_aliases.name)) FILTER (WHERE movie_tags.id IS NOT NULL), '[]') as movie_tags
FROM movies FROM movies
LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id
LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id
LEFT JOIN movies_tags ON movies_tags.movie_id = movies.id
LEFT JOIN entities ON movies.entity_id = entities.id LEFT JOIN entities ON movies.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN entities AS parents ON parents.id = entities.parent_id
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id
@ -248,6 +250,8 @@ async function updateManticoreMovieSearch(movieIds) {
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags ON local_tags.tag_id = tags.id
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
LEFT JOIN tags as movie_tags ON movies_tags.tag_id = movie_tags.id
LEFT JOIN tags as movie_tags_aliases ON movies_tags.tag_id = movie_tags_aliases.alias_for AND movie_tags_aliases.secondary = true
LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id
${movieIds ? 'WHERE movies.id = ANY(?)' : ''} ${movieIds ? 'WHERE movies.id = ANY(?)' : ''}
GROUP BY GROUP BY
@ -270,8 +274,15 @@ async function updateManticoreMovieSearch(movieIds) {
`, movieIds && [movieIds]); `, movieIds && [movieIds]);
const docs = movies.rows.map((movie) => { const docs = movies.rows.map((movie) => {
const combinedTags = Object.values(Object.fromEntries(movie.tags.concat(movie.movie_tags).map((tag) => [tag.f1, {
id: tag.f1,
name: tag.f2,
priority: tag.f3,
alias: tag.f4,
}])));
const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc. const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc.
const flatTags = movie.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results const flatTags = combinedTags.filter((tag) => tag.priority > 6).flatMap((tag) => (tag.alias ? `${tag.name} ${tag.alias}` : tag.name).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' '); const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' ');
return { return {
@ -293,7 +304,7 @@ async function updateManticoreMovieSearch(movieIds) {
entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN
actor_ids: movie.actors.map((actor) => actor.f1), actor_ids: movie.actors.map((actor) => actor.f1),
actors: movie.actors.map((actor) => actor.f2).join(), actors: movie.actors.map((actor) => actor.f2).join(),
tag_ids: movie.tags.map((tag) => tag.f1), tag_ids: combinedTags.map((tag) => tag.id),
tags: flatTags.join(' '), tags: flatTags.join(' '),
has_cover: movie.has_cover, has_cover: movie.has_cover,
meta: movie.date ? format(movie.date, 'y yy M MMM MMMM d') : undefined, meta: movie.date ? format(movie.date, 'y yy M MMM MMMM d') : undefined,