diff --git a/src/deep.js b/src/deep.js index 047d6aae..8acaf59d 100755 --- a/src/deep.js +++ b/src/deep.js @@ -84,9 +84,15 @@ async function fetchScene(scraper, url, entity, baseRelease, options, type = 'sc } if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) { + /* if (scraper.useUnprint || (type === 'scene' && scraper.scrapeScene?.unprint) || (type === 'movie' && scraper.scrapeMovie?.unprint)) { return fetchUnprintScene(scraper, url, entity, baseRelease, options, type); } + */ + + if (!scraper.deprecated) { + return fetchUnprintScene(scraper, url, entity, baseRelease, options, type); + } const session = qu.session(); @@ -191,7 +197,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') { }), }), {}); - curatedScrapedRelease.poster = null; + // curatedScrapedRelease.poster = null; // wat const mergedRelease = { ...merge(baseRelease, curatedScrapedRelease, { @@ -199,6 +205,9 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') { hardMergeKeys: ['actors', 'covers', 'poster', 'trailer', 'teaser'], ignoreKeys: ['poster'], }), + datePrecision: curatedScrapedRelease.date // don't inherit date precision from base release + ? curatedScrapedRelease.datePrecision + : baseRelease.datePrecision, poster: Array.from(new Set([ ...[].concat(curatedScrapedRelease.poster), ...[].concat(baseRelease.poster), diff --git a/src/scrapers/adultempire.js b/src/scrapers/adultempire.js index 0cdb4ec3..3bfc76bc 100755 --- a/src/scrapers/adultempire.js +++ b/src/scrapers/adultempire.js @@ -205,12 +205,6 @@ module.exports = { fetchLatest, // fetchMovies, fetchProfile, - scrapeScene: { - scraper: scrapeRelease, - unprint: true, - }, - scrapeMovie: { - scraper: scrapeRelease, - unprint: true, - }, + scrapeScene: scrapeRelease, + scrapeMovie: scrapeRelease, }; diff --git a/src/scrapers/amnesiac.js b/src/scrapers/amnesiac.js index ff3ec4c5..793cc4f8 100644 --- a/src/scrapers/amnesiac.js +++ b/src/scrapers/amnesiac.js @@ -74,8 +74,5 @@ async function fetchLatest(channel, page = 1) { module.exports = { fetchLatest, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, }; diff --git a/src/scrapers/analvids.js b/src/scrapers/analvids.js index 1976045d..2b24ea30 100644 --- a/src/scrapers/analvids.js +++ b/src/scrapers/analvids.js @@ -138,9 +138,6 @@ async function fetchProfile(actor, { channel }) { module.exports = { fetchLatest, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, fetchProfile, }; diff --git a/src/scrapers/archangel.js b/src/scrapers/archangel.js index 1dc23435..90fafc90 100755 --- a/src/scrapers/archangel.js +++ b/src/scrapers/archangel.js @@ -208,12 +208,6 @@ async function fetchProfile({ name: actorName, url: actorUrl }, { entity, includ module.exports = { fetchLatest, fetchProfile, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, - scrapeMovie: { - scraper: scrapeMovie, - unprint: true, - }, + scrapeScene, + scrapeMovie, }; diff --git a/src/scrapers/bang.js b/src/scrapers/bang.js index e8d43708..f87f0a83 100755 --- a/src/scrapers/bang.js +++ b/src/scrapers/bang.js @@ -225,5 +225,4 @@ module.exports = { fetchUpcoming, fetchProfile, scrapeScene, - useUnprint: true, }; diff --git a/src/scrapers/bluedonkeymedia.js b/src/scrapers/bluedonkeymedia.js index 14e44e82..3b938591 100644 --- a/src/scrapers/bluedonkeymedia.js +++ b/src/scrapers/bluedonkeymedia.js @@ -234,7 +234,6 @@ module.exports = { fetchProfile, scrapeScene: { scraper: scrapeScene, - unprint: true, parser: { runScripts: 'dangerously', }, diff --git a/src/scrapers/bradmontana.js b/src/scrapers/bradmontana.js index 6bf7eafa..233b924d 100755 --- a/src/scrapers/bradmontana.js +++ b/src/scrapers/bradmontana.js @@ -1,6 +1,7 @@ 'use strict'; -const qu = require('../utils/q'); +const unprint = require('unprint'); + const slugify = require('../utils/slugify'); function genderFromUrl(url) { @@ -20,18 +21,21 @@ function genderFromUrl(url) { function scrapeAll(scenes) { return scenes.map(({ query }) => { const release = {}; - const subtitle = query.cnt('.subtitle'); - release.url = query.url('a'); + release.url = query.url(null); release.entryId = new URL(release.url).pathname.match(/\/videos\/([\w-]+)/)[1]; - release.title = query.cnt('.title') || query.q('img', 'title'); - release.actors = subtitle.slice(subtitle.indexOf(':') + 1).split(',').map((actor) => actor.trim()).filter(Boolean); + release.title = query.attribute('img', 'title') || query.content('.font-semibold'); - release.poster = query.img('.thumb img'); + const poster = query.img('img[src*="/uploads"]'); - if (release.poster) { - const match = release.poster.match(/\/uploads\/(\d{4})\/(\d{2})/); + if (poster) { + release.poster = [ + poster.replace(/-\d+x\d+/, ''), + poster, + ]; + + const match = poster.match(/\/uploads\/(\d{4})\/(\d{2})/); if (match) { release.date = new Date(match[1], match[2] - 1, 1); @@ -43,30 +47,34 @@ function scrapeAll(scenes) { }); } -function scrapeScene({ query, html }, url, channel) { +function scrapeScene({ query, html }, { url, entity }) { const release = {}; - const dataString = query.html('.yoast-schema-graph'); - const data = dataString && JSON.parse(dataString)['@graph']; - const pageData = data.find((item) => item['@type'] === 'WebPage'); - const imageData = data.find((item) => item['@type'] === 'ImageObject'); + const data = query.json('.yoast-schema-graph')?.['@graph']; + + const pageData = data?.find((item) => item['@type'] === 'WebPage'); + const imageData = data?.find((item) => item['@type'] === 'ImageObject'); release.entryId = new URL(url).pathname.match(/\/videos\/([\w-]+)/)[1]; - release.title = query.cnt('.video .title h1') - || data.find((item) => item['@type'] === 'BreadcrumbList')?.itemListElement.slice(-1)[0].item.name - || pageData?.name.slice(0, pageData.name.lastIndexOf('-')).trim(); + release.title = query.content('.w-screen + div .font-semibold') + || data?.find((item) => item['@type'] === 'BreadcrumbList')?.itemListElement.slice(-1)[0].item?.name + || pageData?.name.slice(0, pageData?.name.lastIndexOf('-')).trim(); - release.description = query.cnt('.video .descript'); + release.description = query.content('.leading-relaxed'); + release.date = pageData?.datePublished && new Date(pageData.datePublished); - release.date = pageData.datePublished && new Date(pageData.datePublished); - - release.actors = query.all('.video .elenco a').map((el) => { - const actorUrl = query.url(el, null); + release.actors = query.elements('.models-slider-single a').map((el) => { + const actorUrl = unprint.query.url(el, null); + const avatarUrl = unprint.query.img(el); return { - name: query.cnt(el), + name: unprint.query.content(el), url: actorUrl, + avatar: [ + avatarUrl?.replace(/-\d+x\d+/, ''), + avatarUrl, + ], gender: genderFromUrl(actorUrl), }; }); @@ -75,11 +83,8 @@ function scrapeScene({ query, html }, url, channel) { || query.meta('property="og:image"') || html.match(/poster: '(http.*\.jpg)'/)?.[1]; - release.photos = query.imgs('.listPostSm a', 'href'); - release.trailer = query.video('source', 'src', { origin: channel.url }); - - release.likes = query.number('.vortex-p-like-counter'); - release.dislikes = query.number('.vortex-p-dislike-counter'); + release.photos = query.imgs('.gallery img'); + release.trailer = query.video('source', 'src', { origin: entity.url }); if (!release.date && release.poster) { const match = release.poster.match(/\/uploads\/(\d{4})\/(\d{2})/); @@ -93,38 +98,42 @@ function scrapeScene({ query, html }, url, channel) { return release; } -function scrapeProfile({ query, el }, entity, url) { +function scrapeProfile({ query }, entity, url) { const profile = { url }; + const data = query.json('.yoast-schema-graph'); profile.gender = genderFromUrl(url); - profile.description = query.cnt('.about')?.replace(/sobre a atriz:/i, '').trim(); - profile.avatar = query.img('.left .thumb img'); - - profile.scenes = scrapeAll(qu.initAll(el, '.listPostLg .post')); + if (data) { + profile.avatar = data['@graph']?.find((item) => item['@type'] === 'ImageObject')?.url; + } return profile; } async function fetchLatest(channel, page = 1) { const url = `${channel.url}/videos/page/${page}`; - const res = await qu.getAll(url, '.listPostLg .post'); + const res = await unprint.get(url, { selectAll: '.grid > a[href*="/videos"]' }); if (res.ok) { - return scrapeAll(res.items, channel); + return scrapeAll(res.context, channel); } return res.status; } -async function fetchProfilePage({ name, gender }, entity, secondAttempt) { - const url = `${entity.url}/${gender === 'male' || secondAttempt ? 'atores' : 'atrizes'}/${slugify(name, '-')}`; - const res = await qu.get(url); +async function fetchProfilePage({ name, gender, url: actorUrl }, entity, secondAttempt) { + const url = actorUrl || `${entity.url}/${gender === 'male' || secondAttempt ? 'atores' : 'atrizes'}/${slugify(name, '-')}`; + const res = await unprint.get(url); if (res.ok) { return { res, url }; } + if (actorUrl) { + return fetchProfilePage({ name, gender }, entity, false); // don't count as second attempt, retry without actor URL + } + if (secondAttempt) { return res.status; } @@ -136,7 +145,7 @@ async function fetchProfile(baseActor, entity, options) { const { res, url } = await fetchProfilePage(baseActor, entity, false); if (res.ok) { - return scrapeProfile(res.item, entity, url, options); + return scrapeProfile(res.context, entity, url, options); } return res.status; diff --git a/src/scrapers/fabulouscash.js b/src/scrapers/fabulouscash.js index 7ad9a62e..44129906 100755 --- a/src/scrapers/fabulouscash.js +++ b/src/scrapers/fabulouscash.js @@ -75,5 +75,4 @@ async function fetchLatest(channel, page = 1) { module.exports = { fetchLatest, scrapeScene, - useUnprint: true, }; diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index 2b975104..89f78254 100755 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -981,4 +981,5 @@ module.exports = { scrapeAll, scrapeMovie, scrapeScene, + deprecated: true, }; diff --git a/src/scrapers/innofsin.js b/src/scrapers/innofsin.js index b0b5e666..8dbd437b 100755 --- a/src/scrapers/innofsin.js +++ b/src/scrapers/innofsin.js @@ -93,8 +93,5 @@ async function fetchProfile({ name: actorName }, entity, include) { module.exports = { fetchLatest, fetchProfile, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, }; diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index 2470b499..ba376a3f 100755 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -351,8 +351,5 @@ module.exports = { fetchMovie, fetchProfile, fetchUpcoming, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, }; diff --git a/src/scrapers/karups.js b/src/scrapers/karups.js index 6ca684f0..ee49591f 100755 --- a/src/scrapers/karups.js +++ b/src/scrapers/karups.js @@ -95,4 +95,5 @@ module.exports = { fetchLatest, fetchProfile, scrapeScene, + deprecated: true, }; diff --git a/src/scrapers/loveherfilms.js b/src/scrapers/loveherfilms.js index 8c743d61..9a8d2984 100755 --- a/src/scrapers/loveherfilms.js +++ b/src/scrapers/loveherfilms.js @@ -151,4 +151,5 @@ module.exports = { fetchLatest, fetchProfile, scrapeScene, + deprecated: true, }; diff --git a/src/scrapers/mariskax.js b/src/scrapers/mariskax.js index c5cd2d36..d7447713 100644 --- a/src/scrapers/mariskax.js +++ b/src/scrapers/mariskax.js @@ -88,9 +88,6 @@ async function fetchProfile(actor) { module.exports = { fetchLatest, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, fetchProfile, }; diff --git a/src/scrapers/missax.js b/src/scrapers/missax.js index eb2ff8a3..c98d0694 100644 --- a/src/scrapers/missax.js +++ b/src/scrapers/missax.js @@ -87,8 +87,5 @@ async function fetchProfile({ name }, entity) { module.exports = { fetchLatest, fetchProfile, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, }; diff --git a/src/scrapers/naughtyamerica.js b/src/scrapers/naughtyamerica.js index a0c1e876..e4a31cf3 100755 --- a/src/scrapers/naughtyamerica.js +++ b/src/scrapers/naughtyamerica.js @@ -130,8 +130,5 @@ async function fetchProfile({ slug }, { channel }) { module.exports = { fetchLatest, fetchProfile, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, }; diff --git a/src/scrapers/newsensations.js b/src/scrapers/newsensations.js index 9cf60f1c..ace00ac3 100755 --- a/src/scrapers/newsensations.js +++ b/src/scrapers/newsensations.js @@ -158,10 +158,8 @@ async function fetchLatestBlock(site, page) { module.exports = { fetchLatest: fetchLatestClassic, scrapeScene: scrapeSceneClassic, - useUnprint: true, block: { scrapeScene: scrapeSceneBlock, fetchLatest: fetchLatestBlock, - useUnprint: true, }, }; diff --git a/src/scrapers/nubiles.js b/src/scrapers/nubiles.js index e5fe6de5..9231e6a3 100755 --- a/src/scrapers/nubiles.js +++ b/src/scrapers/nubiles.js @@ -160,4 +160,5 @@ module.exports = { fetchUpcoming, fetchProfile, scrapeScene, + deprecated: true, }; diff --git a/src/scrapers/pierrewoodman.js b/src/scrapers/pierrewoodman.js index 94c37261..fe7a9b49 100755 --- a/src/scrapers/pierrewoodman.js +++ b/src/scrapers/pierrewoodman.js @@ -150,4 +150,5 @@ module.exports = { fetchLatest, scrapeScene, fetchProfile, + deprecated: true, }; diff --git a/src/scrapers/purgatoryx.js b/src/scrapers/purgatoryx.js index 34d9656c..46271288 100755 --- a/src/scrapers/purgatoryx.js +++ b/src/scrapers/purgatoryx.js @@ -169,4 +169,5 @@ module.exports = { fetchUpcoming, scrapeAll, scrapeScene, + deprecated: true, }; diff --git a/src/scrapers/spizoo.js b/src/scrapers/spizoo.js index c995182d..59c43272 100755 --- a/src/scrapers/spizoo.js +++ b/src/scrapers/spizoo.js @@ -168,4 +168,5 @@ module.exports = { fetchLatest, fetchProfile, scrapeScene, + deprecated: true, }; diff --git a/src/scrapers/teenmegaworld.js b/src/scrapers/teenmegaworld.js index 4f198100..7a666d8e 100755 --- a/src/scrapers/teenmegaworld.js +++ b/src/scrapers/teenmegaworld.js @@ -118,8 +118,5 @@ async function fetchProfile(actor, entity, include) { module.exports = { fetchLatest, fetchProfile, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, }; diff --git a/src/scrapers/testedefudelidade.js b/src/scrapers/testedefudelidade.js index 5f126180..989fe365 100644 --- a/src/scrapers/testedefudelidade.js +++ b/src/scrapers/testedefudelidade.js @@ -104,8 +104,5 @@ async function fetchProfile(actor, { entity }) { module.exports = { fetchLatest, fetchProfile, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, }; diff --git a/src/scrapers/wankzvr.js b/src/scrapers/wankzvr.js index b84ef755..31461ba8 100755 --- a/src/scrapers/wankzvr.js +++ b/src/scrapers/wankzvr.js @@ -174,4 +174,5 @@ module.exports = { fetchLatest, scrapeScene, fetchProfile, + deprecated: true, }; diff --git a/src/scrapers/whalemember.js b/src/scrapers/whalemember.js index b105ea51..33186a02 100755 --- a/src/scrapers/whalemember.js +++ b/src/scrapers/whalemember.js @@ -86,8 +86,5 @@ async function fetchLatest(channel, page = 1) { module.exports = { fetchLatest, - scrapeScene: { - scraper: scrapeScene, - unprint: true, - }, + scrapeScene, }; diff --git a/src/store-releases.js b/src/store-releases.js index b363b9fe..3684c7f8 100755 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -199,11 +199,11 @@ function filterInternalDuplicateReleases(releases) { .flat(); } -async function filterDuplicateReleases(releases) { +async function filterDuplicateReleases(releases, domain = 'releases') { const internalUniqueReleases = filterInternalDuplicateReleases(releases); const internalUniqueReleaseChunks = chunk(internalUniqueReleases); - const duplicateReleaseEntryChunks = await Promise.map(internalUniqueReleaseChunks, async (internalUniqueReleasesChunk) => knex('releases') + const duplicateReleaseEntryChunks = await Promise.map(internalUniqueReleaseChunks, async (internalUniqueReleasesChunk) => knex(domain) .whereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk.map((release) => [release.entryId, release.entity.id])) .orWhereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk // scene IDs shared across network, mark as duplicate so scene can be updated with channel if only available on release day (i.e. Perv City) @@ -349,7 +349,7 @@ async function storeMovies(movies, useBatchId) { return []; } - const { uniqueReleases } = await filterDuplicateReleases(movies); + const { uniqueReleases } = await filterDuplicateReleases(movies, 'movies'); const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id'); const curatedMovieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'movie'))); @@ -357,9 +357,10 @@ async function storeMovies(movies, useBatchId) { const storedMovies = await bulkInsert('movies', curatedMovieEntries, ['entity_id', 'entry_id'], true); const moviesWithId = attachReleaseIds(movies, storedMovies); - await updateMovieSearch(moviesWithId.map((movie) => movie.id)); - await associateReleaseMedia(moviesWithId, 'movie'); await associateReleaseTags(moviesWithId, 'movie'); + await updateMovieSearch(moviesWithId.map((movie) => movie.id)); + + await associateReleaseMedia(moviesWithId, 'movie'); return moviesWithId; } @@ -369,7 +370,7 @@ async function storeSeries(series, useBatchId) { return []; } - const { uniqueReleases } = await filterDuplicateReleases(series); + const { uniqueReleases } = await filterDuplicateReleases(series, 'series'); const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id'); const curatedSerieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'serie'))); @@ -395,7 +396,7 @@ async function storeScenes(releases, useBatchId) { const releasesWithStudios = await attachStudios(releasesWithBaseActors); // uniqueness is entity ID + entry ID, filter uniques after adding entities - const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios); + const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios, 'releases'); const curatedNewReleaseEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId))); const storedReleases = await bulkInsert('releases', curatedNewReleaseEntries); @@ -433,6 +434,7 @@ async function storeScenes(releases, useBatchId) { await associateSerieScenes(storedSeries, releasesWithId); await associateDirectors(releasesWithId, batchId); // some directors may also be actors, don't associate at the same time + await updateSceneSearch(releasesWithId.map((release) => release.id)); // media is more error-prone, associate separately diff --git a/src/tools/manticore-movies.js b/src/tools/manticore-movies.js index 18d8529a..1b12e8c2 100644 --- a/src/tools/manticore-movies.js +++ b/src/tools/manticore-movies.js @@ -34,10 +34,12 @@ async function fetchMovies() { parents.name as network_name, movies_covers IS NOT NULL as has_cover, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, - COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags + COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags, + COALESCE(JSON_AGG(DISTINCT (movie_tags.id, movie_tags.name, movie_tags.priority, movie_tags_aliases.name)) FILTER (WHERE movie_tags.id IS NOT NULL), '[]') as movie_tags FROM movies LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id + LEFT JOIN movies_tags ON movies_tags.movie_id = movies.id LEFT JOIN entities ON movies.entity_id = entities.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id @@ -47,6 +49,8 @@ async function fetchMovies() { LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true + LEFT JOIN tags as movie_tags ON movies_tags.tag_id = movie_tags.id + LEFT JOIN tags as movie_tags_aliases ON movies_tags.tag_id = movie_tags_aliases.alias_for AND movie_tags_aliases.secondary = true LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id GROUP BY movies.id, @@ -101,8 +105,15 @@ async function init() { const movies = await fetchMovies(); const docs = movies.map((movie) => { + const combinedTags = Object.values(Object.fromEntries(movie.tags.concat(movie.movie_tags).map((tag) => [tag.f1, { + id: tag.f1, + name: tag.f2, + priority: tag.f3, + alias: tag.f4, + }]))); + const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc. - const flatTags = movie.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results + const flatTags = combinedTags.filter((tag) => tag.priority > 6).flatMap((tag) => (tag.alias ? `${tag.name} ${tag.alias}` : tag.name).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' '); return { @@ -124,7 +135,7 @@ async function init() { entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN actor_ids: movie.actors.map((actor) => actor.f1), actors: movie.actors.map((actor) => actor.f2).join(), - tag_ids: movie.tags.map((tag) => tag.f1), + tag_ids: combinedTags.map((tag) => tag.id), tags: flatTags.join(' '), has_cover: movie.has_cover, meta: movie.date ? format(movie.date, 'y yy M MMM MMMM d') : undefined, diff --git a/src/update-search.js b/src/update-search.js index f11413ee..e26e94f2 100644 --- a/src/update-search.js +++ b/src/update-search.js @@ -235,10 +235,12 @@ async function updateManticoreMovieSearch(movieIds) { parents.name as network_name, movies_covers IS NOT NULL as has_cover, COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors, - COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags + COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags, + COALESCE(JSON_AGG(DISTINCT (movie_tags.id, movie_tags.name, movie_tags.priority, movie_tags_aliases.name)) FILTER (WHERE movie_tags.id IS NOT NULL), '[]') as movie_tags FROM movies LEFT JOIN movies_meta ON movies_meta.movie_id = movies.id LEFT JOIN movies_scenes ON movies_scenes.movie_id = movies.id + LEFT JOIN movies_tags ON movies_tags.movie_id = movies.id LEFT JOIN entities ON movies.entity_id = entities.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = movies_scenes.scene_id @@ -248,6 +250,8 @@ async function updateManticoreMovieSearch(movieIds) { LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true + LEFT JOIN tags as movie_tags ON movies_tags.tag_id = movie_tags.id + LEFT JOIN tags as movie_tags_aliases ON movies_tags.tag_id = movie_tags_aliases.alias_for AND movie_tags_aliases.secondary = true LEFT JOIN movies_covers ON movies_covers.movie_id = movies.id ${movieIds ? 'WHERE movies.id = ANY(?)' : ''} GROUP BY @@ -270,8 +274,15 @@ async function updateManticoreMovieSearch(movieIds) { `, movieIds && [movieIds]); const docs = movies.rows.map((movie) => { + const combinedTags = Object.values(Object.fromEntries(movie.tags.concat(movie.movie_tags).map((tag) => [tag.f1, { + id: tag.f1, + name: tag.f2, + priority: tag.f3, + alias: tag.f4, + }]))); + const flatActors = movie.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc. - const flatTags = movie.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results + const flatTags = combinedTags.filter((tag) => tag.priority > 6).flatMap((tag) => (tag.alias ? `${tag.name} ${tag.alias}` : tag.name).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results const filteredTitle = movie.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'gi'), ''), movie.title).trim().replace(/\s{2,}/g, ' '); return { @@ -293,7 +304,7 @@ async function updateManticoreMovieSearch(movieIds) { entity_ids: [movie.channel_id, movie.network_id].filter(Boolean), // manticore does not support OR, this allows IN actor_ids: movie.actors.map((actor) => actor.f1), actors: movie.actors.map((actor) => actor.f2).join(), - tag_ids: movie.tags.map((tag) => tag.f1), + tag_ids: combinedTags.map((tag) => tag.id), tags: flatTags.join(' '), has_cover: movie.has_cover, meta: movie.date ? format(movie.date, 'y yy M MMM MMMM d') : undefined,