Using dedicated releases search table for ts vector documents.

This commit is contained in:
ThePendulum 2020-02-26 22:33:15 +01:00
parent 4910f8650f
commit 3c30e9107a
5 changed files with 69 additions and 46 deletions

View File

@ -35,7 +35,7 @@ function initReleasesActions(store, _router) {
query: $query query: $query
first: $limit first: $limit
) { ) {
releaseById { release {
id id
title title
date date
@ -81,7 +81,7 @@ function initReleasesActions(store, _router) {
if (!res) return []; if (!res) return [];
return res.releases.map(release => curateRelease(release.releaseById)); return res.releases.map(release => curateRelease(release.release));
} }
async function fetchReleaseById({ _commit }, releaseId) { async function fetchReleaseById({ _commit }, releaseId) {

View File

@ -466,16 +466,33 @@ exports.up = knex => Promise.resolve()
table.unique(['tag_id', 'release_id']); table.unique(['tag_id', 'release_id']);
})) }))
.then(() => knex.schema.createTable('releases_search', (table) => {
table.integer('release_id', 16)
.references('id')
.inTable('releases');
}))
.then(() => knex.raw(` .then(() => knex.raw(`
ALTER TABLE releases_search
ADD COLUMN document tsvector;
CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id);
CREATE INDEX releases_search_index ON releases_search USING GIN (document);
COMMENT ON COLUMN actors.height IS E'@omit read,update,create,delete,all,many'; COMMENT ON COLUMN actors.height IS E'@omit read,update,create,delete,all,many';
COMMENT ON COLUMN actors.weight IS E'@omit read,update,create,delete,all,many'; COMMENT ON COLUMN actors.weight IS E'@omit read,update,create,delete,all,many';
CREATE FUNCTION search_sites(search text) RETURNS SETOF sites AS $$ CREATE FUNCTION search_sites(search text) RETURNS SETOF sites AS $$
SELECT * FROM sites SELECT * FROM sites
WHERE WHERE
name ILIKE ('%' || search || '%') OR name ILIKE ('%' || search || '%') OR
slug ILIKE ('%' || search || '%') OR slug ILIKE ('%' || search || '%') OR
url ILIKE ('%' || search || '%') url ILIKE ('%' || search || '%')
$$ LANGUAGE SQL STABLE;
CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_search AS $$
SELECT * FROM releases_search AS search
WHERE search.document @@ plainto_tsquery(replace(query, '.', ' '))
ORDER BY ts_rank(search.document, plainto_tsquery(replace(query, '.', ' '))) DESC;
$$ LANGUAGE SQL STABLE; $$ LANGUAGE SQL STABLE;
/* /*
@ -519,6 +536,7 @@ exports.down = knex => knex.raw(`
DROP TABLE IF EXISTS releases_trailers CASCADE; DROP TABLE IF EXISTS releases_trailers CASCADE;
DROP TABLE IF EXISTS releases_teasers CASCADE; DROP TABLE IF EXISTS releases_teasers CASCADE;
DROP TABLE IF EXISTS releases_tags CASCADE; DROP TABLE IF EXISTS releases_tags CASCADE;
DROP TABLE IF EXISTS releases_search CASCADE;
DROP TABLE IF EXISTS actors_avatars CASCADE; DROP TABLE IF EXISTS actors_avatars CASCADE;
DROP TABLE IF EXISTS actors_photos CASCADE; DROP TABLE IF EXISTS actors_photos CASCADE;
DROP TABLE IF EXISTS actors_social CASCADE; DROP TABLE IF EXISTS actors_social CASCADE;

View File

@ -1,34 +0,0 @@
exports.up = knex => Promise.resolve()
.then(() => knex.raw(`
CREATE VIEW releases_document AS
SELECT
releases.id,
releases.site_id,
to_tsvector(releases.title) ||
to_tsvector(sites.name) ||
to_tsvector(sites.slug) ||
to_tsvector(replace(CAST(releases.date AS VARCHAR), '-', ' ')) ||
to_tsvector(string_agg(actors.name, ' ')) ||
to_tsvector(string_agg(tags.name, ' ')) as document
FROM releases
JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
JOIN sites ON releases.site_id = sites.id
JOIN actors ON local_actors.actor_id = actors.id
JOIN tags ON local_tags.tag_id = tags.id
GROUP BY releases.id, sites.name, sites.slug;
COMMENT ON VIEW releases_document IS E'@foreignKey (id) references releases (id)\n@foreignKey (site_id) references sites (id)';
CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_document AS $$
SELECT * FROM releases_document AS search
WHERE search.document @@ plainto_tsquery(replace(query, '.', ' '))
ORDER BY ts_rank(search.document, plainto_tsquery(replace(query, '.', ' '))) DESC;
$$ LANGUAGE SQL STABLE;
`));
exports.down = knex => Promise.resolve()
.then(() => knex.raw(`
DROP FUNCTION IF EXISTS search_releases;
DROP VIEW IF EXISTS releases_document;
`));

View File

@ -106,6 +106,17 @@ const { argv } = yargs
type: 'boolean', type: 'boolean',
default: true, default: true,
}) })
.option('images', {
describe: 'Include any photos, posters or covers',
type: 'boolean',
default: true,
alias: 'pics',
})
.option('videos', {
describe: 'Include any trailers or teasers',
type: 'boolean',
default: true,
})
.option('posters', { .option('posters', {
describe: 'Include release posters', describe: 'Include release posters',
type: 'boolean', type: 'boolean',

View File

@ -343,32 +343,58 @@ async function storeReleaseAssets(releases) {
[release.id]: pluckItems(release.photos), [release.id]: pluckItems(release.photos),
}), {}); }), {});
if (argv.posters) { if (argv.images && argv.posters) {
const posters = await storeMedia(Object.values(releasePostersById).flat(), 'release', 'poster'); const posters = await storeMedia(Object.values(releasePostersById).flat(), 'release', 'poster');
if (posters) await associateMedia(releasePostersById, posters, 'release', 'poster'); if (posters) await associateMedia(releasePostersById, posters, 'release', 'poster');
} }
if (argv.covers) { if (argv.images && argv.covers) {
const covers = await storeMedia(Object.values(releaseCoversById).flat(), 'release', 'cover'); const covers = await storeMedia(Object.values(releaseCoversById).flat(), 'release', 'cover');
if (covers) await associateMedia(releaseCoversById, covers, 'release', 'cover'); if (covers) await associateMedia(releaseCoversById, covers, 'release', 'cover');
} }
if (argv.photos) { if (argv.images && argv.photos) {
const photos = await storeMedia(Object.values(releasePhotosById).flat(), 'release', 'photo'); const photos = await storeMedia(Object.values(releasePhotosById).flat(), 'release', 'photo');
if (photos) await associateMedia(releasePhotosById, photos, 'release', 'photo'); if (photos) await associateMedia(releasePhotosById, photos, 'release', 'photo');
} }
if (argv.trailers) { if (argv.videos && argv.trailers) {
const trailers = await storeMedia(Object.values(releaseTrailersById).flat(), 'release', 'trailer'); const trailers = await storeMedia(Object.values(releaseTrailersById).flat(), 'release', 'trailer');
if (trailers) await associateMedia(releaseTrailersById, trailers, 'release', 'trailer'); if (trailers) await associateMedia(releaseTrailersById, trailers, 'release', 'trailer');
} }
if (argv.teasers) { if (argv.videos && argv.teasers) {
const teasers = await storeMedia(Object.values(releaseTeasersById).flat(), 'release', 'teaser'); const teasers = await storeMedia(Object.values(releaseTeasersById).flat(), 'release', 'teaser');
if (teasers) await associateMedia(releaseTeasersById, teasers, 'release', 'teaser'); if (teasers) await associateMedia(releaseTeasersById, teasers, 'release', 'teaser');
} }
} }
async function updateReleasesSearch(releaseIds) {
const documents = await knex.raw(`
SELECT
releases.id as release_id,
to_tsvector(
releases.title || ' ' ||
sites.name || ' ' ||
sites.slug || ' ' ||
replace(CAST(releases.date AS VARCHAR), '-', ' ') || ' ' ||
string_agg(actors.name, ' ') || ' ' ||
string_agg(tags.name, ' ')
) as document
FROM releases
JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
JOIN sites ON releases.site_id = sites.id
JOIN actors ON local_actors.actor_id = actors.id
JOIN tags ON local_tags.tag_id = tags.id
WHERE releases.id = ANY(?)
GROUP BY releases.id, sites.name, sites.slug;
`, [releaseIds]);
const query = knex('releases_search').insert(documents.rows).toString();
return knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
}
async function storeRelease(release) { async function storeRelease(release) {
const existingRelease = await knex('releases') const existingRelease = await knex('releases')
.where({ .where({
@ -444,6 +470,8 @@ async function storeReleases(releases) {
storeReleaseAssets(storedReleases), storeReleaseAssets(storedReleases),
]); ]);
await updateReleasesSearch(storedReleases.map(release => release.id));
if (argv.withProfiles && Object.keys(actors).length > 0) { if (argv.withProfiles && Object.keys(actors).length > 0) {
await scrapeBasicActors(); await scrapeBasicActors();
} }