From 8dd5925af6d0f64f3de85fdf2def0fbc141031af Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Sat, 29 Feb 2020 22:47:48 +0100 Subject: [PATCH] Improved search engine query and added stop words. Added 'secondary' property to tag aliases, for tag aliases to be included in searches and alias lists. --- README.md | 2 + assets/css/style.scss | 2 +- migrations/20190325001339_releases.js | 50 +++------ seeds/00_tags.js | 50 +++++++-- src/releases.js | 4 +- traxxx.stop | 156 ++++++++++++++++++++++++++ 6 files changed, 215 insertions(+), 49 deletions(-) create mode 100644 traxxx.stop diff --git a/README.md b/README.md index 1dd19b521..4ccf0857c 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ Use [nvm](https://github.com/creationix/nvm) to install a recent version of Node `npm start` +For optimal search engine performance, copy `traxxx.stop` to your PostgresQL text search directory, usually `/usr/share/postgresql/tsearch_data/ or `/usr/local/share/postgresql/tsearch_data/`. + ### Configuration Do not modify `config/default.js`, but instead create a copy at `config/local.js` containing the properties you wish to change. diff --git a/assets/css/style.scss b/assets/css/style.scss index 61ae040ac..46fa73b85 100644 --- a/assets/css/style.scss +++ b/assets/css/style.scss @@ -11,7 +11,7 @@ body { body { color: $text; margin: 0; - font-family: Verdana, sans-serif; + font-family: Arial, Helvetica, sans-serif; } .nolist { diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index e7c313df8..2ddbf8c86 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -65,6 +65,9 @@ exports.up = knex => Promise.resolve() table.integer('priority', 2) .defaultTo(0); + table.boolean('secondary') + .defaultTo(false); + table.integer('group_id', 12) .references('id') .inTable('tags_groups'); @@ -476,7 +479,7 @@ exports.up = knex => Promise.resolve() ALTER TABLE releases_search ADD COLUMN document tsvector; - CREATE TEXT SEARCH DICTIONARY traxxx ( + CREATE TEXT SEARCH DICTIONARY traxxx_dict ( TEMPLATE = pg_catalog.simple, stopwords = traxxx ); @@ -485,11 +488,17 @@ exports.up = knex => Promise.resolve() COPY = english ); + ALTER TEXT SEARCH CONFIGURATION traxxx + ALTER MAPPING FOR word, numword, hword, numhword, hword_part, hword_numpart, asciiword, asciihword, hword_asciipart WITH traxxx_dict, simple_dict, english_stem; + CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id); CREATE INDEX releases_search_index ON releases_search USING GIN (document); - COMMENT ON COLUMN actors.height IS E'@omit read,update,create,delete,all,many'; - COMMENT ON COLUMN actors.weight IS E'@omit read,update,create,delete,all,many'; + CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_search AS $$ + SELECT * FROM releases_search AS search + WHERE search.document @@ plainto_tsquery('traxxx', regexp_replace(query, '\\.|-', ' ', 'g')) + ORDER BY ts_rank(search.document, plainto_tsquery('traxxx', regexp_replace(query, '\\.|-', ' ', 'g'))) DESC; + $$ LANGUAGE SQL STABLE; CREATE FUNCTION search_sites(search text) RETURNS SETOF sites AS $$ SELECT * FROM sites @@ -499,37 +508,8 @@ exports.up = knex => Promise.resolve() url ILIKE ('%' || search || '%') $$ LANGUAGE SQL STABLE; - CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_search AS $$ - SELECT * FROM releases_search AS search - WHERE search.document @@ plainto_tsquery('traxxx', replace(query, '.', ' ')) - ORDER BY ts_rank(search.document, plainto_tsquery('traxxx', replace(query, '.', ' '))) DESC; - $$ LANGUAGE SQL STABLE; - - /* - CREATE VIEW releases_actors_sortable AS - SELECT releases_actors.*, actors.gender, actors.name, actors.birthdate FROM releases_actors - JOIN actors ON releases_actors.actor_id = actors.id; - - CREATE VIEW releases_tags_sortable AS - SELECT releases_tags.*, tags.name, tags.priority FROM releases_tags - JOIN tags ON releases_tags.tag_id = tags.id; - - CREATE VIEW actors_releases_sortable AS - SELECT releases_actors.*, releases.date FROM releases_actors - JOIN releases ON releases_actors.release_id = releases.id; - - COMMENT ON VIEW releases_actors_sortable IS E'@foreignKey (release_id) references releases (id)\n@foreignKey (actor_id) references actors (id)'; - COMMENT ON VIEW releases_tags_sortable IS E'@foreignKey (release_id) references releases (id)\n@foreignKey (tag_id) references tags (id)'; - COMMENT ON VIEW actors_releases_sortable IS E'@foreignKey (release_id) references releases (id)\n@foreignKey (actor_id) references actors (id)'; - - /* allow conversion resolver to be added for height and weight */ - CREATE FUNCTION releases_by_tag_slugs(slugs text[]) RETURNS setof releases AS $$ - SELECT DISTINCT ON (releases.id) releases.* FROM releases - JOIN releases_tags ON (releases_tags.release_id = releases.id) - JOIN tags ON (releases_tags.tag_id = tags.id) - WHERE tags.slug = ANY($1); - $$ LANGUAGE sql STABLE; - */ + COMMENT ON COLUMN actors.height IS E'@omit read,update,create,delete,all,many'; + COMMENT ON COLUMN actors.weight IS E'@omit read,update,create,delete,all,many'; `)); exports.down = knex => knex.raw(` @@ -568,5 +548,5 @@ exports.down = knex => knex.raw(` DROP TABLE IF EXISTS networks CASCADE; DROP TEXT SEARCH CONFIGURATION IF EXISTS traxxx; - DROP TEXT SEARCH DICTIONARY IF EXISTS traxxx; + DROP TEXT SEARCH DICTIONARY IF EXISTS traxxx_dict; `); diff --git a/seeds/00_tags.js b/seeds/00_tags.js index 16f62a81b..e197495c1 100644 --- a/seeds/00_tags.js +++ b/seeds/00_tags.js @@ -345,8 +345,8 @@ const tags = [ slug: 'electric-shock', }, { - name: 'enhanced boobs', - slug: 'enhanced-boobs', + name: 'fake boobs', + slug: 'fake-boobs', }, { name: 'facefucking', @@ -809,6 +809,10 @@ const tags = [ ]; const aliases = [ + { + name: '2on1', + for: 'threesome', + }, { name: '2-on-1', for: 'threesome', @@ -852,6 +856,7 @@ const aliases = [ { name: 'atm', for: 'ass-to-mouth', + secondary: true, }, { name: 'bald pussy', @@ -867,7 +872,7 @@ const aliases = [ }, { name: 'mfm', - for: 'mfm', + for: 'mmf', }, { name: 'fmf', @@ -959,11 +964,11 @@ const aliases = [ }, { name: 'boob job', - for: 'enhanced-boobs', + for: 'fake-boobs', }, { name: 'boobjob', - for: 'enhanced-boobs', + for: 'fake-boobs', }, { name: 'brown hair', @@ -976,6 +981,7 @@ const aliases = [ { name: 'buttplug', for: 'anal-toys', + secondary: true, }, { name: 'butt plug', @@ -1010,7 +1016,7 @@ const aliases = [ for: 'anal-creampie', }, { - name: 'crop', // a type of whip, not [sic short for corporal + name: 'crop', // a type of whip, not [sic] short for corporal for: 'corporal-punishment', }, { @@ -1024,6 +1030,7 @@ const aliases = [ { name: 'cum in mouth', for: 'oral-creampie', + secondary: true, }, { name: 'cum on ass', @@ -1052,6 +1059,7 @@ const aliases = [ { name: 'deep throat', for: 'deepthroat', + secondary: true, }, { name: 'deepthroating', @@ -1064,6 +1072,7 @@ const aliases = [ { name: 'doggystyle', for: 'doggy-style', + secondary: true, }, { name: 'doggy style - pov', @@ -1096,10 +1105,12 @@ const aliases = [ { name: 'dominatrix', for: 'femdom', + secondary: true, }, { name: 'dp', for: 'double-penetration', + secondary: true, }, { name: 'first dp', @@ -1116,6 +1127,7 @@ const aliases = [ { name: 'dap', for: 'double-anal', + secondary: true, }, { name: 'double anal (dap)', @@ -1128,14 +1140,17 @@ const aliases = [ { name: 'tap', for: 'triple-anal', + secondary: true, }, { name: 'dpp', for: 'double-vaginal', + secondary: true, }, { name: 'dvp', for: 'double-vaginal', + secondary: true, }, { name: 'double vaginal (dvp)', @@ -1163,11 +1178,11 @@ const aliases = [ }, { name: 'enhanced', - for: 'enhanced-boobs', + for: 'fake-boobs', }, { name: 'enhanced tits', - for: 'enhanced-boobs', + for: 'fake-boobs', }, { name: 'facefuck', @@ -1198,12 +1213,14 @@ const aliases = [ for: 'facial', }, { - name: 'fake boobs', - for: 'enhanced-boobs', + name: 'fake tits', + for: 'fake-boobs', + secondary: true, }, { - name: 'fake tits', - for: 'enhanced-boobs', + name: 'enhanced boobs', + for: 'fake-boobs', + secondary: true, }, { name: 'flogging', @@ -1220,10 +1237,12 @@ const aliases = [ { name: 'gape', for: 'gaping', + secondary: true, }, { name: 'gapes', for: 'gaping', + secondary: true, }, { name: 'gapes (gaping asshole)', @@ -1248,6 +1267,7 @@ const aliases = [ { name: 'incest', for: 'family', + secondary: true, }, { name: 'incest fantasy', @@ -1328,10 +1348,12 @@ const aliases = [ { name: 'rimming', for: 'ass-eating', + secondary: true, }, { name: 'rimjob', for: 'ass-eating', + secondary: true, }, { name: 'role play', @@ -1392,6 +1414,7 @@ const aliases = [ { name: 'spitroast', for: 'mfm', + secondary: true, }, { name: 'standing doggystyle', @@ -1444,6 +1467,7 @@ const aliases = [ { name: 'tittyfuck', for: 'titty-fuck', + secondary: true, }, { name: 'tp', @@ -1456,6 +1480,7 @@ const aliases = [ { name: 'transgender', for: 'transsexual', + secondary: true, }, { name: 'trimmed pussy', @@ -1515,6 +1540,7 @@ exports.seed = knex => Promise.resolve() const tagAliases = aliases.map(alias => ({ name: alias.name, alias_for: tagsMap[alias.for], + secondary: !!alias.secondary, })); return upsert('tags', tagAliases, 'name'); diff --git a/src/releases.js b/src/releases.js index 3b3ed71de..93f72a345 100644 --- a/src/releases.js +++ b/src/releases.js @@ -387,7 +387,8 @@ async function updateReleasesSearch(releaseIds) { LPAD(CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR), 2, '0') || ' ' || LPAD(CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR), 2, '0') || ' ' || string_agg(coalesce(actors.name, ''), ' ') || ' ' || - string_agg(coalesce(tags.name, ''), ' ') + string_agg(coalesce(tags.name, ''), ' ') || ' ' || + string_agg(coalesce(tags_aliases.name, ''), ' ') ) as document FROM releases LEFT JOIN sites ON releases.site_id = sites.id @@ -396,6 +397,7 @@ async function updateReleasesSearch(releaseIds) { LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN tags ON local_tags.tag_id = tags.id + LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for WHERE releases.id = ANY(?) GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug; `, [releaseIds]); diff --git a/traxxx.stop b/traxxx.stop new file mode 100644 index 000000000..e8fbc1c0e --- /dev/null +++ b/traxxx.stop @@ -0,0 +1,156 @@ +i +me +my +myself +we +our +ours +ourselves +you +your +yours +yourself +yourselves +he +him +his +himself +she +her +hers +herself +it +its +itself +they +them +their +theirs +themselves +what +which +who +whom +this +that +these +those +am +is +are +was +were +be +been +being +have +has +had +having +do +does +did +doing +a +an +the +and +but +if +or +because +as +until +while +of +at +by +for +with +about +against +between +into +through +during +before +after +above +below +to +from +up +down +in +out +on +off +over +under +again +further +then +once +here +there +when +where +why +how +all +any +both +each +few +more +most +other +some +such +no +nor +not +only +own +same +so +than +too +very +s +t +can +will +just +don +should +now +1080p +2160p +240p +360p +480p +540p +720p +avi +gagvid +h264 +hd +kleenex +ktr +mkv +mov +mp4 +rartv +robots +scenes +sd +split +tbs +trashbin +web +webrip +wmv +x264 +xlf +xxx