Using generic slugify for MindGeek channel.

This commit is contained in:
ThePendulum 2020-02-29 05:00:50 +01:00
parent 870d74a1de
commit b03775fa07
3 changed files with 32 additions and 9 deletions

View File

@ -476,6 +476,15 @@ exports.up = knex => Promise.resolve()
ALTER TABLE releases_search ALTER TABLE releases_search
ADD COLUMN document tsvector; ADD COLUMN document tsvector;
CREATE TEXT SEARCH DICTIONARY traxxx (
TEMPLATE = pg_catalog.simple,
stopwords = traxxx
);
CREATE TEXT SEARCH CONFIGURATION traxxx (
COPY = english
);
CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id); CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id);
CREATE INDEX releases_search_index ON releases_search USING GIN (document); CREATE INDEX releases_search_index ON releases_search USING GIN (document);
@ -492,8 +501,8 @@ exports.up = knex => Promise.resolve()
CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_search AS $$ CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_search AS $$
SELECT * FROM releases_search AS search SELECT * FROM releases_search AS search
WHERE search.document @@ plainto_tsquery(replace(query, '.', ' ')) WHERE search.document @@ plainto_tsquery('traxxx', replace(query, '.', ' '))
ORDER BY ts_rank(search.document, plainto_tsquery(replace(query, '.', ' '))) DESC; ORDER BY ts_rank(search.document, plainto_tsquery('traxxx', replace(query, '.', ' '))) DESC;
$$ LANGUAGE SQL STABLE; $$ LANGUAGE SQL STABLE;
/* /*
@ -557,4 +566,7 @@ exports.down = knex => knex.raw(`
DROP TABLE IF EXISTS media CASCADE; DROP TABLE IF EXISTS media CASCADE;
DROP TABLE IF EXISTS countries CASCADE; DROP TABLE IF EXISTS countries CASCADE;
DROP TABLE IF EXISTS networks CASCADE; DROP TABLE IF EXISTS networks CASCADE;
DROP TEXT SEARCH CONFIGURATION IF EXISTS traxxx;
DROP TEXT SEARCH DICTIONARY IF EXISTS traxxx;
`); `);

View File

@ -374,21 +374,30 @@ async function updateReleasesSearch(releaseIds) {
SELECT SELECT
releases.id as release_id, releases.id as release_id,
to_tsvector( to_tsvector(
'traxxx',
releases.title || ' ' || releases.title || ' ' ||
sites.name || ' ' || sites.name || ' ' ||
sites.slug || ' ' || sites.slug || ' ' ||
replace(CAST(releases.date AS VARCHAR), '-', ' ') || ' ' || networks.name || ' ' ||
networks.slug || ' ' ||
EXTRACT(YEAR FROM releases.date) || ' ' ||
CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR) || ' ' ||
CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR) || ' ' ||
SUBSTRING(CAST(EXTRACT(YEAR FROM releases.date) AS VARCHAR) FROM 3 for 2) || ' ' ||
LPAD(CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
LPAD(CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
string_agg(coalesce(actors.name, ''), ' ') || ' ' || string_agg(coalesce(actors.name, ''), ' ') || ' ' ||
string_agg(coalesce(tags.name, ''), ' ') string_agg(coalesce(tags.name, ''), ' ')
) as document ) as document
FROM releases FROM releases
JOIN sites ON releases.site_id = sites.id LEFT JOIN sites ON releases.site_id = sites.id
LEFT JOIN networks ON sites.network_id = networks.id
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags ON local_tags.tag_id = tags.id
WHERE releases.id = ANY(?) WHERE releases.id = ANY(?)
GROUP BY releases.id, sites.name, sites.slug; GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug;
`, [releaseIds]); `, [releaseIds]);
if (documents.rows?.length > 0) { if (documents.rows?.length > 0) {
@ -467,13 +476,14 @@ async function storeReleases(releases) {
const actors = accumulateActors(storedReleases); const actors = accumulateActors(storedReleases);
const movies = accumulateMovies(storedReleases); const movies = accumulateMovies(storedReleases);
await associateActors(actors, storedReleases);
await Promise.all([ await Promise.all([
associateActors(actors, storedReleases), // actors need to be stored before generating search
updateReleasesSearch(storedReleases.map(release => release.id)),
storeReleaseAssets(storedReleases), storeReleaseAssets(storedReleases),
]); ]);
await updateReleasesSearch(storedReleases.map(release => release.id));
if (argv.withProfiles && Object.keys(actors).length > 0) { if (argv.withProfiles && Object.keys(actors).length > 0) {
await scrapeBasicActors(); await scrapeBasicActors();
} }

View File

@ -8,6 +8,7 @@ const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
const moment = require('moment'); const moment = require('moment');
const { ex } = require('../utils/q'); const { ex } = require('../utils/q');
const slugify = require('../utils/slugify');
const { inchesToCm, lbsToKg } = require('../utils/convert'); const { inchesToCm, lbsToKg } = require('../utils/convert');
const { cookieToData } = require('../utils/cookies'); const { cookieToData } = require('../utils/cookies');
@ -97,7 +98,7 @@ function scrapeScene(data, url, _site, networkName) {
} }
const siteName = data.collections[0]?.name || data.brand; const siteName = data.collections[0]?.name || data.brand;
release.channel = siteName.replace(/\s+/g, '').toLowerCase(); release.channel = slugify(siteName, { delimiter: '' });
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`; release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;