Using generic slugify for MindGeek channel.
This commit is contained in:
		
							parent
							
								
									870d74a1de
								
							
						
					
					
						commit
						b03775fa07
					
				|  | @ -476,6 +476,15 @@ exports.up = knex => Promise.resolve() | |||
|         ALTER TABLE releases_search | ||||
|             ADD COLUMN document tsvector; | ||||
| 
 | ||||
|         CREATE TEXT SEARCH DICTIONARY traxxx ( | ||||
|             TEMPLATE = pg_catalog.simple, | ||||
|             stopwords = traxxx | ||||
|         ); | ||||
| 
 | ||||
|         CREATE TEXT SEARCH CONFIGURATION traxxx ( | ||||
|             COPY = english | ||||
|         ); | ||||
| 
 | ||||
|         CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id); | ||||
|         CREATE INDEX releases_search_index ON releases_search USING GIN (document); | ||||
| 
 | ||||
|  | @ -492,8 +501,8 @@ exports.up = knex => Promise.resolve() | |||
| 
 | ||||
|         CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_search AS $$ | ||||
|             SELECT * FROM releases_search AS search | ||||
|             WHERE search.document @@ plainto_tsquery(replace(query, '.', ' ')) | ||||
|             ORDER BY ts_rank(search.document, plainto_tsquery(replace(query, '.', ' '))) DESC; | ||||
|             WHERE search.document @@ plainto_tsquery('traxxx', replace(query, '.', ' ')) | ||||
|             ORDER BY ts_rank(search.document, plainto_tsquery('traxxx', replace(query, '.', ' '))) DESC; | ||||
|         $$ LANGUAGE SQL STABLE; | ||||
| 
 | ||||
|         /* | ||||
|  | @ -557,4 +566,7 @@ exports.down = knex => knex.raw(` | |||
|     DROP TABLE IF EXISTS media CASCADE; | ||||
|     DROP TABLE IF EXISTS countries CASCADE; | ||||
|     DROP TABLE IF EXISTS networks CASCADE; | ||||
| 
 | ||||
|     DROP TEXT SEARCH CONFIGURATION IF EXISTS traxxx; | ||||
|     DROP TEXT SEARCH DICTIONARY IF EXISTS traxxx; | ||||
| `);
 | ||||
|  |  | |||
|  | @ -374,21 +374,30 @@ async function updateReleasesSearch(releaseIds) { | |||
|         SELECT | ||||
|             releases.id as release_id, | ||||
|             to_tsvector( | ||||
|                 'traxxx', | ||||
|                 releases.title || ' ' || | ||||
|                 sites.name || ' ' || | ||||
|                 sites.slug || ' ' || | ||||
|                 replace(CAST(releases.date AS VARCHAR), '-', ' ') || ' ' || | ||||
|                 networks.name || ' ' || | ||||
|                 networks.slug || ' ' || | ||||
|                 EXTRACT(YEAR FROM releases.date) || ' ' || | ||||
|                 CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR) || ' ' || | ||||
|                 CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR) || ' ' || | ||||
|                 SUBSTRING(CAST(EXTRACT(YEAR FROM releases.date) AS VARCHAR) FROM 3 for 2) || ' ' || | ||||
|                 LPAD(CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR), 2, '0') || ' ' || | ||||
|                 LPAD(CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR), 2, '0') || ' ' || | ||||
|                 string_agg(coalesce(actors.name, ''), ' ') || ' ' || | ||||
|                 string_agg(coalesce(tags.name, ''), ' ') | ||||
|             ) as document | ||||
|         FROM releases | ||||
|         JOIN sites ON releases.site_id = sites.id | ||||
|         LEFT JOIN sites ON releases.site_id = sites.id | ||||
|         LEFT JOIN networks ON sites.network_id = networks.id | ||||
|         LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id | ||||
|         LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id | ||||
|         LEFT JOIN actors ON local_actors.actor_id = actors.id | ||||
|         LEFT JOIN tags ON local_tags.tag_id = tags.id | ||||
|         WHERE releases.id = ANY(?) | ||||
|         GROUP BY releases.id, sites.name, sites.slug; | ||||
|         GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug; | ||||
|     `, [releaseIds]);
 | ||||
| 
 | ||||
|     if (documents.rows?.length > 0) { | ||||
|  | @ -467,13 +476,14 @@ async function storeReleases(releases) { | |||
|     const actors = accumulateActors(storedReleases); | ||||
|     const movies = accumulateMovies(storedReleases); | ||||
| 
 | ||||
|     await associateActors(actors, storedReleases); | ||||
| 
 | ||||
|     await Promise.all([ | ||||
|         associateActors(actors, storedReleases), | ||||
|         // actors need to be stored before generating search
 | ||||
|         updateReleasesSearch(storedReleases.map(release => release.id)), | ||||
|         storeReleaseAssets(storedReleases), | ||||
|     ]); | ||||
| 
 | ||||
|     await updateReleasesSearch(storedReleases.map(release => release.id)); | ||||
| 
 | ||||
|     if (argv.withProfiles && Object.keys(actors).length > 0) { | ||||
|         await scrapeBasicActors(); | ||||
|     } | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ const { CookieJar } = Promise.promisifyAll(require('tough-cookie')); | |||
| const moment = require('moment'); | ||||
| 
 | ||||
| const { ex } = require('../utils/q'); | ||||
| const slugify = require('../utils/slugify'); | ||||
| const { inchesToCm, lbsToKg } = require('../utils/convert'); | ||||
| const { cookieToData } = require('../utils/cookies'); | ||||
| 
 | ||||
|  | @ -97,7 +98,7 @@ function scrapeScene(data, url, _site, networkName) { | |||
|     } | ||||
| 
 | ||||
|     const siteName = data.collections[0]?.name || data.brand; | ||||
|     release.channel = siteName.replace(/\s+/g, '').toLowerCase(); | ||||
|     release.channel = slugify(siteName, { delimiter: '' }); | ||||
| 
 | ||||
|     release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`; | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue