forked from DebaucheryLibrarian/traxxx
Using generic slugify for MindGeek channel.
This commit is contained in:
parent
870d74a1de
commit
b03775fa07
|
@ -476,6 +476,15 @@ exports.up = knex => Promise.resolve()
|
|||
ALTER TABLE releases_search
|
||||
ADD COLUMN document tsvector;
|
||||
|
||||
CREATE TEXT SEARCH DICTIONARY traxxx (
|
||||
TEMPLATE = pg_catalog.simple,
|
||||
stopwords = traxxx
|
||||
);
|
||||
|
||||
CREATE TEXT SEARCH CONFIGURATION traxxx (
|
||||
COPY = english
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id);
|
||||
CREATE INDEX releases_search_index ON releases_search USING GIN (document);
|
||||
|
||||
|
@ -492,8 +501,8 @@ exports.up = knex => Promise.resolve()
|
|||
|
||||
CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_search AS $$
|
||||
SELECT * FROM releases_search AS search
|
||||
WHERE search.document @@ plainto_tsquery(replace(query, '.', ' '))
|
||||
ORDER BY ts_rank(search.document, plainto_tsquery(replace(query, '.', ' '))) DESC;
|
||||
WHERE search.document @@ plainto_tsquery('traxxx', replace(query, '.', ' '))
|
||||
ORDER BY ts_rank(search.document, plainto_tsquery('traxxx', replace(query, '.', ' '))) DESC;
|
||||
$$ LANGUAGE SQL STABLE;
|
||||
|
||||
/*
|
||||
|
@ -557,4 +566,7 @@ exports.down = knex => knex.raw(`
|
|||
DROP TABLE IF EXISTS media CASCADE;
|
||||
DROP TABLE IF EXISTS countries CASCADE;
|
||||
DROP TABLE IF EXISTS networks CASCADE;
|
||||
|
||||
DROP TEXT SEARCH CONFIGURATION IF EXISTS traxxx;
|
||||
DROP TEXT SEARCH DICTIONARY IF EXISTS traxxx;
|
||||
`);
|
||||
|
|
|
@ -374,21 +374,30 @@ async function updateReleasesSearch(releaseIds) {
|
|||
SELECT
|
||||
releases.id as release_id,
|
||||
to_tsvector(
|
||||
'traxxx',
|
||||
releases.title || ' ' ||
|
||||
sites.name || ' ' ||
|
||||
sites.slug || ' ' ||
|
||||
replace(CAST(releases.date AS VARCHAR), '-', ' ') || ' ' ||
|
||||
networks.name || ' ' ||
|
||||
networks.slug || ' ' ||
|
||||
EXTRACT(YEAR FROM releases.date) || ' ' ||
|
||||
CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR) || ' ' ||
|
||||
CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR) || ' ' ||
|
||||
SUBSTRING(CAST(EXTRACT(YEAR FROM releases.date) AS VARCHAR) FROM 3 for 2) || ' ' ||
|
||||
LPAD(CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
|
||||
LPAD(CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
|
||||
string_agg(coalesce(actors.name, ''), ' ') || ' ' ||
|
||||
string_agg(coalesce(tags.name, ''), ' ')
|
||||
) as document
|
||||
FROM releases
|
||||
JOIN sites ON releases.site_id = sites.id
|
||||
LEFT JOIN sites ON releases.site_id = sites.id
|
||||
LEFT JOIN networks ON sites.network_id = networks.id
|
||||
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
|
||||
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
|
||||
LEFT JOIN actors ON local_actors.actor_id = actors.id
|
||||
LEFT JOIN tags ON local_tags.tag_id = tags.id
|
||||
WHERE releases.id = ANY(?)
|
||||
GROUP BY releases.id, sites.name, sites.slug;
|
||||
GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug;
|
||||
`, [releaseIds]);
|
||||
|
||||
if (documents.rows?.length > 0) {
|
||||
|
@ -467,13 +476,14 @@ async function storeReleases(releases) {
|
|||
const actors = accumulateActors(storedReleases);
|
||||
const movies = accumulateMovies(storedReleases);
|
||||
|
||||
await associateActors(actors, storedReleases);
|
||||
|
||||
await Promise.all([
|
||||
associateActors(actors, storedReleases),
|
||||
// actors need to be stored before generating search
|
||||
updateReleasesSearch(storedReleases.map(release => release.id)),
|
||||
storeReleaseAssets(storedReleases),
|
||||
]);
|
||||
|
||||
await updateReleasesSearch(storedReleases.map(release => release.id));
|
||||
|
||||
if (argv.withProfiles && Object.keys(actors).length > 0) {
|
||||
await scrapeBasicActors();
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
|
|||
const moment = require('moment');
|
||||
|
||||
const { ex } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
||||
const { cookieToData } = require('../utils/cookies');
|
||||
|
||||
|
@ -97,7 +98,7 @@ function scrapeScene(data, url, _site, networkName) {
|
|||
}
|
||||
|
||||
const siteName = data.collections[0]?.name || data.brand;
|
||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
release.channel = slugify(siteName, { delimiter: '' });
|
||||
|
||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||
|
||||
|
|
Loading…
Reference in New Issue