Using generic slugify for MindGeek channel.
This commit is contained in:
parent
870d74a1de
commit
b03775fa07
|
@ -476,6 +476,15 @@ exports.up = knex => Promise.resolve()
|
||||||
ALTER TABLE releases_search
|
ALTER TABLE releases_search
|
||||||
ADD COLUMN document tsvector;
|
ADD COLUMN document tsvector;
|
||||||
|
|
||||||
|
CREATE TEXT SEARCH DICTIONARY traxxx (
|
||||||
|
TEMPLATE = pg_catalog.simple,
|
||||||
|
stopwords = traxxx
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TEXT SEARCH CONFIGURATION traxxx (
|
||||||
|
COPY = english
|
||||||
|
);
|
||||||
|
|
||||||
CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id);
|
CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id);
|
||||||
CREATE INDEX releases_search_index ON releases_search USING GIN (document);
|
CREATE INDEX releases_search_index ON releases_search USING GIN (document);
|
||||||
|
|
||||||
|
@ -492,8 +501,8 @@ exports.up = knex => Promise.resolve()
|
||||||
|
|
||||||
CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_search AS $$
|
CREATE FUNCTION search_releases(query text) RETURNS SETOF releases_search AS $$
|
||||||
SELECT * FROM releases_search AS search
|
SELECT * FROM releases_search AS search
|
||||||
WHERE search.document @@ plainto_tsquery(replace(query, '.', ' '))
|
WHERE search.document @@ plainto_tsquery('traxxx', replace(query, '.', ' '))
|
||||||
ORDER BY ts_rank(search.document, plainto_tsquery(replace(query, '.', ' '))) DESC;
|
ORDER BY ts_rank(search.document, plainto_tsquery('traxxx', replace(query, '.', ' '))) DESC;
|
||||||
$$ LANGUAGE SQL STABLE;
|
$$ LANGUAGE SQL STABLE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -557,4 +566,7 @@ exports.down = knex => knex.raw(`
|
||||||
DROP TABLE IF EXISTS media CASCADE;
|
DROP TABLE IF EXISTS media CASCADE;
|
||||||
DROP TABLE IF EXISTS countries CASCADE;
|
DROP TABLE IF EXISTS countries CASCADE;
|
||||||
DROP TABLE IF EXISTS networks CASCADE;
|
DROP TABLE IF EXISTS networks CASCADE;
|
||||||
|
|
||||||
|
DROP TEXT SEARCH CONFIGURATION IF EXISTS traxxx;
|
||||||
|
DROP TEXT SEARCH DICTIONARY IF EXISTS traxxx;
|
||||||
`);
|
`);
|
||||||
|
|
|
@ -374,21 +374,30 @@ async function updateReleasesSearch(releaseIds) {
|
||||||
SELECT
|
SELECT
|
||||||
releases.id as release_id,
|
releases.id as release_id,
|
||||||
to_tsvector(
|
to_tsvector(
|
||||||
|
'traxxx',
|
||||||
releases.title || ' ' ||
|
releases.title || ' ' ||
|
||||||
sites.name || ' ' ||
|
sites.name || ' ' ||
|
||||||
sites.slug || ' ' ||
|
sites.slug || ' ' ||
|
||||||
replace(CAST(releases.date AS VARCHAR), '-', ' ') || ' ' ||
|
networks.name || ' ' ||
|
||||||
|
networks.slug || ' ' ||
|
||||||
|
EXTRACT(YEAR FROM releases.date) || ' ' ||
|
||||||
|
CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR) || ' ' ||
|
||||||
|
CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR) || ' ' ||
|
||||||
|
SUBSTRING(CAST(EXTRACT(YEAR FROM releases.date) AS VARCHAR) FROM 3 for 2) || ' ' ||
|
||||||
|
LPAD(CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
|
||||||
|
LPAD(CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
|
||||||
string_agg(coalesce(actors.name, ''), ' ') || ' ' ||
|
string_agg(coalesce(actors.name, ''), ' ') || ' ' ||
|
||||||
string_agg(coalesce(tags.name, ''), ' ')
|
string_agg(coalesce(tags.name, ''), ' ')
|
||||||
) as document
|
) as document
|
||||||
FROM releases
|
FROM releases
|
||||||
JOIN sites ON releases.site_id = sites.id
|
LEFT JOIN sites ON releases.site_id = sites.id
|
||||||
|
LEFT JOIN networks ON sites.network_id = networks.id
|
||||||
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
|
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
|
||||||
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
|
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
|
||||||
LEFT JOIN actors ON local_actors.actor_id = actors.id
|
LEFT JOIN actors ON local_actors.actor_id = actors.id
|
||||||
LEFT JOIN tags ON local_tags.tag_id = tags.id
|
LEFT JOIN tags ON local_tags.tag_id = tags.id
|
||||||
WHERE releases.id = ANY(?)
|
WHERE releases.id = ANY(?)
|
||||||
GROUP BY releases.id, sites.name, sites.slug;
|
GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug;
|
||||||
`, [releaseIds]);
|
`, [releaseIds]);
|
||||||
|
|
||||||
if (documents.rows?.length > 0) {
|
if (documents.rows?.length > 0) {
|
||||||
|
@ -467,13 +476,14 @@ async function storeReleases(releases) {
|
||||||
const actors = accumulateActors(storedReleases);
|
const actors = accumulateActors(storedReleases);
|
||||||
const movies = accumulateMovies(storedReleases);
|
const movies = accumulateMovies(storedReleases);
|
||||||
|
|
||||||
|
await associateActors(actors, storedReleases);
|
||||||
|
|
||||||
await Promise.all([
|
await Promise.all([
|
||||||
associateActors(actors, storedReleases),
|
// actors need to be stored before generating search
|
||||||
|
updateReleasesSearch(storedReleases.map(release => release.id)),
|
||||||
storeReleaseAssets(storedReleases),
|
storeReleaseAssets(storedReleases),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
await updateReleasesSearch(storedReleases.map(release => release.id));
|
|
||||||
|
|
||||||
if (argv.withProfiles && Object.keys(actors).length > 0) {
|
if (argv.withProfiles && Object.keys(actors).length > 0) {
|
||||||
await scrapeBasicActors();
|
await scrapeBasicActors();
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const { ex } = require('../utils/q');
|
const { ex } = require('../utils/q');
|
||||||
|
const slugify = require('../utils/slugify');
|
||||||
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
||||||
const { cookieToData } = require('../utils/cookies');
|
const { cookieToData } = require('../utils/cookies');
|
||||||
|
|
||||||
|
@ -97,7 +98,7 @@ function scrapeScene(data, url, _site, networkName) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const siteName = data.collections[0]?.name || data.brand;
|
const siteName = data.collections[0]?.name || data.brand;
|
||||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
release.channel = slugify(siteName, { delimiter: '' });
|
||||||
|
|
||||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue