traxxx-web/src/tools/manticore-scenes.js

175 lines
5.8 KiB
JavaScript
Raw Normal View History

2024-03-14 23:08:24 +00:00
// import config from 'config';
import { format } from 'date-fns';
import { faker } from '@faker-js/faker';
2024-03-21 01:54:05 +00:00
import { indexApi } from '../manticore.js';
2024-03-14 23:08:24 +00:00
import { knexOwner as knex } from '../knex.js';
import slugify from '../utils/slugify.js';
import chunk from '../utils/chunk.js';
async function fetchScenes() {
const scenes = await knex.raw(`
SELECT
releases.id AS id,
releases.title,
releases.created_at,
releases.date,
releases.shoot_id,
scenes_meta.stashed,
entities.id as channel_id,
entities.slug as channel_slug,
entities.name as channel_name,
parents.id as network_id,
parents.slug as network_slug,
parents.name as network_name,
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
FROM releases
LEFT JOIN scenes_meta ON scenes_meta.scene_id = releases.id
LEFT JOIN entities ON releases.entity_id = entities.id
LEFT JOIN entities AS parents ON parents.id = entities.parent_id
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = releases.id
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
GROUP BY
releases.id,
releases.title,
releases.created_at,
releases.date,
releases.shoot_id,
scenes_meta.stashed,
entities.id,
entities.name,
entities.slug,
entities.alias,
parents.id,
parents.name,
parents.slug,
parents.alias;
`);
const actors = Object.fromEntries(scenes.rows.flatMap((row) => row.actors.map((actor) => [actor.f1, faker.person.fullName()])));
const tags = Object.fromEntries(scenes.rows.flatMap((row) => row.tags.map((tag) => [tag.f1, faker.word.adjective()])));
return scenes.rows.map((row) => {
const title = faker.lorem.lines(1);
const channelName = faker.company.name();
const channelSlug = slugify(channelName, '');
const networkName = faker.company.name();
const networkSlug = slugify(networkName, '');
const rowActors = row.actors.map((actor) => ({ f1: actor.f1, f2: actors[actor.f1] }));
const rowTags = row.tags.map((tag) => ({ f1: tag.f1, f2: tags[tag.f1], f3: tag.f3 }));
return {
...row,
title,
actors: rowActors,
tags: rowTags,
channel_name: channelName,
channel_slug: channelSlug,
network_name: networkName,
network_slug: networkSlug,
};
});
}
async function updateStashed(docs) {
await chunk(docs, 1000).reduce(async (chain, docsChunk) => {
await chain;
const sceneIds = docsChunk.map((doc) => doc.replace.id);
const stashes = await knex('stashes_scenes')
.select('stashes_scenes.id as stashed_id', 'stashes_scenes.scene_id', 'stashes.id as stash_id', 'stashes.user_id as user_id')
.leftJoin('stashes', 'stashes.id', 'stashes_scenes.stash_id')
.whereIn('scene_id', sceneIds);
if (stashes.length > 0) {
console.log(stashes);
}
const stashDocs = docsChunk.flatMap((doc) => {
const sceneStashes = stashes.filter((stash) => stash.scene_id === doc.replace.id);
if (sceneStashes.length === 0) {
return [];
}
const stashDoc = sceneStashes.map((stash) => ({
replace: {
2024-03-21 01:54:05 +00:00
index: 'scenes_stashed',
2024-03-14 23:08:24 +00:00
id: stash.stashed_id,
doc: {
// ...doc.replace.doc,
2024-03-21 01:54:05 +00:00
scene_id: doc.replace.id,
2024-03-14 23:08:24 +00:00
user_id: stash.user_id,
},
},
}));
return stashDoc;
});
console.log(stashDocs);
if (stashDocs.length > 0) {
await indexApi.bulk(stashDocs.map((doc) => JSON.stringify(doc)).join('\n'));
}
}, Promise.resolve());
}
async function init() {
const scenes = await fetchScenes();
const docs = scenes.map((scene) => {
const flatActors = scene.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc.
const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
const filteredTitle = scene.title && [...flatActors, ...flatTags].reduce((accTitle, tag) => accTitle.replace(new RegExp(tag.replace(/[^\w\s]+/g, ''), 'i'), ''), scene.title).trim().replace(/\s{2,}/, ' ');
return {
replace: {
2024-03-21 01:54:05 +00:00
index: 'scenes',
2024-03-14 23:08:24 +00:00
id: scene.id,
doc: {
title: scene.title || undefined,
title_filtered: filteredTitle || undefined,
date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
created_at: Math.round(scene.created_at.getTime() / 1000),
effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
// shoot_id: scene.shoot_id || undefined,
channel_id: scene.channel_id,
channel_slug: scene.channel_slug,
channel_name: scene.channel_name,
network_id: scene.network_id || undefined,
network_slug: scene.network_slug || undefined,
network_name: scene.network_name || undefined,
actor_ids: scene.actors.map((actor) => actor.f1),
actors: scene.actors.map((actor) => actor.f2).join(),
tag_ids: scene.tags.map((tag) => tag.f1),
tags: flatTags.join(' '),
meta: scene.date ? format(scene.date, 'y yy M MMM MMMM d') : undefined,
liked: scene.stashed || 0,
},
},
};
});
const data = await indexApi.bulk(docs.map((doc) => JSON.stringify(doc)).join('\n'));
await updateStashed(docs);
console.log('data', data);
knex.destroy();
}
init();