Added tag reassociation and dedupe migration.

This commit is contained in:
DebaucheryLibrarian
2026-02-22 06:24:26 +01:00
parent e3b922da6c
commit b95e2fadf7
6 changed files with 156 additions and 7 deletions

View File

@@ -0,0 +1,31 @@
exports.up = async (knex) => {
// dedupe
await knex.raw(`
DELETE
FROM releases_tags
WHERE ctid IN
(
SELECT ctid
FROM(
SELECT
*,
ctid,
row_number() OVER (PARTITION BY release_id, original_tag ORDER BY ctid)
FROM releases_tags
)s
WHERE row_number >= 2
)
`);
await knex.schema.alterTable('releases_tags', (table) => {
table.increments('id');
table.unique(['release_id', 'original_tag']);
});
};
exports.down = async (knex) => {
await knex.schema.alterTable('releases_tags', (table) => {
table.dropColumn('id');
table.dropUnique(['release_id', 'original_tag']);
});
};

View File

@@ -838,6 +838,11 @@ const tags = [
slug: 'natural-boobs',
group: 'body',
},
{
name: 'natural butt',
slug: 'natural-butt',
group: 'body',
},
{
name: 'nipple clamps',
slug: 'nipple-clamps',
@@ -1616,6 +1621,10 @@ const aliases = [
name: 'big tits d-dd cup',
for: 'big-boobs',
},
{
name: 'busty',
for: 'big-boobs',
},
{
name: 'busty - big boobs',
for: 'big-boobs',
@@ -2159,6 +2168,18 @@ const aliases = [
name: 'natural tits',
for: 'natural-boobs',
},
{
name: 'natural butt',
for: 'natural-butt',
},
{
name: 'natural ass',
for: 'natural-butt',
},
{
name: 'real ass',
for: 'natural-butt',
},
{
name: 'oiled',
for: 'oil',

View File

@@ -25,6 +25,7 @@ const { scrapeActors, deleteActors, flushActors, flushProfiles, interpolateProfi
const { flushEntities } = require('./entities');
const { deleteScenes, deleteMovies, flushScenes, flushMovies, flushBatches } = require('./releases');
const { flushOrphanedMedia } = require('./media');
const { reassociateEntityReleaseTags, reassociateReleaseTags, reassociateOriginalTags } = require('./tags');
const getFileEntries = require('./utils/file-entries');
const inspector = new Inspector();
@@ -186,6 +187,18 @@ async function init() {
await deleteMovies(argv.deleteMovies);
}
if (argv.originalTags) {
await reassociateOriginalTags(argv.originalTags, argv.rematchTags);
}
if (argv.releaseTags) {
await reassociateReleaseTags(argv.releaseTags, argv.rematchTags);
}
if (argv.networkReleaseTags || argv.channelReleaseTags) {
await reassociateEntityReleaseTags(argv.networkReleaseTags, argv.channelReleaseTags, argv.rematchTags);
}
if (argv.flushOrphanedMedia) {
await flushOrphanedMedia();
}

View File

@@ -407,6 +407,32 @@ const { argv } = yargs
type: 'array',
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
})
.option('original-tags', {
describe: 'Reassociate original tag names',
type: 'array',
alias: ['tags'],
})
.option('release-tags', {
describe: 'Reassociate tags for scene IDs',
type: 'array',
alias: ['scene-tags'],
})
.option('channel-release-tags', {
describe: 'Reassociate tags for all channel releases',
type: 'array',
alias: ['channel-scene-tags', 'channel-tags'],
})
.option('network-release-tags', {
describe: 'Reassociate tags for all network releases',
type: 'array',
alias: ['network-scene-tags', 'network-tags'],
})
.option('rematch-tags', {
describe: 'Reassociate tags that are already associated',
type: 'boolean',
alias: 'rematch',
default: false,
})
.option('request', {
describe: 'Make an arbitrary HTTP request',
type: 'string',

View File

@@ -368,9 +368,7 @@ async function searchEntities(query, type, limit) {
return curateEntities(entities);
}
async function flushEntities(networkSlugs = [], channelSlugs = []) {
const entitySlugs = networkSlugs.concat(channelSlugs).join(', ');
async function fetchEntityReleaseIds(networkSlugs = [], channelSlugs = []) {
const entityQuery = knex
.withRecursive('selected_entities', knex.raw(`
SELECT entities.*
@@ -443,6 +441,17 @@ async function flushEntities(networkSlugs = [], channelSlugs = []) {
})
.pluck('series.id');
return {
sceneIds,
movieIds,
serieIds,
};
}
async function flushEntities(networkSlugs = [], channelSlugs = []) {
const { sceneIds, movieIds, serieIds } = await fetchEntityReleaseIds(networkSlugs, channelSlugs);
const entitySlugs = networkSlugs.concat(channelSlugs).join(', ');
if (sceneIds.length === 0 && movieIds.length === 0 && serieIds.length === 0) {
logger.info(`No scenes, movies or series found to remove for ${entitySlugs}`);
return;
@@ -479,6 +488,7 @@ module.exports = {
fetchIncludedEntities,
fetchReleaseEntities,
fetchEntitiesBySlug,
fetchEntityReleaseIds,
fetchEntity,
fetchEntities,
getRecursiveParent,

View File

@@ -1,6 +1,10 @@
'use strict';
const logger = require('./logger')(__filename);
const knex = require('./knex');
const { fetchEntityReleaseIds } = require('./entities');
const slugify = require('./utils/slugify');
const bulkInsert = require('./utils/bulk-insert');
@@ -73,9 +77,8 @@ function withRelations(queryBuilder, withMedia) {
}
}
async function matchReleaseTags(releases) {
const tags = releases
.map((release) => release.tags).flat()
async function matchTags(rawTags) {
const tags = rawTags
.map((tag) => tag?.trim().match(/[a-z0-9()]+/ig)?.join(' ').toLowerCase())
.filter(Boolean);
@@ -153,7 +156,7 @@ async function associateReleaseTags(releases, type = 'release') {
return;
}
const tagIdsBySlug = await matchReleaseTags(releases);
const tagIdsBySlug = await matchTags(releases.flatMap((release) => release.tags));
const entityTagIdsByEntityId = await getEntityTags(releases);
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, entityTagIdsByEntityId, type);
@@ -187,8 +190,53 @@ async function fetchTags(limit = 100) {
return tags.map((tag) => curateTag(tag));
}
async function reassociateTagEntries(tagEntries, rematch) {
const updatableTagEntries = tagEntries.filter((tagEntry) => (!tagEntry.tag_id || rematch) && tagEntry.original_tag);
const matchedTags = await matchTags(Array.from(new Set(updatableTagEntries.map((tagEntry) => tagEntry.original_tag))));
const updatedTagEntries = updatableTagEntries.map((tagEntry) => ({
...tagEntry,
tag_id: matchedTags[slugify(tagEntry.original_tag)],
})).filter((tagEntry) => tagEntry.tag_id);
if (updatedTagEntries.length > 0) {
await knex('releases_tags')
.whereIn('id', updatedTagEntries.map((tagEntry) => tagEntry.id))
.delete();
await knex('releases_tags').insert(updatedTagEntries.map((tagEntry) => ({
...tagEntry,
id: undefined,
})));
}
logger.info(`Updated ${updatedTagEntries.length} tags in ${new Set(updatedTagEntries.map((tagEntry) => tagEntry.release_id)).size} scenes`);
}
async function reassociateReleaseTags(rawSceneIds, rematch) {
const sceneIds = rawSceneIds.map((sceneId) => Number(sceneId)).filter(Boolean);
const tagEntries = await knex('releases_tags').whereIn('release_id', sceneIds);
await reassociateTagEntries(tagEntries, rematch);
}
async function reassociateEntityReleaseTags(networkSlugs = [], channelSlugs = [], rematch = false) {
const { sceneIds } = await fetchEntityReleaseIds(networkSlugs, channelSlugs);
await reassociateReleaseTags(sceneIds, rematch);
}
async function reassociateOriginalTags(originalTags, rematch) {
const tagEntries = await knex('releases_tags').whereIn(knex.raw('lower(releases_tags.original_tag)'), originalTags.map((originalTag) => originalTag.toLowerCase()));
await reassociateTagEntries(tagEntries, rematch);
}
module.exports = {
associateReleaseTags,
fetchTag,
fetchTags,
reassociateEntityReleaseTags,
reassociateReleaseTags,
reassociateOriginalTags,
};