Added tag reassociation and dedupe migration.
This commit is contained in:
31
migrations/20260222055254_unique_origin_tags.js
Normal file
31
migrations/20260222055254_unique_origin_tags.js
Normal file
@@ -0,0 +1,31 @@
|
||||
exports.up = async (knex) => {
|
||||
// dedupe
|
||||
await knex.raw(`
|
||||
DELETE
|
||||
FROM releases_tags
|
||||
WHERE ctid IN
|
||||
(
|
||||
SELECT ctid
|
||||
FROM(
|
||||
SELECT
|
||||
*,
|
||||
ctid,
|
||||
row_number() OVER (PARTITION BY release_id, original_tag ORDER BY ctid)
|
||||
FROM releases_tags
|
||||
)s
|
||||
WHERE row_number >= 2
|
||||
)
|
||||
`);
|
||||
|
||||
await knex.schema.alterTable('releases_tags', (table) => {
|
||||
table.increments('id');
|
||||
table.unique(['release_id', 'original_tag']);
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async (knex) => {
|
||||
await knex.schema.alterTable('releases_tags', (table) => {
|
||||
table.dropColumn('id');
|
||||
table.dropUnique(['release_id', 'original_tag']);
|
||||
});
|
||||
};
|
||||
@@ -838,6 +838,11 @@ const tags = [
|
||||
slug: 'natural-boobs',
|
||||
group: 'body',
|
||||
},
|
||||
{
|
||||
name: 'natural butt',
|
||||
slug: 'natural-butt',
|
||||
group: 'body',
|
||||
},
|
||||
{
|
||||
name: 'nipple clamps',
|
||||
slug: 'nipple-clamps',
|
||||
@@ -1616,6 +1621,10 @@ const aliases = [
|
||||
name: 'big tits d-dd cup',
|
||||
for: 'big-boobs',
|
||||
},
|
||||
{
|
||||
name: 'busty',
|
||||
for: 'big-boobs',
|
||||
},
|
||||
{
|
||||
name: 'busty - big boobs',
|
||||
for: 'big-boobs',
|
||||
@@ -2159,6 +2168,18 @@ const aliases = [
|
||||
name: 'natural tits',
|
||||
for: 'natural-boobs',
|
||||
},
|
||||
{
|
||||
name: 'natural butt',
|
||||
for: 'natural-butt',
|
||||
},
|
||||
{
|
||||
name: 'natural ass',
|
||||
for: 'natural-butt',
|
||||
},
|
||||
{
|
||||
name: 'real ass',
|
||||
for: 'natural-butt',
|
||||
},
|
||||
{
|
||||
name: 'oiled',
|
||||
for: 'oil',
|
||||
|
||||
13
src/app.js
13
src/app.js
@@ -25,6 +25,7 @@ const { scrapeActors, deleteActors, flushActors, flushProfiles, interpolateProfi
|
||||
const { flushEntities } = require('./entities');
|
||||
const { deleteScenes, deleteMovies, flushScenes, flushMovies, flushBatches } = require('./releases');
|
||||
const { flushOrphanedMedia } = require('./media');
|
||||
const { reassociateEntityReleaseTags, reassociateReleaseTags, reassociateOriginalTags } = require('./tags');
|
||||
const getFileEntries = require('./utils/file-entries');
|
||||
|
||||
const inspector = new Inspector();
|
||||
@@ -186,6 +187,18 @@ async function init() {
|
||||
await deleteMovies(argv.deleteMovies);
|
||||
}
|
||||
|
||||
if (argv.originalTags) {
|
||||
await reassociateOriginalTags(argv.originalTags, argv.rematchTags);
|
||||
}
|
||||
|
||||
if (argv.releaseTags) {
|
||||
await reassociateReleaseTags(argv.releaseTags, argv.rematchTags);
|
||||
}
|
||||
|
||||
if (argv.networkReleaseTags || argv.channelReleaseTags) {
|
||||
await reassociateEntityReleaseTags(argv.networkReleaseTags, argv.channelReleaseTags, argv.rematchTags);
|
||||
}
|
||||
|
||||
if (argv.flushOrphanedMedia) {
|
||||
await flushOrphanedMedia();
|
||||
}
|
||||
|
||||
26
src/argv.js
26
src/argv.js
@@ -407,6 +407,32 @@ const { argv } = yargs
|
||||
type: 'array',
|
||||
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
|
||||
})
|
||||
.option('original-tags', {
|
||||
describe: 'Reassociate original tag names',
|
||||
type: 'array',
|
||||
alias: ['tags'],
|
||||
})
|
||||
.option('release-tags', {
|
||||
describe: 'Reassociate tags for scene IDs',
|
||||
type: 'array',
|
||||
alias: ['scene-tags'],
|
||||
})
|
||||
.option('channel-release-tags', {
|
||||
describe: 'Reassociate tags for all channel releases',
|
||||
type: 'array',
|
||||
alias: ['channel-scene-tags', 'channel-tags'],
|
||||
})
|
||||
.option('network-release-tags', {
|
||||
describe: 'Reassociate tags for all network releases',
|
||||
type: 'array',
|
||||
alias: ['network-scene-tags', 'network-tags'],
|
||||
})
|
||||
.option('rematch-tags', {
|
||||
describe: 'Reassociate tags that are already associated',
|
||||
type: 'boolean',
|
||||
alias: 'rematch',
|
||||
default: false,
|
||||
})
|
||||
.option('request', {
|
||||
describe: 'Make an arbitrary HTTP request',
|
||||
type: 'string',
|
||||
|
||||
@@ -368,9 +368,7 @@ async function searchEntities(query, type, limit) {
|
||||
return curateEntities(entities);
|
||||
}
|
||||
|
||||
async function flushEntities(networkSlugs = [], channelSlugs = []) {
|
||||
const entitySlugs = networkSlugs.concat(channelSlugs).join(', ');
|
||||
|
||||
async function fetchEntityReleaseIds(networkSlugs = [], channelSlugs = []) {
|
||||
const entityQuery = knex
|
||||
.withRecursive('selected_entities', knex.raw(`
|
||||
SELECT entities.*
|
||||
@@ -443,6 +441,17 @@ async function flushEntities(networkSlugs = [], channelSlugs = []) {
|
||||
})
|
||||
.pluck('series.id');
|
||||
|
||||
return {
|
||||
sceneIds,
|
||||
movieIds,
|
||||
serieIds,
|
||||
};
|
||||
}
|
||||
|
||||
async function flushEntities(networkSlugs = [], channelSlugs = []) {
|
||||
const { sceneIds, movieIds, serieIds } = await fetchEntityReleaseIds(networkSlugs, channelSlugs);
|
||||
const entitySlugs = networkSlugs.concat(channelSlugs).join(', ');
|
||||
|
||||
if (sceneIds.length === 0 && movieIds.length === 0 && serieIds.length === 0) {
|
||||
logger.info(`No scenes, movies or series found to remove for ${entitySlugs}`);
|
||||
return;
|
||||
@@ -479,6 +488,7 @@ module.exports = {
|
||||
fetchIncludedEntities,
|
||||
fetchReleaseEntities,
|
||||
fetchEntitiesBySlug,
|
||||
fetchEntityReleaseIds,
|
||||
fetchEntity,
|
||||
fetchEntities,
|
||||
getRecursiveParent,
|
||||
|
||||
56
src/tags.js
56
src/tags.js
@@ -1,6 +1,10 @@
|
||||
'use strict';
|
||||
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
|
||||
const { fetchEntityReleaseIds } = require('./entities');
|
||||
|
||||
const slugify = require('./utils/slugify');
|
||||
const bulkInsert = require('./utils/bulk-insert');
|
||||
|
||||
@@ -73,9 +77,8 @@ function withRelations(queryBuilder, withMedia) {
|
||||
}
|
||||
}
|
||||
|
||||
async function matchReleaseTags(releases) {
|
||||
const tags = releases
|
||||
.map((release) => release.tags).flat()
|
||||
async function matchTags(rawTags) {
|
||||
const tags = rawTags
|
||||
.map((tag) => tag?.trim().match(/[a-z0-9()]+/ig)?.join(' ').toLowerCase())
|
||||
.filter(Boolean);
|
||||
|
||||
@@ -153,7 +156,7 @@ async function associateReleaseTags(releases, type = 'release') {
|
||||
return;
|
||||
}
|
||||
|
||||
const tagIdsBySlug = await matchReleaseTags(releases);
|
||||
const tagIdsBySlug = await matchTags(releases.flatMap((release) => release.tags));
|
||||
const entityTagIdsByEntityId = await getEntityTags(releases);
|
||||
|
||||
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, entityTagIdsByEntityId, type);
|
||||
@@ -187,8 +190,53 @@ async function fetchTags(limit = 100) {
|
||||
return tags.map((tag) => curateTag(tag));
|
||||
}
|
||||
|
||||
async function reassociateTagEntries(tagEntries, rematch) {
|
||||
const updatableTagEntries = tagEntries.filter((tagEntry) => (!tagEntry.tag_id || rematch) && tagEntry.original_tag);
|
||||
const matchedTags = await matchTags(Array.from(new Set(updatableTagEntries.map((tagEntry) => tagEntry.original_tag))));
|
||||
|
||||
const updatedTagEntries = updatableTagEntries.map((tagEntry) => ({
|
||||
...tagEntry,
|
||||
tag_id: matchedTags[slugify(tagEntry.original_tag)],
|
||||
})).filter((tagEntry) => tagEntry.tag_id);
|
||||
|
||||
if (updatedTagEntries.length > 0) {
|
||||
await knex('releases_tags')
|
||||
.whereIn('id', updatedTagEntries.map((tagEntry) => tagEntry.id))
|
||||
.delete();
|
||||
|
||||
await knex('releases_tags').insert(updatedTagEntries.map((tagEntry) => ({
|
||||
...tagEntry,
|
||||
id: undefined,
|
||||
})));
|
||||
}
|
||||
|
||||
logger.info(`Updated ${updatedTagEntries.length} tags in ${new Set(updatedTagEntries.map((tagEntry) => tagEntry.release_id)).size} scenes`);
|
||||
}
|
||||
|
||||
async function reassociateReleaseTags(rawSceneIds, rematch) {
|
||||
const sceneIds = rawSceneIds.map((sceneId) => Number(sceneId)).filter(Boolean);
|
||||
const tagEntries = await knex('releases_tags').whereIn('release_id', sceneIds);
|
||||
|
||||
await reassociateTagEntries(tagEntries, rematch);
|
||||
}
|
||||
|
||||
async function reassociateEntityReleaseTags(networkSlugs = [], channelSlugs = [], rematch = false) {
|
||||
const { sceneIds } = await fetchEntityReleaseIds(networkSlugs, channelSlugs);
|
||||
|
||||
await reassociateReleaseTags(sceneIds, rematch);
|
||||
}
|
||||
|
||||
async function reassociateOriginalTags(originalTags, rematch) {
|
||||
const tagEntries = await knex('releases_tags').whereIn(knex.raw('lower(releases_tags.original_tag)'), originalTags.map((originalTag) => originalTag.toLowerCase()));
|
||||
|
||||
await reassociateTagEntries(tagEntries, rematch);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateReleaseTags,
|
||||
fetchTag,
|
||||
fetchTags,
|
||||
reassociateEntityReleaseTags,
|
||||
reassociateReleaseTags,
|
||||
reassociateOriginalTags,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user