Added tag reassociation and dedupe migration.
This commit is contained in:
31
migrations/20260222055254_unique_origin_tags.js
Normal file
31
migrations/20260222055254_unique_origin_tags.js
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
exports.up = async (knex) => {
|
||||||
|
// dedupe
|
||||||
|
await knex.raw(`
|
||||||
|
DELETE
|
||||||
|
FROM releases_tags
|
||||||
|
WHERE ctid IN
|
||||||
|
(
|
||||||
|
SELECT ctid
|
||||||
|
FROM(
|
||||||
|
SELECT
|
||||||
|
*,
|
||||||
|
ctid,
|
||||||
|
row_number() OVER (PARTITION BY release_id, original_tag ORDER BY ctid)
|
||||||
|
FROM releases_tags
|
||||||
|
)s
|
||||||
|
WHERE row_number >= 2
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
|
||||||
|
await knex.schema.alterTable('releases_tags', (table) => {
|
||||||
|
table.increments('id');
|
||||||
|
table.unique(['release_id', 'original_tag']);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.down = async (knex) => {
|
||||||
|
await knex.schema.alterTable('releases_tags', (table) => {
|
||||||
|
table.dropColumn('id');
|
||||||
|
table.dropUnique(['release_id', 'original_tag']);
|
||||||
|
});
|
||||||
|
};
|
||||||
@@ -838,6 +838,11 @@ const tags = [
|
|||||||
slug: 'natural-boobs',
|
slug: 'natural-boobs',
|
||||||
group: 'body',
|
group: 'body',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'natural butt',
|
||||||
|
slug: 'natural-butt',
|
||||||
|
group: 'body',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'nipple clamps',
|
name: 'nipple clamps',
|
||||||
slug: 'nipple-clamps',
|
slug: 'nipple-clamps',
|
||||||
@@ -1616,6 +1621,10 @@ const aliases = [
|
|||||||
name: 'big tits d-dd cup',
|
name: 'big tits d-dd cup',
|
||||||
for: 'big-boobs',
|
for: 'big-boobs',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'busty',
|
||||||
|
for: 'big-boobs',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'busty - big boobs',
|
name: 'busty - big boobs',
|
||||||
for: 'big-boobs',
|
for: 'big-boobs',
|
||||||
@@ -2159,6 +2168,18 @@ const aliases = [
|
|||||||
name: 'natural tits',
|
name: 'natural tits',
|
||||||
for: 'natural-boobs',
|
for: 'natural-boobs',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'natural butt',
|
||||||
|
for: 'natural-butt',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'natural ass',
|
||||||
|
for: 'natural-butt',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'real ass',
|
||||||
|
for: 'natural-butt',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'oiled',
|
name: 'oiled',
|
||||||
for: 'oil',
|
for: 'oil',
|
||||||
|
|||||||
13
src/app.js
13
src/app.js
@@ -25,6 +25,7 @@ const { scrapeActors, deleteActors, flushActors, flushProfiles, interpolateProfi
|
|||||||
const { flushEntities } = require('./entities');
|
const { flushEntities } = require('./entities');
|
||||||
const { deleteScenes, deleteMovies, flushScenes, flushMovies, flushBatches } = require('./releases');
|
const { deleteScenes, deleteMovies, flushScenes, flushMovies, flushBatches } = require('./releases');
|
||||||
const { flushOrphanedMedia } = require('./media');
|
const { flushOrphanedMedia } = require('./media');
|
||||||
|
const { reassociateEntityReleaseTags, reassociateReleaseTags, reassociateOriginalTags } = require('./tags');
|
||||||
const getFileEntries = require('./utils/file-entries');
|
const getFileEntries = require('./utils/file-entries');
|
||||||
|
|
||||||
const inspector = new Inspector();
|
const inspector = new Inspector();
|
||||||
@@ -186,6 +187,18 @@ async function init() {
|
|||||||
await deleteMovies(argv.deleteMovies);
|
await deleteMovies(argv.deleteMovies);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (argv.originalTags) {
|
||||||
|
await reassociateOriginalTags(argv.originalTags, argv.rematchTags);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argv.releaseTags) {
|
||||||
|
await reassociateReleaseTags(argv.releaseTags, argv.rematchTags);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argv.networkReleaseTags || argv.channelReleaseTags) {
|
||||||
|
await reassociateEntityReleaseTags(argv.networkReleaseTags, argv.channelReleaseTags, argv.rematchTags);
|
||||||
|
}
|
||||||
|
|
||||||
if (argv.flushOrphanedMedia) {
|
if (argv.flushOrphanedMedia) {
|
||||||
await flushOrphanedMedia();
|
await flushOrphanedMedia();
|
||||||
}
|
}
|
||||||
|
|||||||
26
src/argv.js
26
src/argv.js
@@ -407,6 +407,32 @@ const { argv } = yargs
|
|||||||
type: 'array',
|
type: 'array',
|
||||||
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
|
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
|
||||||
})
|
})
|
||||||
|
.option('original-tags', {
|
||||||
|
describe: 'Reassociate original tag names',
|
||||||
|
type: 'array',
|
||||||
|
alias: ['tags'],
|
||||||
|
})
|
||||||
|
.option('release-tags', {
|
||||||
|
describe: 'Reassociate tags for scene IDs',
|
||||||
|
type: 'array',
|
||||||
|
alias: ['scene-tags'],
|
||||||
|
})
|
||||||
|
.option('channel-release-tags', {
|
||||||
|
describe: 'Reassociate tags for all channel releases',
|
||||||
|
type: 'array',
|
||||||
|
alias: ['channel-scene-tags', 'channel-tags'],
|
||||||
|
})
|
||||||
|
.option('network-release-tags', {
|
||||||
|
describe: 'Reassociate tags for all network releases',
|
||||||
|
type: 'array',
|
||||||
|
alias: ['network-scene-tags', 'network-tags'],
|
||||||
|
})
|
||||||
|
.option('rematch-tags', {
|
||||||
|
describe: 'Reassociate tags that are already associated',
|
||||||
|
type: 'boolean',
|
||||||
|
alias: 'rematch',
|
||||||
|
default: false,
|
||||||
|
})
|
||||||
.option('request', {
|
.option('request', {
|
||||||
describe: 'Make an arbitrary HTTP request',
|
describe: 'Make an arbitrary HTTP request',
|
||||||
type: 'string',
|
type: 'string',
|
||||||
|
|||||||
@@ -368,9 +368,7 @@ async function searchEntities(query, type, limit) {
|
|||||||
return curateEntities(entities);
|
return curateEntities(entities);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function flushEntities(networkSlugs = [], channelSlugs = []) {
|
async function fetchEntityReleaseIds(networkSlugs = [], channelSlugs = []) {
|
||||||
const entitySlugs = networkSlugs.concat(channelSlugs).join(', ');
|
|
||||||
|
|
||||||
const entityQuery = knex
|
const entityQuery = knex
|
||||||
.withRecursive('selected_entities', knex.raw(`
|
.withRecursive('selected_entities', knex.raw(`
|
||||||
SELECT entities.*
|
SELECT entities.*
|
||||||
@@ -443,6 +441,17 @@ async function flushEntities(networkSlugs = [], channelSlugs = []) {
|
|||||||
})
|
})
|
||||||
.pluck('series.id');
|
.pluck('series.id');
|
||||||
|
|
||||||
|
return {
|
||||||
|
sceneIds,
|
||||||
|
movieIds,
|
||||||
|
serieIds,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function flushEntities(networkSlugs = [], channelSlugs = []) {
|
||||||
|
const { sceneIds, movieIds, serieIds } = await fetchEntityReleaseIds(networkSlugs, channelSlugs);
|
||||||
|
const entitySlugs = networkSlugs.concat(channelSlugs).join(', ');
|
||||||
|
|
||||||
if (sceneIds.length === 0 && movieIds.length === 0 && serieIds.length === 0) {
|
if (sceneIds.length === 0 && movieIds.length === 0 && serieIds.length === 0) {
|
||||||
logger.info(`No scenes, movies or series found to remove for ${entitySlugs}`);
|
logger.info(`No scenes, movies or series found to remove for ${entitySlugs}`);
|
||||||
return;
|
return;
|
||||||
@@ -479,6 +488,7 @@ module.exports = {
|
|||||||
fetchIncludedEntities,
|
fetchIncludedEntities,
|
||||||
fetchReleaseEntities,
|
fetchReleaseEntities,
|
||||||
fetchEntitiesBySlug,
|
fetchEntitiesBySlug,
|
||||||
|
fetchEntityReleaseIds,
|
||||||
fetchEntity,
|
fetchEntity,
|
||||||
fetchEntities,
|
fetchEntities,
|
||||||
getRecursiveParent,
|
getRecursiveParent,
|
||||||
|
|||||||
56
src/tags.js
56
src/tags.js
@@ -1,6 +1,10 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const logger = require('./logger')(__filename);
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
|
|
||||||
|
const { fetchEntityReleaseIds } = require('./entities');
|
||||||
|
|
||||||
const slugify = require('./utils/slugify');
|
const slugify = require('./utils/slugify');
|
||||||
const bulkInsert = require('./utils/bulk-insert');
|
const bulkInsert = require('./utils/bulk-insert');
|
||||||
|
|
||||||
@@ -73,9 +77,8 @@ function withRelations(queryBuilder, withMedia) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function matchReleaseTags(releases) {
|
async function matchTags(rawTags) {
|
||||||
const tags = releases
|
const tags = rawTags
|
||||||
.map((release) => release.tags).flat()
|
|
||||||
.map((tag) => tag?.trim().match(/[a-z0-9()]+/ig)?.join(' ').toLowerCase())
|
.map((tag) => tag?.trim().match(/[a-z0-9()]+/ig)?.join(' ').toLowerCase())
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
|
|
||||||
@@ -153,7 +156,7 @@ async function associateReleaseTags(releases, type = 'release') {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const tagIdsBySlug = await matchReleaseTags(releases);
|
const tagIdsBySlug = await matchTags(releases.flatMap((release) => release.tags));
|
||||||
const entityTagIdsByEntityId = await getEntityTags(releases);
|
const entityTagIdsByEntityId = await getEntityTags(releases);
|
||||||
|
|
||||||
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, entityTagIdsByEntityId, type);
|
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, entityTagIdsByEntityId, type);
|
||||||
@@ -187,8 +190,53 @@ async function fetchTags(limit = 100) {
|
|||||||
return tags.map((tag) => curateTag(tag));
|
return tags.map((tag) => curateTag(tag));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function reassociateTagEntries(tagEntries, rematch) {
|
||||||
|
const updatableTagEntries = tagEntries.filter((tagEntry) => (!tagEntry.tag_id || rematch) && tagEntry.original_tag);
|
||||||
|
const matchedTags = await matchTags(Array.from(new Set(updatableTagEntries.map((tagEntry) => tagEntry.original_tag))));
|
||||||
|
|
||||||
|
const updatedTagEntries = updatableTagEntries.map((tagEntry) => ({
|
||||||
|
...tagEntry,
|
||||||
|
tag_id: matchedTags[slugify(tagEntry.original_tag)],
|
||||||
|
})).filter((tagEntry) => tagEntry.tag_id);
|
||||||
|
|
||||||
|
if (updatedTagEntries.length > 0) {
|
||||||
|
await knex('releases_tags')
|
||||||
|
.whereIn('id', updatedTagEntries.map((tagEntry) => tagEntry.id))
|
||||||
|
.delete();
|
||||||
|
|
||||||
|
await knex('releases_tags').insert(updatedTagEntries.map((tagEntry) => ({
|
||||||
|
...tagEntry,
|
||||||
|
id: undefined,
|
||||||
|
})));
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`Updated ${updatedTagEntries.length} tags in ${new Set(updatedTagEntries.map((tagEntry) => tagEntry.release_id)).size} scenes`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function reassociateReleaseTags(rawSceneIds, rematch) {
|
||||||
|
const sceneIds = rawSceneIds.map((sceneId) => Number(sceneId)).filter(Boolean);
|
||||||
|
const tagEntries = await knex('releases_tags').whereIn('release_id', sceneIds);
|
||||||
|
|
||||||
|
await reassociateTagEntries(tagEntries, rematch);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function reassociateEntityReleaseTags(networkSlugs = [], channelSlugs = [], rematch = false) {
|
||||||
|
const { sceneIds } = await fetchEntityReleaseIds(networkSlugs, channelSlugs);
|
||||||
|
|
||||||
|
await reassociateReleaseTags(sceneIds, rematch);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function reassociateOriginalTags(originalTags, rematch) {
|
||||||
|
const tagEntries = await knex('releases_tags').whereIn(knex.raw('lower(releases_tags.original_tag)'), originalTags.map((originalTag) => originalTag.toLowerCase()));
|
||||||
|
|
||||||
|
await reassociateTagEntries(tagEntries, rematch);
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
associateReleaseTags,
|
associateReleaseTags,
|
||||||
fetchTag,
|
fetchTag,
|
||||||
fetchTags,
|
fetchTags,
|
||||||
|
reassociateEntityReleaseTags,
|
||||||
|
reassociateReleaseTags,
|
||||||
|
reassociateOriginalTags,
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user