Added fingerprint table and WIP StashDB import tool.

This commit is contained in:
DebaucheryLibrarian 2026-01-26 02:03:08 +01:00
parent 63f0410c5c
commit c34905247c
3 changed files with 220 additions and 0 deletions

View File

@ -0,0 +1,66 @@
const config = require('config');
exports.up = async (knex) => {
await knex.schema.createTable('fingerprints_types', (table) => {
table.string('type')
.primary();
});
await knex('fingerprints_types').insert([
'oshash',
'phash',
'md5',
'blake2',
].map((type) => ({ type })));
await knex.schema.createTable('releases_fingerprints', (table) => {
table.increments('id');
table.integer('scene_id')
.notNullable()
.references('id')
.inTable('releases');
table.string('hash')
.notNullable()
.index();
table.string('type')
.notNullable()
.references('type')
.inTable('fingerprints_types');
table.integer('duration');
table.integer('width');
table.integer('height');
table.integer('user_id')
.references('id')
.inTable('users');
table.string('source');
table.integer('source_submissions');
table.json('source_meta');
table.datetime('source_created_at');
table.datetime('created_at')
.notNullable()
.defaultTo(knex.fn.now());
});
await knex.raw(`
create unique index scenes_fingerprints_unique
on releases_fingerprints (scene_id, hash, source, user_id)
nulls not distinct
`);
await knex.raw('GRANT ALL ON ALL TABLES IN SCHEMA public TO :visitor;', {
visitor: knex.raw(config.database.query.user),
});
};
exports.down = async function(knex) {
await knex.schema.dropTable('releases_fingerprints');
await knex.schema.dropTable('fingerprints_types');
};

View File

@ -44,6 +44,7 @@ async function fetchScenes() {
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
COALESCE(JSON_AGG(DISTINCT (movies.id, movies.title)) FILTER (WHERE movies.id IS NOT NULL), '[]') as movies,
COALESCE(JSON_AGG(DISTINCT (series.id, series.title)) FILTER (WHERE series.id IS NOT NULL), '[]') as series,
COALESCE(JSON_AGG(DISTINCT (releases_fingerprints.hash)) FILTER (WHERE releases_fingerprints.hash IS NOT NULL), '[]') as fingerprints,
studios.showcased IS NOT false
AND (entities.showcased IS NOT false OR COALESCE(studios.showcased, false) = true)
AND (parents.showcased IS NOT false OR COALESCE(entities.showcased, false) = true OR COALESCE(studios.showcased, false) = true)
@ -60,6 +61,7 @@ async function fetchScenes() {
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = releases.id
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
LEFT JOIN releases_fingerprints ON releases_fingerprints.scene_id = releases.id
LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN actors AS directors ON local_directors.director_id = directors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id
@ -126,6 +128,7 @@ async function init() {
series text,
meta text,
date timestamp,
fingerprints text,
is_showcased bool,
created_at timestamp,
effective_date timestamp,
@ -176,6 +179,7 @@ async function init() {
movies: scene.movies.map((movie) => movie.f2).join(' '),
serie_ids: scene.series.map((serie) => serie.f1),
series: scene.series.map((serie) => serie.f2).join(' '),
fingerprints: scene.fingerprints.join(' '),
meta: scene.date ? format(scene.date, 'y yy M MM MMM MMMM d dd') : undefined,
stashed: scene.stashed || 0,
dupe_index: scene.dupe_index || 0,

150
src/tools/stashdb-hashes.js Normal file
View File

@ -0,0 +1,150 @@
'use strict';
const util = require('util');
const unprint = require('unprint');
const args = require('yargs').argv;
const knex = require('../knex');
const bulkInsert = require('../utils/bulk-insert');
const query = `query Scenes(
$page: Int! = 1
$limit: Int! = 10
$studioId: String!
) {
queryScenes(
input: {
page: $page
per_page: $limit
sort: DATE
direction: DESC
parentStudio: $studioId
}
) {
count
scenes {
id
code
release_date
title
duration
fingerprints {
hash
algorithm
created
updated
duration
submissions
reports
}
urls {
url
site {
name
}
}
studio {
id
name
}
}
}
}`;
async function curateData(data) {
const stashScenes = data.queryScenes.scenes.map((stashScene) => {
const release = {};
release.id = stashScene.id;
release.title = stashScene.title;
release.url = stashScene.urls.find((url) => url.site.name?.toLowerCase() === 'studio')?.url;
release.date = stashScene.release_date;
release.duration = stashScene.duration;
release.entryId = stashScene.code;
// FOR XEMPIRE
release.urlId = new URL(release.url).pathname.match(/\/(\d+)$/)?.[1] || null;
release.fingerprints = stashScene.fingerprints.map((fingerprint) => ({
hash: fingerprint.hash,
algorithm: fingerprint.algorithm,
duration: fingerprint.duration,
submissions: fingerprint.submissions,
createdAt: fingerprint.created,
updatedAt: fingerprint.updated,
reports: fingerprint.reports,
}));
return release;
});
const stashScenesByIdentifiers = Object.fromEntries(stashScenes.flatMap((scene) => [[scene.entryId || scene.urlId, scene], [scene.url, scene]]));
const sceneEntries = await knex('releases')
.select('releases.*')
.leftJoin('entities', 'entities.id', 'releases.entity_id')
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.where('entities.slug', 'hardx')
.whereIn('entry_id', stashScenes.map((scene) => scene.entryId || scene.urlId));
const fpEntries = sceneEntries.flatMap((scene) => {
const stashScene = stashScenesByIdentifiers[scene.entry_id]
|| stashScenesByIdentifiers[scene.url];
return stashScene.fingerprints.map((fingerprint) => ({
scene_id: scene.id,
hash: fingerprint.hash,
type: fingerprint.algorithm.toLowerCase(),
duration: fingerprint.duration,
source: 'stashdb',
source_created_at: fingerprint.createdAt,
source_submissions: fingerprint.submissions,
source_meta: {
reports: fingerprint.reports,
sceneId: stashScene.id,
entryId: stashScene.entryId,
title: stashScene.title,
url: stashScene.url,
date: stashScene.date,
},
}));
});
console.log(util.inspect(fpEntries, { colors: true, depth: null, maxArrayLength: null }));
if (args.update) {
await bulkInsert('releases_fingerprints', fpEntries, false);
console.log(`Inserted ${fpEntries.length} hash entries`);
}
}
async function init() {
const res = await unprint.post('https://stashdb.org/graphql', {
operationName: 'Scenes',
query,
variables: {
limit: 1000,
studioId: '8c7ad24a-4756-4163-b328-28e8391602cd',
page: 1,
},
}, {
headers: {
'Content-Type': 'application/json',
ApiKey: process.env.STASHDB_API_KEY,
},
});
if (res.ok) {
await curateData(res.data.data);
await knex.destroy();
return;
}
console.error('FAILED', res.status, res.data);
console.log(res.data);
}
init();