diff --git a/migrations/20260125052234_fingerprints.js b/migrations/20260125052234_fingerprints.js new file mode 100644 index 00000000..24285094 --- /dev/null +++ b/migrations/20260125052234_fingerprints.js @@ -0,0 +1,66 @@ +const config = require('config'); + +exports.up = async (knex) => { + await knex.schema.createTable('fingerprints_types', (table) => { + table.string('type') + .primary(); + }); + + await knex('fingerprints_types').insert([ + 'oshash', + 'phash', + 'md5', + 'blake2', + ].map((type) => ({ type }))); + + await knex.schema.createTable('releases_fingerprints', (table) => { + table.increments('id'); + + table.integer('scene_id') + .notNullable() + .references('id') + .inTable('releases'); + + table.string('hash') + .notNullable() + .index(); + + table.string('type') + .notNullable() + .references('type') + .inTable('fingerprints_types'); + + table.integer('duration'); + table.integer('width'); + table.integer('height'); + + table.integer('user_id') + .references('id') + .inTable('users'); + + table.string('source'); + table.integer('source_submissions'); + table.json('source_meta'); + + table.datetime('source_created_at'); + + table.datetime('created_at') + .notNullable() + .defaultTo(knex.fn.now()); + }); + + await knex.raw(` + create unique index scenes_fingerprints_unique + on releases_fingerprints (scene_id, hash, source, user_id) + nulls not distinct + `); + + await knex.raw('GRANT ALL ON ALL TABLES IN SCHEMA public TO :visitor;', { + visitor: knex.raw(config.database.query.user), + }); +}; + +exports.down = async function(knex) { + await knex.schema.dropTable('releases_fingerprints'); + await knex.schema.dropTable('fingerprints_types'); +}; diff --git a/src/tools/manticore-scenes.js b/src/tools/manticore-scenes.js index 2ce6261d..0fa2b5ff 100644 --- a/src/tools/manticore-scenes.js +++ b/src/tools/manticore-scenes.js @@ -44,6 +44,7 @@ async function fetchScenes() { COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags, COALESCE(JSON_AGG(DISTINCT (movies.id, movies.title)) FILTER (WHERE movies.id IS NOT NULL), '[]') as movies, COALESCE(JSON_AGG(DISTINCT (series.id, series.title)) FILTER (WHERE series.id IS NOT NULL), '[]') as series, + COALESCE(JSON_AGG(DISTINCT (releases_fingerprints.hash)) FILTER (WHERE releases_fingerprints.hash IS NOT NULL), '[]') as fingerprints, studios.showcased IS NOT false AND (entities.showcased IS NOT false OR COALESCE(studios.showcased, false) = true) AND (parents.showcased IS NOT false OR COALESCE(entities.showcased, false) = true OR COALESCE(studios.showcased, false) = true) @@ -60,6 +61,7 @@ async function fetchScenes() { LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id LEFT JOIN releases_directors AS local_directors ON local_directors.release_id = releases.id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id + LEFT JOIN releases_fingerprints ON releases_fingerprints.scene_id = releases.id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN actors AS directors ON local_directors.director_id = directors.id LEFT JOIN tags ON local_tags.tag_id = tags.id @@ -126,6 +128,7 @@ async function init() { series text, meta text, date timestamp, + fingerprints text, is_showcased bool, created_at timestamp, effective_date timestamp, @@ -176,6 +179,7 @@ async function init() { movies: scene.movies.map((movie) => movie.f2).join(' '), serie_ids: scene.series.map((serie) => serie.f1), series: scene.series.map((serie) => serie.f2).join(' '), + fingerprints: scene.fingerprints.join(' '), meta: scene.date ? format(scene.date, 'y yy M MM MMM MMMM d dd') : undefined, stashed: scene.stashed || 0, dupe_index: scene.dupe_index || 0, diff --git a/src/tools/stashdb-hashes.js b/src/tools/stashdb-hashes.js new file mode 100644 index 00000000..35489ffa --- /dev/null +++ b/src/tools/stashdb-hashes.js @@ -0,0 +1,150 @@ +'use strict'; + +const util = require('util'); +const unprint = require('unprint'); +const args = require('yargs').argv; + +const knex = require('../knex'); +const bulkInsert = require('../utils/bulk-insert'); + +const query = `query Scenes( + $page: Int! = 1 + $limit: Int! = 10 + $studioId: String! +) { + queryScenes( + input: { + page: $page + per_page: $limit + sort: DATE + direction: DESC + parentStudio: $studioId + } + ) { + count + scenes { + id + code + release_date + title + duration + fingerprints { + hash + algorithm + created + updated + duration + submissions + reports + } + urls { + url + site { + name + } + } + studio { + id + name + } + } + } +}`; + +async function curateData(data) { + const stashScenes = data.queryScenes.scenes.map((stashScene) => { + const release = {}; + + release.id = stashScene.id; + release.title = stashScene.title; + release.url = stashScene.urls.find((url) => url.site.name?.toLowerCase() === 'studio')?.url; + release.date = stashScene.release_date; + release.duration = stashScene.duration; + + release.entryId = stashScene.code; + + // FOR XEMPIRE + release.urlId = new URL(release.url).pathname.match(/\/(\d+)$/)?.[1] || null; + + release.fingerprints = stashScene.fingerprints.map((fingerprint) => ({ + hash: fingerprint.hash, + algorithm: fingerprint.algorithm, + duration: fingerprint.duration, + submissions: fingerprint.submissions, + createdAt: fingerprint.created, + updatedAt: fingerprint.updated, + reports: fingerprint.reports, + })); + + return release; + }); + + const stashScenesByIdentifiers = Object.fromEntries(stashScenes.flatMap((scene) => [[scene.entryId || scene.urlId, scene], [scene.url, scene]])); + + const sceneEntries = await knex('releases') + .select('releases.*') + .leftJoin('entities', 'entities.id', 'releases.entity_id') + .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') + .where('entities.slug', 'hardx') + .whereIn('entry_id', stashScenes.map((scene) => scene.entryId || scene.urlId)); + + const fpEntries = sceneEntries.flatMap((scene) => { + const stashScene = stashScenesByIdentifiers[scene.entry_id] + || stashScenesByIdentifiers[scene.url]; + + return stashScene.fingerprints.map((fingerprint) => ({ + scene_id: scene.id, + hash: fingerprint.hash, + type: fingerprint.algorithm.toLowerCase(), + duration: fingerprint.duration, + source: 'stashdb', + source_created_at: fingerprint.createdAt, + source_submissions: fingerprint.submissions, + source_meta: { + reports: fingerprint.reports, + sceneId: stashScene.id, + entryId: stashScene.entryId, + title: stashScene.title, + url: stashScene.url, + date: stashScene.date, + }, + })); + }); + + console.log(util.inspect(fpEntries, { colors: true, depth: null, maxArrayLength: null })); + + if (args.update) { + await bulkInsert('releases_fingerprints', fpEntries, false); + + console.log(`Inserted ${fpEntries.length} hash entries`); + } +} + +async function init() { + const res = await unprint.post('https://stashdb.org/graphql', { + operationName: 'Scenes', + query, + variables: { + limit: 1000, + studioId: '8c7ad24a-4756-4163-b328-28e8391602cd', + page: 1, + }, + }, { + headers: { + 'Content-Type': 'application/json', + ApiKey: process.env.STASHDB_API_KEY, + }, + }); + + if (res.ok) { + await curateData(res.data.data); + await knex.destroy(); + + return; + } + + console.error('FAILED', res.status, res.data); + console.log(res.data); +} + +init();