traxxx/src/tools/stashdb-hashes.js

163 lines
3.8 KiB
JavaScript

'use strict';
// const util = require('util');
const unprint = require('unprint');
const args = require('yargs').argv;
const knex = require('../knex');
const bulkInsert = require('../utils/bulk-insert');
const query = `query Scenes(
$page: Int! = 1
$limit: Int! = 10
$studioId: String!
) {
queryScenes(
input: {
page: $page
per_page: $limit
sort: DATE
direction: DESC
parentStudio: $studioId
}
) {
count
scenes {
id
code
release_date
title
duration
fingerprints {
hash
algorithm
created
updated
duration
submissions
reports
}
urls {
url
site {
name
}
}
studio {
id
name
}
}
}
}`;
async function curateData(data) {
const stashScenes = data.queryScenes.scenes.map((stashScene) => {
const release = {};
release.id = stashScene.id;
release.title = stashScene.title;
release.url = stashScene.urls.find((url) => url.site.name?.toLowerCase() === 'studio')?.url;
release.date = stashScene.release_date;
release.duration = stashScene.duration;
release.entryId = stashScene.code;
// FOR XEMPIRE
release.urlId = new URL(release.url).pathname.match(/\/(\d+)$/)?.[1] || null;
release.fingerprints = stashScene.fingerprints.map((fingerprint) => ({
hash: fingerprint.hash,
algorithm: fingerprint.algorithm,
duration: fingerprint.duration,
submissions: fingerprint.submissions,
createdAt: fingerprint.created,
updatedAt: fingerprint.updated,
reports: fingerprint.reports,
}));
return release;
});
console.log(`Found ${stashScenes.length} scenes in StashDB`);
const stashScenesByIdentifiers = Object.fromEntries(stashScenes.flatMap((scene) => [[scene.entryId || scene.urlId, scene], [scene.url, scene]]));
const [{ id: batchId }] = await knex('batches').insert({ showcased: false, comment: 'StashDB fingerprints' }).returning('id');
const sceneEntries = await knex('releases')
.select('releases.*')
.leftJoin('entities', 'entities.id', 'releases.entity_id')
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.modify((builder) => {
if (args.channel.charAt(0) === '_') {
builder.where('parents.slug', args.entity.slice(1));
} else {
builder.where('entities.slug', args.entity);
}
})
.whereIn('entry_id', stashScenes.map((scene) => scene.entryId || scene.urlId));
console.log(`Matched ${sceneEntries.length} scenes`);
const fpEntries = sceneEntries.flatMap((scene) => {
const stashScene = stashScenesByIdentifiers[scene.entry_id]
|| stashScenesByIdentifiers[scene.url];
return stashScene.fingerprints.map((fingerprint) => ({
scene_id: scene.id,
hash: fingerprint.hash,
type: fingerprint.algorithm.toLowerCase(),
duration: fingerprint.duration,
source: 'stashdb',
source_created_at: fingerprint.createdAt,
source_submissions: fingerprint.submissions,
source_meta: {
reports: fingerprint.reports,
sceneId: stashScene.id,
entryId: stashScene.entryId,
title: stashScene.title,
url: stashScene.url,
date: stashScene.date,
},
batch_id: batchId,
}));
});
// console.log(util.inspect(fpEntries, { colors: true, depth: null, maxArrayLength: null }));
if (args.update) {
await bulkInsert('releases_fingerprints', fpEntries, false);
console.log(`Inserted ${fpEntries.length} hash entries in batch ${batchId}`);
}
}
async function init() {
const res = await unprint.post('https://stashdb.org/graphql', {
operationName: 'Scenes',
query,
variables: {
limit: 1000,
studioId: args.studioId,
page: 1,
},
}, {
headers: {
'Content-Type': 'application/json',
ApiKey: process.env.STASHDB_API_KEY,
},
});
if (res.ok) {
await curateData(res.data.data);
await knex.destroy();
return;
}
console.error('FAILED', res.status, res.data);
console.log(res.data);
}
init();