Added self hash filtering to media module. Moved Girl Girl back to Jules Jordan.
This commit is contained in:
61
src/media.js
61
src/media.js
@@ -1,7 +1,7 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const util = require('util');
|
||||
// const util = require('util');
|
||||
const Promise = require('bluebird');
|
||||
const fs = require('fs').promises;
|
||||
const path = require('path');
|
||||
@@ -177,10 +177,7 @@ async function findSourceDuplicates(baseMedias) {
|
||||
const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source');
|
||||
const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page');
|
||||
|
||||
return {
|
||||
existingSourceMediaByUrl,
|
||||
existingExtractMediaByUrl,
|
||||
};
|
||||
return [existingSourceMediaByUrl, existingExtractMediaByUrl];
|
||||
}
|
||||
|
||||
async function findHashDuplicates(medias) {
|
||||
@@ -189,16 +186,37 @@ async function findHashDuplicates(medias) {
|
||||
const existingHashMediaEntries = await knex('media').whereIn('hash', hashes);
|
||||
const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
|
||||
|
||||
const uniqueHashMedia = medias.filter(media => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]);
|
||||
const uniqueHashMedias = medias.filter(media => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]);
|
||||
|
||||
const existingHashMedia = medias
|
||||
const { selfDuplicateMedias, selfUniqueMediasByHash } = uniqueHashMedias.reduce((acc, media) => {
|
||||
if (acc.selfUniqueMediasByHash[media.meta.hash]) {
|
||||
acc.selfDuplicateMedias.push({
|
||||
...media,
|
||||
use: acc.selfUniqueMediasByHash[media.meta.hash].id,
|
||||
});
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc.selfUniqueMediasByHash[media.meta.hash] = media;
|
||||
|
||||
return acc;
|
||||
}, {
|
||||
selfDuplicateMedias: [],
|
||||
selfUniqueMediasByHash: {},
|
||||
});
|
||||
|
||||
const selfUniqueHashMedias = Object.values(selfUniqueMediasByHash);
|
||||
|
||||
const existingHashMedias = medias
|
||||
.filter(media => existingHashMediaEntriesByHash[media.entry?.hash || media.meta?.hash])
|
||||
.map(media => ({
|
||||
...media,
|
||||
entry: existingHashMediaEntriesByHash[media.entry?.hash || media.meta?.hash],
|
||||
}));
|
||||
}))
|
||||
.concat(selfDuplicateMedias);
|
||||
|
||||
return { uniqueHashMedia, existingHashMedia };
|
||||
return [selfUniqueHashMedias, existingHashMedias];
|
||||
}
|
||||
|
||||
async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
||||
@@ -337,7 +355,7 @@ async function fetchSource(source) {
|
||||
return attempt(1);
|
||||
}
|
||||
|
||||
async function trySource(baseSource, existingMedias, baseMedia, baseSourceIndex) {
|
||||
async function trySource(baseSource, existingMedias) {
|
||||
// catch error and try the next source
|
||||
const extractedSource = await extractSource(baseSource, existingMedias);
|
||||
const existingSourceMedia = existingMedias.existingSourceMediaByUrl[extractedSource.src];
|
||||
@@ -350,7 +368,7 @@ async function trySource(baseSource, existingMedias, baseMedia, baseSourceIndex)
|
||||
}
|
||||
|
||||
if (existingSourceMedia) {
|
||||
logger.silly(`Media source URL already in database, skipping ${baseSource.url}`);
|
||||
logger.silly(`Media source URL already in database, skipping ${baseSource.src}`);
|
||||
|
||||
// media entry found by source URL, don't fetch
|
||||
return {
|
||||
@@ -359,7 +377,7 @@ async function trySource(baseSource, existingMedias, baseMedia, baseSourceIndex)
|
||||
};
|
||||
}
|
||||
|
||||
return fetchSource(extractedSource, baseMedia, baseSourceIndex, 1);
|
||||
return fetchSource(extractedSource);
|
||||
}
|
||||
|
||||
async function fetchMedia(baseMedia, existingMedias) {
|
||||
@@ -420,21 +438,21 @@ function curateMediaEntry(media, index) {
|
||||
}
|
||||
|
||||
async function storeMedias(baseMedias) {
|
||||
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias);
|
||||
const [existingSourceMediaByUrl, existingExtractMediaByUrl] = await findSourceDuplicates(baseMedias);
|
||||
|
||||
const savedMedias = await Promise.map(
|
||||
baseMedias,
|
||||
async baseMedia => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }),
|
||||
);
|
||||
|
||||
const { uniqueHashMedia, existingHashMedia } = await findHashDuplicates(savedMedias);
|
||||
const [uniqueHashMedias, existingHashMedias] = await findHashDuplicates(savedMedias);
|
||||
|
||||
const newMediaWithEntries = uniqueHashMedia.map((media, index) => curateMediaEntry(media, index));
|
||||
const newMediaEntries = newMediaWithEntries.filter(media => !media.newEntry).map(media => media.entry);
|
||||
const newMediaWithEntries = uniqueHashMedias.map((media, index) => curateMediaEntry(media, index));
|
||||
const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry);
|
||||
|
||||
await knex('media').insert(newMediaEntries);
|
||||
|
||||
return [...newMediaWithEntries, ...existingHashMedia];
|
||||
return [...newMediaWithEntries, ...existingHashMedias];
|
||||
}
|
||||
|
||||
async function associateReleaseMedia(releases) {
|
||||
@@ -445,12 +463,13 @@ async function associateReleaseMedia(releases) {
|
||||
// TODO: internal duplicate filtering
|
||||
// TODO: media count limits
|
||||
// TODO: catch errors
|
||||
// TODO: stage by role
|
||||
|
||||
const baseMediasByReleaseId = releases.reduce((acc, release) => ({
|
||||
...acc,
|
||||
[release.id]: [
|
||||
...(argv.images && argv.poster ? toBaseMedias([release.poster], 'posters') : []),
|
||||
...(argv.images && argv.photos ? toBaseMedias(release.photos, 'photos').slice(0, 5) : []),
|
||||
...(argv.images && argv.photos ? toBaseMedias(release.photos, 'photos') : []),
|
||||
...(argv.videos && argv.trailer ? toBaseMedias([release.trailer], 'trailers') : []),
|
||||
...(argv.videos && argv.teaser ? toBaseMedias([release.teaser], 'teasers') : []),
|
||||
],
|
||||
@@ -473,17 +492,15 @@ async function associateReleaseMedia(releases) {
|
||||
|
||||
acc[media.role].push({
|
||||
release_id: releaseId,
|
||||
media_id: media.entry.id,
|
||||
media_id: media.use || media.entry.id,
|
||||
});
|
||||
});
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
console.log(util.inspect(associationsByRole, null, null));
|
||||
|
||||
await Promise.all(Object.entries(associationsByRole)
|
||||
.map(async ([role, associations]) => knex(`releases_${role}`).insert(associations)));
|
||||
.map(async ([role, associations]) => knex.raw(`${knex(`releases_${role}`).insert(associations)} ON CONFLICT DO NOTHING`)));
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
Reference in New Issue
Block a user