traxxx/src/media_legacy.js

386 lines
12 KiB
JavaScript

'use strict';
const config = require('config');
const Promise = require('bluebird');
const path = require('path');
const fs = require('fs-extra');
const bhttp = require('bhttp');
const mime = require('mime');
const sharp = require('sharp');
const blake2 = require('blake2');
const logger = require('./logger')(__filename);
const knex = require('./knex');
const upsert = require('./utils/upsert');
const { ex } = require('./utils/q');
function getHash(buffer) {
const hash = blake2.createHash('blake2b', { digestLength: 24 });
hash.update(buffer);
return hash.digest('hex');
}
function pluckPhotos(photos, specifiedLimit) {
const limit = specifiedLimit || config.media.limit;
if (photos.length <= limit) {
return photos;
}
const plucked = [1]
.concat(
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))),
);
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
}
async function getEntropy(buffer) {
try {
const { entropy } = await sharp(buffer).stats();
return entropy;
} catch (error) {
logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`);
return 7.5;
}
}
async function createThumbnail(buffer) {
try {
const thumbnail = sharp(buffer)
.resize({
height: config.media.thumbnailSize,
withoutEnlargement: true,
})
.jpeg({
quality: config.media.thumbnailQuality,
})
.toBuffer();
return thumbnail;
} catch (error) {
logger.error(`Failed to create thumbnail: ${error.message}`);
}
return null;
}
async function createMediaDirectory(domain, subpath) {
const filepath = path.join(config.media.path, domain, subpath);
await fs.mkdir(filepath, { recursive: true });
return filepath;
}
function curatePhotoEntries(files) {
return files.map((file, index) => ({
path: file.filepath,
thumbnail: file.thumbpath,
mime: file.mimetype,
hash: file.hash,
source: file.source,
index,
}));
}
async function findDuplicates(photos, identifier, prop = null) {
const duplicates = await knex('media')
.whereIn(identifier, photos.flat().map((photo) => {
if (prop) return photo[prop];
if (photo.src) return photo.src;
return photo;
}));
const duplicateLookup = new Set(duplicates.map(photo => photo[prop || identifier]));
const originals = photos.filter((source) => {
if (Array.isArray(source)) {
return !source.some(sourceX => duplicateLookup.has((prop && sourceX[prop]) || (sourceX.src && sourceX)));
}
return !duplicateLookup.has((prop && source[prop]) || (source.src && source));
});
return [duplicates, originals];
}
async function extractPhoto(source) {
const res = await bhttp.get(source.src);
if (res.statusCode === 200) {
const { q } = ex(res.body.toString());
return source.extract(q);
}
return null;
}
async function fetchPhoto(photoUrl, index, label, attempt = 1) {
if (photoUrl.src && photoUrl.extract) {
// source links to page containing a (presumably) tokenized photo
const photo = await extractPhoto(photoUrl);
return fetchPhoto(photo, index, label);
}
if (Array.isArray(photoUrl)) {
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
const photo = await fetchPhoto(url, index, label);
if (photo) {
return photo;
}
throw new Error('Photo not available');
}), Promise.reject(new Error()));
}
try {
const { pathname } = new URL(photoUrl);
const res = await bhttp.get(photoUrl);
if (res.statusCode === 200) {
const mimetype = mime.getType(pathname);
const extension = mime.getExtension(mimetype);
const hash = getHash(res.body);
const entropy = await getEntropy(res.body);
return {
photo: res.body,
mimetype,
extension,
hash,
entropy,
source: photoUrl,
};
}
throw new Error(`Response ${res.statusCode} not OK`);
} catch (error) {
logger.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${label} (${photoUrl}): ${error}`);
if (attempt < 3) {
await Promise.delay(5000);
return fetchPhoto(photoUrl, index, label, attempt + 1);
}
return null;
}
}
async function savePhotos(files, {
domain = 'release',
subpath,
role = 'photo',
naming = 'index',
}) {
return Promise.map(files, async (file, index) => {
try {
const timestamp = new Date().getTime();
const thumbnail = await createThumbnail(file.photo);
const filename = naming === 'index'
? `${file.role || role}${index + 1}`
: `${timestamp + index}`;
const filepath = path.join(`${domain}s`, subpath, `${filename}.${file.extension}`);
const thumbpath = path.join(`${domain}s`, subpath, `${filename}_thumb.${file.extension}`);
await Promise.all([
fs.writeFile(path.join(config.media.path, filepath), file.photo),
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
]);
return {
...file,
thumbnail,
filepath,
thumbpath,
};
} catch (error) {
logger.error(`Failed to store ${domain} ${role} to ${subpath}: ${error.message}`);
return null;
}
});
}
async function storePhotos(photos, {
domain = 'release',
role = 'photo',
naming = 'index',
targetId,
subpath,
primaryRole, // role to assign to first photo if not already in database, used mainly for avatars
entropyFilter = 2.5, // filter out fallback avatars and other generic clipart
}, label) {
if (!photos || photos.length === 0) {
logger.info(`No ${role}s available for ${label}`);
return;
}
const pluckedPhotos = pluckPhotos(Array.from(new Set(photos))); // pre-filter link duplicates, limit total per configuration
const [sourceDuplicates, sourceOriginals] = await findDuplicates(pluckedPhotos, 'source', null, label);
logger.info(`Fetching ${sourceOriginals.length} new ${role}s, ${sourceDuplicates.length} already present by source for ${label}`);
const metaFiles = await Promise.map(sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), {
concurrency: 10,
}).filter(photo => photo && photo.entropy > entropyFilter);
const metaFilesByHash = metaFiles.reduce((acc, photo) => ({ ...acc, [photo.hash]: photo }), {}); // pre-filter hash duplicates within set; may occur through fallbacks
const [hashDuplicates, hashOriginals] = await findDuplicates(Object.values(metaFilesByHash), 'hash', 'hash', label);
logger.info(`Saving ${hashOriginals.length} new ${role}s, ${hashDuplicates.length} already present by hash for ${label}`);
const savedPhotos = await savePhotos(hashOriginals, {
domain,
role,
targetId,
subpath,
naming,
});
const curatedPhotoEntries = curatePhotoEntries(savedPhotos.filter(Boolean), domain, role, targetId);
const newPhotos = await knex('media').insert(curatedPhotoEntries).returning('*');
const photoEntries = Array.isArray(newPhotos)
? [...sourceDuplicates, ...hashDuplicates, ...newPhotos]
: [...sourceDuplicates, ...hashDuplicates];
const photoAssociations = photoEntries
.map(photoEntry => ({
[`${domain}_id`]: targetId,
media_id: photoEntry.id,
}));
if (primaryRole) {
// store one photo as a 'primary' photo, such as an avatar or cover
const primaryPhoto = await knex(`${domain}s_${primaryRole}s`)
.where(`${domain}_id`, targetId)
.first();
if (primaryPhoto) {
const remainingAssociations = photoAssociations.filter(association => association.media_id !== primaryPhoto.media_id);
await upsert(`${domain}s_${role}s`, remainingAssociations, [`${domain}_id`, 'media_id']);
return;
}
await Promise.all([
upsert(`${domain}s_${primaryRole}s`, photoAssociations.slice(0, 1), [`${domain}_id`, 'media_id']),
upsert(`${domain}s_${role}s`, photoAssociations.slice(1), [`${domain}_id`, 'media_id']),
]);
return;
}
await upsert(`${domain}s_${role}s`, photoAssociations, [`${domain}_id`, 'media_id']);
}
/*
async function storeReleasePhotos(releases, label) {
const sources = releases.map(release => pluckPhotos(release.photos)).flat();
const uniqueSources = Array.from(new Set(sources));
const [sourceDuplicates, sourceOriginals] = await findDuplicates(uniqueSources, 'source', null, label);
const metaFiles = await Promise.map(
sourceOriginals,
async (photoUrl, index) => fetchPhoto(photoUrl, index, label),
{ concurrency: 10 },
)
.filter(photo => photo);
const hashUniques = Object.values(metaFiles.reduce((acc, file) => {
if (!acc[file.hash]) acc[file.hash] = file;
return acc;
}, {}));
const [hashDuplicates, hashOriginals] = await findDuplicates(hashUniques, 'hash', 'hash', label);
const sourceHashes = metaFiles.concat(sourceDuplicates).reduce((acc, file) => {
acc[file.source] = file.hash;
return acc;
}, {});
const associations = releases.map(release => release.photos.map(source => [release.id, sourceHashes[source]])).flat();
console.log(associations);
}
*/
async function storeTrailer(trailers, {
domain = 'releases',
role = 'trailer',
targetId,
subpath,
}, label) {
// support scrapers supplying multiple qualities
const trailer = Array.isArray(trailers)
? trailers.find(trailerX => config.media.trailerQuality.includes(trailerX.quality)) || trailers[0]
: trailers;
if (!trailer || !trailer.src) {
logger.info(`No ${role} available for ${label}`);
return;
}
const [sourceDuplicates, sourceOriginals] = await findDuplicates([trailer], 'source', 'src', label);
const metaFiles = await Promise.map(sourceOriginals, async (trailerX) => {
const { pathname } = new URL(trailerX.src);
const mimetype = trailerX.type || mime.getType(pathname);
const res = await bhttp.get(trailerX.src);
const hash = getHash(res.body);
const filepath = path.join(domain, subpath, `${role}${trailerX.quality ? `_${trailerX.quality}` : ''}.${mime.getExtension(mimetype)}`);
return {
trailer: res.body,
path: filepath,
mime: mimetype,
source: trailerX.src,
quality: trailerX.quality || null,
hash,
};
});
const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', 'hash', label);
const newTrailers = await knex('media')
.insert(hashOriginals.map(trailerX => ({
path: trailerX.path,
mime: trailerX.mime,
source: trailerX.source,
quality: trailerX.quality,
hash: trailerX.hash,
type: role,
})))
.returning('*');
await Promise.all(hashOriginals.map(trailerX => fs.writeFile(path.join(config.media.path, trailerX.path), trailerX.trailer)));
const trailerEntries = Array.isArray(newTrailers)
? [...sourceDuplicates, ...hashDuplicates, ...newTrailers]
: [...sourceDuplicates, ...hashDuplicates];
await upsert(`releases_${role}s`, trailerEntries.map(trailerEntry => ({
release_id: targetId,
media_id: trailerEntry.id,
})), ['release_id', 'media_id']);
}
module.exports = {
createMediaDirectory,
storePhotos,
// storeReleasePhotos,
storeTrailer,
};