traxxx/src/media.js

290 lines
8.5 KiB
JavaScript
Raw Normal View History

'use strict';
const config = require('config');
const Promise = require('bluebird');
const path = require('path');
const fs = require('fs-extra');
const bhttp = require('bhttp');
const mime = require('mime');
const sharp = require('sharp');
const blake2 = require('blake2');
const knex = require('./knex');
function getHash(buffer) {
const hash = blake2.createHash('blake2b', { digestLength: 24 });
hash.update(buffer);
return hash.digest('hex');
}
function pluckPhotos(photos, specifiedLimit) {
const limit = specifiedLimit || config.media.limit;
if (photos.length <= limit) {
return photos;
}
const plucked = [1]
.concat(
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))),
);
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
}
async function createThumbnail(buffer) {
return sharp(buffer)
.resize({
height: config.media.thumbnailSize,
withoutEnlargement: true,
})
.jpeg({
quality: 50,
})
.toBuffer();
}
async function createMediaDirectory(domain, subpath) {
const filepath = path.join(config.media.path, domain, subpath);
await fs.mkdir(filepath, { recursive: true });
return filepath;
}
function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId) {
return files.map((file, index) => ({
path: file.filepath,
thumbnail: file.thumbpath,
mime: file.mimetype,
hash: file.hash,
source: file.source,
index,
domain,
target_id: targetId,
role: file.role || role,
}));
}
// before fetching
async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) {
const photoSourceEntries = await knex('media')
.whereIn('source', photos.flat())
.whereIn('domain', domains)
.whereIn('role', roles); // accept string argument
const photoSources = new Set(photoSourceEntries.map(photo => photo.source));
const newPhotos = photos.filter(source => (Array.isArray(source) // fallbacks provided?
? !source.some(sourceX => photoSources.has(sourceX)) // ensure none of the sources match
: !photoSources.has(source)));
if (photoSourceEntries.length > 0) {
console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`);
}
return newPhotos;
}
// after fetching
async function filterHashDuplicates(files, domains = ['releases'], roles = ['photo'], identifier) {
const photoHashEntries = await knex('media')
.whereIn('hash', files.map(file => file.hash))
.whereIn('domain', [].concat(domains))
.whereIn('role', [].concat(roles)); // accept string argument
const photoHashes = new Set(photoHashEntries.map(entry => entry.hash));
if (photoHashEntries.length > 0) {
console.log(`Ignoring ${photoHashEntries.length} ${roles} items already present by hash for ${identifier}`);
}
return files.filter(file => file && !photoHashes.has(file.hash));
}
async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
if (Array.isArray(photoUrl)) {
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
const photo = await fetchPhoto(url, index, identifier);
if (photo) {
return photo;
}
throw new Error('Photo not available');
}), Promise.reject(new Error()));
}
try {
2019-12-09 04:00:49 +00:00
const { pathname } = new URL(photoUrl);
const mimetype = mime.getType(pathname);
const res = await bhttp.get(photoUrl);
if (res.statusCode === 200) {
const extension = mime.getExtension(mimetype);
const hash = getHash(res.body);
return {
photo: res.body,
mimetype,
extension,
hash,
source: photoUrl,
};
}
throw new Error(`Response ${res.statusCode} not OK`);
} catch (error) {
console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${identifier} (${photoUrl}): ${error}`);
if (attempt < 3) {
await Promise.delay(1000);
return fetchPhoto(photoUrl, index, identifier, attempt + 1);
}
return null;
}
}
async function savePhotos(files, {
domain = 'releases',
subpath,
role = 'photo',
naming = 'index',
}) {
return Promise.map(files, async (file, index) => {
const timestamp = new Date().getTime();
const thumbnail = await createThumbnail(file.photo);
const filename = naming === 'index'
? `${file.role || role}-${index + 1}`
: `${timestamp + index}`;
const filepath = path.join(domain, subpath, `${filename}.${file.extension}`);
const thumbpath = path.join(domain, subpath, `${filename}_thumb.${file.extension}`);
await Promise.all([
fs.writeFile(path.join(config.media.path, filepath), file.photo),
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
]);
return {
...file,
thumbnail,
filepath,
thumbpath,
};
});
}
async function storePhotos(photos, {
domain = 'releases',
role = 'photo',
naming = 'index',
targetId,
subpath,
primaryRole, // role to assign to first photo if not already in database, used mainly for avatars
}, identifier) {
if (!photos || photos.length === 0) {
console.warn(`No ${role}s available for ${identifier}`);
return;
}
const pluckedPhotos = pluckPhotos(photos);
const roles = primaryRole ? [role, primaryRole] : [role];
const newPhotos = await filterSourceDuplicates(pluckedPhotos, [domain], roles, identifier);
if (newPhotos.length === 0) return;
console.log(`Fetching ${newPhotos.length} ${role}s for ${identifier}`);
const metaFiles = await Promise.map(newPhotos, async (photoUrl, index) => fetchPhoto(photoUrl, index, identifier), {
concurrency: 10,
}).filter(photo => photo);
const [uniquePhotos, primaryPhoto] = await Promise.all([
filterHashDuplicates(metaFiles, [domain], roles, identifier),
primaryRole
? await knex('media')
.where('domain', domain)
.where('target_id', targetId)
.where('role', primaryRole)
.first()
: null,
]);
if (primaryRole && !primaryPhoto) {
console.log(`Setting first photo as ${primaryRole} for ${identifier}`);
uniquePhotos[0].role = primaryRole;
}
const savedPhotos = await savePhotos(uniquePhotos, {
domain,
role,
targetId,
subpath,
naming,
});
const curatedPhotoEntries = curatePhotoEntries(savedPhotos, domain, role, targetId);
await knex('media').insert(curatedPhotoEntries);
console.log(`Stored ${newPhotos.length} ${role}s for ${identifier}`);
}
async function storeTrailer(trailers, {
domain = 'releases',
role = 'trailer',
targetId,
subpath,
}, identifier) {
// support scrapers supplying multiple qualities
const trailer = Array.isArray(trailers)
? trailers.find(trailerX => [1080, 720].includes(trailerX.quality)) || trailers[0]
: trailers;
if (!trailer || !trailer.src) {
console.warn(`No trailer available for ${identifier}`);
return;
}
console.log(`Storing trailer for ${identifier}`);
const { pathname } = new URL(trailer.src);
const mimetype = trailer.type || mime.getType(pathname);
const res = await bhttp.get(trailer.src);
const filepath = path.join('releases', subpath, `trailer${trailer.quality ? `_${trailer.quality}` : ''}.${mime.getExtension(mimetype)}`);
await Promise.all([
fs.writeFile(path.join(config.media.path, filepath), res.body),
knex('media').insert({
path: filepath,
mime: mimetype,
source: trailer.src,
domain,
target_id: targetId,
role,
quality: trailer.quality || null,
}),
]);
}
async function findAvatar(actorId, domain = 'actors') {
return knex('media')
.where('domain', domain)
.where('target_id', actorId)
.where('role', 'avatar');
}
module.exports = {
createMediaDirectory,
findAvatar,
storePhotos,
storeTrailer,
};