'use strict'; const config = require('config'); const Promise = require('bluebird'); const path = require('path'); const fs = require('fs-extra'); const bhttp = require('bhttp'); const mime = require('mime'); const sharp = require('sharp'); const blake2 = require('blake2'); const knex = require('./knex'); function getHash(buffer) { const hash = blake2.createHash('blake2b', { digestLength: 24 }); hash.update(buffer); return hash.digest('hex'); } function pluckPhotos(photos, specifiedLimit) { const limit = specifiedLimit || config.media.limit; if (photos.length <= limit) { return photos; } const plucked = [1] .concat( Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))), ); return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close } async function createThumbnail(buffer) { return sharp(buffer) .resize({ height: config.media.thumbnailSize, withoutEnlargement: true, }) .toBuffer(); } async function createMediaDirectory(domain, subpath) { const filepath = path.join(config.media.path, domain, subpath); await fs.mkdir(filepath, { recursive: true }); return filepath; } function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId) { return files.map((file, index) => ({ path: file.filepath, thumbnail: file.thumbpath, mime: file.mimetype, hash: file.hash, source: file.source, index, domain, target_id: targetId, role: file.role || role, })); } // before fetching async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) { const photoSourceEntries = await knex('media') .whereIn('source', photos.flat()) .whereIn('domain', domains) .whereIn('role', roles); // accept string argument const photoSources = new Set(photoSourceEntries.map(photo => photo.source)); const newPhotos = photos.filter(source => (Array.isArray(source) // fallbacks provided? ? !source.some(sourceX => photoSources.has(sourceX)) // ensure none of the sources match : !photoSources.has(source))); if (photoSourceEntries.length > 0) { console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`); } return newPhotos; } // after fetching async function filterHashDuplicates(files, domains = ['releases'], roles = ['photo'], identifier) { const photoHashEntries = await knex('media') .whereIn('hash', files.map(file => file.hash)) .whereIn('domain', [].concat(domains)) .whereIn('role', [].concat(roles)); // accept string argument const photoHashes = new Set(photoHashEntries.map(entry => entry.hash)); if (photoHashEntries.length > 0) { console.log(`Ignoring ${photoHashEntries.length} ${roles} items already present by hash for ${identifier}`); } return files.filter(file => file && !photoHashes.has(file.hash)); } async function fetchPhoto(photoUrl, index, identifier, attempt = 1) { if (Array.isArray(photoUrl)) { return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => { const photo = await fetchPhoto(url, index, identifier); if (photo) { return photo; } throw new Error('Photo not available'); }), Promise.reject(new Error())); } try { const { pathname } = new URL(photoUrl); const mimetype = mime.getType(pathname); const res = await bhttp.get(photoUrl); if (res.statusCode === 200) { const extension = mime.getExtension(mimetype); const hash = getHash(res.body); return { photo: res.body, mimetype, extension, hash, source: photoUrl, }; } throw new Error(`Response ${res.statusCode} not OK`); } catch (error) { console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${identifier} (${photoUrl}): ${error}`); if (attempt < 3) { await Promise.delay(1000); return fetchPhoto(photoUrl, index, identifier, attempt + 1); } return null; } } async function savePhotos(files, { domain = 'releases', subpath, role = 'photo', naming = 'index', }) { return Promise.map(files, async (file, index) => { const timestamp = new Date().getTime(); const thumbnail = await createThumbnail(file.photo); const filename = naming === 'index' ? `${file.role || role}-${index + 1}` : `${timestamp + index}`; const filepath = path.join(domain, subpath, `${filename}.${file.extension}`); const thumbpath = path.join(domain, subpath, `${filename}_thumb.${file.extension}`); await Promise.all([ fs.writeFile(path.join(config.media.path, filepath), file.photo), fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), ]); return { ...file, thumbnail, filepath, thumbpath, }; }); } async function storePhotos(photos, { domain = 'releases', role = 'photo', naming = 'index', targetId, subpath, primaryRole, // role to assign to first photo if not already in database, used mainly for avatars }, identifier) { if (!photos || photos.length === 0) { console.warn(`No ${role}s available for ${identifier}`); return; } const pluckedPhotos = pluckPhotos(photos); const roles = primaryRole ? [role, primaryRole] : [role]; const newPhotos = await filterSourceDuplicates(pluckedPhotos, [domain], roles, identifier); if (newPhotos.length === 0) return; console.log(`Fetching ${newPhotos.length} ${role}s for ${identifier}`); const metaFiles = await Promise.map(newPhotos, async (photoUrl, index) => fetchPhoto(photoUrl, index, identifier), { concurrency: 10, }).filter(photo => photo); const [uniquePhotos, primaryPhoto] = await Promise.all([ filterHashDuplicates(metaFiles, [domain], roles, identifier), primaryRole ? await knex('media') .where('domain', domain) .where('target_id', targetId) .where('role', primaryRole) .first() : null, ]); if (primaryRole && !primaryPhoto) { console.log(`Setting first photo as ${primaryRole} for ${identifier}`); uniquePhotos[0].role = primaryRole; } const savedPhotos = await savePhotos(uniquePhotos, { domain, role, targetId, subpath, naming, }); const curatedPhotoEntries = curatePhotoEntries(savedPhotos, domain, role, targetId); await knex('media').insert(curatedPhotoEntries); console.log(`Stored ${newPhotos.length} ${role}s for ${identifier}`); } async function storeTrailer(trailers, { domain = 'releases', role = 'trailer', targetId, subpath, }, identifier) { // support scrapers supplying multiple qualities const trailer = Array.isArray(trailers) ? trailers.find(trailerX => [1080, 720].includes(trailerX.quality)) || trailers[0] : trailers; if (!trailer || !trailer.src) { console.warn(`No trailer available for ${identifier}`); return; } console.log(`Storing trailer for ${identifier}`); const { pathname } = new URL(trailer.src); const mimetype = trailer.type || mime.getType(pathname); const res = await bhttp.get(trailer.src); const filepath = path.join('releases', subpath, `trailer${trailer.quality ? `_${trailer.quality}` : ''}.${mime.getExtension(mimetype)}`); await Promise.all([ fs.writeFile(path.join(config.media.path, filepath), res.body), knex('media').insert({ path: filepath, mime: mimetype, source: trailer.src, domain, target_id: targetId, role, quality: trailer.quality || null, }), ]); } async function findAvatar(actorId, domain = 'actors') { return knex('media') .where('domain', domain) .where('target_id', actorId) .where('role', 'avatar'); } module.exports = { createMediaDirectory, findAvatar, storePhotos, storeTrailer, };