'use strict'; const config = require('config'); const Promise = require('bluebird'); const path = require('path'); const fs = require('fs-extra'); const bhttp = require('bhttp'); const mime = require('mime'); const sharp = require('sharp'); const blake2 = require('blake2'); const logger = require('./logger'); const knex = require('./knex'); const upsert = require('./utils/upsert'); function getHash(buffer) { const hash = blake2.createHash('blake2b', { digestLength: 24 }); hash.update(buffer); return hash.digest('hex'); } function pluckPhotos(photos, specifiedLimit) { const limit = specifiedLimit || config.media.limit; if (photos.length <= limit) { return photos; } const plucked = [1] .concat( Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))), ); return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close } async function getEntropy(buffer) { const { entropy } = await sharp(buffer).stats(); return entropy; } async function createThumbnail(buffer) { try { const thumbnail = sharp(buffer) .resize({ height: config.media.thumbnailSize, withoutEnlargement: true, }) .jpeg({ quality: config.media.thumbnailQuality, }) .toBuffer(); return thumbnail; } catch (error) { logger.error(`Failed to create thumbnail: ${error.message}`); throw error; } } async function createMediaDirectory(domain, subpath) { const filepath = path.join(config.media.path, domain, subpath); await fs.mkdir(filepath, { recursive: true }); return filepath; } function curatePhotoEntries(files) { return files.map((file, index) => ({ path: file.filepath, thumbnail: file.thumbpath, mime: file.mimetype, hash: file.hash, source: file.source, index, })); } async function findDuplicates(photos, identifier, prop = null, label) { const duplicates = await knex('media') .whereIn(identifier, photos.flat().map(photo => (prop ? photo[prop] : photo))); const duplicateLookup = new Set(duplicates.map(photo => photo[prop || identifier])); const originals = photos.filter(source => (Array.isArray(source) // fallbacks provided? ? !source.some(sourceX => duplicateLookup.has(prop ? sourceX[prop] : sourceX)) // ensure none of the sources match : !duplicateLookup.has(prop ? source[prop] : source))); if (duplicates.length > 0) { logger.info(`${duplicates.length} media items already present by ${identifier} for ${label}`); } if (originals.length > 0) { logger.info(`Fetching ${originals.length} new media items for ${label}`); } return [duplicates, originals]; } async function fetchPhoto(photoUrl, index, label, attempt = 1) { if (Array.isArray(photoUrl)) { return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => { const photo = await fetchPhoto(url, index, label); if (photo) { return photo; } throw new Error('Photo not available'); }), Promise.reject(new Error())); } try { const { pathname } = new URL(photoUrl); const res = await bhttp.get(photoUrl); if (res.statusCode === 200) { const mimetype = mime.getType(pathname); const extension = mime.getExtension(mimetype); const hash = getHash(res.body); const entropy = await getEntropy(res.body); return { photo: res.body, mimetype, extension, hash, entropy, source: photoUrl, }; } throw new Error(`Response ${res.statusCode} not OK`); } catch (error) { logger.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${label} (${photoUrl}): ${error}`); if (attempt < 3) { await Promise.delay(5000); return fetchPhoto(photoUrl, index, label, attempt + 1); } return null; } } async function savePhotos(files, { domain = 'release', subpath, role = 'photo', naming = 'index', }) { return Promise.map(files, async (file, index) => { try { const timestamp = new Date().getTime(); const thumbnail = await createThumbnail(file.photo); const filename = naming === 'index' ? `${file.role || role}${index + 1}` : `${timestamp + index}`; const filepath = path.join(`${domain}s`, subpath, `${filename}.${file.extension}`); const thumbpath = path.join(`${domain}s`, subpath, `${filename}_thumb.${file.extension}`); await Promise.all([ fs.writeFile(path.join(config.media.path, filepath), file.photo), fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), ]); return { ...file, thumbnail, filepath, thumbpath, }; } catch (error) { logger.error(`Failed to store ${domain} ${role} to ${subpath}: ${error.message}`); return null; } }); } async function storePhotos(photos, { domain = 'release', role = 'photo', naming = 'index', targetId, subpath, primaryRole, // role to assign to first photo if not already in database, used mainly for avatars entropyFilter = 2.5, // filter out fallback avatars and other generic clipart }, label) { if (!photos || photos.length === 0) { logger.info(`No ${role}s available for ${label}`); return; } const pluckedPhotos = pluckPhotos(Array.from(new Set(photos))); // pre-filter link duplicates, limit total per configuration const [sourceDuplicates, sourceOriginals] = await findDuplicates(pluckedPhotos, 'source', null, label); const metaFiles = await Promise.map(sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), { concurrency: 10, }).filter(photo => photo && photo.entropy > entropyFilter); const metaFilesByHash = metaFiles.reduce((acc, photo) => ({ ...acc, [photo.hash]: photo }), {}); // pre-filter hash duplicates within set; may occur through fallbacks const [hashDuplicates, hashOriginals] = await findDuplicates(Object.values(metaFilesByHash), 'hash', 'hash', label); const savedPhotos = await savePhotos(hashOriginals, { domain, role, targetId, subpath, naming, }); const curatedPhotoEntries = curatePhotoEntries(savedPhotos.filter(Boolean), domain, role, targetId); const newPhotos = await knex('media').insert(curatedPhotoEntries).returning('*'); const photoEntries = Array.isArray(newPhotos) ? [...sourceDuplicates, ...hashDuplicates, ...newPhotos] : [...sourceDuplicates, ...hashDuplicates]; const photoAssociations = photoEntries .map(photoEntry => ({ [`${domain}_id`]: targetId, media_id: photoEntry.id, })); if (primaryRole) { // store one photo as a 'primary' photo, such as an avatar or cover const primaryPhoto = await knex(`${domain}s_${primaryRole}s`) .where(`${domain}_id`, targetId) .first(); if (primaryPhoto) { const remainingAssociations = photoAssociations.filter(association => association.media_id !== primaryPhoto.media_id); await upsert(`${domain}s_${role}s`, remainingAssociations, [`${domain}_id`, 'media_id']); return; } await Promise.all([ upsert(`${domain}s_${primaryRole}s`, photoAssociations.slice(0, 1), [`${domain}_id`, 'media_id']), upsert(`${domain}s_${role}s`, photoAssociations.slice(1), [`${domain}_id`, 'media_id']), ]); return; } await upsert(`${domain}s_${role}s`, photoAssociations, [`${domain}_id`, 'media_id']); } /* async function storeReleasePhotos(releases, label) { const sources = releases.map(release => pluckPhotos(release.photos)).flat(); const uniqueSources = Array.from(new Set(sources)); const [sourceDuplicates, sourceOriginals] = await findDuplicates(uniqueSources, 'source', null, label); const metaFiles = await Promise.map( sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), { concurrency: 10 }, ) .filter(photo => photo); const hashUniques = Object.values(metaFiles.reduce((acc, file) => { if (!acc[file.hash]) acc[file.hash] = file; return acc; }, {})); const [hashDuplicates, hashOriginals] = await findDuplicates(hashUniques, 'hash', 'hash', label); const sourceHashes = metaFiles.concat(sourceDuplicates).reduce((acc, file) => { acc[file.source] = file.hash; return acc; }, {}); const associations = releases.map(release => release.photos.map(source => [release.id, sourceHashes[source]])).flat(); console.log(associations); } */ async function storeTrailer(trailers, { domain = 'releases', role = 'trailer', targetId, subpath, }, label) { // support scrapers supplying multiple qualities const trailer = Array.isArray(trailers) ? trailers.find(trailerX => config.media.trailerQuality.includes(trailerX.quality)) || trailers[0] : trailers; if (!trailer || !trailer.src) { logger.info(`No ${role} available for ${label}`); return; } const [sourceDuplicates, sourceOriginals] = await findDuplicates([trailer], 'source', 'src', label); const metaFiles = await Promise.map(sourceOriginals, async (trailerX) => { const { pathname } = new URL(trailerX.src); const mimetype = trailerX.type || mime.getType(pathname); const res = await bhttp.get(trailerX.src); const hash = getHash(res.body); const filepath = path.join(domain, subpath, `${role}${trailerX.quality ? `_${trailerX.quality}` : ''}.${mime.getExtension(mimetype)}`); return { trailer: res.body, path: filepath, mime: mimetype, source: trailerX.src, quality: trailerX.quality || null, hash, }; }); const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', 'hash', label); const newTrailers = await knex('media') .insert(hashOriginals.map(trailerX => ({ path: trailerX.path, mime: trailerX.mime, source: trailerX.source, quality: trailerX.quality, hash: trailerX.hash, type: role, }))) .returning('*'); await Promise.all(hashOriginals.map(trailerX => fs.writeFile(path.join(config.media.path, trailerX.path), trailerX.trailer))); const trailerEntries = Array.isArray(newTrailers) ? [...sourceDuplicates, ...hashDuplicates, ...newTrailers] : [...sourceDuplicates, ...hashDuplicates]; await upsert(`releases_${role}s`, trailerEntries.map(trailerEntry => ({ release_id: targetId, media_id: trailerEntry.id, })), ['release_id', 'media_id']); } module.exports = { createMediaDirectory, storePhotos, // storeReleasePhotos, storeTrailer, };