'use strict'; const config = require('config'); const Promise = require('bluebird'); const fs = require('fs'); const fsPromises = require('fs').promises; const path = require('path'); const nanoid = require('nanoid/non-secure'); const mime = require('mime'); const sharp = require('sharp'); const blake2 = require('blake2'); const logger = require('./logger')(__filename); const argv = require('./argv'); const knex = require('./knex'); const http = require('./utils/http'); const { get } = require('./utils/qu'); function getHash(buffer) { const hash = blake2.createHash('blake2b', { digestLength: 24 }); hash.update(buffer); return hash.digest('hex'); } async function getEntropy(buffer) { try { const { entropy } = await sharp(buffer).stats(); return entropy; } catch (error) { logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`); return 7.5; } } async function getMeta(buffer) { try { const { width, height, size } = await sharp(buffer).metadata(); return { width, height, size, }; } catch (error) { logger.warn(`Failed to retrieve image metadata: ${error.message}`); return {}; } } async function getThumbnail(buffer, height = config.media.thumbnailSize) { try { const thumbnail = sharp(buffer) .resize({ height, withoutEnlargement: true, }) .jpeg({ quality: config.media.thumbnailQuality, }) .toBuffer(); return thumbnail; } catch (error) { logger.error(`Failed to create thumbnail: ${error.message}`); } return null; } function itemsByKey(items, key) { return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {}); } function toBaseSource(rawSource) { if (rawSource.src || (rawSource.extract && rawSource.url)) { const baseSource = {}; if (rawSource.src) baseSource.src = rawSource.src; if (rawSource.quality) baseSource.quality = rawSource.quality; if (rawSource.type) baseSource.type = rawSource.type; if (rawSource.url) baseSource.url = rawSource.url; if (rawSource.extract) baseSource.extract = rawSource.extract; if (rawSource.referer) baseSource.referer = rawSource.referer; if (rawSource.host) baseSource.host = rawSource.host; return baseSource; } if (typeof rawSource === 'string') { return { src: rawSource, }; } return null; } function baseSourceToBaseMedia(baseSource, role) { if (Array.isArray(baseSource)) { if (baseSource.length > 0) { return { id: nanoid(), role, sources: baseSource, }; } return null; } if (baseSource) { return { id: nanoid(), role, sources: [baseSource], }; } return null; } function fallbackMediaToBaseMedia(rawMedia, role) { const baseSources = rawMedia .map(source => toBaseSource(source)) .filter(Boolean); return baseSourceToBaseMedia(baseSources, role); } function toBaseMedias(rawMedias, role) { if (!rawMedias || rawMedias.length === 0) { return []; } return rawMedias.map((rawMedia) => { if (!rawMedia) { return null; } if (Array.isArray(rawMedia)) { // fallback sources provided return fallbackMediaToBaseMedia(rawMedia, role); } const baseSource = toBaseSource(rawMedia); return baseSourceToBaseMedia(baseSource, role); }).filter(Boolean); } async function findSourceDuplicates(baseMedias) { const sourceUrls = baseMedias .map(baseMedia => baseMedia.sources.map(source => source.src)) .flat() .filter(Boolean); const extractUrls = baseMedias .map(baseMedia => baseMedia.sources.map(source => source.url)) .flat() .filter(Boolean); const [existingSourceMedia, existingExtractMedia] = await Promise.all([ knex('media').whereIn('source', sourceUrls), knex('media').whereIn('source_page', extractUrls), ]); const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source'); const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page'); return { existingSourceMediaByUrl, existingExtractMediaByUrl, }; } async function findHashDuplicates(medias) { const mediaHashes = medias.map(media => media.file?.hash).filter(Boolean); const existingHashMedia = await knex('media').whereIn('hash', mediaHashes); return itemsByKey(existingHashMedia, 'hash'); } async function extractSource(baseSource, { existingExtractMediaByUrl }) { if (typeof baseSource.extract !== 'function' || !baseSource.url) { return baseSource; } const existingExtractMedia = existingExtractMediaByUrl[baseSource.url]; if (existingExtractMedia) { // media entry found by extract URL return { ...baseSource, entry: existingExtractMedia, src: existingExtractMedia.source, }; } const res = await get(baseSource.url); if (res.ok) { const src = await baseSource.extract(res.item); return { ...baseSource, src, }; } throw new Error(`Could not extract source from ${baseSource.url}: ${res.status}`); } async function fetchSource(source, baseMedia, baseSourceIndex) { logger.silly(`Fetching media from ${source.src}`); // attempts async function attempt(attempts = 1) { try { const { pathname } = new URL(source.src); const mimetype = mime.getType(pathname); const extension = mime.getExtension(mimetype); const isImage = /image/.test(mimetype); const tempPath = path.join(config.media.path, 'temp', `${baseMedia.id}-${baseSourceIndex}.${extension}`); const res = await http.get(source.src, { ...(source.referer && { referer: source.referer }), ...(source.host && { host: source.host }), }, { stream: true, }); if (!res.ok) { throw new Error(`Response ${res.status} not OK`); } res.res.pipe(fs.createWriteStream(tempPath)); const buffer = res.body; console.log(res.body); const hash = getHash(buffer); const entropy = isImage ? await getEntropy(buffer) : null; const { size, width, height } = isImage ? await getMeta(buffer) : {}; logger.silly(`Fetched media from ${source.src}`); return { ...source, file: { temp: tempPath, mimetype, extension, hash, entropy, size, width, height, }, }; } catch (error) { logger.warn(`Failed attempt ${attempts}/3 to fetch ${source.src}: ${error.message}`); if (attempts < 3) { await Promise.delay(1000); return attempt(attempts + 1); } throw new Error(`Failed to fetch ${source.src}: ${error.message}`); } } return attempt(1); } async function trySource(baseSource, existingMedias, baseMedia, baseSourceIndex) { // catch error and try the next source const extractedSource = await extractSource(baseSource, existingMedias); const existingSourceMedia = existingMedias.existingSourceMediaByUrl[extractedSource.src]; if (extractedSource.entry) { // media entry found during extraction, don't fetch return extractedSource; } if (existingSourceMedia) { // media entry found by source URL, don't fetch return { ...baseSource, entry: existingSourceMedia, src: existingSourceMedia.source, }; } return fetchSource(extractedSource, baseMedia, baseSourceIndex, 1); } async function fetchMedia(baseMedia, existingMedias) { try { const source = await baseMedia.sources.reduce( // try each source until success (result, baseSource, baseSourceIndex) => result.catch(async () => trySource(baseSource, existingMedias, baseMedia, baseSourceIndex)), Promise.reject(new Error()), ); return { ...baseMedia, ...source, }; } catch (error) { logger.warn(error.message); return baseMedia; } } function saveMedia(media, existingHashMediaByHash) { const existingHashMedia = existingHashMediaByHash[media.file.hash]; if (existingHashMedia) { return { ...media, entry: existingHashMedia, }; } const hashDir = media.file.hash.slice(0, 2); const hashSubDir = media.file.hash.slice(2, 4); const hashFilename = media.file.hash.slice(4); const filename = media.quality ? `${hashFilename}_${media.quality}.${media.file.extension}` : `${hashFilename}.${media.file.extension}`; const filedir = path.join(media.role, hashDir, hashSubDir); const filepath = path.join(filedir, filename); console.log(filedir, filepath); return media; } async function storeMedias(baseMedias) { await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true }); const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias); const fetchedMedias = await Promise.map(baseMedias, async baseMedia => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl })); const existingHashMediaByHash = await findHashDuplicates(fetchedMedias); const savedMedias = await Promise.map(fetchedMedias, async fetchedMedia => saveMedia(fetchedMedia, existingHashMediaByHash)); } async function associateReleaseMedia(releases) { if (!argv.media) { return; } const baseMediasByReleaseId = releases.reduce((acc, release) => ({ ...acc, [release.id]: { poster: argv.images && argv.poster && toBaseMedias([release.poster], 'posters'), photos: argv.images && argv.photos && toBaseMedias(release.photos, 'photos').slice(0, 5), trailer: argv.videos && argv.trailer && toBaseMedias([release.trailer], 'trailers'), teaser: argv.videos && argv.teaser && toBaseMedias([release.teaser], 'teasers'), }, }), {}); const baseMedias = Object.values(baseMediasByReleaseId) .map(releaseMedia => Object.values(releaseMedia)) .flat(2) .filter(Boolean); await storeMedias(baseMedias); } module.exports = { associateReleaseMedia, };