Expanded new media module. Added network to channel site to fix actor glitch.
This commit is contained in:
268
src/media.js
268
src/media.js
@@ -1,13 +1,76 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
const fs = require('fs');
|
||||
const fsPromises = require('fs').promises;
|
||||
const path = require('path');
|
||||
const nanoid = require('nanoid/non-secure');
|
||||
const mime = require('mime');
|
||||
const sharp = require('sharp');
|
||||
const blake2 = require('blake2');
|
||||
|
||||
const logger = require('./logger')(__filename);
|
||||
const argv = require('./argv');
|
||||
const knex = require('./knex');
|
||||
const http = require('./utils/http');
|
||||
const { get } = require('./utils/qu');
|
||||
|
||||
function getHash(buffer) {
|
||||
const hash = blake2.createHash('blake2b', { digestLength: 24 });
|
||||
hash.update(buffer);
|
||||
|
||||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
async function getEntropy(buffer) {
|
||||
try {
|
||||
const { entropy } = await sharp(buffer).stats();
|
||||
|
||||
return entropy;
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`);
|
||||
|
||||
return 7.5;
|
||||
}
|
||||
}
|
||||
|
||||
async function getMeta(buffer) {
|
||||
try {
|
||||
const { width, height, size } = await sharp(buffer).metadata();
|
||||
|
||||
return {
|
||||
width,
|
||||
height,
|
||||
size,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to retrieve image metadata: ${error.message}`);
|
||||
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
async function getThumbnail(buffer, height = config.media.thumbnailSize) {
|
||||
try {
|
||||
const thumbnail = sharp(buffer)
|
||||
.resize({
|
||||
height,
|
||||
withoutEnlargement: true,
|
||||
})
|
||||
.jpeg({
|
||||
quality: config.media.thumbnailQuality,
|
||||
})
|
||||
.toBuffer();
|
||||
|
||||
return thumbnail;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to create thumbnail: ${error.message}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function itemsByKey(items, key) {
|
||||
return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {});
|
||||
}
|
||||
@@ -23,6 +86,9 @@ function toBaseSource(rawSource) {
|
||||
if (rawSource.url) baseSource.url = rawSource.url;
|
||||
if (rawSource.extract) baseSource.extract = rawSource.extract;
|
||||
|
||||
if (rawSource.referer) baseSource.referer = rawSource.referer;
|
||||
if (rawSource.host) baseSource.host = rawSource.host;
|
||||
|
||||
return baseSource;
|
||||
}
|
||||
|
||||
@@ -35,11 +101,12 @@ function toBaseSource(rawSource) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function baseSourceToBaseMedia(baseSource) {
|
||||
function baseSourceToBaseMedia(baseSource, role) {
|
||||
if (Array.isArray(baseSource)) {
|
||||
if (baseSource.length > 0) {
|
||||
return {
|
||||
id: nanoid(),
|
||||
role,
|
||||
sources: baseSource,
|
||||
};
|
||||
}
|
||||
@@ -50,6 +117,7 @@ function baseSourceToBaseMedia(baseSource) {
|
||||
if (baseSource) {
|
||||
return {
|
||||
id: nanoid(),
|
||||
role,
|
||||
sources: [baseSource],
|
||||
};
|
||||
}
|
||||
@@ -57,15 +125,15 @@ function baseSourceToBaseMedia(baseSource) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function fallbackMediaToBaseMedia(rawMedia) {
|
||||
function fallbackMediaToBaseMedia(rawMedia, role) {
|
||||
const baseSources = rawMedia
|
||||
.map(source => toBaseSource(source))
|
||||
.filter(Boolean);
|
||||
|
||||
return baseSourceToBaseMedia(baseSources);
|
||||
return baseSourceToBaseMedia(baseSources, role);
|
||||
}
|
||||
|
||||
function toBaseMedias(rawMedias) {
|
||||
function toBaseMedias(rawMedias, role) {
|
||||
if (!rawMedias || rawMedias.length === 0) {
|
||||
return [];
|
||||
}
|
||||
@@ -77,12 +145,12 @@ function toBaseMedias(rawMedias) {
|
||||
|
||||
if (Array.isArray(rawMedia)) {
|
||||
// fallback sources provided
|
||||
return fallbackMediaToBaseMedia(rawMedia);
|
||||
return fallbackMediaToBaseMedia(rawMedia, role);
|
||||
}
|
||||
|
||||
const baseSource = toBaseSource(rawMedia);
|
||||
|
||||
return baseSourceToBaseMedia(baseSource);
|
||||
return baseSourceToBaseMedia(baseSource, role);
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
@@ -97,7 +165,6 @@ async function findSourceDuplicates(baseMedias) {
|
||||
.flat()
|
||||
.filter(Boolean);
|
||||
|
||||
|
||||
const [existingSourceMedia, existingExtractMedia] = await Promise.all([
|
||||
knex('media').whereIn('source', sourceUrls),
|
||||
knex('media').whereIn('source_page', extractUrls),
|
||||
@@ -112,37 +179,183 @@ async function findSourceDuplicates(baseMedias) {
|
||||
};
|
||||
}
|
||||
|
||||
async function extractSource(baseSource) {
|
||||
if (!baseSource.extract || !baseSource.url) {
|
||||
async function findHashDuplicates(medias) {
|
||||
const mediaHashes = medias.map(media => media.file?.hash).filter(Boolean);
|
||||
const existingHashMedia = await knex('media').whereIn('hash', mediaHashes);
|
||||
|
||||
return itemsByKey(existingHashMedia, 'hash');
|
||||
}
|
||||
|
||||
async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
||||
if (typeof baseSource.extract !== 'function' || !baseSource.url) {
|
||||
return baseSource;
|
||||
}
|
||||
|
||||
const existingExtractMedia = existingExtractMediaByUrl[baseSource.url];
|
||||
|
||||
if (existingExtractMedia) {
|
||||
// media entry found by extract URL
|
||||
return {
|
||||
...baseSource,
|
||||
entry: existingExtractMedia,
|
||||
src: existingExtractMedia.source,
|
||||
};
|
||||
}
|
||||
|
||||
const res = await get(baseSource.url);
|
||||
|
||||
console.log(res);
|
||||
return baseSource;
|
||||
if (res.ok) {
|
||||
const src = await baseSource.extract(res.item);
|
||||
|
||||
return {
|
||||
...baseSource,
|
||||
src,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Could not extract source from ${baseSource.url}: ${res.status}`);
|
||||
}
|
||||
|
||||
async function fetchSource(baseSource, { existingSourceMediaByUrl, existingExtractMediaByUrl }) {
|
||||
async function fetchSource(source, baseMedia, baseSourceIndex) {
|
||||
logger.silly(`Fetching media from ${source.src}`);
|
||||
// attempts
|
||||
// extract
|
||||
const extractedSource = await extractSource(baseSource, existingExtractMediaByUrl);
|
||||
|
||||
console.log(extractedSource);
|
||||
async function attempt(attempts = 1) {
|
||||
try {
|
||||
const { pathname } = new URL(source.src);
|
||||
const mimetype = mime.getType(pathname);
|
||||
const extension = mime.getExtension(mimetype);
|
||||
const isImage = /image/.test(mimetype);
|
||||
|
||||
const tempPath = path.join(config.media.path, 'temp', `${baseMedia.id}-${baseSourceIndex}.${extension}`);
|
||||
|
||||
const res = await http.get(source.src, {
|
||||
...(source.referer && { referer: source.referer }),
|
||||
...(source.host && { host: source.host }),
|
||||
}, {
|
||||
stream: true,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Response ${res.status} not OK`);
|
||||
}
|
||||
|
||||
res.res.pipe(fs.createWriteStream(tempPath));
|
||||
|
||||
const buffer = res.body;
|
||||
|
||||
console.log(res.body);
|
||||
|
||||
const hash = getHash(buffer);
|
||||
const entropy = isImage ? await getEntropy(buffer) : null;
|
||||
const { size, width, height } = isImage ? await getMeta(buffer) : {};
|
||||
|
||||
logger.silly(`Fetched media from ${source.src}`);
|
||||
|
||||
return {
|
||||
...source,
|
||||
file: {
|
||||
temp: tempPath,
|
||||
mimetype,
|
||||
extension,
|
||||
hash,
|
||||
entropy,
|
||||
size,
|
||||
width,
|
||||
height,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warn(`Failed attempt ${attempts}/3 to fetch ${source.src}: ${error.message}`);
|
||||
|
||||
if (attempts < 3) {
|
||||
await Promise.delay(1000);
|
||||
|
||||
return attempt(attempts + 1);
|
||||
}
|
||||
|
||||
throw new Error(`Failed to fetch ${source.src}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
return attempt(1);
|
||||
}
|
||||
|
||||
async function trySource(baseSource, existingMedias, baseMedia, baseSourceIndex) {
|
||||
// catch error and try the next source
|
||||
const extractedSource = await extractSource(baseSource, existingMedias);
|
||||
const existingSourceMedia = existingMedias.existingSourceMediaByUrl[extractedSource.src];
|
||||
|
||||
if (extractedSource.entry) {
|
||||
// media entry found during extraction, don't fetch
|
||||
return extractedSource;
|
||||
}
|
||||
|
||||
if (existingSourceMedia) {
|
||||
// media entry found by source URL, don't fetch
|
||||
return {
|
||||
...baseSource,
|
||||
entry: existingSourceMedia,
|
||||
src: existingSourceMedia.source,
|
||||
};
|
||||
}
|
||||
|
||||
return fetchSource(extractedSource, baseMedia, baseSourceIndex, 1);
|
||||
}
|
||||
|
||||
async function fetchMedia(baseMedia, existingMedias) {
|
||||
await baseMedia.sources.reduce((result, baseSource, _baseSourceIndex) => result.catch(async () => {
|
||||
await fetchSource(baseSource, existingMedias);
|
||||
}), Promise.reject(new Error()));
|
||||
try {
|
||||
const source = await baseMedia.sources.reduce(
|
||||
// try each source until success
|
||||
(result, baseSource, baseSourceIndex) => result.catch(async () => trySource(baseSource, existingMedias, baseMedia, baseSourceIndex)),
|
||||
Promise.reject(new Error()),
|
||||
);
|
||||
|
||||
return {
|
||||
...baseMedia,
|
||||
...source,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warn(error.message);
|
||||
|
||||
return baseMedia;
|
||||
}
|
||||
}
|
||||
|
||||
function saveMedia(media, existingHashMediaByHash) {
|
||||
const existingHashMedia = existingHashMediaByHash[media.file.hash];
|
||||
|
||||
if (existingHashMedia) {
|
||||
return {
|
||||
...media,
|
||||
entry: existingHashMedia,
|
||||
};
|
||||
}
|
||||
|
||||
const hashDir = media.file.hash.slice(0, 2);
|
||||
const hashSubDir = media.file.hash.slice(2, 4);
|
||||
const hashFilename = media.file.hash.slice(4);
|
||||
|
||||
const filename = media.quality
|
||||
? `${hashFilename}_${media.quality}.${media.file.extension}`
|
||||
: `${hashFilename}.${media.file.extension}`;
|
||||
|
||||
const filedir = path.join(media.role, hashDir, hashSubDir);
|
||||
const filepath = path.join(filedir, filename);
|
||||
|
||||
console.log(filedir, filepath);
|
||||
|
||||
return media;
|
||||
}
|
||||
|
||||
async function storeMedias(baseMedias) {
|
||||
await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true });
|
||||
|
||||
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias);
|
||||
const fetchedMedias = await Promise.map(baseMedias, async baseMedia => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }));
|
||||
|
||||
await Promise.map(baseMedias, async baseMedia => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }));
|
||||
|
||||
console.log(existingSourceMediaByUrl, existingExtractMediaByUrl);
|
||||
const existingHashMediaByHash = await findHashDuplicates(fetchedMedias);
|
||||
const savedMedias = await Promise.map(fetchedMedias, async fetchedMedia => saveMedia(fetchedMedia, existingHashMediaByHash));
|
||||
}
|
||||
|
||||
async function associateReleaseMedia(releases) {
|
||||
@@ -153,14 +366,17 @@ async function associateReleaseMedia(releases) {
|
||||
const baseMediasByReleaseId = releases.reduce((acc, release) => ({
|
||||
...acc,
|
||||
[release.id]: {
|
||||
poster: argv.images && argv.poster && toBaseMedias([release.poster]),
|
||||
photos: argv.images && argv.photos && toBaseMedias(release.photos),
|
||||
trailer: argv.videos && argv.trailer && toBaseMedias([release.trailer]),
|
||||
teaser: argv.videos && argv.teaser && toBaseMedias([release.teaser]),
|
||||
poster: argv.images && argv.poster && toBaseMedias([release.poster], 'posters'),
|
||||
photos: argv.images && argv.photos && toBaseMedias(release.photos, 'photos').slice(0, 5),
|
||||
trailer: argv.videos && argv.trailer && toBaseMedias([release.trailer], 'trailers'),
|
||||
teaser: argv.videos && argv.teaser && toBaseMedias([release.teaser], 'teasers'),
|
||||
},
|
||||
}), {});
|
||||
|
||||
const baseMedias = Object.values(baseMediasByReleaseId).map(releaseMedia => Object.values(releaseMedia)).flat(2);
|
||||
const baseMedias = Object.values(baseMediasByReleaseId)
|
||||
.map(releaseMedia => Object.values(releaseMedia))
|
||||
.flat(2)
|
||||
.filter(Boolean);
|
||||
|
||||
await storeMedias(baseMedias);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user