Refactored media module. Returning 320p and 720p videos from MindGeek as teasers instead of trailers.
This commit is contained in:
274
src/media.js
274
src/media.js
@@ -4,11 +4,14 @@ const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
const bhttp = require('bhttp');
|
||||
const mime = require('mime');
|
||||
const fs = require('fs-extra');
|
||||
const sharp = require('sharp');
|
||||
const path = require('path');
|
||||
const blake2 = require('blake2');
|
||||
|
||||
const logger = require('./logger');
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const { ex } = require('./utils/q');
|
||||
|
||||
function getHash(buffer) {
|
||||
const hash = blake2.createHash('blake2b', { digestLength: 24 });
|
||||
@@ -17,6 +20,26 @@ function getHash(buffer) {
|
||||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
async function createThumbnail(buffer) {
|
||||
try {
|
||||
const thumbnail = sharp(buffer)
|
||||
.resize({
|
||||
height: config.media.thumbnailSize,
|
||||
withoutEnlargement: true,
|
||||
})
|
||||
.jpeg({
|
||||
quality: config.media.thumbnailQuality,
|
||||
})
|
||||
.toBuffer();
|
||||
|
||||
return thumbnail;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to create thumbnail: ${error.message}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function pluckItems(items, specifiedLimit) {
|
||||
const limit = specifiedLimit || config.media.limit;
|
||||
|
||||
@@ -30,6 +53,13 @@ function pluckItems(items, specifiedLimit) {
|
||||
return Array.from(new Set(plucked)).map(itemIndex => items[itemIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||
}
|
||||
|
||||
function pickQuality(items) {
|
||||
const itemsByQuality = items.reduce((acc, item) => ({ ...acc, [item.quality]: item }), {});
|
||||
const item = config.media.videoQuality.reduce((acc, quality) => acc || itemsByQuality[quality], null);
|
||||
|
||||
return item || items[0];
|
||||
}
|
||||
|
||||
async function getEntropy(buffer) {
|
||||
try {
|
||||
const { entropy } = await sharp(buffer).stats();
|
||||
@@ -42,33 +72,58 @@ async function getEntropy(buffer) {
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchItem(source, index, existingItemsBySource, attempt = 1) {
|
||||
async function extractItem(source) {
|
||||
const res = await bhttp.get(source.src);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const { q } = ex(res.body.toString());
|
||||
|
||||
return source.extract(q);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null) {
|
||||
try {
|
||||
if (Array.isArray(source)) {
|
||||
if (source.every(sourceX => !!sourceX.quality)) {
|
||||
// various video qualities provided
|
||||
const selectedSource = pickQuality(source);
|
||||
return fetchItem(selectedSource, index, existingItemsBySource, domain, role, attempt, originalSource);
|
||||
}
|
||||
|
||||
// fallbacks provided
|
||||
return source.reduce((outcome, sourceX) => outcome.catch(async () => {
|
||||
const item = await fetchItem(sourceX, index, existingItemsBySource);
|
||||
|
||||
if (item) {
|
||||
return item;
|
||||
}
|
||||
|
||||
throw new Error(`Item not available: ${source}`);
|
||||
}), Promise.reject(new Error()));
|
||||
return source.reduce(
|
||||
(outcome, sourceX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource)),
|
||||
Promise.reject(new Error()),
|
||||
);
|
||||
}
|
||||
|
||||
if (source.src && source.extract) {
|
||||
// source links to page containing a (presumably) tokenized photo
|
||||
const itemSource = await extractItem(source);
|
||||
|
||||
return fetchItem(itemSource, index, existingItemsBySource, domain, role, attempt, source);
|
||||
}
|
||||
|
||||
|
||||
if (existingItemsBySource[source]) {
|
||||
return existingItemsBySource[source];
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await bhttp.get(source);
|
||||
logger.verbose(`Fetching media item from ${source.src || source}`);
|
||||
|
||||
const res = await bhttp.get(source.src || source);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const { pathname } = new URL(source);
|
||||
const { pathname } = new URL(source.src || source);
|
||||
const mimetype = mime.getType(pathname);
|
||||
const extension = mime.getExtension(mimetype);
|
||||
const hash = getHash(res.body);
|
||||
const entropy = await getEntropy(res.body);
|
||||
const entropy = /image/.test(mimetype) ? await getEntropy(res.body) : null;
|
||||
|
||||
logger.verbose(`Fetched media item from ${source.src || source}`);
|
||||
|
||||
return {
|
||||
file: res.body,
|
||||
@@ -76,40 +131,193 @@ async function fetchItem(source, index, existingItemsBySource, attempt = 1) {
|
||||
extension,
|
||||
hash,
|
||||
entropy,
|
||||
source,
|
||||
quality: source.quality || null,
|
||||
source: originalSource?.src || originalSource || source.src || source,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Response ${res.statusCode} not OK`);
|
||||
} catch (error) {
|
||||
if (attempt <= 3) {
|
||||
return fetchItem(source, index, existingItemsBySource, attempt + 1);
|
||||
logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`);
|
||||
|
||||
if (attempt < 3) {
|
||||
await Promise.delay(5000);
|
||||
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1);
|
||||
}
|
||||
|
||||
throw new Error(`Failed to fetch media from ${source}: ${error}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchItems(itemSources, existingItemsBySource) {
|
||||
return Promise.map(itemSources, async (source, index) => fetchItem(source, index, existingItemsBySource));
|
||||
async function fetchItems(itemSources, existingItemsBySource, domain, role) {
|
||||
return Promise.map(itemSources, async (source, index) => fetchItem(source, index, existingItemsBySource, domain, role), {
|
||||
concurrency: 10,
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function storeReleaseMedia(releases, {
|
||||
type = 'poster',
|
||||
} = {}) {
|
||||
const pluckedSources = releases.map(release => pluckItems(release[type]));
|
||||
const existingSourceItems = await knex('media').whereIn('source', pluckedSources.flat());
|
||||
const existingItemsBySource = existingSourceItems.reduce((acc, item) => ({ ...acc, [item.source]: item }), {});
|
||||
async function saveItems(items, domain, role) {
|
||||
return Promise.map(items, async (item) => {
|
||||
try {
|
||||
const dir = item.hash.slice(0, 2);
|
||||
const subdir = item.hash.slice(2, 4);
|
||||
const filename = item.quality
|
||||
? `${item.hash.slice(4)}_${item.quality}.${item.extension}`
|
||||
: `${item.hash.slice(4)}.${item.extension}`;
|
||||
|
||||
const fetchedItems = await fetchItems(pluckedSources, existingItemsBySource);
|
||||
const existingHashItems = await knex('media').whereIn('hash', fetchedItems.map(item => item.hash));
|
||||
const existingItemsByHash = existingHashItems.reduce((acc, item) => ({ ...acc, [item.hash]: item }), {});
|
||||
const filedir = path.join(`${domain}s`, `${role}s`, dir, subdir);
|
||||
const filepath = path.join(filedir, filename);
|
||||
|
||||
const newItems = fetchedItems.filter(item => !existingItemsByHash[item.hash]);
|
||||
await fs.mkdir(path.join(config.media.path, filedir), { recursive: true });
|
||||
await fs.writeFile(path.join(config.media.path, filepath), item.file);
|
||||
|
||||
console.log(fetchedItems, existingHashItems, existingItemsByHash, newItems);
|
||||
if (/image/.test(item.mimetype)) {
|
||||
const thumbnail = await createThumbnail(item.file);
|
||||
|
||||
const thumbdir = path.join(`${domain}s`, `${role}s`, 'thumbs', dir, subdir);
|
||||
const thumbpath = path.join(thumbdir, filename);
|
||||
|
||||
await fs.mkdir(path.join(config.media.path, thumbdir), { recursive: true });
|
||||
await fs.writeFile(path.join(config.media.path, thumbpath), thumbnail);
|
||||
|
||||
logger.verbose(`Saved ${domain} ${role} with thumbnail to ${filepath}`);
|
||||
|
||||
return {
|
||||
...item,
|
||||
thumbnail,
|
||||
filepath,
|
||||
thumbpath,
|
||||
};
|
||||
}
|
||||
|
||||
logger.verbose(`Saved ${domain} ${role} to ${filepath}`);
|
||||
|
||||
return {
|
||||
...item,
|
||||
filepath,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`Failed to store ${domain} ${role} from ${item.source}: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function curateItemEntries(items) {
|
||||
return items.filter(Boolean).map((item, index) => ({
|
||||
path: item.filepath,
|
||||
thumbnail: item.thumbpath,
|
||||
mime: item.mimetype,
|
||||
hash: item.hash,
|
||||
source: item.source,
|
||||
entropy: item.entropy,
|
||||
index,
|
||||
}));
|
||||
}
|
||||
|
||||
function groupItems(items) {
|
||||
return items.reduce((acc, item) => ({
|
||||
source: { ...acc.source, [item.source]: item },
|
||||
hash: { ...acc.hash, [item.hash]: item },
|
||||
}), {
|
||||
source: {},
|
||||
hash: {},
|
||||
});
|
||||
}
|
||||
|
||||
async function storeMedia(sources, domain, role) {
|
||||
const presentSources = sources.filter(Boolean);
|
||||
|
||||
if (presentSources.length === 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// find source duplicates that don't need to be re-downloaded or re-saved
|
||||
const existingSourceItems = await knex('media').whereIn('source', presentSources.flat().map(source => source.src || source));
|
||||
const { source: existingSourceItemsBySource, hash: existingSourceItemsByHash } = groupItems(existingSourceItems);
|
||||
|
||||
// download media items from new sources
|
||||
const fetchedItems = await fetchItems(presentSources, existingSourceItemsBySource, domain, role);
|
||||
const { hash: fetchedItemsByHash } = groupItems(fetchedItems);
|
||||
|
||||
// find hash duplicates that don't need to be re-saved
|
||||
const uniqueFetchedItems = Object.values(fetchedItemsByHash);
|
||||
const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash));
|
||||
const { hash: existingHashItemsByHash } = groupItems(existingHashItems);
|
||||
|
||||
// save new items to disk
|
||||
const newItems = uniqueFetchedItems.filter(item => !existingHashItemsByHash[item.hash]);
|
||||
const savedItems = await saveItems(newItems, domain, role);
|
||||
|
||||
// store new items in database
|
||||
const curatedItemEntries = curateItemEntries(savedItems);
|
||||
const storedItems = await knex('media').insert(curatedItemEntries).returning('*');
|
||||
const { hash: storedItemsByHash } = groupItems(Array.isArray(storedItems) ? storedItems : []);
|
||||
|
||||
// accumulate existing and new items by source to be mapped onto releases
|
||||
const itemsByHash = { ...existingSourceItemsByHash, ...existingHashItemsByHash, ...storedItemsByHash };
|
||||
const itemsBySource = {
|
||||
...existingSourceItemsBySource,
|
||||
...fetchedItems.reduce((acc, item) => ({ ...acc, [item.source]: itemsByHash[item.hash] }), {}),
|
||||
};
|
||||
|
||||
logger.info(`Stored ${fetchedItems.length} new ${domain} ${role}s`);
|
||||
|
||||
return itemsBySource;
|
||||
}
|
||||
|
||||
function extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId) {
|
||||
if (!primaryRole) {
|
||||
return { [role]: associations, [primaryRole]: null };
|
||||
}
|
||||
|
||||
if (primaryItemsByTargetId[targetId]) {
|
||||
const remainingAssociations = associations.filter(association => association.media_id !== primaryItemsByTargetId[targetId].media_id);
|
||||
|
||||
return { [role]: remainingAssociations, [primaryRole]: null };
|
||||
}
|
||||
|
||||
return {
|
||||
[role]: associations.slice(1),
|
||||
[primaryRole]: associations.slice(0, 1)[0],
|
||||
};
|
||||
}
|
||||
|
||||
function associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId) {
|
||||
if (!sources) return { [role]: null, [primaryRole]: null };
|
||||
|
||||
const associations = sources
|
||||
.filter(Boolean)
|
||||
.map((source) => {
|
||||
const mediaItem = Array.isArray(source)
|
||||
? source.reduce((acc, sourceX) => acc || mediaBySource[sourceX.src || sourceX], null)
|
||||
: mediaBySource[source.src || source];
|
||||
|
||||
return mediaItem && { [`${domain}_id`]: targetId, media_id: mediaItem.id };
|
||||
})
|
||||
.filter(Boolean);
|
||||
|
||||
logger.info(`Associating ${associations.length} ${role}s to ${domain} ${targetId}`);
|
||||
|
||||
return extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId);
|
||||
}
|
||||
|
||||
async function associateMedia(sourcesByTargetId, mediaBySource, domain, role, primaryRole) {
|
||||
const primaryItems = primaryRole ? await knex(`${domain}s_${primaryRole}s`).whereIn(`${domain}_id`, Object.keys(sourcesByTargetId)) : [];
|
||||
const primaryItemsByTargetId = primaryItems.reduce((acc, item) => ({ ...acc, [item[`${domain}_id`]]: item }), {});
|
||||
|
||||
const associationsPerTarget = await Promise.map(Object.entries(sourcesByTargetId), ([targetId, sources]) => associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId));
|
||||
|
||||
const associations = associationsPerTarget.map(association => association[role]).flat().filter(Boolean);
|
||||
const primaryAssociations = associationsPerTarget.map(association => association[primaryRole]).filter(Boolean);
|
||||
|
||||
return Promise.all([
|
||||
(associations.length > 0 && knex.raw(`${knex(`${domain}s_${role}s`).insert(associations).toString()} ON CONFLICT DO NOTHING`)),
|
||||
(primaryAssociations.length > 0 && knex.raw(`${knex(`${domain}s_${primaryRole}s`).insert(primaryAssociations).toString()} ON CONFLICT DO NOTHING`)),
|
||||
]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
storeReleaseMedia,
|
||||
pluckItems,
|
||||
storeMedia,
|
||||
associateMedia,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user