ripunzel/src/fetch/content.js

194 lines
5.6 KiB
JavaScript

'use strict';
const config = require('config');
const Promise = require('bluebird');
const yaml = require('js-yaml');
const logger = require('../logger')(__filename);
const saveProfileDetails = require('../save/profileDetails');
const fetchItem = require('./item');
const interpolate = require('../interpolate');
const save = require('../save/save');
// const textToStream = require('../save/textToStream');
const saveMeta = require('../save/meta');
const mux = require('../save/mux');
const writeToIndex = require('../save/writeToIndex');
function curateComments(comments) {
return comments.map((comment) => {
const curatedComment = {
text: comment.body,
author: comment.author,
score: comment.score,
date: comment.datetime,
};
if (comment.replies.length) {
return {
...curatedComment,
replies: curateComments(comment.replies),
};
}
return curatedComment;
});
}
function selfPostToText(item, post) {
const curatedPost = {
title: post.title,
text: post.text,
author: post.user.name,
subreddit: post.subreddit,
score: post.score,
datetime: post.datetime,
url: post.url,
comments: curateComments(item.comments),
};
return yaml.safeDump(curatedPost);
}
async function getBuffers(item, context) {
if (item.self) {
return [{
...Buffer.from(selfPostToText(item, context.post), 'utf8'),
hash: context.post.hash,
}];
}
const sources = item.mux ? [item.url].concat(item.mux) : [item.url];
const buffers = await Promise.map(sources, (source) => fetchItem(source, 0, {
item,
...context,
}));
if (buffers.filter((buffer) => buffer).length > 0) {
return buffers;
}
return null;
}
async function addMeta(filepath, item, post, user, ep) {
if (item.type !== 'image/jpeg') {
return false;
}
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
const interpolatedValue = interpolate(value, item, null, null, post, user);
return interpolatedValue ? { ...acc, [key]: interpolatedValue } : acc;
}, {});
if (Object.keys(meta).length > 0) {
return saveMeta(filepath, meta, ep);
}
return false;
}
function getFilepath(item, content, host, post, user) {
const type = item.type.split('/')[0];
if (post && content.album) {
return interpolate(config.library.posts.album[type], item, content, host, post, user);
}
if (post) {
return interpolate(config.library.posts[type], item, content, host, post, user);
}
if (content.album) {
return interpolate(config.library.direct.album[type], item, content, host);
}
return interpolate(config.library.direct[type], item, content, host);
}
async function fetchSaveUserContent(user, ep, args) {
const profilePaths = await saveProfileDetails(user, args);
const hashes = new Set(user.indexed.original.flatMap((item) => [item.hash, item.phash]).filter(Boolean));
const posts = await Promise.map(user.posts, async (post) => {
if (!post.content) {
return null;
}
const items = await Promise.map(post.content.items, async (originalItem, index) => {
const item = { ...originalItem, index };
const buffers = await getBuffers(item, { post, host: post.host, headers: post.content.headers || item.headers });
// no buffers, ignore item
if (!buffers || buffers.length === 0) {
return null;
}
item.hash = buffers[0].hash;
item.phash = buffers[0].phash;
// prevent duplicates
if (config.fetch.avoidDuplicates && hashes.has(buffers[0].hash) && !item.album) {
logger.verbose(`Ignoring duplicate file '${post.url}' (${post.permalink})`);
return {
...item,
ignored: true,
};
}
const filepath = getFilepath(item, post.content, post.host, post, user);
const sourcePaths = await save(filepath, buffers.map(({ buffer }) => buffer), item, post);
hashes.add(buffers[0].hash);
if (item.mux) {
await mux(filepath, sourcePaths, item);
}
await addMeta(filepath, item, post, user, ep);
return item;
});
return {
...post,
hash: items[0]?.hash,
phash: items[0]?.phash,
content: items.filter(Boolean),
};
}, {
concurrency: config.fetch.concurrency,
});
return writeToIndex(posts.filter(Boolean), profilePaths, user, args);
}
async function fetchSaveDirectContent(content, host, ep) {
return Promise.map(content.items, async (originalItem, index) => {
logger.info(`Fetching and saving '${host.url}'`);
const item = { ...originalItem, index };
const buffers = await getBuffers(item, { host, headers: content.headers || item.headers });
// no buffers, ignore item
if (!buffers || buffers.length === 0) {
return;
}
const filepath = getFilepath(item, content, host, null, null);
const sourcePaths = await save(filepath, buffers.map(({ buffer }) => buffer), item, null);
if (item.mux) {
await mux(filepath, sourcePaths, item);
}
await addMeta(filepath, item, null, null, ep);
}, { concurrency: 5 });
}
module.exports = {
fetchSaveUserContent,
fetchSaveDirectContent,
};