ripunzel/src/fetch/content.js

185 lines
5.2 KiB
JavaScript

'use strict';
const config = require('config');
const Promise = require('bluebird');
const yaml = require('js-yaml');
const logger = require('../logger')(__filename);
const saveProfileDetails = require('../save/profileDetails');
const fetchItem = require('./item');
const interpolate = require('../interpolate');
const save = require('../save/save');
// const textToStream = require('../save/textToStream');
const saveMeta = require('../save/meta');
const mux = require('../save/mux');
const writeToIndex = require('../save/writeToIndex');
function curateComments(comments) {
return comments.map((comment) => {
const curatedComment = {
text: comment.body,
author: comment.author,
score: comment.score,
date: comment.datetime,
};
if (comment.replies.length) {
return {
...curatedComment,
replies: curateComments(comment.replies),
};
}
return curatedComment;
});
}
function selfPostToText(item, post) {
const curatedPost = {
title: post.title,
text: post.text,
author: post.user.name,
subreddit: post.subreddit,
score: post.score,
datetime: post.datetime,
url: post.url,
comments: curateComments(item.comments),
};
return yaml.safeDump(curatedPost);
}
async function getBuffers(item, post, host) {
if (item.self) {
return [{
...Buffer.from(selfPostToText(item, post), 'utf8'),
hash: post.hash,
}];
}
const sources = item.mux ? [item.url].concat(item.mux) : [item.url];
const buffers = await Promise.map(sources, source => fetchItem(source, 0, post, host));
if (buffers.filter(buffer => buffer).length > 0) {
return buffers;
}
return null;
}
async function addMeta(filepath, item, post, user, ep) {
if (item.type !== 'image/jpeg') {
return false;
}
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
const interpolatedValue = interpolate(value, item, null, null, post, user);
return interpolatedValue ? { ...acc, [key]: interpolatedValue } : acc;
}, {});
if (Object.keys(meta).length > 0) {
return saveMeta(filepath, meta, ep);
}
return false;
}
function getFilepath(item, content, host, post, user) {
const type = item.type.split('/')[0];
if (post && content.album) {
return interpolate(config.library.posts.album[type], item, content, host, post, user);
}
if (post) {
return interpolate(config.library.posts[type], item, content, host, post, user);
}
if (content.album) {
return interpolate(config.library.direct.album[type], item, content, host);
}
return interpolate(config.library.direct[type], item, content, host);
}
async function fetchSaveUserContent(user, ep, args) {
const profilePaths = await saveProfileDetails(user, args);
const hashes = new Set(user.indexed.original.map(item => item.hash));
const posts = await Promise.map(user.posts, async (post) => {
if (!post.content) {
return null;
}
const hash = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index };
const buffers = await getBuffers(item, post, post.host);
// no buffers, ignore item
if (!buffers || buffers.length === 0) {
return accItems;
}
// prevent duplicates
if (config.fetch.avoidDuplicates && hashes.has(buffers[0].hash)) {
logger.verbose(`Ignoring duplicate file '${post.url}' (${post.permalink})`);
return buffers[0].hash;
}
const filepath = getFilepath(item, post.content, post.host, post, user);
const sourcePaths = await save(filepath, buffers, item, post);
hashes.add(buffers[0].hash);
if (item.mux) {
await mux(filepath, sourcePaths, item);
}
await addMeta(filepath, item, post, user, ep);
return buffers[0].hash;
}, []);
return {
...post,
hash,
};
}, {
concurrency: config.fetch.concurrency,
});
return writeToIndex(posts.filter(Boolean), profilePaths, user, args);
}
async function fetchSaveDirectContent(content, host, ep) {
return Promise.reduce(content.items, async (accItems, originalItem, index) => {
logger.info(`Fetching and saving '${host.url}'`);
const item = { ...originalItem, index };
const buffers = await getBuffers(item, null, host);
// no buffers, ignore item
if (!buffers || buffers.length === 0) {
return accItems;
}
const filepath = getFilepath(item, content, host, null, null);
const sourcePaths = await save(filepath, buffers, item, null);
if (item.mux) {
await mux(filepath, sourcePaths, item);
}
await addMeta(filepath, item, null, null, ep);
return sourcePaths;
}, []);
}
module.exports = {
fetchSaveUserContent,
fetchSaveDirectContent,
};