ripunzel/src/app.js

177 lines
4.8 KiB
JavaScript

'use strict';
const config = require('config');
const Snoowrap = require('snoowrap');
const fs = require('fs-extra');
const Promise = require('bluebird');
const exiftool = require('node-exiftool');
const exiftoolBin = require('dist-exiftool');
const cron = require('node-cron');
require('array.prototype.flatten').shim();
const reddit = new Snoowrap(config.reddit.api);
const args = require('./cli')();
const logger = require('./logger')(__filename);
const dissectLink = require('./dissectLink');
const curatePosts = require('./curate/posts');
const methods = require('./methods/methods');
const { attachContentInfo, getInfo } = require('./fetch/info');
const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content');
const getPosts = require('./sources/getPosts')(reddit, args);
const getUserPosts = require('./sources/getUserPosts')(reddit, args);
async function getFileContents(location, label) {
try {
const fileContents = await fs.readFile(location, 'utf8');
return fileContents.split('\n').filter((entry) => entry && entry.slice(0, 1) !== '#');
} catch (error) {
logger.error(`Could not read ${label} file '${location}': ${error}.`);
return [];
}
}
function getPostHosts(posts) {
// const hosts = Array.from(new Set(Object.values(posts).flatMap((user) => user.posts.map((post) => post.host?.method)))).filter(Boolean);
const hosts = Object.values(Object.fromEntries(Object.values(posts).flatMap((user) => user.posts.map((post) => post.host && [post.host?.method, post.host])).filter(Boolean)));
return hosts;
}
async function fetchPredata(hosts) {
return hosts.reduce(async (chain, host) => {
const acc = await chain;
if (methods[host?.method]?.fetchPredata) {
const data = await methods[host.method].fetchPredata();
logger.info(`Fetched predata for ${host.method}`);
return {
...acc,
[host.method]: data,
};
}
return acc;
}, Promise.resolve({}));
}
async function getCompletePosts() {
let userPosts = {};
let ignoreIds = [];
let usernames = args.users || [];
let postIds = args.posts || [];
if (args.fileUsers) {
usernames = usernames.concat(await getFileContents(args.fileUsers, 'username'));
}
if (args.filePosts) {
postIds = postIds.concat(await getFileContents(args.filePosts, 'post ID'));
}
if (!usernames.length && !postIds.length) {
return null;
}
if (usernames.length) {
userPosts = await getUserPosts(usernames);
}
if (postIds.length) {
userPosts = await getPosts(postIds, userPosts);
}
if (args.fileIgnore) {
ignoreIds = await getFileContents(args.fileIgnore, 'ignore');
}
const curatedUserPosts = curatePosts(userPosts, ignoreIds, args);
const predata = await fetchPredata(getPostHosts(curatedUserPosts));
return attachContentInfo(curatedUserPosts, { reddit, predata });
}
async function getDirectContent(links, ep) {
const hosts = links.map((link) => {
const host = dissectLink(link);
return {
link,
host,
};
});
const predata = await fetchPredata(hosts.map(({ host }) => host));
return Promise.map(hosts, async ({ link, host }) => {
const info = await getInfo(host, { reddit, link, predata });
if (info) {
return fetchSaveDirectContent(info, host, ep);
}
return null;
}, {
concurrency: 5,
});
}
async function getCompleteContents(ep) {
if (args.fetch) {
return getDirectContent(args.fetch, ep);
}
if (args.fileDirect) {
return getDirectContent(await getFileContents(args.fileDirect, 'direct'), ep);
}
return null;
}
function fetchSavePosts(userPosts, ep) {
// don't map to apply concurrency limit and reduce network stress
return Promise.reduce(Object.values(userPosts), (acc, user) => fetchSaveUserContent(user, ep, args), null);
}
async function initApp() {
try {
const ep = new exiftool.ExiftoolProcess(exiftoolBin);
await ep.open();
if (args.fetch || args.fileDirect) {
await getCompleteContents(ep);
}
const userPosts = await getCompletePosts();
if (userPosts) {
await fetchSavePosts(userPosts, ep);
}
await ep.close();
if (args.watch) {
logger.info(`Watch-mode enabled, checking again for new posts according to crontab '${config.fetch.watch.schedule}'.`);
}
} catch (error) {
if (args.debug) {
logger.error(error.stack);
} else {
logger.error(error.message);
}
}
}
initApp();
if (args.watch) {
cron.schedule(config.fetch.watch.schedule, initApp);
}