Reading index file and ignoring already indexed content.

This commit is contained in:
2018-06-17 03:39:12 +02:00
parent 7cf1a99915
commit 33ef3d2c19
5 changed files with 44 additions and 18 deletions

View File

@@ -1,8 +1,13 @@
'use strict';
const config = require('config');
const Promise = require('bluebird');
const fs = require('fs-extra');
const csvParse = Promise.promisify(require('csv').parse);
const getArchivePostIds = require('../archives/getArchivePostIds.js');
const curateUser = require('../curate/user.js');
const interpolate = require('../interpolate.js');
async function getUser(username, reddit) {
try {
@@ -40,24 +45,42 @@ async function getArchivedPosts(username, posts, reddit) {
return Promise.all(postIds.map(postId => reddit.getSubmission(postId).fetch()));
}
async function getIndexedPosts(user) {
const indexFilePath = interpolate(config.library.index.file, user, null, null, false);
try {
const indexFile = await fs.readFile(indexFilePath, 'utf8');
return await csvParse(indexFile, { delimiter: '\t', columns: true, cast: true });
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Could not load index file for '${user.name}' at '${indexFilePath}': ${error}`);
return [];
}
}
function getUserPostsWrap(reddit, args) {
return function getUserPosts(usernames) {
return Promise.props(usernames.reduce(async (userPosts, username) => {
return async function getUserPosts(usernames) {
const users = await Promise.map(usernames, async (username) => {
const [user, posts] = await Promise.all([
getUser(username, reddit),
getPosts(username, reddit, args),
]);
const indexed = await getIndexedPosts(user);
if (args.archives) {
posts.push(...await getArchivedPosts(username, posts, reddit));
}
if (posts.length) {
return { ...userPosts, [user.name]: { ...user, posts } };
return { ...user, posts, indexed };
}
return userPosts;
}, {}));
return null;
});
return users.reduce((userPosts, user) => (user ? { ...userPosts, [user.name]: user } : userPosts), {});
};
}