Added date and indexed oldest/latest limit arguments.
This commit is contained in:
parent
86002ef00b
commit
24a165cf48
21
src/cli.js
21
src/cli.js
|
@ -4,7 +4,7 @@ const config = require('config');
|
|||
const yargs = require('yargs');
|
||||
|
||||
function getArgs() {
|
||||
return yargs
|
||||
const args = yargs
|
||||
.command('npm start -- --user <username>')
|
||||
.option('users', {
|
||||
alias: 'user',
|
||||
|
@ -40,7 +40,18 @@ function getArgs() {
|
|||
type: 'array',
|
||||
})
|
||||
.option('after', {
|
||||
describe: 'Do not include posts from before this date (DD-MM-YYYY). When set to \'index\', it will assume the date of the latest indexed post.',
|
||||
describe: 'Only include posts after this date (YYYY-MM-DD, optionally HH:mm)',
|
||||
})
|
||||
.option('before', {
|
||||
describe: 'Only include posts before this date (YYYY-MM-DD, optionally HH:mm)',
|
||||
})
|
||||
.option('after-indexed', {
|
||||
describe: 'Only include posts after the oldest or the latest entry in the index',
|
||||
options: ['oldest', 'latest'],
|
||||
})
|
||||
.option('before-indexed', {
|
||||
describe: 'Only include posts before the oldest or the latest entry in the index',
|
||||
options: ['oldest', 'latest'],
|
||||
})
|
||||
.option('archives', {
|
||||
describe: 'Search archives for deleted posts',
|
||||
|
@ -48,6 +59,12 @@ function getArgs() {
|
|||
default: config.fetch.archives.search,
|
||||
})
|
||||
.argv;
|
||||
|
||||
return {
|
||||
...args,
|
||||
after: args.after ? new Date(args.after) : null,
|
||||
before: args.before ? new Date(args.before) : null,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = getArgs;
|
||||
|
|
|
@ -6,7 +6,39 @@ const omit = require('object.omit');
|
|||
const dissectLink = require('../dissectLink.js');
|
||||
const hashPost = require('./hashPost.js');
|
||||
|
||||
function curatePost(acc, post, user, index, processed, args) {
|
||||
const { isAfter, isBefore, isEqual } = require('date-fns');
|
||||
|
||||
function report(curatedPosts, indexed, user, args) {
|
||||
const { posts, indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount } = curatedPosts;
|
||||
|
||||
if (indexedUpdated.length > 0) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`);
|
||||
}
|
||||
|
||||
if (tooOldCount > 0) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`);
|
||||
}
|
||||
|
||||
if (tooRecentCount > 0) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooRecentCount} newer posts for '${user.name}' for specified date limit '${args.before}'`);
|
||||
}
|
||||
|
||||
if (beforeIndexedCount > 0) {
|
||||
console.log(
|
||||
'\x1b[33m%s\x1b[0m',
|
||||
`Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'`
|
||||
);
|
||||
}
|
||||
|
||||
if (afterIndexedCount > 0) {
|
||||
console.log(
|
||||
'\x1b[33m%s\x1b[0m',
|
||||
`Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
function curatePost(acc, post, user, index, indexed, processed, args) {
|
||||
const host = dissectLink(post.url);
|
||||
const permalink = `https://reddit.com${post.permalink}`;
|
||||
|
||||
|
@ -25,8 +57,26 @@ function curatePost(acc, post, user, index, processed, args) {
|
|||
hash: hashPost(post),
|
||||
};
|
||||
|
||||
if (user.indexed.original.find(entry => entry.id === post.id)) {
|
||||
return { ...acc, indexedUpdated: [...acc.indexedUpdated, curatedPost] };
|
||||
if (indexed.entries.length) {
|
||||
if (indexed.entries.find(entry => entry.id === post.id)) {
|
||||
return { ...acc, indexedUpdated: [...acc.indexedUpdated, curatedPost] };
|
||||
}
|
||||
|
||||
if (args.afterIndexed && (isBefore(curatedPost.datetime, indexed[args.afterIndexed].date) || isEqual(curatedPost.datetime, indexed[args.afterIndexed].date))) {
|
||||
return { ...acc, beforeIndexedCount: acc.beforeIndexedCount + 1 };
|
||||
}
|
||||
|
||||
if (args.beforeIndexed && (isAfter(curatedPost.datetime, indexed[args.beforeIndexed].date) || isEqual(curatedPost.datetime, indexed[args.beforeIndexed].date))) {
|
||||
return { ...acc, afterIndexedCount: acc.afterIndexedCount + 1 };
|
||||
}
|
||||
}
|
||||
|
||||
if (args.after && (isBefore(curatedPost.datetime, args.after) || isEqual(curatedPost.datetime, args.after))) {
|
||||
return { ...acc, tooOldCount: acc.tooOldCount + 1 };
|
||||
}
|
||||
|
||||
if (args.before && (isAfter(curatedPost.datetime, args.before) || isEqual(curatedPost.datetime, args.before))) {
|
||||
return { ...acc, tooRecentCount: acc.tooRecentCount + 1 };
|
||||
}
|
||||
|
||||
// cut-off at limit, but don't count posts requested directly by ID
|
||||
|
@ -72,17 +122,40 @@ function curatePost(acc, post, user, index, processed, args) {
|
|||
|
||||
const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => {
|
||||
const processed = new Set();
|
||||
const indexedByDate = user.indexed.original.sort((entryA, entryB) => new Date(entryA.date) - new Date(entryB.date));
|
||||
|
||||
const { posts, indexedUpdated } = user.posts.reduce((accUserPosts, post, index) =>
|
||||
curatePost(accUserPosts, post, user, index, processed, args), { posts: [], indexedUpdated: [] });
|
||||
const indexed = {
|
||||
entries: indexedByDate,
|
||||
oldest: indexedByDate.slice(0, 1)[0],
|
||||
latest: indexedByDate.slice(-1)[0],
|
||||
};
|
||||
|
||||
if (indexedUpdated.length > 0) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`);
|
||||
}
|
||||
const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, processed, args), {
|
||||
posts: [],
|
||||
indexedUpdated: [],
|
||||
tooOldCount: 0,
|
||||
tooRecentCount: 0,
|
||||
beforeIndexedCount: 0,
|
||||
afterIndexedCount: 0,
|
||||
});
|
||||
|
||||
const indexedOriginal = user.indexed.original.filter(entry => !indexedUpdated.find(post => post.id === entry.id));
|
||||
report(curatedPosts, indexed, user, args);
|
||||
|
||||
return { ...accPosts, [user.name]: { ...user, posts, indexed: { original: indexedOriginal, updated: indexedUpdated } } };
|
||||
const indexedOriginal = user.indexed.original.filter(entry => !curatedPosts.indexedUpdated.find(post => post.id === entry.id));
|
||||
|
||||
return {
|
||||
...accPosts,
|
||||
[user.name]: {
|
||||
...user,
|
||||
posts: curatedPosts.posts,
|
||||
indexed: {
|
||||
original: indexedOriginal,
|
||||
updated: curatedPosts.indexedUpdated,
|
||||
oldest: indexed.oldest,
|
||||
latest: indexed.latest,
|
||||
},
|
||||
},
|
||||
};
|
||||
}, {});
|
||||
|
||||
module.exports = curatePosts;
|
||||
|
|
Loading…
Reference in New Issue