Refactored post retrieval so limit is applied per-user and ignores directly requested posts, and to start utilizing async/await.

This commit is contained in:
2024-09-11 05:16:55 +02:00
parent 2627ec6207
commit 84836bc8c0
8 changed files with 124 additions and 69 deletions

View File

@@ -25,23 +25,16 @@ if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) {
return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user <user> or one post with --post <post-id>. See --help for more options.');
}
Promise.resolve().then(() => {
if(args.users) {
return getUserPosts(args.users);
}
Promise.resolve().then(async () => {
let userPosts = args.users ? await getUserPosts(args.users) : {};
return [];
}).then(userPosts => {
if(args.posts) {
return getPosts(args.posts).then(posts => posts.concat(userPosts));
userPosts = await getPosts(args.posts, userPosts);
}
return userPosts;
}).then(posts => {
return curatePosts(posts, args).slice(0, args.limit);
}).then(posts => {
return attachContentInfo(posts);
}).then(posts => {
const curatedPosts = curatePosts(userPosts, args);
const posts = await attachContentInfo(curatedPosts);
return fetchContent(posts);
}).catch(error => {
return console.error(error);

View File

@@ -12,7 +12,7 @@ module.exports = yargs.command('npm start -- --user <username>').option('users',
describe: 'Reddit post IDs to fetch',
type: 'array'
}).option('limit', {
describe: 'Maximum amount of posts to fetch after filtering out ignored, cross- and reposts',
describe: 'Maximum amount of posts to fetch per supplied user (!), after filtering out ignored, cross- and reposts',
type: 'number',
default: config.fetch.limit
}).option('sort', {

View File

@@ -2,54 +2,60 @@
const config = require('config');
const dissectLink = require('../dissectLink.js');
const omit = require('object.omit');
function curatePosts(posts, args) {
const curatePosts = (userPosts, args) => {
const processed = new Set();
return posts.reduce((acc, post, index) => {
return Object.values(userPosts).reduce((accPosts, user) => accPosts.concat(user.posts.reduce((accUserPosts, post, index) => {
// cut-off at limit, but don't count posts requested directly by ID
if(accUserPosts.length >= args.limit && !post.direct) {
return accUserPosts;
}
const host = dissectLink(post.url);
post.permalink = 'https://reddit.com' + post.permalink;
const permalink = 'https://reddit.com' + post.permalink;
const ignoring = args.ignore ? args.ignore.find(prop => {
return post[prop];
}) : null;
if(ignoring) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${post.permalink})`);
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
return acc;
return accUserPosts;
}
if(host) {
if(config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${post.url}' (${post.permalink})`);
console.log('\x1b[33m%s\x1b[0m', `Ignoring duplicate content '${post.url}' (cross-post, repost, or --post ID was already included) (${permalink})`);
return acc;
return accUserPosts;
}
if((args.include && !args.include.includes(host.label)) || (args.exclude && args.exclude.includes(host.label))) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring source '${host.label}' from post '${post.url}' (${post.permalink})`);
console.log('\x1b[33m%s\x1b[0m', `Ignoring source '${host.label}' from post '${post.url}' (${permalink})`);
return acc;
return accUserPosts;
}
processed.add(host.id);
}
return acc.concat({
return accUserPosts.concat({
id: post.id,
index: index,
title: post.title,
text: post.selftext,
user: post.user,
permalink: post.permalink,
user: omit(user, ['posts']),
permalink,
url: post.url,
datetime: new Date(post.created_utc * 1000),
subreddit: post.subreddit.display_name,
preview: post.preview ? post.preview.images.map(image => image.source) : null,
host
});
}, []);
}, [])), []);
};
module.exports = curatePosts;

View File

@@ -1,40 +1,48 @@
'use strict';
const Promise = require('bluebird');
const config = require('config');
const curateUser = require('../curate/user.js');
const saveProfileDetails = require('../save/profileDetails.js');
function getPostsWrap(reddit, args) {
return function getPosts(postIds) {
return postIds.reduce((chain, postId) => {
return chain.then(acc => {
return reddit.getSubmission(postId).fetch().then(post => ({post, acc}));
}).then(({post, acc}) => {
if(acc.users[post.author.name]) {
return {post, acc, user: acc.users[post.author.name]}
}
const getUser = async (username, reddit) => {
try {
const user = await reddit.getUser(username).fetch();
if(post.author.name === '[deleted]') {
return {post, acc, user: {name: '[deleted]'}};
}
return curateUser(user);
} catch(error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
return reddit.getUser(post.author.name).fetch().then(curateUser).then(saveProfileDetails).then(user => ({post, acc, user}));
}).then(({post, acc, user}) => {
post.user = user;
acc.posts.push(post);
return {
name: username,
fallback: true
};
}
};
// keep track of users to prevent fetching one user multiple times
acc.users[user.name] = user;
const getPostsWrap = (reddit, args) => {
return function getPosts(postIds, userPosts = {}) {
return Promise.reduce(postIds, (accUserPosts, postId) => Promise.resolve().then(async () => {
const post = await reddit.getSubmission(postId).fetch();
return acc;
});
}, Promise.resolve({
posts: [],
users: {}
})).then(({posts, users}) => {
return posts;
});
post.direct = true;
if(accUserPosts[post.author.name]) {
accUserPosts[post.author.name].posts = accUserPosts[post.author.name].posts.concat(post);
return accUserPosts;
}
// don't attempt to fetch deleted user
if(post.author.name === '[deleted]') {
return {...accUserPosts, '[deleted]': {name: '[deleted]', deleted: true, posts: [post]}};
}
const user = await getUser(post.author.name);
return {...accUserPosts, [post.author.name]: {...user, posts: [post]}}
}), userPosts);
};
};

View File

@@ -1,35 +1,41 @@
'use strict';
const Promise = require('bluebird');
const config = require('config');
const getArchivePostIds = require('../archives/getArchivePostIds.js');
const curateUser = require('../curate/user.js');
const saveProfileDetails = require('../save/profileDetails.js');
const getUser = (username, reddit) => {
return reddit.getUser(username).fetch().then(user => curateUser(user)).catch(error => {
const getUser = async (username, reddit) => {
try {
const user = await reddit.getUser(username).fetch();
return curateUser(user);
} catch(error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
return {
name: username,
fallback: true
};
});
}
};
const getPosts = (username, reddit, args) => {
return reddit.getUser(username).getSubmissions({
sort: args.sort,
limit: Infinity
}).catch(error => {
const getPosts = async (username, reddit, args) => {
try {
return await reddit.getUser(username).getSubmissions({
sort: args.sort,
limit: Infinity
});
} catch(error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
return [];
});
}
};
const getUserPostsWrap = (reddit, args) => usernames => Promise.all(
usernames.map(async username => {
const getUserPostsWrap = (reddit, args) => usernames => Promise.props(usernames.reduce((userPosts, username) => {
userPosts[username] = (async () => {
const [user, posts] = await Promise.all([
getUser(username, reddit),
getPosts(username, reddit, args)
@@ -46,8 +52,10 @@ const getUserPostsWrap = (reddit, args) => usernames => Promise.all(
posts.push(...archivedPosts);
}
return posts.map(post => Object.assign(post, {user}));
})
).then(posts => posts.flatten());
return {...user, posts};
})();
return userPosts;
}, {}));
module.exports = getUserPostsWrap;