From de64cc6d13b26813ef13358e816c8157097505f2 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:54 +0200 Subject: [PATCH] Added support for fetching individual posts. Improved use of yargs, --help now available. Refactored main app flow. --- README.md | 13 ++--- src/app.js | 102 ++++++++++++++++++++++++-------------- src/cli.js | 26 ++++++++++ src/curate/posts.js | 46 +++++++++++++++++ src/curate/submissions.js | 48 ------------------ src/fetch/content.js | 6 +-- src/fetch/info.js | 4 +- 7 files changed, 150 insertions(+), 95 deletions(-) create mode 100644 src/cli.js create mode 100644 src/curate/posts.js delete mode 100644 src/curate/submissions.js diff --git a/README.md b/README.md index 08bea93..8df19e2 100644 --- a/README.md +++ b/README.md @@ -33,13 +33,14 @@ reddit-post-dump requires a arbitrarily recent version of Node.js. Before use, d `npm install` ## Usage -`node app.js --user={username}` +`node app.js (--user | --post )` -### Optional parameters -* `--users={user1,user2}`: You may fetch posts from multiple users by either supplying a comma-separated list of usernames (no spaces) with `--users`, or by using multiple individual `--user` arguments -* `--limit={number}`: Maximum amount posts per user to fetch content from. Limit is applied after fltering out ignored, cross- and reposts. -* `--sort={method}`: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included. -* `--ignore={prop1,prop2}`: Ignore submissions that have any of these comma-separated properties. Supported properties include `pinned`, `stickied`, `hidden`, `over_18`, `spoiler` (protip: any property in the API response can be ignored, if desired). +### Optional arguments +* `--users [...]`: You may fetch posts from multiple users by supplying a space-separated list of usernames to `--users`. +* `--posts [...]`: Fetch multiple posts by supplying a space-separated list of post IDs to `--posts`. +* `--limit `: Maximum amount posts per user to fetch content from. Limit is applied after fltering out ignored, cross- and reposts. +* `--sort `: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included. +* `--ignore [...]`: Ignore posts with any of the following properties: `pinned`, `stickied`, `hidden`, `over_18`, `spoiler`. ### Examples * `node app.js --user=ThePendulum` diff --git a/src/app.js b/src/app.js index 9e641e2..be27039 100644 --- a/src/app.js +++ b/src/app.js @@ -3,18 +3,17 @@ const config = require('config'); const util = require('util'); const fs = require('fs-extra'); -const yargs = require('yargs').argv; const snoowrap = require('snoowrap'); const promiseFinally = require('promise.prototype.finally'); const reddit = new snoowrap(config.reddit.api); -const curateSubmissions = require('./curate/submissions.js'); +const curatePosts = require('./curate/posts.js'); const curateUser = require('./curate/user.js'); const interpolate = require('./interpolate.js'); -const fetchInfo = require('./fetch/info.js'); +const attachContentInfo = require('./fetch/info.js'); const fetchContent = require('./fetch/content.js'); const save = require('./save/save.js'); @@ -22,41 +21,72 @@ const saveProfileDetails = require('./save/profileDetails.js'); promiseFinally.shim(); -const limit = yargs.limit || config.fetch.limit; +const args = require('./cli.js'); -// allow for any combination of --ignore val1 --ignore val2, --ignore=val1,val2 -const ignore = yargs.ignore ? [].concat(yargs.ignore).reduce((acc, prop) => acc.concat(prop.split(',')), []) : []; - -if(!yargs.user && typeof yargs.users !== 'string') { - return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user=[user], or multiple users with --users=[user1,user2] or --user=[user1] --user=[user2]'); +if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) { + return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user or one post with --post . See --help for more options.'); } -const users = yargs.users ? yargs.users.split(',') : [].concat(yargs.user); +Promise.resolve().then(() => { + if(args.users) { + return getUserPosts(args.users); + } +}).then((userPosts = []) => { + if(args.posts) { + return getPosts(args.posts).then(posts => posts.concat(userPosts)); + } -users.forEach(username => { - return Promise.resolve().then(() => { - return reddit.getUser(username).fetch().then(curateUser); - }).then(user => { - return saveProfileDetails(user); - }).then(user => { - return reddit.getUser(username).getSubmissions({ - sort: yargs.sort || config.fetch.sort, - limit: Infinity - }).then(submissions => ({ - user, - submissions - })); - }).then(({user, submissions}) => { - const posts = curateSubmissions(submissions, ignore); - const limitedPosts = posts.slice(0, limit); - - return fetchInfo(limitedPosts).then(info => ({ - user, - posts - })); - }).then(({user, posts}) => { - return fetchContent(posts, user); - }).catch(error => { - return console.log('\x1b[31m%s\x1b[0m', error); - }); + return userPosts; +}).then(posts => { + return curatePosts(posts).slice(0, args.limit); +}).then(posts => { + return attachContentInfo(posts).then(info => fetchContent(posts)); +}).catch(error => { + return console.error(error); }); + +function getUserPosts(users) { + return users.reduce((chain, username) => { + return chain.then(accPosts => { + return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({user, accPosts})); + }).then(({user, accPosts}) => { + return reddit.getUser(username).getSubmissions({ + sort: args.sort, + limit: Infinity + }).then(posts => { + return accPosts.concat(posts.map(post => { + post.user = user; + + return post; + })); + }); + }); + }, Promise.resolve([])); +}; + +function getPosts(postIds) { + return postIds.reduce((chain, postId) => { + return chain.then(acc => { + return reddit.getSubmission(postId).fetch().then(post => ({post, acc})); + }).then(({post, acc}) => { + if(acc.users[post.author.name]) { + return {post, acc, user: acc.users[post.author.name]} + } + + return reddit.getUser(post.author.name).fetch().then(curateUser).then(saveProfileDetails).then(user => ({post, acc, user})); + }).then(({post, acc, user}) => { + post.user = user; + acc.posts.push(post); + + // keep track of users to prevent fetching one user multiple times + acc.users[user.name] = user; + + return acc; + }); + }, Promise.resolve({ + posts: [], + users: {} + })).then(({posts, users}) => { + return posts; + }); +}; diff --git a/src/cli.js b/src/cli.js new file mode 100644 index 0000000..d790593 --- /dev/null +++ b/src/cli.js @@ -0,0 +1,26 @@ +'use strict'; + +const config = require('config'); +const yargs = require('yargs'); + +module.exports = yargs.option('users', { + alias: 'user', + describe: 'Reddit usernames to fetch posts from', + type: 'array' +}).option('posts', { + alias: 'post', + describe: 'Reddit post IDs to fetch', + type: 'array' +}).option('limit', { + describe: 'Maximum amount of posts to fetch after filtering out ignored, cross- and reposts', + type: 'number', + default: config.fetch.limit +}).option('sort', { + describe: 'Property to sort posts by', + choices: ['new', 'top', 'hot', 'controversial'], + default: config.fetch.sort +}).option('ignore', { + describe: 'Ignore posts with any of these properties', + type: 'array', + choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'] +}).argv; diff --git a/src/curate/posts.js b/src/curate/posts.js new file mode 100644 index 0000000..a3c7eea --- /dev/null +++ b/src/curate/posts.js @@ -0,0 +1,46 @@ +'use strict'; + +const config = require('config'); +const dissectLink = require('../dissectLink.js'); + +function curatePosts(posts, ignore) { + const processed = new Set(); + + return posts.reduce((acc, post, index) => { + const host = dissectLink(post.url); + const ignoring = ignore ? ignore.find(prop => { + return post[prop]; + }) : null; + + if(ignoring) { + console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' - ${post.url}`); + + return acc; + } + + if(host) { + if(config.fetch.avoidDuplicates && processed.has(host.id)) { + console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${post.title}' - ${post.url}`); + + return acc; + } + + processed.add(host.id); + } + + return acc.concat({ + id: post.id, + index: index, + title: post.title, + text: post.selftext, + user: post.user, + permalink: 'https://reddit.com' + post.permalink, + url: post.url, + datetime: new Date(post.created_utc * 1000), + subreddit: post.subreddit.display_name, + host + }); + }, []); +}; + +module.exports = curatePosts; diff --git a/src/curate/submissions.js b/src/curate/submissions.js deleted file mode 100644 index 2a23146..0000000 --- a/src/curate/submissions.js +++ /dev/null @@ -1,48 +0,0 @@ -'use strict'; - -const config = require('config'); -const dissectLink = require('../dissectLink.js'); - -function curateSubmissions(submissions, ignore) { - const processed = new Set(); - - return submissions.reduce((acc, submission, index) => { - const host = dissectLink(submission.url); - const ignoring = ignore.find(prop => { - return submission[prop]; - }); - - if(ignoring) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${submission.title}' - ${submission.url}`); - - return acc; - } - - if(host) { - if(config.fetch.avoidDuplicates && processed.has(host.id)) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${submission.title}' - ${submission.url}`); - - return acc; - } - - processed.add(host.id); - } - - const curatedSubmission = { - id: submission.id, - index: index, - title: submission.title, - text: submission.selftext, - user: submission.author.name, - permalink: 'https://reddit.com' + submission.permalink, - url: submission.url, - datetime: new Date(submission.created_utc * 1000), - subreddit: submission.subreddit.display_name, - host - }; - - return acc.concat(curatedSubmission); - }, []); -}; - -module.exports = curateSubmissions; diff --git a/src/fetch/content.js b/src/fetch/content.js index 7f090c4..ccfbfb5 100644 --- a/src/fetch/content.js +++ b/src/fetch/content.js @@ -16,7 +16,7 @@ const exiftoolBin = require('dist-exiftool'); const ep = new exiftool.ExiftoolProcess(exiftoolBin); -module.exports = function(posts, user) { +module.exports = function(posts) { return Promise.resolve().then(() => { return ep.open(); }).then(() => { @@ -37,7 +37,7 @@ module.exports = function(posts, user) { })).then(items => { return Promise.all(items.map(item => { const type = item.type.split('/')[0]; - const filepath = post.content.album ? interpolate(config.library.album[type], user, post, item) : interpolate(config.library[type], user, post, item); + const filepath = post.content.album ? interpolate(config.library.album[type], post.user, post, item) : interpolate(config.library[type], post.user, post, item); return Promise.resolve().then(() => { return fs.ensureDir(path.dirname(filepath)); @@ -49,7 +49,7 @@ module.exports = function(posts, user) { } }).then(() => { const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => { - const interpolatedValue = interpolate(value, user, post, item); + const interpolatedValue = interpolate(value, post.user, post, item); if(interpolatedValue) { acc[key] = interpolatedValue; diff --git a/src/fetch/info.js b/src/fetch/info.js index 85ce654..ddd873d 100644 --- a/src/fetch/info.js +++ b/src/fetch/info.js @@ -2,7 +2,7 @@ const methods = require('../methods/methods.js'); -function fetchInfo(posts) { +function attachContentInfo(posts) { return Promise.all(posts.reduce((acc, post) => { if(post.host && methods[post.host.method]) { acc = acc.concat(methods[post.host.method](post).then(content => { @@ -18,4 +18,4 @@ function fetchInfo(posts) { }, [])); }; -module.exports = fetchInfo; +module.exports = attachContentInfo;