Added support for fetching individual posts. Improved use of yargs, --help now available. Refactored main app flow.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:54 +02:00
parent 1581e780b8
commit de64cc6d13
7 changed files with 150 additions and 95 deletions

View File

@ -33,13 +33,14 @@ reddit-post-dump requires a arbitrarily recent version of Node.js. Before use, d
`npm install` `npm install`
## Usage ## Usage
`node app.js --user={username}` `node app.js (--user <username> | --post <post-id>)`
### Optional parameters ### Optional arguments
* `--users={user1,user2}`: You may fetch posts from multiple users by either supplying a comma-separated list of usernames (no spaces) with `--users`, or by using multiple individual `--user` arguments * `--users <username> [<username>...]`: You may fetch posts from multiple users by supplying a space-separated list of usernames to `--users`.
* `--limit={number}`: Maximum amount posts per user to fetch content from. Limit is applied after fltering out ignored, cross- and reposts. * `--posts <post-id> [<post-id>...]`: Fetch multiple posts by supplying a space-separated list of post IDs to `--posts`.
* `--sort={method}`: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included. * `--limit <number>`: Maximum amount posts per user to fetch content from. Limit is applied after fltering out ignored, cross- and reposts.
* `--ignore={prop1,prop2}`: Ignore submissions that have any of these comma-separated properties. Supported properties include `pinned`, `stickied`, `hidden`, `over_18`, `spoiler` (protip: any property in the API response can be ignored, if desired). * `--sort <method>`: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included.
* `--ignore <prop> [<prop>...]`: Ignore posts with any of the following properties: `pinned`, `stickied`, `hidden`, `over_18`, `spoiler`.
### Examples ### Examples
* `node app.js --user=ThePendulum` * `node app.js --user=ThePendulum`

View File

@ -3,18 +3,17 @@
const config = require('config'); const config = require('config');
const util = require('util'); const util = require('util');
const fs = require('fs-extra'); const fs = require('fs-extra');
const yargs = require('yargs').argv;
const snoowrap = require('snoowrap'); const snoowrap = require('snoowrap');
const promiseFinally = require('promise.prototype.finally'); const promiseFinally = require('promise.prototype.finally');
const reddit = new snoowrap(config.reddit.api); const reddit = new snoowrap(config.reddit.api);
const curateSubmissions = require('./curate/submissions.js'); const curatePosts = require('./curate/posts.js');
const curateUser = require('./curate/user.js'); const curateUser = require('./curate/user.js');
const interpolate = require('./interpolate.js'); const interpolate = require('./interpolate.js');
const fetchInfo = require('./fetch/info.js'); const attachContentInfo = require('./fetch/info.js');
const fetchContent = require('./fetch/content.js'); const fetchContent = require('./fetch/content.js');
const save = require('./save/save.js'); const save = require('./save/save.js');
@ -22,41 +21,72 @@ const saveProfileDetails = require('./save/profileDetails.js');
promiseFinally.shim(); promiseFinally.shim();
const limit = yargs.limit || config.fetch.limit; const args = require('./cli.js');
// allow for any combination of --ignore val1 --ignore val2, --ignore=val1,val2 if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) {
const ignore = yargs.ignore ? [].concat(yargs.ignore).reduce((acc, prop) => acc.concat(prop.split(',')), []) : []; return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user <user> or one post with --post <post-id>. See --help for more options.');
if(!yargs.user && typeof yargs.users !== 'string') {
return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user=[user], or multiple users with --users=[user1,user2] or --user=[user1] --user=[user2]');
} }
const users = yargs.users ? yargs.users.split(',') : [].concat(yargs.user); Promise.resolve().then(() => {
if(args.users) {
return getUserPosts(args.users);
}
}).then((userPosts = []) => {
if(args.posts) {
return getPosts(args.posts).then(posts => posts.concat(userPosts));
}
users.forEach(username => { return userPosts;
return Promise.resolve().then(() => { }).then(posts => {
return reddit.getUser(username).fetch().then(curateUser); return curatePosts(posts).slice(0, args.limit);
}).then(user => { }).then(posts => {
return saveProfileDetails(user); return attachContentInfo(posts).then(info => fetchContent(posts));
}).then(user => { }).catch(error => {
return reddit.getUser(username).getSubmissions({ return console.error(error);
sort: yargs.sort || config.fetch.sort,
limit: Infinity
}).then(submissions => ({
user,
submissions
}));
}).then(({user, submissions}) => {
const posts = curateSubmissions(submissions, ignore);
const limitedPosts = posts.slice(0, limit);
return fetchInfo(limitedPosts).then(info => ({
user,
posts
}));
}).then(({user, posts}) => {
return fetchContent(posts, user);
}).catch(error => {
return console.log('\x1b[31m%s\x1b[0m', error);
});
}); });
function getUserPosts(users) {
return users.reduce((chain, username) => {
return chain.then(accPosts => {
return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({user, accPosts}));
}).then(({user, accPosts}) => {
return reddit.getUser(username).getSubmissions({
sort: args.sort,
limit: Infinity
}).then(posts => {
return accPosts.concat(posts.map(post => {
post.user = user;
return post;
}));
});
});
}, Promise.resolve([]));
};
function getPosts(postIds) {
return postIds.reduce((chain, postId) => {
return chain.then(acc => {
return reddit.getSubmission(postId).fetch().then(post => ({post, acc}));
}).then(({post, acc}) => {
if(acc.users[post.author.name]) {
return {post, acc, user: acc.users[post.author.name]}
}
return reddit.getUser(post.author.name).fetch().then(curateUser).then(saveProfileDetails).then(user => ({post, acc, user}));
}).then(({post, acc, user}) => {
post.user = user;
acc.posts.push(post);
// keep track of users to prevent fetching one user multiple times
acc.users[user.name] = user;
return acc;
});
}, Promise.resolve({
posts: [],
users: {}
})).then(({posts, users}) => {
return posts;
});
};

26
src/cli.js Normal file
View File

@ -0,0 +1,26 @@
'use strict';
const config = require('config');
const yargs = require('yargs');
module.exports = yargs.option('users', {
alias: 'user',
describe: 'Reddit usernames to fetch posts from',
type: 'array'
}).option('posts', {
alias: 'post',
describe: 'Reddit post IDs to fetch',
type: 'array'
}).option('limit', {
describe: 'Maximum amount of posts to fetch after filtering out ignored, cross- and reposts',
type: 'number',
default: config.fetch.limit
}).option('sort', {
describe: 'Property to sort posts by',
choices: ['new', 'top', 'hot', 'controversial'],
default: config.fetch.sort
}).option('ignore', {
describe: 'Ignore posts with any of these properties',
type: 'array',
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18']
}).argv;

46
src/curate/posts.js Normal file
View File

@ -0,0 +1,46 @@
'use strict';
const config = require('config');
const dissectLink = require('../dissectLink.js');
function curatePosts(posts, ignore) {
const processed = new Set();
return posts.reduce((acc, post, index) => {
const host = dissectLink(post.url);
const ignoring = ignore ? ignore.find(prop => {
return post[prop];
}) : null;
if(ignoring) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' - ${post.url}`);
return acc;
}
if(host) {
if(config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${post.title}' - ${post.url}`);
return acc;
}
processed.add(host.id);
}
return acc.concat({
id: post.id,
index: index,
title: post.title,
text: post.selftext,
user: post.user,
permalink: 'https://reddit.com' + post.permalink,
url: post.url,
datetime: new Date(post.created_utc * 1000),
subreddit: post.subreddit.display_name,
host
});
}, []);
};
module.exports = curatePosts;

View File

@ -1,48 +0,0 @@
'use strict';
const config = require('config');
const dissectLink = require('../dissectLink.js');
function curateSubmissions(submissions, ignore) {
const processed = new Set();
return submissions.reduce((acc, submission, index) => {
const host = dissectLink(submission.url);
const ignoring = ignore.find(prop => {
return submission[prop];
});
if(ignoring) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${submission.title}' - ${submission.url}`);
return acc;
}
if(host) {
if(config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${submission.title}' - ${submission.url}`);
return acc;
}
processed.add(host.id);
}
const curatedSubmission = {
id: submission.id,
index: index,
title: submission.title,
text: submission.selftext,
user: submission.author.name,
permalink: 'https://reddit.com' + submission.permalink,
url: submission.url,
datetime: new Date(submission.created_utc * 1000),
subreddit: submission.subreddit.display_name,
host
};
return acc.concat(curatedSubmission);
}, []);
};
module.exports = curateSubmissions;

View File

@ -16,7 +16,7 @@ const exiftoolBin = require('dist-exiftool');
const ep = new exiftool.ExiftoolProcess(exiftoolBin); const ep = new exiftool.ExiftoolProcess(exiftoolBin);
module.exports = function(posts, user) { module.exports = function(posts) {
return Promise.resolve().then(() => { return Promise.resolve().then(() => {
return ep.open(); return ep.open();
}).then(() => { }).then(() => {
@ -37,7 +37,7 @@ module.exports = function(posts, user) {
})).then(items => { })).then(items => {
return Promise.all(items.map(item => { return Promise.all(items.map(item => {
const type = item.type.split('/')[0]; const type = item.type.split('/')[0];
const filepath = post.content.album ? interpolate(config.library.album[type], user, post, item) : interpolate(config.library[type], user, post, item); const filepath = post.content.album ? interpolate(config.library.album[type], post.user, post, item) : interpolate(config.library[type], post.user, post, item);
return Promise.resolve().then(() => { return Promise.resolve().then(() => {
return fs.ensureDir(path.dirname(filepath)); return fs.ensureDir(path.dirname(filepath));
@ -49,7 +49,7 @@ module.exports = function(posts, user) {
} }
}).then(() => { }).then(() => {
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => { const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
const interpolatedValue = interpolate(value, user, post, item); const interpolatedValue = interpolate(value, post.user, post, item);
if(interpolatedValue) { if(interpolatedValue) {
acc[key] = interpolatedValue; acc[key] = interpolatedValue;

View File

@ -2,7 +2,7 @@
const methods = require('../methods/methods.js'); const methods = require('../methods/methods.js');
function fetchInfo(posts) { function attachContentInfo(posts) {
return Promise.all(posts.reduce((acc, post) => { return Promise.all(posts.reduce((acc, post) => {
if(post.host && methods[post.host.method]) { if(post.host && methods[post.host.method]) {
acc = acc.concat(methods[post.host.method](post).then(content => { acc = acc.concat(methods[post.host.method](post).then(content => {
@ -18,4 +18,4 @@ function fetchInfo(posts) {
}, [])); }, []));
}; };
module.exports = fetchInfo; module.exports = attachContentInfo;