Added support for fetching individual posts. Improved use of yargs, --help now available. Refactored main app flow.
This commit is contained in:
parent
1581e780b8
commit
de64cc6d13
13
README.md
13
README.md
|
@ -33,13 +33,14 @@ reddit-post-dump requires a arbitrarily recent version of Node.js. Before use, d
|
||||||
`npm install`
|
`npm install`
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
`node app.js --user={username}`
|
`node app.js (--user <username> | --post <post-id>)`
|
||||||
|
|
||||||
### Optional parameters
|
### Optional arguments
|
||||||
* `--users={user1,user2}`: You may fetch posts from multiple users by either supplying a comma-separated list of usernames (no spaces) with `--users`, or by using multiple individual `--user` arguments
|
* `--users <username> [<username>...]`: You may fetch posts from multiple users by supplying a space-separated list of usernames to `--users`.
|
||||||
* `--limit={number}`: Maximum amount posts per user to fetch content from. Limit is applied after fltering out ignored, cross- and reposts.
|
* `--posts <post-id> [<post-id>...]`: Fetch multiple posts by supplying a space-separated list of post IDs to `--posts`.
|
||||||
* `--sort={method}`: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included.
|
* `--limit <number>`: Maximum amount posts per user to fetch content from. Limit is applied after fltering out ignored, cross- and reposts.
|
||||||
* `--ignore={prop1,prop2}`: Ignore submissions that have any of these comma-separated properties. Supported properties include `pinned`, `stickied`, `hidden`, `over_18`, `spoiler` (protip: any property in the API response can be ignored, if desired).
|
* `--sort <method>`: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included.
|
||||||
|
* `--ignore <prop> [<prop>...]`: Ignore posts with any of the following properties: `pinned`, `stickied`, `hidden`, `over_18`, `spoiler`.
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
* `node app.js --user=ThePendulum`
|
* `node app.js --user=ThePendulum`
|
||||||
|
|
102
src/app.js
102
src/app.js
|
@ -3,18 +3,17 @@
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
const util = require('util');
|
const util = require('util');
|
||||||
const fs = require('fs-extra');
|
const fs = require('fs-extra');
|
||||||
const yargs = require('yargs').argv;
|
|
||||||
const snoowrap = require('snoowrap');
|
const snoowrap = require('snoowrap');
|
||||||
const promiseFinally = require('promise.prototype.finally');
|
const promiseFinally = require('promise.prototype.finally');
|
||||||
|
|
||||||
const reddit = new snoowrap(config.reddit.api);
|
const reddit = new snoowrap(config.reddit.api);
|
||||||
|
|
||||||
const curateSubmissions = require('./curate/submissions.js');
|
const curatePosts = require('./curate/posts.js');
|
||||||
const curateUser = require('./curate/user.js');
|
const curateUser = require('./curate/user.js');
|
||||||
|
|
||||||
const interpolate = require('./interpolate.js');
|
const interpolate = require('./interpolate.js');
|
||||||
|
|
||||||
const fetchInfo = require('./fetch/info.js');
|
const attachContentInfo = require('./fetch/info.js');
|
||||||
const fetchContent = require('./fetch/content.js');
|
const fetchContent = require('./fetch/content.js');
|
||||||
|
|
||||||
const save = require('./save/save.js');
|
const save = require('./save/save.js');
|
||||||
|
@ -22,41 +21,72 @@ const saveProfileDetails = require('./save/profileDetails.js');
|
||||||
|
|
||||||
promiseFinally.shim();
|
promiseFinally.shim();
|
||||||
|
|
||||||
const limit = yargs.limit || config.fetch.limit;
|
const args = require('./cli.js');
|
||||||
|
|
||||||
// allow for any combination of --ignore val1 --ignore val2, --ignore=val1,val2
|
if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) {
|
||||||
const ignore = yargs.ignore ? [].concat(yargs.ignore).reduce((acc, prop) => acc.concat(prop.split(',')), []) : [];
|
return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user <user> or one post with --post <post-id>. See --help for more options.');
|
||||||
|
|
||||||
if(!yargs.user && typeof yargs.users !== 'string') {
|
|
||||||
return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user=[user], or multiple users with --users=[user1,user2] or --user=[user1] --user=[user2]');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const users = yargs.users ? yargs.users.split(',') : [].concat(yargs.user);
|
Promise.resolve().then(() => {
|
||||||
|
if(args.users) {
|
||||||
|
return getUserPosts(args.users);
|
||||||
|
}
|
||||||
|
}).then((userPosts = []) => {
|
||||||
|
if(args.posts) {
|
||||||
|
return getPosts(args.posts).then(posts => posts.concat(userPosts));
|
||||||
|
}
|
||||||
|
|
||||||
users.forEach(username => {
|
return userPosts;
|
||||||
return Promise.resolve().then(() => {
|
}).then(posts => {
|
||||||
return reddit.getUser(username).fetch().then(curateUser);
|
return curatePosts(posts).slice(0, args.limit);
|
||||||
}).then(user => {
|
}).then(posts => {
|
||||||
return saveProfileDetails(user);
|
return attachContentInfo(posts).then(info => fetchContent(posts));
|
||||||
}).then(user => {
|
}).catch(error => {
|
||||||
return reddit.getUser(username).getSubmissions({
|
return console.error(error);
|
||||||
sort: yargs.sort || config.fetch.sort,
|
|
||||||
limit: Infinity
|
|
||||||
}).then(submissions => ({
|
|
||||||
user,
|
|
||||||
submissions
|
|
||||||
}));
|
|
||||||
}).then(({user, submissions}) => {
|
|
||||||
const posts = curateSubmissions(submissions, ignore);
|
|
||||||
const limitedPosts = posts.slice(0, limit);
|
|
||||||
|
|
||||||
return fetchInfo(limitedPosts).then(info => ({
|
|
||||||
user,
|
|
||||||
posts
|
|
||||||
}));
|
|
||||||
}).then(({user, posts}) => {
|
|
||||||
return fetchContent(posts, user);
|
|
||||||
}).catch(error => {
|
|
||||||
return console.log('\x1b[31m%s\x1b[0m', error);
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
function getUserPosts(users) {
|
||||||
|
return users.reduce((chain, username) => {
|
||||||
|
return chain.then(accPosts => {
|
||||||
|
return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({user, accPosts}));
|
||||||
|
}).then(({user, accPosts}) => {
|
||||||
|
return reddit.getUser(username).getSubmissions({
|
||||||
|
sort: args.sort,
|
||||||
|
limit: Infinity
|
||||||
|
}).then(posts => {
|
||||||
|
return accPosts.concat(posts.map(post => {
|
||||||
|
post.user = user;
|
||||||
|
|
||||||
|
return post;
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}, Promise.resolve([]));
|
||||||
|
};
|
||||||
|
|
||||||
|
function getPosts(postIds) {
|
||||||
|
return postIds.reduce((chain, postId) => {
|
||||||
|
return chain.then(acc => {
|
||||||
|
return reddit.getSubmission(postId).fetch().then(post => ({post, acc}));
|
||||||
|
}).then(({post, acc}) => {
|
||||||
|
if(acc.users[post.author.name]) {
|
||||||
|
return {post, acc, user: acc.users[post.author.name]}
|
||||||
|
}
|
||||||
|
|
||||||
|
return reddit.getUser(post.author.name).fetch().then(curateUser).then(saveProfileDetails).then(user => ({post, acc, user}));
|
||||||
|
}).then(({post, acc, user}) => {
|
||||||
|
post.user = user;
|
||||||
|
acc.posts.push(post);
|
||||||
|
|
||||||
|
// keep track of users to prevent fetching one user multiple times
|
||||||
|
acc.users[user.name] = user;
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
});
|
||||||
|
}, Promise.resolve({
|
||||||
|
posts: [],
|
||||||
|
users: {}
|
||||||
|
})).then(({posts, users}) => {
|
||||||
|
return posts;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const config = require('config');
|
||||||
|
const yargs = require('yargs');
|
||||||
|
|
||||||
|
module.exports = yargs.option('users', {
|
||||||
|
alias: 'user',
|
||||||
|
describe: 'Reddit usernames to fetch posts from',
|
||||||
|
type: 'array'
|
||||||
|
}).option('posts', {
|
||||||
|
alias: 'post',
|
||||||
|
describe: 'Reddit post IDs to fetch',
|
||||||
|
type: 'array'
|
||||||
|
}).option('limit', {
|
||||||
|
describe: 'Maximum amount of posts to fetch after filtering out ignored, cross- and reposts',
|
||||||
|
type: 'number',
|
||||||
|
default: config.fetch.limit
|
||||||
|
}).option('sort', {
|
||||||
|
describe: 'Property to sort posts by',
|
||||||
|
choices: ['new', 'top', 'hot', 'controversial'],
|
||||||
|
default: config.fetch.sort
|
||||||
|
}).option('ignore', {
|
||||||
|
describe: 'Ignore posts with any of these properties',
|
||||||
|
type: 'array',
|
||||||
|
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18']
|
||||||
|
}).argv;
|
|
@ -0,0 +1,46 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const config = require('config');
|
||||||
|
const dissectLink = require('../dissectLink.js');
|
||||||
|
|
||||||
|
function curatePosts(posts, ignore) {
|
||||||
|
const processed = new Set();
|
||||||
|
|
||||||
|
return posts.reduce((acc, post, index) => {
|
||||||
|
const host = dissectLink(post.url);
|
||||||
|
const ignoring = ignore ? ignore.find(prop => {
|
||||||
|
return post[prop];
|
||||||
|
}) : null;
|
||||||
|
|
||||||
|
if(ignoring) {
|
||||||
|
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' - ${post.url}`);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(host) {
|
||||||
|
if(config.fetch.avoidDuplicates && processed.has(host.id)) {
|
||||||
|
console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${post.title}' - ${post.url}`);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
processed.add(host.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return acc.concat({
|
||||||
|
id: post.id,
|
||||||
|
index: index,
|
||||||
|
title: post.title,
|
||||||
|
text: post.selftext,
|
||||||
|
user: post.user,
|
||||||
|
permalink: 'https://reddit.com' + post.permalink,
|
||||||
|
url: post.url,
|
||||||
|
datetime: new Date(post.created_utc * 1000),
|
||||||
|
subreddit: post.subreddit.display_name,
|
||||||
|
host
|
||||||
|
});
|
||||||
|
}, []);
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports = curatePosts;
|
|
@ -1,48 +0,0 @@
|
||||||
'use strict';
|
|
||||||
|
|
||||||
const config = require('config');
|
|
||||||
const dissectLink = require('../dissectLink.js');
|
|
||||||
|
|
||||||
function curateSubmissions(submissions, ignore) {
|
|
||||||
const processed = new Set();
|
|
||||||
|
|
||||||
return submissions.reduce((acc, submission, index) => {
|
|
||||||
const host = dissectLink(submission.url);
|
|
||||||
const ignoring = ignore.find(prop => {
|
|
||||||
return submission[prop];
|
|
||||||
});
|
|
||||||
|
|
||||||
if(ignoring) {
|
|
||||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${submission.title}' - ${submission.url}`);
|
|
||||||
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(host) {
|
|
||||||
if(config.fetch.avoidDuplicates && processed.has(host.id)) {
|
|
||||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${submission.title}' - ${submission.url}`);
|
|
||||||
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
|
|
||||||
processed.add(host.id);
|
|
||||||
}
|
|
||||||
|
|
||||||
const curatedSubmission = {
|
|
||||||
id: submission.id,
|
|
||||||
index: index,
|
|
||||||
title: submission.title,
|
|
||||||
text: submission.selftext,
|
|
||||||
user: submission.author.name,
|
|
||||||
permalink: 'https://reddit.com' + submission.permalink,
|
|
||||||
url: submission.url,
|
|
||||||
datetime: new Date(submission.created_utc * 1000),
|
|
||||||
subreddit: submission.subreddit.display_name,
|
|
||||||
host
|
|
||||||
};
|
|
||||||
|
|
||||||
return acc.concat(curatedSubmission);
|
|
||||||
}, []);
|
|
||||||
};
|
|
||||||
|
|
||||||
module.exports = curateSubmissions;
|
|
|
@ -16,7 +16,7 @@ const exiftoolBin = require('dist-exiftool');
|
||||||
|
|
||||||
const ep = new exiftool.ExiftoolProcess(exiftoolBin);
|
const ep = new exiftool.ExiftoolProcess(exiftoolBin);
|
||||||
|
|
||||||
module.exports = function(posts, user) {
|
module.exports = function(posts) {
|
||||||
return Promise.resolve().then(() => {
|
return Promise.resolve().then(() => {
|
||||||
return ep.open();
|
return ep.open();
|
||||||
}).then(() => {
|
}).then(() => {
|
||||||
|
@ -37,7 +37,7 @@ module.exports = function(posts, user) {
|
||||||
})).then(items => {
|
})).then(items => {
|
||||||
return Promise.all(items.map(item => {
|
return Promise.all(items.map(item => {
|
||||||
const type = item.type.split('/')[0];
|
const type = item.type.split('/')[0];
|
||||||
const filepath = post.content.album ? interpolate(config.library.album[type], user, post, item) : interpolate(config.library[type], user, post, item);
|
const filepath = post.content.album ? interpolate(config.library.album[type], post.user, post, item) : interpolate(config.library[type], post.user, post, item);
|
||||||
|
|
||||||
return Promise.resolve().then(() => {
|
return Promise.resolve().then(() => {
|
||||||
return fs.ensureDir(path.dirname(filepath));
|
return fs.ensureDir(path.dirname(filepath));
|
||||||
|
@ -49,7 +49,7 @@ module.exports = function(posts, user) {
|
||||||
}
|
}
|
||||||
}).then(() => {
|
}).then(() => {
|
||||||
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
|
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
|
||||||
const interpolatedValue = interpolate(value, user, post, item);
|
const interpolatedValue = interpolate(value, post.user, post, item);
|
||||||
|
|
||||||
if(interpolatedValue) {
|
if(interpolatedValue) {
|
||||||
acc[key] = interpolatedValue;
|
acc[key] = interpolatedValue;
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
const methods = require('../methods/methods.js');
|
const methods = require('../methods/methods.js');
|
||||||
|
|
||||||
function fetchInfo(posts) {
|
function attachContentInfo(posts) {
|
||||||
return Promise.all(posts.reduce((acc, post) => {
|
return Promise.all(posts.reduce((acc, post) => {
|
||||||
if(post.host && methods[post.host.method]) {
|
if(post.host && methods[post.host.method]) {
|
||||||
acc = acc.concat(methods[post.host.method](post).then(content => {
|
acc = acc.concat(methods[post.host.method](post).then(content => {
|
||||||
|
@ -18,4 +18,4 @@ function fetchInfo(posts) {
|
||||||
}, []));
|
}, []));
|
||||||
};
|
};
|
||||||
|
|
||||||
module.exports = fetchInfo;
|
module.exports = attachContentInfo;
|
||||||
|
|
Loading…
Reference in New Issue