Added support for fetching individual posts. Improved use of yargs, --help now available. Refactored main app flow.
This commit is contained in:
parent
1581e780b8
commit
de64cc6d13
13
README.md
13
README.md
|
@ -33,13 +33,14 @@ reddit-post-dump requires a arbitrarily recent version of Node.js. Before use, d
|
|||
`npm install`
|
||||
|
||||
## Usage
|
||||
`node app.js --user={username}`
|
||||
`node app.js (--user <username> | --post <post-id>)`
|
||||
|
||||
### Optional parameters
|
||||
* `--users={user1,user2}`: You may fetch posts from multiple users by either supplying a comma-separated list of usernames (no spaces) with `--users`, or by using multiple individual `--user` arguments
|
||||
* `--limit={number}`: Maximum amount posts per user to fetch content from. Limit is applied after fltering out ignored, cross- and reposts.
|
||||
* `--sort={method}`: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included.
|
||||
* `--ignore={prop1,prop2}`: Ignore submissions that have any of these comma-separated properties. Supported properties include `pinned`, `stickied`, `hidden`, `over_18`, `spoiler` (protip: any property in the API response can be ignored, if desired).
|
||||
### Optional arguments
|
||||
* `--users <username> [<username>...]`: You may fetch posts from multiple users by supplying a space-separated list of usernames to `--users`.
|
||||
* `--posts <post-id> [<post-id>...]`: Fetch multiple posts by supplying a space-separated list of post IDs to `--posts`.
|
||||
* `--limit <number>`: Maximum amount posts per user to fetch content from. Limit is applied after fltering out ignored, cross- and reposts.
|
||||
* `--sort <method>`: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included.
|
||||
* `--ignore <prop> [<prop>...]`: Ignore posts with any of the following properties: `pinned`, `stickied`, `hidden`, `over_18`, `spoiler`.
|
||||
|
||||
### Examples
|
||||
* `node app.js --user=ThePendulum`
|
||||
|
|
98
src/app.js
98
src/app.js
|
@ -3,18 +3,17 @@
|
|||
const config = require('config');
|
||||
const util = require('util');
|
||||
const fs = require('fs-extra');
|
||||
const yargs = require('yargs').argv;
|
||||
const snoowrap = require('snoowrap');
|
||||
const promiseFinally = require('promise.prototype.finally');
|
||||
|
||||
const reddit = new snoowrap(config.reddit.api);
|
||||
|
||||
const curateSubmissions = require('./curate/submissions.js');
|
||||
const curatePosts = require('./curate/posts.js');
|
||||
const curateUser = require('./curate/user.js');
|
||||
|
||||
const interpolate = require('./interpolate.js');
|
||||
|
||||
const fetchInfo = require('./fetch/info.js');
|
||||
const attachContentInfo = require('./fetch/info.js');
|
||||
const fetchContent = require('./fetch/content.js');
|
||||
|
||||
const save = require('./save/save.js');
|
||||
|
@ -22,41 +21,72 @@ const saveProfileDetails = require('./save/profileDetails.js');
|
|||
|
||||
promiseFinally.shim();
|
||||
|
||||
const limit = yargs.limit || config.fetch.limit;
|
||||
const args = require('./cli.js');
|
||||
|
||||
// allow for any combination of --ignore val1 --ignore val2, --ignore=val1,val2
|
||||
const ignore = yargs.ignore ? [].concat(yargs.ignore).reduce((acc, prop) => acc.concat(prop.split(',')), []) : [];
|
||||
|
||||
if(!yargs.user && typeof yargs.users !== 'string') {
|
||||
return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user=[user], or multiple users with --users=[user1,user2] or --user=[user1] --user=[user2]');
|
||||
if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) {
|
||||
return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user <user> or one post with --post <post-id>. See --help for more options.');
|
||||
}
|
||||
|
||||
const users = yargs.users ? yargs.users.split(',') : [].concat(yargs.user);
|
||||
Promise.resolve().then(() => {
|
||||
if(args.users) {
|
||||
return getUserPosts(args.users);
|
||||
}
|
||||
}).then((userPosts = []) => {
|
||||
if(args.posts) {
|
||||
return getPosts(args.posts).then(posts => posts.concat(userPosts));
|
||||
}
|
||||
|
||||
users.forEach(username => {
|
||||
return Promise.resolve().then(() => {
|
||||
return reddit.getUser(username).fetch().then(curateUser);
|
||||
}).then(user => {
|
||||
return saveProfileDetails(user);
|
||||
}).then(user => {
|
||||
return reddit.getUser(username).getSubmissions({
|
||||
sort: yargs.sort || config.fetch.sort,
|
||||
limit: Infinity
|
||||
}).then(submissions => ({
|
||||
user,
|
||||
submissions
|
||||
}));
|
||||
}).then(({user, submissions}) => {
|
||||
const posts = curateSubmissions(submissions, ignore);
|
||||
const limitedPosts = posts.slice(0, limit);
|
||||
|
||||
return fetchInfo(limitedPosts).then(info => ({
|
||||
user,
|
||||
posts
|
||||
}));
|
||||
}).then(({user, posts}) => {
|
||||
return fetchContent(posts, user);
|
||||
return userPosts;
|
||||
}).then(posts => {
|
||||
return curatePosts(posts).slice(0, args.limit);
|
||||
}).then(posts => {
|
||||
return attachContentInfo(posts).then(info => fetchContent(posts));
|
||||
}).catch(error => {
|
||||
return console.log('\x1b[31m%s\x1b[0m', error);
|
||||
return console.error(error);
|
||||
});
|
||||
|
||||
function getUserPosts(users) {
|
||||
return users.reduce((chain, username) => {
|
||||
return chain.then(accPosts => {
|
||||
return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({user, accPosts}));
|
||||
}).then(({user, accPosts}) => {
|
||||
return reddit.getUser(username).getSubmissions({
|
||||
sort: args.sort,
|
||||
limit: Infinity
|
||||
}).then(posts => {
|
||||
return accPosts.concat(posts.map(post => {
|
||||
post.user = user;
|
||||
|
||||
return post;
|
||||
}));
|
||||
});
|
||||
});
|
||||
}, Promise.resolve([]));
|
||||
};
|
||||
|
||||
function getPosts(postIds) {
|
||||
return postIds.reduce((chain, postId) => {
|
||||
return chain.then(acc => {
|
||||
return reddit.getSubmission(postId).fetch().then(post => ({post, acc}));
|
||||
}).then(({post, acc}) => {
|
||||
if(acc.users[post.author.name]) {
|
||||
return {post, acc, user: acc.users[post.author.name]}
|
||||
}
|
||||
|
||||
return reddit.getUser(post.author.name).fetch().then(curateUser).then(saveProfileDetails).then(user => ({post, acc, user}));
|
||||
}).then(({post, acc, user}) => {
|
||||
post.user = user;
|
||||
acc.posts.push(post);
|
||||
|
||||
// keep track of users to prevent fetching one user multiple times
|
||||
acc.users[user.name] = user;
|
||||
|
||||
return acc;
|
||||
});
|
||||
}, Promise.resolve({
|
||||
posts: [],
|
||||
users: {}
|
||||
})).then(({posts, users}) => {
|
||||
return posts;
|
||||
});
|
||||
};
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const yargs = require('yargs');
|
||||
|
||||
module.exports = yargs.option('users', {
|
||||
alias: 'user',
|
||||
describe: 'Reddit usernames to fetch posts from',
|
||||
type: 'array'
|
||||
}).option('posts', {
|
||||
alias: 'post',
|
||||
describe: 'Reddit post IDs to fetch',
|
||||
type: 'array'
|
||||
}).option('limit', {
|
||||
describe: 'Maximum amount of posts to fetch after filtering out ignored, cross- and reposts',
|
||||
type: 'number',
|
||||
default: config.fetch.limit
|
||||
}).option('sort', {
|
||||
describe: 'Property to sort posts by',
|
||||
choices: ['new', 'top', 'hot', 'controversial'],
|
||||
default: config.fetch.sort
|
||||
}).option('ignore', {
|
||||
describe: 'Ignore posts with any of these properties',
|
||||
type: 'array',
|
||||
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18']
|
||||
}).argv;
|
|
@ -0,0 +1,46 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const dissectLink = require('../dissectLink.js');
|
||||
|
||||
function curatePosts(posts, ignore) {
|
||||
const processed = new Set();
|
||||
|
||||
return posts.reduce((acc, post, index) => {
|
||||
const host = dissectLink(post.url);
|
||||
const ignoring = ignore ? ignore.find(prop => {
|
||||
return post[prop];
|
||||
}) : null;
|
||||
|
||||
if(ignoring) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' - ${post.url}`);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
if(host) {
|
||||
if(config.fetch.avoidDuplicates && processed.has(host.id)) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${post.title}' - ${post.url}`);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
processed.add(host.id);
|
||||
}
|
||||
|
||||
return acc.concat({
|
||||
id: post.id,
|
||||
index: index,
|
||||
title: post.title,
|
||||
text: post.selftext,
|
||||
user: post.user,
|
||||
permalink: 'https://reddit.com' + post.permalink,
|
||||
url: post.url,
|
||||
datetime: new Date(post.created_utc * 1000),
|
||||
subreddit: post.subreddit.display_name,
|
||||
host
|
||||
});
|
||||
}, []);
|
||||
};
|
||||
|
||||
module.exports = curatePosts;
|
|
@ -1,48 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const dissectLink = require('../dissectLink.js');
|
||||
|
||||
function curateSubmissions(submissions, ignore) {
|
||||
const processed = new Set();
|
||||
|
||||
return submissions.reduce((acc, submission, index) => {
|
||||
const host = dissectLink(submission.url);
|
||||
const ignoring = ignore.find(prop => {
|
||||
return submission[prop];
|
||||
});
|
||||
|
||||
if(ignoring) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${submission.title}' - ${submission.url}`);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
if(host) {
|
||||
if(config.fetch.avoidDuplicates && processed.has(host.id)) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${submission.title}' - ${submission.url}`);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
processed.add(host.id);
|
||||
}
|
||||
|
||||
const curatedSubmission = {
|
||||
id: submission.id,
|
||||
index: index,
|
||||
title: submission.title,
|
||||
text: submission.selftext,
|
||||
user: submission.author.name,
|
||||
permalink: 'https://reddit.com' + submission.permalink,
|
||||
url: submission.url,
|
||||
datetime: new Date(submission.created_utc * 1000),
|
||||
subreddit: submission.subreddit.display_name,
|
||||
host
|
||||
};
|
||||
|
||||
return acc.concat(curatedSubmission);
|
||||
}, []);
|
||||
};
|
||||
|
||||
module.exports = curateSubmissions;
|
|
@ -16,7 +16,7 @@ const exiftoolBin = require('dist-exiftool');
|
|||
|
||||
const ep = new exiftool.ExiftoolProcess(exiftoolBin);
|
||||
|
||||
module.exports = function(posts, user) {
|
||||
module.exports = function(posts) {
|
||||
return Promise.resolve().then(() => {
|
||||
return ep.open();
|
||||
}).then(() => {
|
||||
|
@ -37,7 +37,7 @@ module.exports = function(posts, user) {
|
|||
})).then(items => {
|
||||
return Promise.all(items.map(item => {
|
||||
const type = item.type.split('/')[0];
|
||||
const filepath = post.content.album ? interpolate(config.library.album[type], user, post, item) : interpolate(config.library[type], user, post, item);
|
||||
const filepath = post.content.album ? interpolate(config.library.album[type], post.user, post, item) : interpolate(config.library[type], post.user, post, item);
|
||||
|
||||
return Promise.resolve().then(() => {
|
||||
return fs.ensureDir(path.dirname(filepath));
|
||||
|
@ -49,7 +49,7 @@ module.exports = function(posts, user) {
|
|||
}
|
||||
}).then(() => {
|
||||
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
|
||||
const interpolatedValue = interpolate(value, user, post, item);
|
||||
const interpolatedValue = interpolate(value, post.user, post, item);
|
||||
|
||||
if(interpolatedValue) {
|
||||
acc[key] = interpolatedValue;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
const methods = require('../methods/methods.js');
|
||||
|
||||
function fetchInfo(posts) {
|
||||
function attachContentInfo(posts) {
|
||||
return Promise.all(posts.reduce((acc, post) => {
|
||||
if(post.host && methods[post.host.method]) {
|
||||
acc = acc.concat(methods[post.host.method](post).then(content => {
|
||||
|
@ -18,4 +18,4 @@ function fetchInfo(posts) {
|
|||
}, []));
|
||||
};
|
||||
|
||||
module.exports = fetchInfo;
|
||||
module.exports = attachContentInfo;
|
||||
|
|
Loading…
Reference in New Issue