Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist).
This commit is contained in:
parent
3b5d886da3
commit
7cf1a99915
|
@ -19,7 +19,7 @@ module.exports = {
|
|||
index: {
|
||||
file: '$base/index',
|
||||
format: 'tsv',
|
||||
keys: ['postId', 'postTitle', 'subreddit', 'postDate', 'url'],
|
||||
keys: ['postId', 'subreddit', 'postDate', 'postTitle', 'url'],
|
||||
},
|
||||
booleans: {
|
||||
extracted: 'extracted-',
|
||||
|
|
|
@ -1870,16 +1870,6 @@
|
|||
"harmony-reflect": "1.6.0"
|
||||
}
|
||||
},
|
||||
"promise.prototype.finally": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/promise.prototype.finally/-/promise.prototype.finally-3.1.0.tgz",
|
||||
"integrity": "sha512-7p/K2f6dI+dM8yjRQEGrTQs5hTQixUAdOGpMEA3+pVxpX5oHKRSKAXyLw9Q9HUWDTdwtoo39dSHGQtN90HcEwQ==",
|
||||
"requires": {
|
||||
"define-properties": "1.1.2",
|
||||
"es-abstract": "1.11.0",
|
||||
"function-bind": "1.1.1"
|
||||
}
|
||||
},
|
||||
"pseudomap": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz",
|
||||
|
|
|
@ -42,7 +42,6 @@
|
|||
"node-fetch": "^2.1.2",
|
||||
"object.omit": "^3.0.0",
|
||||
"object.pick": "^1.3.0",
|
||||
"promise.prototype.finally": "^3.1.0",
|
||||
"snoowrap": "^1.15.2",
|
||||
"url-pattern": "^1.0.3",
|
||||
"yargs": "^11.0.0"
|
||||
|
|
61
src/app.js
61
src/app.js
|
@ -1,49 +1,58 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const util = require('util');
|
||||
const fs = require('fs-extra');
|
||||
const snoowrap = require('snoowrap');
|
||||
const omit = require('object.omit');
|
||||
const Snoowrap = require('snoowrap');
|
||||
const exiftool = require('node-exiftool');
|
||||
const exiftoolBin = require('dist-exiftool');
|
||||
|
||||
require('promise.prototype.finally').shim();
|
||||
require('array.prototype.flatten').shim();
|
||||
|
||||
const reddit = new snoowrap(config.reddit.api);
|
||||
const args = require('./cli.js');
|
||||
const reddit = new Snoowrap(config.reddit.api);
|
||||
const args = require('./cli.js')();
|
||||
|
||||
const curatePosts = require('./curate/posts.js');
|
||||
|
||||
const interpolate = require('./interpolate.js');
|
||||
|
||||
const attachContentInfo = require('./fetch/info.js');
|
||||
const fetchSaveContent = require('./fetch/content.js');
|
||||
|
||||
const getPosts = require('./sources/getPosts.js')(reddit, args);
|
||||
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);
|
||||
|
||||
if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) {
|
||||
return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user <user> or one post with --post <post-id>. See --help for more options.');
|
||||
}
|
||||
async function getCompleteUserPosts() {
|
||||
let userPosts = await getUserPosts(args.users);
|
||||
|
||||
const ep = new exiftool.ExiftoolProcess(exiftoolBin);
|
||||
|
||||
Promise.resolve().then(async () => {
|
||||
const initUsers = args.users ? args.users.reduce((acc, username) => ({...acc, [username]: {name: username, posts: []}}), {}) : {};
|
||||
let userPosts = await getUserPosts(initUsers);
|
||||
|
||||
if(args.posts) {
|
||||
if (args.posts) {
|
||||
userPosts = await getPosts(args.posts, userPosts);
|
||||
}
|
||||
|
||||
const curatedUserPosts = curatePosts(userPosts, args);
|
||||
const infoUserPosts = await attachContentInfo(curatedUserPosts);
|
||||
|
||||
await ep.open();
|
||||
await Promise.all(Object.values(infoUserPosts).map(user => fetchSaveContent(user, ep)));
|
||||
await ep.close();
|
||||
}).catch(error => {
|
||||
return console.error(error);
|
||||
});
|
||||
return attachContentInfo(curatedUserPosts);
|
||||
}
|
||||
|
||||
function fetchSavePosts(userPosts, ep) {
|
||||
return Promise.all(Object.values(userPosts).map(user => fetchSaveContent(user, ep)));
|
||||
}
|
||||
|
||||
async function initApp() {
|
||||
const usersProvided = args.users && args.users.length;
|
||||
const postIdsProvided = args.posts && args.posts.length;
|
||||
|
||||
if (!usersProvided && !postIdsProvided) {
|
||||
return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user or post ID. See --help for more details.');
|
||||
}
|
||||
|
||||
try {
|
||||
const userPosts = await getCompleteUserPosts();
|
||||
const ep = new exiftool.ExiftoolProcess(exiftoolBin);
|
||||
|
||||
await ep.open();
|
||||
await fetchSavePosts(userPosts, ep);
|
||||
|
||||
return ep.close();
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
}
|
||||
}
|
||||
|
||||
initApp();
|
||||
|
|
|
@ -5,12 +5,12 @@ const config = require('config');
|
|||
const archives = require('./archives.js');
|
||||
|
||||
function getArchivePostIds(username, exclude) {
|
||||
console.log('Searching archives for posts...');
|
||||
console.log(`Finding archived posts for '${username}'...`);
|
||||
|
||||
return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username))).then(postIds => postIds.flatten()).then(postIds => {
|
||||
return exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds;
|
||||
}).then(postIds => {
|
||||
console.log(`Found ${postIds.length} unique archived posts`);
|
||||
console.log(`Found ${postIds.length} unique archived posts for user '${username}'`);
|
||||
|
||||
return postIds;
|
||||
});
|
||||
|
|
66
src/cli.js
66
src/cli.js
|
@ -3,34 +3,38 @@
|
|||
const config = require('config');
|
||||
const yargs = require('yargs');
|
||||
|
||||
module.exports = yargs.command('npm start -- --user <username>').option('users', {
|
||||
alias: 'user',
|
||||
describe: 'Reddit usernames to fetch posts from',
|
||||
type: 'array'
|
||||
}).option('posts', {
|
||||
alias: 'post',
|
||||
describe: 'Reddit post IDs to fetch',
|
||||
type: 'array'
|
||||
}).option('limit', {
|
||||
describe: 'Maximum amount of posts to fetch per supplied user (!), after filtering out ignored, cross- and reposts',
|
||||
type: 'number',
|
||||
default: config.fetch.limit
|
||||
}).option('sort', {
|
||||
describe: 'Property to sort posts by',
|
||||
choices: ['new', 'top', 'hot', 'controversial'],
|
||||
default: config.fetch.sort
|
||||
}).option('ignore', {
|
||||
describe: 'Ignore posts with any of these properties',
|
||||
type: 'array',
|
||||
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18']
|
||||
}).option('include', {
|
||||
describe: 'Include only these sources',
|
||||
type: 'array'
|
||||
}).option('exclude', {
|
||||
describe: 'Do not include these sources',
|
||||
type: 'array'
|
||||
}).option('archives', {
|
||||
describe: 'Search archives for deleted posts',
|
||||
type: 'boolean',
|
||||
default: config.fetch.archives.search
|
||||
}).argv;
|
||||
function getArgs() {
|
||||
return yargs.command('npm start -- --user <username>').option('users', {
|
||||
alias: 'user',
|
||||
describe: 'Reddit usernames to fetch posts from',
|
||||
type: 'array',
|
||||
}).option('posts', {
|
||||
alias: 'post',
|
||||
describe: 'Reddit post IDs to fetch',
|
||||
type: 'array',
|
||||
}).option('limit', {
|
||||
describe: 'Maximum amount of posts to fetch per supplied user (!), after filtering out ignored, cross- and reposts',
|
||||
type: 'number',
|
||||
default: config.fetch.limit,
|
||||
}).option('sort', {
|
||||
describe: 'Property to sort posts by',
|
||||
choices: ['new', 'top', 'hot', 'controversial'],
|
||||
default: config.fetch.sort,
|
||||
}).option('ignore', {
|
||||
describe: 'Ignore posts with any of these properties',
|
||||
type: 'array',
|
||||
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'],
|
||||
}).option('include', {
|
||||
describe: 'Include only these sources',
|
||||
type: 'array',
|
||||
}).option('exclude', {
|
||||
describe: 'Do not include these sources',
|
||||
type: 'array',
|
||||
}).option('archives', {
|
||||
describe: 'Search archives for deleted posts',
|
||||
type: 'boolean',
|
||||
default: config.fetch.archives.search,
|
||||
}).argv;
|
||||
}
|
||||
|
||||
module.exports = getArgs;
|
||||
|
|
|
@ -1,63 +1,75 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const dissectLink = require('../dissectLink.js');
|
||||
const omit = require('object.omit');
|
||||
|
||||
const dissectLink = require('../dissectLink.js');
|
||||
const hashPost = require('./hashPost.js');
|
||||
|
||||
function curatePost(accUserPosts, post, user, index, processed, args) {
|
||||
// cut-off at limit, but don't count posts requested directly by ID
|
||||
if (accUserPosts.length >= args.limit && !post.direct) {
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
const host = dissectLink(post.url);
|
||||
const permalink = `https://reddit.com${post.permalink}`;
|
||||
|
||||
const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
|
||||
|
||||
if (ignoring) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
|
||||
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
if (host) {
|
||||
const hostIncludes = args.include && !args.include.includes(host.label);
|
||||
const hostExcluded = args.exclude && args.exclude.includes(host.label);
|
||||
|
||||
if (hostIncludes || hostExcluded) {
|
||||
console.log(
|
||||
'\x1b[33m%s\x1b[0m',
|
||||
`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
|
||||
);
|
||||
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
|
||||
console.log(
|
||||
'\x1b[33m%s\x1b[0m',
|
||||
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
|
||||
);
|
||||
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
processed.add(host.id);
|
||||
}
|
||||
|
||||
return accUserPosts.concat({
|
||||
id: post.id,
|
||||
index,
|
||||
title: post.title,
|
||||
text: post.selftext,
|
||||
user: omit(user, ['posts']),
|
||||
permalink,
|
||||
url: post.url,
|
||||
datetime: new Date(post.created_utc * 1000),
|
||||
subreddit: post.subreddit.display_name,
|
||||
preview: post.preview ? post.preview.images.map(image => image.source) : null,
|
||||
host,
|
||||
hash: hashPost(post)
|
||||
});
|
||||
}
|
||||
|
||||
const curatePosts = (userPosts, args) => {
|
||||
const processed = new Set();
|
||||
|
||||
return Object.values(userPosts).reduce((accPosts, user) => ({...accPosts, [user.name]: {...user, posts: user.posts.reduce((accUserPosts, post, index) => {
|
||||
// cut-off at limit, but don't count posts requested directly by ID
|
||||
if(accUserPosts.length >= args.limit && !post.direct) {
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
const host = dissectLink(post.url);
|
||||
const permalink = 'https://reddit.com' + post.permalink;
|
||||
|
||||
const ignoring = args.ignore ? args.ignore.find(prop => {
|
||||
return post[prop];
|
||||
}) : null;
|
||||
|
||||
if(ignoring) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
|
||||
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
if(host) {
|
||||
if(config.fetch.avoidDuplicates && processed.has(host.id)) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring duplicate content '${post.url}' (cross-post, repost, or --post ID was already included) (${permalink})`);
|
||||
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
if((args.include && !args.include.includes(host.label)) || (args.exclude && args.exclude.includes(host.label))) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring source '${host.label}' from post '${post.url}' (${permalink})`);
|
||||
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
processed.add(host.id);
|
||||
}
|
||||
|
||||
return accUserPosts.concat({
|
||||
id: post.id,
|
||||
index: index,
|
||||
title: post.title,
|
||||
text: post.selftext,
|
||||
user: omit(user, ['posts']),
|
||||
permalink,
|
||||
url: post.url,
|
||||
datetime: new Date(post.created_utc * 1000),
|
||||
subreddit: post.subreddit.display_name,
|
||||
preview: post.preview ? post.preview.images.map(image => image.source) : null,
|
||||
host,
|
||||
hash: hashPost(post)
|
||||
});
|
||||
}, [])}}), {});
|
||||
return Object.values(userPosts).reduce((accPosts, user) => {
|
||||
return { ...accPosts, [user.name]: { ...user, posts: user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, processed, args), []) } };
|
||||
}, {});
|
||||
};
|
||||
|
||||
module.exports = curatePosts;
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
const fs = require('fs-extra');
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
|
||||
const saveProfileDetails = require('../save/profileDetails.js');
|
||||
const fetchItem = require('./item.js');
|
||||
const interpolate = require('../interpolate.js');
|
||||
const save = require('../save/save.js');
|
||||
|
@ -25,7 +27,7 @@ async function getStreams(item, post) {
|
|||
return null;
|
||||
}
|
||||
|
||||
async function addMeta(filepath, ep, item, post, user) {
|
||||
async function addMeta(filepath, item, post, user, ep) {
|
||||
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
|
||||
const interpolatedValue = interpolate(value, user, post, item);
|
||||
|
||||
|
@ -46,13 +48,16 @@ function getFilepath(item, post, user) {
|
|||
}
|
||||
|
||||
async function fetchSaveContent(user, ep) {
|
||||
// async, nothing depends on its success so don't await
|
||||
saveProfileDetails(user);
|
||||
|
||||
const posts = await Promise.map(user.posts, async (post) => {
|
||||
await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
|
||||
const item = { ...originalItem, index };
|
||||
const streams = await getStreams(item, post);
|
||||
|
||||
// no streams, ignore item
|
||||
if (streams.length <= 0) {
|
||||
if (!streams || streams.length <= 0) {
|
||||
return accItems;
|
||||
}
|
||||
|
||||
|
@ -63,7 +68,7 @@ async function fetchSaveContent(user, ep) {
|
|||
await mux(filepath, sourcePaths, item);
|
||||
}
|
||||
|
||||
await addMeta(filepath, ep, item, post, user);
|
||||
await addMeta(filepath, item, post, user, ep);
|
||||
|
||||
return sourcePaths;
|
||||
}, []);
|
||||
|
|
|
@ -9,7 +9,7 @@ const textToStream = require('./textToStream.js');
|
|||
const save = require('./save.js');
|
||||
|
||||
function saveProfileDetails(user) {
|
||||
if(config.library.profile.image && !user.fallback) {
|
||||
if(config.library.profile.image && !user.fallback && !user.deleted) {
|
||||
const image = user.profile ? user.profile.image : user.image;
|
||||
|
||||
if(config.library.profile.avoidAvatar && new urlPattern('http(s)\\://(www.)redditstatic.com/avatars/:id(.:ext)(?:query)').match(image)) {
|
||||
|
@ -26,7 +26,7 @@ function saveProfileDetails(user) {
|
|||
}
|
||||
}
|
||||
|
||||
if(config.library.profile.description && !user.fallback) {
|
||||
if(config.library.profile.description && !user.fallback && !user.deleted) {
|
||||
if(user.profile && user.profile.description) {
|
||||
const filepath = interpolate(config.library.profile.description, user);
|
||||
const stream = textToStream(user.profile.description);
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const fs = require('fs-extra');
|
||||
const Promise = require('bluebird');
|
||||
|
||||
const csvStringify = Promise.promisify(require('csv').stringify);
|
||||
|
||||
const interpolate = require('../interpolate.js');
|
||||
|
||||
async function writeToIndex(posts, user) {
|
||||
const filename = interpolate(config.library.index.file, user, null, false);
|
||||
const data = posts.map(post => config.library.index.keys.map(key => interpolate(`$${key}`, user, post, null, false, 'YYYY-MM-DDTHH:mm:ssZ')));
|
||||
|
||||
const tsvString = await csvStringify(data, { delimiter: '\t', header: true, columns: config.library.index.keys });
|
||||
|
||||
return fs.writeFile(filename, tsvString);
|
||||
}
|
||||
|
||||
module.exports = writeToIndex;
|
|
@ -1,61 +1,64 @@
|
|||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
const config = require('config');
|
||||
const getArchivePostIds = require('../archives/getArchivePostIds.js');
|
||||
const curateUser = require('../curate/user.js');
|
||||
const saveProfileDetails = require('../save/profileDetails.js');
|
||||
|
||||
const getUser = async (username, reddit) => {
|
||||
async function getUser(username, reddit) {
|
||||
try {
|
||||
const user = await reddit.getUser(username).fetch();
|
||||
|
||||
return curateUser(user);
|
||||
} catch(error) {
|
||||
} catch (error) {
|
||||
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
|
||||
|
||||
return {
|
||||
name: username,
|
||||
fallback: true
|
||||
fallback: true,
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const getPosts = async (username, reddit, args) => {
|
||||
async function getPosts(username, reddit, args) {
|
||||
try {
|
||||
return await reddit.getUser(username).getSubmissions({
|
||||
const user = await reddit.getUser(username).getSubmissions({
|
||||
sort: args.sort,
|
||||
limit: Infinity
|
||||
limit: Infinity,
|
||||
});
|
||||
} catch(error) {
|
||||
|
||||
return user;
|
||||
} catch (error) {
|
||||
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
|
||||
|
||||
return [];
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const getUserPostsWrap = (reddit, args) => users => Promise.props(Object.entries(users).reduce((userPosts, [username, user]) => {
|
||||
userPosts[username] = (async () => {
|
||||
const [user, posts] = await Promise.all([
|
||||
getUser(username, reddit),
|
||||
getPosts(username, reddit, args)
|
||||
]);
|
||||
async function getArchivedPosts(username, posts, reddit) {
|
||||
const postIds = await getArchivePostIds(username, posts.map(post => post.id));
|
||||
|
||||
if(user) {
|
||||
saveProfileDetails(user);
|
||||
}
|
||||
return Promise.all(postIds.map(postId => reddit.getSubmission(postId).fetch()));
|
||||
}
|
||||
|
||||
if(args.archives) {
|
||||
const postIds = await getArchivePostIds(username, posts.map(post => post.id));
|
||||
const archivedPosts = await Promise.all(postIds.map(postId => reddit.getSubmission(postId).fetch()));
|
||||
function getUserPostsWrap(reddit, args) {
|
||||
return function getUserPosts(usernames) {
|
||||
return Promise.props(usernames.reduce(async (userPosts, username) => {
|
||||
const [user, posts] = await Promise.all([
|
||||
getUser(username, reddit),
|
||||
getPosts(username, reddit, args),
|
||||
]);
|
||||
|
||||
posts.push(...archivedPosts);
|
||||
}
|
||||
if (args.archives) {
|
||||
posts.push(...await getArchivedPosts(username, posts, reddit));
|
||||
}
|
||||
|
||||
return {...user, posts};
|
||||
})();
|
||||
if (posts.length) {
|
||||
return { ...userPosts, [user.name]: { ...user, posts } };
|
||||
}
|
||||
|
||||
return userPosts;
|
||||
}, {}));
|
||||
return userPosts;
|
||||
}, {}));
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = getUserPostsWrap;
|
||||
|
|
Loading…
Reference in New Issue