Fetching and curating posts per user without merge.
This commit is contained in:
parent
3c64ffcd45
commit
bba0f8f1d7
|
@ -14,6 +14,11 @@ module.exports = {
|
|||
description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)',
|
||||
avoidAvatar: true
|
||||
},
|
||||
index: {
|
||||
file: '$base/index',
|
||||
entry: '$postId (r/$subreddit) - $hostId ($url) - $postTitle',
|
||||
unique: true
|
||||
},
|
||||
booleans: {
|
||||
extracted: 'extracted-',
|
||||
preview: 'preview-',
|
||||
|
|
18
src/app.js
18
src/app.js
|
@ -4,6 +4,7 @@ const config = require('config');
|
|||
const util = require('util');
|
||||
const fs = require('fs-extra');
|
||||
const snoowrap = require('snoowrap');
|
||||
const omit = require('object.omit');
|
||||
|
||||
require('promise.prototype.finally').shim();
|
||||
require('array.prototype.flatten').shim();
|
||||
|
@ -26,16 +27,25 @@ if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) {
|
|||
}
|
||||
|
||||
Promise.resolve().then(async () => {
|
||||
let userPosts = args.users ? await getUserPosts(args.users) : {};
|
||||
const initUsers = args.users ? args.users.reduce((acc, username) => ({...acc, [username]: {name: username, posts: []}}), {}) : {};
|
||||
let userPosts = await getUserPosts(initUsers);
|
||||
|
||||
if(args.posts) {
|
||||
userPosts = await getPosts(args.posts, userPosts);
|
||||
}
|
||||
|
||||
const curatedPosts = curatePosts(userPosts, args);
|
||||
const posts = await attachContentInfo(curatedPosts);
|
||||
const curatedUserPosts = curatePosts(userPosts, args);
|
||||
const infoUserPosts = await attachContentInfo(curatedUserPosts);
|
||||
|
||||
return fetchContent(posts);
|
||||
console.log(util.inspect(infoUserPosts, {depth: 10}));
|
||||
|
||||
/*
|
||||
for(const user of args.users) {
|
||||
console.log(user);
|
||||
}
|
||||
*/
|
||||
|
||||
// return fetchContent(posts, userPosts);
|
||||
}).catch(error => {
|
||||
return console.error(error);
|
||||
});
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
'use strict';
|
||||
|
||||
const crypto = require('crypto');
|
||||
|
||||
const hashPost = post => {
|
||||
return crypto.createHash('md5').update(post.id + post.subreddit_id + post.created_utc + post.title).digest('hex');
|
||||
};
|
||||
|
||||
module.exports = hashPost;
|
|
@ -3,11 +3,12 @@
|
|||
const config = require('config');
|
||||
const dissectLink = require('../dissectLink.js');
|
||||
const omit = require('object.omit');
|
||||
const hashPost = require('./hashPost.js');
|
||||
|
||||
const curatePosts = (userPosts, args) => {
|
||||
const processed = new Set();
|
||||
|
||||
return Object.values(userPosts).reduce((accPosts, user) => accPosts.concat(user.posts.reduce((accUserPosts, post, index) => {
|
||||
return Object.values(userPosts).reduce((accPosts, user) => ({...accPosts, [user.name]: {...user, posts: user.posts.reduce((accUserPosts, post, index) => {
|
||||
// cut-off at limit, but don't count posts requested directly by ID
|
||||
if(accUserPosts.length >= args.limit && !post.direct) {
|
||||
return accUserPosts;
|
||||
|
@ -53,9 +54,10 @@ const curatePosts = (userPosts, args) => {
|
|||
datetime: new Date(post.created_utc * 1000),
|
||||
subreddit: post.subreddit.display_name,
|
||||
preview: post.preview ? post.preview.images.map(image => image.source) : null,
|
||||
host
|
||||
host,
|
||||
hash: hashPost(post)
|
||||
});
|
||||
}, [])), []);
|
||||
}, [])}}), {});
|
||||
};
|
||||
|
||||
module.exports = curatePosts;
|
||||
|
|
|
@ -16,9 +16,11 @@ const exiftoolBin = require('dist-exiftool');
|
|||
|
||||
const ep = new exiftool.ExiftoolProcess(exiftoolBin);
|
||||
|
||||
module.exports = function(posts) {
|
||||
module.exports = function(posts, users) {
|
||||
return Promise.resolve().then(() => {
|
||||
return ep.open();
|
||||
}).then(() => {
|
||||
// console.log(users);
|
||||
}).then(() => {
|
||||
return Promise.all(posts.map(post => {
|
||||
return Promise.all(post.content.items.map((item, index) => {
|
||||
|
@ -66,7 +68,12 @@ module.exports = function(posts) {
|
|||
return saveMeta(filepath, meta, ep);
|
||||
}
|
||||
});
|
||||
}));
|
||||
})).then(result => {
|
||||
const filename = interpolate(config.library.index.file, post.user, post);
|
||||
const entry = interpolate(config.library.index.entry, post.user, post, null, false) + '\n';
|
||||
|
||||
return fs.appendFile(filename, config.library.index.unique ? `${post.hash} ${entry}` : entry);
|
||||
});
|
||||
});
|
||||
}));
|
||||
}).finally(() => {
|
||||
|
|
|
@ -2,28 +2,31 @@
|
|||
|
||||
const util = require('util');
|
||||
const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
const methods = require('../methods/methods.js');
|
||||
|
||||
function attachContentInfo(posts) {
|
||||
return Promise.all(posts.reduce((acc, post) => {
|
||||
if(post.host && methods[post.host.method]) {
|
||||
acc = acc.concat(methods[post.host.method](post).then(content => Object.assign(post, {content})).catch(error => {
|
||||
console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`);
|
||||
|
||||
if(config.fetch.archives.preview && post.preview) {
|
||||
console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);
|
||||
|
||||
return methods.redditPreview(post).then(content => Object.assign(post, {content}));
|
||||
}
|
||||
|
||||
return null;
|
||||
}));
|
||||
} else {
|
||||
const attachContentInfo = users => {
|
||||
return Promise.reduce(Object.values(users), async (accUsers, user) => ({...accUsers, [user.name]: {...user, posts: await Promise.reduce(user.posts, async (accPosts, post) => {
|
||||
if(!post.host || !methods[post.host.method]) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.url}' (${post.permalink})`);
|
||||
|
||||
return accPosts;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, [])).then(posts => posts.filter(post => post));
|
||||
try {
|
||||
return [...accPosts, await methods[post.host.method](post)];
|
||||
} catch(error) {
|
||||
console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`);
|
||||
|
||||
if(config.fetch.archives.preview && post.preview) {
|
||||
console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);
|
||||
|
||||
return [...accPosts, {...post, content: await methods.redditPreview(post)}];
|
||||
}
|
||||
|
||||
return accPosts;
|
||||
}
|
||||
}, [])}}), {});
|
||||
};
|
||||
|
||||
module.exports = attachContentInfo;
|
||||
|
|
|
@ -6,7 +6,7 @@ const url = require('url');
|
|||
const dateFns = require('date-fns');
|
||||
const mime = require('mime-types');
|
||||
|
||||
function interpolate(pattern, user, post, item) {
|
||||
function interpolate(pattern, user, post, item, strip = true) {
|
||||
const dateFormat = config.library.dateFormat || 'YYYYMMDD';
|
||||
|
||||
const vars = {
|
||||
|
@ -41,8 +41,10 @@ function interpolate(pattern, user, post, item) {
|
|||
$postUser: post.user,
|
||||
$postDate: dateFns.format(post.datetime, dateFormat),
|
||||
$postIndex: post.index + config.library.indexOffset,
|
||||
$url: post.url,
|
||||
$subreddit: post.subreddit,
|
||||
$host: post.host.label
|
||||
$hostLabel: post.host.label,
|
||||
$hostId: post.host.id
|
||||
});
|
||||
|
||||
if(post.content.album) {
|
||||
|
@ -70,7 +72,7 @@ function interpolate(pattern, user, post, item) {
|
|||
|
||||
return Object.entries(vars).reduce((acc, [key, value], index) => {
|
||||
// substitute slashes for filesystem compatability
|
||||
if(key !== '$base') {
|
||||
if(key !== '$base' && strip) {
|
||||
value = (value || '').toString().replace(/\//g, config.library.slashSubstitute);
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ const getPosts = async (username, reddit, args) => {
|
|||
}
|
||||
};
|
||||
|
||||
const getUserPostsWrap = (reddit, args) => usernames => Promise.props(usernames.reduce((userPosts, username) => {
|
||||
const getUserPostsWrap = (reddit, args) => users => Promise.props(Object.entries(users).reduce((userPosts, [username, user]) => {
|
||||
userPosts[username] = (async () => {
|
||||
const [user, posts] = await Promise.all([
|
||||
getUser(username, reddit),
|
||||
|
|
Loading…
Reference in New Issue