Fetching and curating posts per user without merge.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:56 +02:00
parent 3c64ffcd45
commit bba0f8f1d7
8 changed files with 68 additions and 30 deletions

View File

@ -14,6 +14,11 @@ module.exports = {
description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)', description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)',
avoidAvatar: true avoidAvatar: true
}, },
index: {
file: '$base/index',
entry: '$postId (r/$subreddit) - $hostId ($url) - $postTitle',
unique: true
},
booleans: { booleans: {
extracted: 'extracted-', extracted: 'extracted-',
preview: 'preview-', preview: 'preview-',

View File

@ -4,6 +4,7 @@ const config = require('config');
const util = require('util'); const util = require('util');
const fs = require('fs-extra'); const fs = require('fs-extra');
const snoowrap = require('snoowrap'); const snoowrap = require('snoowrap');
const omit = require('object.omit');
require('promise.prototype.finally').shim(); require('promise.prototype.finally').shim();
require('array.prototype.flatten').shim(); require('array.prototype.flatten').shim();
@ -26,16 +27,25 @@ if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) {
} }
Promise.resolve().then(async () => { Promise.resolve().then(async () => {
let userPosts = args.users ? await getUserPosts(args.users) : {}; const initUsers = args.users ? args.users.reduce((acc, username) => ({...acc, [username]: {name: username, posts: []}}), {}) : {};
let userPosts = await getUserPosts(initUsers);
if(args.posts) { if(args.posts) {
userPosts = await getPosts(args.posts, userPosts); userPosts = await getPosts(args.posts, userPosts);
} }
const curatedPosts = curatePosts(userPosts, args); const curatedUserPosts = curatePosts(userPosts, args);
const posts = await attachContentInfo(curatedPosts); const infoUserPosts = await attachContentInfo(curatedUserPosts);
return fetchContent(posts); console.log(util.inspect(infoUserPosts, {depth: 10}));
/*
for(const user of args.users) {
console.log(user);
}
*/
// return fetchContent(posts, userPosts);
}).catch(error => { }).catch(error => {
return console.error(error); return console.error(error);
}); });

9
src/curate/hashPost.js Normal file
View File

@ -0,0 +1,9 @@
'use strict';
const crypto = require('crypto');
const hashPost = post => {
return crypto.createHash('md5').update(post.id + post.subreddit_id + post.created_utc + post.title).digest('hex');
};
module.exports = hashPost;

View File

@ -3,11 +3,12 @@
const config = require('config'); const config = require('config');
const dissectLink = require('../dissectLink.js'); const dissectLink = require('../dissectLink.js');
const omit = require('object.omit'); const omit = require('object.omit');
const hashPost = require('./hashPost.js');
const curatePosts = (userPosts, args) => { const curatePosts = (userPosts, args) => {
const processed = new Set(); const processed = new Set();
return Object.values(userPosts).reduce((accPosts, user) => accPosts.concat(user.posts.reduce((accUserPosts, post, index) => { return Object.values(userPosts).reduce((accPosts, user) => ({...accPosts, [user.name]: {...user, posts: user.posts.reduce((accUserPosts, post, index) => {
// cut-off at limit, but don't count posts requested directly by ID // cut-off at limit, but don't count posts requested directly by ID
if(accUserPosts.length >= args.limit && !post.direct) { if(accUserPosts.length >= args.limit && !post.direct) {
return accUserPosts; return accUserPosts;
@ -53,9 +54,10 @@ const curatePosts = (userPosts, args) => {
datetime: new Date(post.created_utc * 1000), datetime: new Date(post.created_utc * 1000),
subreddit: post.subreddit.display_name, subreddit: post.subreddit.display_name,
preview: post.preview ? post.preview.images.map(image => image.source) : null, preview: post.preview ? post.preview.images.map(image => image.source) : null,
host host,
hash: hashPost(post)
}); });
}, [])), []); }, [])}}), {});
}; };
module.exports = curatePosts; module.exports = curatePosts;

View File

@ -16,9 +16,11 @@ const exiftoolBin = require('dist-exiftool');
const ep = new exiftool.ExiftoolProcess(exiftoolBin); const ep = new exiftool.ExiftoolProcess(exiftoolBin);
module.exports = function(posts) { module.exports = function(posts, users) {
return Promise.resolve().then(() => { return Promise.resolve().then(() => {
return ep.open(); return ep.open();
}).then(() => {
// console.log(users);
}).then(() => { }).then(() => {
return Promise.all(posts.map(post => { return Promise.all(posts.map(post => {
return Promise.all(post.content.items.map((item, index) => { return Promise.all(post.content.items.map((item, index) => {
@ -66,7 +68,12 @@ module.exports = function(posts) {
return saveMeta(filepath, meta, ep); return saveMeta(filepath, meta, ep);
} }
}); });
})); })).then(result => {
const filename = interpolate(config.library.index.file, post.user, post);
const entry = interpolate(config.library.index.entry, post.user, post, null, false) + '\n';
return fs.appendFile(filename, config.library.index.unique ? `${post.hash} ${entry}` : entry);
});
}); });
})); }));
}).finally(() => { }).finally(() => {

View File

@ -2,28 +2,31 @@
const util = require('util'); const util = require('util');
const config = require('config'); const config = require('config');
const Promise = require('bluebird');
const methods = require('../methods/methods.js'); const methods = require('../methods/methods.js');
function attachContentInfo(posts) { const attachContentInfo = users => {
return Promise.all(posts.reduce((acc, post) => { return Promise.reduce(Object.values(users), async (accUsers, user) => ({...accUsers, [user.name]: {...user, posts: await Promise.reduce(user.posts, async (accPosts, post) => {
if(post.host && methods[post.host.method]) { if(!post.host || !methods[post.host.method]) {
acc = acc.concat(methods[post.host.method](post).then(content => Object.assign(post, {content})).catch(error => { console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.url}' (${post.permalink})`);
return accPosts;
}
try {
return [...accPosts, await methods[post.host.method](post)];
} catch(error) {
console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`); console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`);
if(config.fetch.archives.preview && post.preview) { if(config.fetch.archives.preview && post.preview) {
console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);
return methods.redditPreview(post).then(content => Object.assign(post, {content})); return [...accPosts, {...post, content: await methods.redditPreview(post)}];
} }
return null; return accPosts;
}));
} else {
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.url}' (${post.permalink})`);
} }
}, [])}}), {});
return acc;
}, [])).then(posts => posts.filter(post => post));
}; };
module.exports = attachContentInfo; module.exports = attachContentInfo;

View File

@ -6,7 +6,7 @@ const url = require('url');
const dateFns = require('date-fns'); const dateFns = require('date-fns');
const mime = require('mime-types'); const mime = require('mime-types');
function interpolate(pattern, user, post, item) { function interpolate(pattern, user, post, item, strip = true) {
const dateFormat = config.library.dateFormat || 'YYYYMMDD'; const dateFormat = config.library.dateFormat || 'YYYYMMDD';
const vars = { const vars = {
@ -41,8 +41,10 @@ function interpolate(pattern, user, post, item) {
$postUser: post.user, $postUser: post.user,
$postDate: dateFns.format(post.datetime, dateFormat), $postDate: dateFns.format(post.datetime, dateFormat),
$postIndex: post.index + config.library.indexOffset, $postIndex: post.index + config.library.indexOffset,
$url: post.url,
$subreddit: post.subreddit, $subreddit: post.subreddit,
$host: post.host.label $hostLabel: post.host.label,
$hostId: post.host.id
}); });
if(post.content.album) { if(post.content.album) {
@ -70,7 +72,7 @@ function interpolate(pattern, user, post, item) {
return Object.entries(vars).reduce((acc, [key, value], index) => { return Object.entries(vars).reduce((acc, [key, value], index) => {
// substitute slashes for filesystem compatability // substitute slashes for filesystem compatability
if(key !== '$base') { if(key !== '$base' && strip) {
value = (value || '').toString().replace(/\//g, config.library.slashSubstitute); value = (value || '').toString().replace(/\//g, config.library.slashSubstitute);
} }

View File

@ -34,7 +34,7 @@ const getPosts = async (username, reddit, args) => {
} }
}; };
const getUserPostsWrap = (reddit, args) => usernames => Promise.props(usernames.reduce((userPosts, username) => { const getUserPostsWrap = (reddit, args) => users => Promise.props(Object.entries(users).reduce((userPosts, [username, user]) => {
userPosts[username] = (async () => { userPosts[username] = (async () => {
const [user, posts] = await Promise.all([ const [user, posts] = await Promise.all([
getUser(username, reddit), getUser(username, reddit),