From bba0f8f1d759b200fed6edfac8e2f0d2a864df97 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:56 +0200 Subject: [PATCH] Fetching and curating posts per user without merge. --- config/default.js | 5 +++++ src/app.js | 18 ++++++++++++++---- src/curate/hashPost.js | 9 +++++++++ src/curate/posts.js | 8 +++++--- src/fetch/content.js | 11 +++++++++-- src/fetch/info.js | 37 ++++++++++++++++++++----------------- src/interpolate.js | 8 +++++--- src/sources/getUserPosts.js | 2 +- 8 files changed, 68 insertions(+), 30 deletions(-) create mode 100644 src/curate/hashPost.js diff --git a/config/default.js b/config/default.js index cf92c35..181834c 100644 --- a/config/default.js +++ b/config/default.js @@ -14,6 +14,11 @@ module.exports = { description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)', avoidAvatar: true }, + index: { + file: '$base/index', + entry: '$postId (r/$subreddit) - $hostId ($url) - $postTitle', + unique: true + }, booleans: { extracted: 'extracted-', preview: 'preview-', diff --git a/src/app.js b/src/app.js index 34bd729..99dceed 100644 --- a/src/app.js +++ b/src/app.js @@ -4,6 +4,7 @@ const config = require('config'); const util = require('util'); const fs = require('fs-extra'); const snoowrap = require('snoowrap'); +const omit = require('object.omit'); require('promise.prototype.finally').shim(); require('array.prototype.flatten').shim(); @@ -26,16 +27,25 @@ if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) { } Promise.resolve().then(async () => { - let userPosts = args.users ? await getUserPosts(args.users) : {}; + const initUsers = args.users ? args.users.reduce((acc, username) => ({...acc, [username]: {name: username, posts: []}}), {}) : {}; + let userPosts = await getUserPosts(initUsers); if(args.posts) { userPosts = await getPosts(args.posts, userPosts); } - const curatedPosts = curatePosts(userPosts, args); - const posts = await attachContentInfo(curatedPosts); + const curatedUserPosts = curatePosts(userPosts, args); + const infoUserPosts = await attachContentInfo(curatedUserPosts); - return fetchContent(posts); + console.log(util.inspect(infoUserPosts, {depth: 10})); + + /* + for(const user of args.users) { + console.log(user); + } + */ + + // return fetchContent(posts, userPosts); }).catch(error => { return console.error(error); }); diff --git a/src/curate/hashPost.js b/src/curate/hashPost.js new file mode 100644 index 0000000..428962a --- /dev/null +++ b/src/curate/hashPost.js @@ -0,0 +1,9 @@ +'use strict'; + +const crypto = require('crypto'); + +const hashPost = post => { + return crypto.createHash('md5').update(post.id + post.subreddit_id + post.created_utc + post.title).digest('hex'); +}; + +module.exports = hashPost; diff --git a/src/curate/posts.js b/src/curate/posts.js index 7cd3dcd..97855ce 100644 --- a/src/curate/posts.js +++ b/src/curate/posts.js @@ -3,11 +3,12 @@ const config = require('config'); const dissectLink = require('../dissectLink.js'); const omit = require('object.omit'); +const hashPost = require('./hashPost.js'); const curatePosts = (userPosts, args) => { const processed = new Set(); - return Object.values(userPosts).reduce((accPosts, user) => accPosts.concat(user.posts.reduce((accUserPosts, post, index) => { + return Object.values(userPosts).reduce((accPosts, user) => ({...accPosts, [user.name]: {...user, posts: user.posts.reduce((accUserPosts, post, index) => { // cut-off at limit, but don't count posts requested directly by ID if(accUserPosts.length >= args.limit && !post.direct) { return accUserPosts; @@ -53,9 +54,10 @@ const curatePosts = (userPosts, args) => { datetime: new Date(post.created_utc * 1000), subreddit: post.subreddit.display_name, preview: post.preview ? post.preview.images.map(image => image.source) : null, - host + host, + hash: hashPost(post) }); - }, [])), []); + }, [])}}), {}); }; module.exports = curatePosts; diff --git a/src/fetch/content.js b/src/fetch/content.js index 543db96..8bbcd5b 100644 --- a/src/fetch/content.js +++ b/src/fetch/content.js @@ -16,9 +16,11 @@ const exiftoolBin = require('dist-exiftool'); const ep = new exiftool.ExiftoolProcess(exiftoolBin); -module.exports = function(posts) { +module.exports = function(posts, users) { return Promise.resolve().then(() => { return ep.open(); + }).then(() => { + // console.log(users); }).then(() => { return Promise.all(posts.map(post => { return Promise.all(post.content.items.map((item, index) => { @@ -66,7 +68,12 @@ module.exports = function(posts) { return saveMeta(filepath, meta, ep); } }); - })); + })).then(result => { + const filename = interpolate(config.library.index.file, post.user, post); + const entry = interpolate(config.library.index.entry, post.user, post, null, false) + '\n'; + + return fs.appendFile(filename, config.library.index.unique ? `${post.hash} ${entry}` : entry); + }); }); })); }).finally(() => { diff --git a/src/fetch/info.js b/src/fetch/info.js index ddfc5b3..59228c9 100644 --- a/src/fetch/info.js +++ b/src/fetch/info.js @@ -2,28 +2,31 @@ const util = require('util'); const config = require('config'); +const Promise = require('bluebird'); const methods = require('../methods/methods.js'); -function attachContentInfo(posts) { - return Promise.all(posts.reduce((acc, post) => { - if(post.host && methods[post.host.method]) { - acc = acc.concat(methods[post.host.method](post).then(content => Object.assign(post, {content})).catch(error => { - console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`); - - if(config.fetch.archives.preview && post.preview) { - console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); - - return methods.redditPreview(post).then(content => Object.assign(post, {content})); - } - - return null; - })); - } else { +const attachContentInfo = users => { + return Promise.reduce(Object.values(users), async (accUsers, user) => ({...accUsers, [user.name]: {...user, posts: await Promise.reduce(user.posts, async (accPosts, post) => { + if(!post.host || !methods[post.host.method]) { console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.url}' (${post.permalink})`); + + return accPosts; } - return acc; - }, [])).then(posts => posts.filter(post => post)); + try { + return [...accPosts, await methods[post.host.method](post)]; + } catch(error) { + console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`); + + if(config.fetch.archives.preview && post.preview) { + console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); + + return [...accPosts, {...post, content: await methods.redditPreview(post)}]; + } + + return accPosts; + } + }, [])}}), {}); }; module.exports = attachContentInfo; diff --git a/src/interpolate.js b/src/interpolate.js index dc7fe4d..e52c2a0 100644 --- a/src/interpolate.js +++ b/src/interpolate.js @@ -6,7 +6,7 @@ const url = require('url'); const dateFns = require('date-fns'); const mime = require('mime-types'); -function interpolate(pattern, user, post, item) { +function interpolate(pattern, user, post, item, strip = true) { const dateFormat = config.library.dateFormat || 'YYYYMMDD'; const vars = { @@ -41,8 +41,10 @@ function interpolate(pattern, user, post, item) { $postUser: post.user, $postDate: dateFns.format(post.datetime, dateFormat), $postIndex: post.index + config.library.indexOffset, + $url: post.url, $subreddit: post.subreddit, - $host: post.host.label + $hostLabel: post.host.label, + $hostId: post.host.id }); if(post.content.album) { @@ -70,7 +72,7 @@ function interpolate(pattern, user, post, item) { return Object.entries(vars).reduce((acc, [key, value], index) => { // substitute slashes for filesystem compatability - if(key !== '$base') { + if(key !== '$base' && strip) { value = (value || '').toString().replace(/\//g, config.library.slashSubstitute); } diff --git a/src/sources/getUserPosts.js b/src/sources/getUserPosts.js index d8fdfcd..d37d667 100644 --- a/src/sources/getUserPosts.js +++ b/src/sources/getUserPosts.js @@ -34,7 +34,7 @@ const getPosts = async (username, reddit, args) => { } }; -const getUserPostsWrap = (reddit, args) => usernames => Promise.props(usernames.reduce((userPosts, username) => { +const getUserPostsWrap = (reddit, args) => users => Promise.props(Object.entries(users).reduce((userPosts, [username, user]) => { userPosts[username] = (async () => { const [user, posts] = await Promise.all([ getUser(username, reddit),