From f41b78818326183ae66118c5380b8d3776171a03 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:56 +0200 Subject: [PATCH] Extracting indexed posts from fetched posts to allow for index rewrite flexibility. --- src/curate/posts.js | 102 +++++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 45 deletions(-) diff --git a/src/curate/posts.js b/src/curate/posts.js index 1e175a5..bc596ff 100644 --- a/src/curate/posts.js +++ b/src/curate/posts.js @@ -6,49 +6,11 @@ const omit = require('object.omit'); const dissectLink = require('../dissectLink.js'); const hashPost = require('./hashPost.js'); -function curatePost(accUserPosts, post, user, index, processed, args) { - // cut-off at limit, but don't count posts requested directly by ID - if (accUserPosts.length >= args.limit && !post.direct) { - return accUserPosts; - } - +function curatePost(acc, post, user, index, indexedPostIds, processed, args) { const host = dissectLink(post.url); const permalink = `https://reddit.com${post.permalink}`; - const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null; - - if (ignoring) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`); - - return accUserPosts; - } - - if (host) { - const hostIncludes = args.include && !args.include.includes(host.label); - const hostExcluded = args.exclude && args.exclude.includes(host.label); - - if (hostIncludes || hostExcluded) { - console.log( - '\x1b[33m%s\x1b[0m', - `Ignoring source '${host.label}' from post '${post.url}' (${permalink})`, - ); - - return accUserPosts; - } - - if (config.fetch.avoidDuplicates && processed.has(host.id)) { - console.log( - '\x1b[33m%s\x1b[0m', - `Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`, - ); - - return accUserPosts; - } - - processed.add(host.id); - } - - return accUserPosts.concat({ + const curatedPost = { id: post.id, index, title: post.title, @@ -61,15 +23,65 @@ function curatePost(accUserPosts, post, user, index, processed, args) { preview: post.preview ? post.preview.images.map(image => image.source) : null, host, hash: hashPost(post), - }); + }; + + if (indexedPostIds.includes(post.id)) { + return { ...acc, indexed: { ...acc.indexed, [post.id]: curatedPost } }; + } + + // cut-off at limit, but don't count posts requested directly by ID + if (acc.posts.length >= args.limit && !post.direct) { + return acc; + } + + const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null; + + if (ignoring) { + console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`); + + return acc; + } + + if (host) { + const hostIncludes = args.include && !args.include.includes(host.label); + const hostExcluded = args.exclude && args.exclude.includes(host.label); + + if (hostIncludes || hostExcluded) { + console.log( + '\x1b[33m%s\x1b[0m', + `Ignoring source '${host.label}' from post '${post.url}' (${permalink})`, + ); + + return acc; + } + + if (config.fetch.avoidDuplicates && processed.has(host.id)) { + console.log( + '\x1b[33m%s\x1b[0m', + `Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`, + ); + + return acc; + } + + processed.add(host.id); + } + + return { ...acc, posts: [...acc.posts, curatedPost] }; } const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => { - const indexedHostIds = user.indexed.map(entry => entry.hostId); // already downloaded - const processed = new Set(indexedHostIds); + const processed = new Set(); + const indexedPostIds = user.indexed.map(entry => entry.postId); - const posts = user.posts.reduce((accUserPosts, post, index) => - curatePost(accUserPosts, post, user, index, processed, args), []); + const { posts, indexed } = user.posts.reduce((accUserPosts, post, index) => + curatePost(accUserPosts, post, user, index, indexedPostIds, processed, args), { posts: [], indexed: {} }); + + const indexedLength = Object.keys(indexed).length; + + if (indexedLength > 0) { + console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedLength} indexed posts for '${user.name}'`); + } return { ...accPosts, [user.name]: { ...user, posts } }; }, {});