Extracting indexed posts from fetched posts to allow for index rewrite flexibility.

2024-09-11 05:16:56 +02:00 · 2024-09-11 05:16:56 +02:00 · f41b788183
parent c49e1edca0
commit f41b788183
1 changed files with 57 additions and 45 deletions
--- a/src/curate/posts.js
+++ b/src/curate/posts.js
@ -6,49 +6,11 @@ const omit = require('object.omit');
 const dissectLink = require('../dissectLink.js');
 const hashPost = require('./hashPost.js');

-function curatePost(accUserPosts, post, user, index, processed, args) {
-    // cut-off at limit, but don't count posts requested directly by ID
-    if (accUserPosts.length >= args.limit && !post.direct) {
-        return accUserPosts;
-    }
-
+function curatePost(acc, post, user, index, indexedPostIds, processed, args) {
    const host = dissectLink(post.url);
    const permalink = `https://reddit.com${post.permalink}`;

-    const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
-
-    if (ignoring) {
-        console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
-
-        return accUserPosts;
-    }
-
-    if (host) {
-        const hostIncludes = args.include && !args.include.includes(host.label);
-        const hostExcluded = args.exclude && args.exclude.includes(host.label);
-
-        if (hostIncludes || hostExcluded) {
-            console.log(
-                '\x1b[33m%s\x1b[0m',
-                `Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
-            );
-
-            return accUserPosts;
-        }
-
-        if (config.fetch.avoidDuplicates && processed.has(host.id)) {
-            console.log(
-                '\x1b[33m%s\x1b[0m',
-                `Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
-            );
-
-            return accUserPosts;
-        }
-
-        processed.add(host.id);
-    }
-
-    return accUserPosts.concat({
+    const curatedPost = {
        id: post.id,
        index,
        title: post.title,
@ -61,15 +23,65 @@ function curatePost(accUserPosts, post, user, index, processed, args) {
        preview: post.preview ? post.preview.images.map(image => image.source) : null,
        host,
        hash: hashPost(post),
-    });
+    };
+
+    if (indexedPostIds.includes(post.id)) {
+        return { ...acc, indexed: { ...acc.indexed, [post.id]: curatedPost } };
+    }
+
+    // cut-off at limit, but don't count posts requested directly by ID
+    if (acc.posts.length >= args.limit && !post.direct) {
+        return acc;
+    }
+
+    const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
+
+    if (ignoring) {
+        console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
+
+        return acc;
+    }
+
+    if (host) {
+        const hostIncludes = args.include && !args.include.includes(host.label);
+        const hostExcluded = args.exclude && args.exclude.includes(host.label);
+
+        if (hostIncludes || hostExcluded) {
+            console.log(
+                '\x1b[33m%s\x1b[0m',
+                `Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
+            );
+
+            return acc;
+        }
+
+        if (config.fetch.avoidDuplicates && processed.has(host.id)) {
+            console.log(
+                '\x1b[33m%s\x1b[0m',
+                `Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
+            );
+
+            return acc;
+        }
+
+        processed.add(host.id);
+    }
+
+    return { ...acc, posts: [...acc.posts, curatedPost] };
 }

 const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => {
-    const indexedHostIds = user.indexed.map(entry => entry.hostId); // already downloaded
-    const processed = new Set(indexedHostIds);
+    const processed = new Set();
+    const indexedPostIds = user.indexed.map(entry => entry.postId);

-    const posts = user.posts.reduce((accUserPosts, post, index) =>
-        curatePost(accUserPosts, post, user, index, processed, args), []);
+    const { posts, indexed } = user.posts.reduce((accUserPosts, post, index) =>
+        curatePost(accUserPosts, post, user, index, indexedPostIds, processed, args), { posts: [], indexed: {} });
+
+    const indexedLength = Object.keys(indexed).length;
+
+    if (indexedLength > 0) {
+        console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedLength} indexed posts for '${user.name}'`);
+    }

    return { ...accPosts, [user.name]: { ...user, posts } };
 }, {});