From 91577a00b13fa9b2f1c0a81def71a6acb9b68be0 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:56 +0200 Subject: [PATCH] Added support for file with host IDs to ignore. --- .gitignore | 1 + src/app.js | 7 ++++++- src/cli.js | 4 ++++ src/curate/posts.js | 18 ++++++++++++++---- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 4bb4335..f65ca33 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ output/ dist/ users posts +ignore diff --git a/src/app.js b/src/app.js index f8e2d43..057bad1 100644 --- a/src/app.js +++ b/src/app.js @@ -35,6 +35,7 @@ async function getFileContents(location, label) { async function getCompleteUserPosts() { let userPosts = {}; + let ignoreIds = []; let usernames = args.users || []; let postIds = args.posts || []; @@ -58,7 +59,11 @@ async function getCompleteUserPosts() { userPosts = await getPosts(postIds, userPosts); } - const curatedUserPosts = curatePosts(userPosts, args); + if (args.fileIgnore) { + ignoreIds = await getFileContents(args.fileIgnore, 'ignore'); + } + + const curatedUserPosts = curatePosts(userPosts, ignoreIds, args); return attachContentInfo(curatedUserPosts); } diff --git a/src/cli.js b/src/cli.js index 02140b3..fcf66e1 100644 --- a/src/cli.js +++ b/src/cli.js @@ -39,6 +39,10 @@ function getArgs() { type: 'array', choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'], }) + .option('file-ignore', { + describe: 'Ignore the host IDs in this file', + type: 'string', + }) .option('include', { describe: 'Include only these sources', type: 'array', diff --git a/src/curate/posts.js b/src/curate/posts.js index 91fd153..4518d37 100644 --- a/src/curate/posts.js +++ b/src/curate/posts.js @@ -38,7 +38,7 @@ function report(curatedPosts, indexed, user, args) { } }; -function curatePost(acc, post, user, index, indexed, processed, args) { +function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) { const host = dissectLink(post.url); const permalink = `https://reddit.com${post.permalink}`; @@ -96,6 +96,15 @@ function curatePost(acc, post, user, index, indexed, processed, args) { const hostIncludes = args.include && !args.include.includes(host.label); const hostExcluded = args.exclude && args.exclude.includes(host.label); + if (ignoreIds.has(String(host.id).toLowerCase())) { + console.log( + '\x1b[33m%s\x1b[0m', + `Ignoring content '${post.url}' because its ID is specified to be ignored (${permalink})`, + ); + + return acc; + } + if (hostIncludes || hostExcluded) { console.log( '\x1b[33m%s\x1b[0m', @@ -108,7 +117,7 @@ function curatePost(acc, post, user, index, indexed, processed, args) { if (config.fetch.avoidDuplicates && processed.has(host.id)) { console.log( '\x1b[33m%s\x1b[0m', - `Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`, + `Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`, ); return acc; @@ -120,8 +129,9 @@ function curatePost(acc, post, user, index, indexed, processed, args) { return { ...acc, posts: [...acc.posts, curatedPost] }; } -const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => { +const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts).reduce((accPosts, user) => { const processed = new Set(); + const ignoreIds = new Set(ignoreIdsArray.map(postId => String(postId).toLowerCase())); const indexedByDate = user.indexed.original.sort((entryA, entryB) => new Date(entryA.date) - new Date(entryB.date)); const indexed = { @@ -130,7 +140,7 @@ const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPos latest: indexedByDate.slice(-1)[0], }; - const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, processed, args), { + const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, ignoreIds, processed, args), { posts: [], indexedUpdated: [], tooOldCount: 0,