From e48c00b5293d58143f1564d28dff3626947d22dc Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:57 +0200 Subject: [PATCH] Added option to index (specified) ignored posts. Saving index through save module, and it now notifies the user that an index has been written. --- src/cli.js | 4 ++++ src/curate/posts.js | 24 +++++++++++++++--------- src/fetch/content.js | 2 +- src/save/writeToIndex.js | 13 +++++++------ src/sources/getIndex.js | 2 +- src/sources/getUserPosts.js | 4 ++-- 6 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/cli.js b/src/cli.js index 3acbd05..5fd93f6 100644 --- a/src/cli.js +++ b/src/cli.js @@ -73,6 +73,10 @@ function getArgs() { describe: 'Ignore index file and force a redownload of the profile image and description', type: 'boolean', }) + .option('index-ignored', { + describe: 'Add posts specified to be ignored to the index file, but don\'t download them', + type: 'boolean', + }) .option('watch', { describe: 'Keep the process running and periodically check for new posts', type: 'boolean', diff --git a/src/curate/posts.js b/src/curate/posts.js index 6f16d75..9929208 100644 --- a/src/curate/posts.js +++ b/src/curate/posts.js @@ -9,12 +9,21 @@ const hashPost = require('./hashPost.js'); const { isAfter, isBefore, isEqual } = require('date-fns'); function report(curatedPosts, indexed, user, args) { - const { posts, indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount } = curatedPosts; + const { + posts, indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored, + } = curatedPosts; if (indexedUpdated.length > 0) { console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`); } + if (requestedIgnored.length > 0) { + console.log( + '\x1b[33m%s\x1b[0m', + `Ignoring ${requestedIgnored.length} posts because their IDs are specified to be ignored for '${user.name}'`, + ); + } + if (tooOldCount > 0) { console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`); } @@ -26,14 +35,14 @@ function report(curatedPosts, indexed, user, args) { if (beforeIndexedCount > 0) { console.log( '\x1b[33m%s\x1b[0m', - `Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'` + `Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'`, ); } if (afterIndexedCount > 0) { console.log( '\x1b[33m%s\x1b[0m', - `Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'` + `Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`, ); } } @@ -97,12 +106,7 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) const hostExcluded = args.exclude && args.exclude.includes(host.label); if (ignoreIds.has(String(host.id).toLowerCase())) { - console.log( - '\x1b[33m%s\x1b[0m', - `Ignoring content '${post.url}' because its ID is specified to be ignored (${permalink})`, - ); - - return acc; + return { ...acc, requestedIgnored: [...acc.requestedIgnored, curatedPost] }; } if (hostIncludes || hostExcluded) { @@ -143,6 +147,7 @@ const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, ignoreIds, processed, args), { posts: [], indexedUpdated: [], + requestedIgnored: [], tooOldCount: 0, tooRecentCount: 0, beforeIndexedCount: 0, @@ -162,6 +167,7 @@ const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts profile: user.indexed.profile, original: indexedOriginal, updated: curatedPosts.indexedUpdated, + ignored: curatedPosts.requestedIgnored, oldest: indexed.oldest, latest: indexed.latest, }, diff --git a/src/fetch/content.js b/src/fetch/content.js index 7a809c4..8c58edf 100644 --- a/src/fetch/content.js +++ b/src/fetch/content.js @@ -83,7 +83,7 @@ async function fetchSaveContent(user, ep, args) { concurrency: config.fetch.concurrency, }); - return writeToIndex(posts, profilePaths, user); + return writeToIndex(posts, profilePaths, user, args); } module.exports = fetchSaveContent; diff --git a/src/save/writeToIndex.js b/src/save/writeToIndex.js index 19f75fc..734103e 100644 --- a/src/save/writeToIndex.js +++ b/src/save/writeToIndex.js @@ -1,16 +1,17 @@ 'use strict'; const config = require('config'); -const fs = require('fs-extra'); const yaml = require('js-yaml'); -const interpolate = require('../interpolate.js'); +const interpolate = require('../interpolate'); +const textToStream = require('./textToStream'); +const save = require('./save'); -async function writeToIndex(posts, profilePaths, user) { - const filename = interpolate(config.library.index.file, user, null, false); +async function writeToIndex(posts, profilePaths, user, args) { + const filepath = interpolate(config.library.index.file, user, null, false); const now = new Date(); - const newAndUpdatedEntries = posts.concat(user.indexed.updated).map((post) => { + const newAndUpdatedEntries = posts.concat(user.indexed.updated, args.indexIgnored ? user.indexed.ignored : []).map((post) => { const entryPost = { id: post.id, subreddit: post.subreddit, @@ -41,7 +42,7 @@ async function writeToIndex(posts, profilePaths, user) { return; } - return fs.writeFile(filename, yaml.safeDump(data)); + return save(filepath, textToStream(yaml.safeDump(data))); } module.exports = writeToIndex; diff --git a/src/sources/getIndex.js b/src/sources/getIndex.js index 193b875..c9351f8 100644 --- a/src/sources/getIndex.js +++ b/src/sources/getIndex.js @@ -14,7 +14,7 @@ async function getIndex(user) { return yaml.safeLoad(indexFile); } catch (error) { - console.log('\x1b[33m%s\x1b[0m', `Could not load index file for '${user.name}' at '${indexFilePath}': ${error}`); + console.log('\x1b[33m%s\x1b[0m', `No index file found for '${user.name}' at '${indexFilePath}'`); return { profile: { image: null, description: null }, posts: [] }; } diff --git a/src/sources/getUserPosts.js b/src/sources/getUserPosts.js index 9705b19..4c1c173 100644 --- a/src/sources/getUserPosts.js +++ b/src/sources/getUserPosts.js @@ -23,12 +23,12 @@ async function getUser(username, reddit) { async function getPosts(username, reddit, args) { try { - const user = await reddit.getUser(username).getSubmissions({ + const submissions = await reddit.getUser(username).getSubmissions({ sort: args.sort, limit: Infinity, }); - return user; + return submissions; } catch (error) { console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);