Added option to index (specified) ignored posts. Saving index through save module, and it now notifies the user that an index has been written.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:57 +02:00
parent c1b164a5f9
commit e48c00b529
6 changed files with 30 additions and 19 deletions

View File

@ -73,6 +73,10 @@ function getArgs() {
describe: 'Ignore index file and force a redownload of the profile image and description', describe: 'Ignore index file and force a redownload of the profile image and description',
type: 'boolean', type: 'boolean',
}) })
.option('index-ignored', {
describe: 'Add posts specified to be ignored to the index file, but don\'t download them',
type: 'boolean',
})
.option('watch', { .option('watch', {
describe: 'Keep the process running and periodically check for new posts', describe: 'Keep the process running and periodically check for new posts',
type: 'boolean', type: 'boolean',

View File

@ -9,12 +9,21 @@ const hashPost = require('./hashPost.js');
const { isAfter, isBefore, isEqual } = require('date-fns'); const { isAfter, isBefore, isEqual } = require('date-fns');
function report(curatedPosts, indexed, user, args) { function report(curatedPosts, indexed, user, args) {
const { posts, indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount } = curatedPosts; const {
posts, indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored,
} = curatedPosts;
if (indexedUpdated.length > 0) { if (indexedUpdated.length > 0) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`); console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`);
} }
if (requestedIgnored.length > 0) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring ${requestedIgnored.length} posts because their IDs are specified to be ignored for '${user.name}'`,
);
}
if (tooOldCount > 0) { if (tooOldCount > 0) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`); console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`);
} }
@ -26,14 +35,14 @@ function report(curatedPosts, indexed, user, args) {
if (beforeIndexedCount > 0) { if (beforeIndexedCount > 0) {
console.log( console.log(
'\x1b[33m%s\x1b[0m', '\x1b[33m%s\x1b[0m',
`Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'` `Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'`,
); );
} }
if (afterIndexedCount > 0) { if (afterIndexedCount > 0) {
console.log( console.log(
'\x1b[33m%s\x1b[0m', '\x1b[33m%s\x1b[0m',
`Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'` `Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`,
); );
} }
} }
@ -97,12 +106,7 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
const hostExcluded = args.exclude && args.exclude.includes(host.label); const hostExcluded = args.exclude && args.exclude.includes(host.label);
if (ignoreIds.has(String(host.id).toLowerCase())) { if (ignoreIds.has(String(host.id).toLowerCase())) {
console.log( return { ...acc, requestedIgnored: [...acc.requestedIgnored, curatedPost] };
'\x1b[33m%s\x1b[0m',
`Ignoring content '${post.url}' because its ID is specified to be ignored (${permalink})`,
);
return acc;
} }
if (hostIncludes || hostExcluded) { if (hostIncludes || hostExcluded) {
@ -143,6 +147,7 @@ const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts
const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, ignoreIds, processed, args), { const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, ignoreIds, processed, args), {
posts: [], posts: [],
indexedUpdated: [], indexedUpdated: [],
requestedIgnored: [],
tooOldCount: 0, tooOldCount: 0,
tooRecentCount: 0, tooRecentCount: 0,
beforeIndexedCount: 0, beforeIndexedCount: 0,
@ -162,6 +167,7 @@ const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts
profile: user.indexed.profile, profile: user.indexed.profile,
original: indexedOriginal, original: indexedOriginal,
updated: curatedPosts.indexedUpdated, updated: curatedPosts.indexedUpdated,
ignored: curatedPosts.requestedIgnored,
oldest: indexed.oldest, oldest: indexed.oldest,
latest: indexed.latest, latest: indexed.latest,
}, },

View File

@ -83,7 +83,7 @@ async function fetchSaveContent(user, ep, args) {
concurrency: config.fetch.concurrency, concurrency: config.fetch.concurrency,
}); });
return writeToIndex(posts, profilePaths, user); return writeToIndex(posts, profilePaths, user, args);
} }
module.exports = fetchSaveContent; module.exports = fetchSaveContent;

View File

@ -1,16 +1,17 @@
'use strict'; 'use strict';
const config = require('config'); const config = require('config');
const fs = require('fs-extra');
const yaml = require('js-yaml'); const yaml = require('js-yaml');
const interpolate = require('../interpolate.js'); const interpolate = require('../interpolate');
const textToStream = require('./textToStream');
const save = require('./save');
async function writeToIndex(posts, profilePaths, user) { async function writeToIndex(posts, profilePaths, user, args) {
const filename = interpolate(config.library.index.file, user, null, false); const filepath = interpolate(config.library.index.file, user, null, false);
const now = new Date(); const now = new Date();
const newAndUpdatedEntries = posts.concat(user.indexed.updated).map((post) => { const newAndUpdatedEntries = posts.concat(user.indexed.updated, args.indexIgnored ? user.indexed.ignored : []).map((post) => {
const entryPost = { const entryPost = {
id: post.id, id: post.id,
subreddit: post.subreddit, subreddit: post.subreddit,
@ -41,7 +42,7 @@ async function writeToIndex(posts, profilePaths, user) {
return; return;
} }
return fs.writeFile(filename, yaml.safeDump(data)); return save(filepath, textToStream(yaml.safeDump(data)));
} }
module.exports = writeToIndex; module.exports = writeToIndex;

View File

@ -14,7 +14,7 @@ async function getIndex(user) {
return yaml.safeLoad(indexFile); return yaml.safeLoad(indexFile);
} catch (error) { } catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Could not load index file for '${user.name}' at '${indexFilePath}': ${error}`); console.log('\x1b[33m%s\x1b[0m', `No index file found for '${user.name}' at '${indexFilePath}'`);
return { profile: { image: null, description: null }, posts: [] }; return { profile: { image: null, description: null }, posts: [] };
} }

View File

@ -23,12 +23,12 @@ async function getUser(username, reddit) {
async function getPosts(username, reddit, args) { async function getPosts(username, reddit, args) {
try { try {
const user = await reddit.getUser(username).getSubmissions({ const submissions = await reddit.getUser(username).getSubmissions({
sort: args.sort, sort: args.sort,
limit: Infinity, limit: Infinity,
}); });
return user; return submissions;
} catch (error) { } catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`); console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);