Added support for file with host IDs to ignore.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:56 +02:00
parent b5da928c98
commit 91577a00b1
4 changed files with 25 additions and 5 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ output/
dist/
users
posts
ignore

View File

@ -35,6 +35,7 @@ async function getFileContents(location, label) {
async function getCompleteUserPosts() {
let userPosts = {};
let ignoreIds = [];
let usernames = args.users || [];
let postIds = args.posts || [];
@ -58,7 +59,11 @@ async function getCompleteUserPosts() {
userPosts = await getPosts(postIds, userPosts);
}
const curatedUserPosts = curatePosts(userPosts, args);
if (args.fileIgnore) {
ignoreIds = await getFileContents(args.fileIgnore, 'ignore');
}
const curatedUserPosts = curatePosts(userPosts, ignoreIds, args);
return attachContentInfo(curatedUserPosts);
}

View File

@ -39,6 +39,10 @@ function getArgs() {
type: 'array',
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'],
})
.option('file-ignore', {
describe: 'Ignore the host IDs in this file',
type: 'string',
})
.option('include', {
describe: 'Include only these sources',
type: 'array',

View File

@ -38,7 +38,7 @@ function report(curatedPosts, indexed, user, args) {
}
};
function curatePost(acc, post, user, index, indexed, processed, args) {
function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) {
const host = dissectLink(post.url);
const permalink = `https://reddit.com${post.permalink}`;
@ -96,6 +96,15 @@ function curatePost(acc, post, user, index, indexed, processed, args) {
const hostIncludes = args.include && !args.include.includes(host.label);
const hostExcluded = args.exclude && args.exclude.includes(host.label);
if (ignoreIds.has(String(host.id).toLowerCase())) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring content '${post.url}' because its ID is specified to be ignored (${permalink})`,
);
return acc;
}
if (hostIncludes || hostExcluded) {
console.log(
'\x1b[33m%s\x1b[0m',
@ -108,7 +117,7 @@ function curatePost(acc, post, user, index, indexed, processed, args) {
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
`Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`,
);
return acc;
@ -120,8 +129,9 @@ function curatePost(acc, post, user, index, indexed, processed, args) {
return { ...acc, posts: [...acc.posts, curatedPost] };
}
const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => {
const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts).reduce((accPosts, user) => {
const processed = new Set();
const ignoreIds = new Set(ignoreIdsArray.map(postId => String(postId).toLowerCase()));
const indexedByDate = user.indexed.original.sort((entryA, entryB) => new Date(entryA.date) - new Date(entryB.date));
const indexed = {
@ -130,7 +140,7 @@ const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPos
latest: indexedByDate.slice(-1)[0],
};
const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, processed, args), {
const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, ignoreIds, processed, args), {
posts: [],
indexedUpdated: [],
tooOldCount: 0,