Added support for file with host IDs to ignore.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:56 +02:00
parent b5da928c98
commit 91577a00b1
4 changed files with 25 additions and 5 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ output/
dist/ dist/
users users
posts posts
ignore

View File

@ -35,6 +35,7 @@ async function getFileContents(location, label) {
async function getCompleteUserPosts() { async function getCompleteUserPosts() {
let userPosts = {}; let userPosts = {};
let ignoreIds = [];
let usernames = args.users || []; let usernames = args.users || [];
let postIds = args.posts || []; let postIds = args.posts || [];
@ -58,7 +59,11 @@ async function getCompleteUserPosts() {
userPosts = await getPosts(postIds, userPosts); userPosts = await getPosts(postIds, userPosts);
} }
const curatedUserPosts = curatePosts(userPosts, args); if (args.fileIgnore) {
ignoreIds = await getFileContents(args.fileIgnore, 'ignore');
}
const curatedUserPosts = curatePosts(userPosts, ignoreIds, args);
return attachContentInfo(curatedUserPosts); return attachContentInfo(curatedUserPosts);
} }

View File

@ -39,6 +39,10 @@ function getArgs() {
type: 'array', type: 'array',
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'], choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'],
}) })
.option('file-ignore', {
describe: 'Ignore the host IDs in this file',
type: 'string',
})
.option('include', { .option('include', {
describe: 'Include only these sources', describe: 'Include only these sources',
type: 'array', type: 'array',

View File

@ -38,7 +38,7 @@ function report(curatedPosts, indexed, user, args) {
} }
}; };
function curatePost(acc, post, user, index, indexed, processed, args) { function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) {
const host = dissectLink(post.url); const host = dissectLink(post.url);
const permalink = `https://reddit.com${post.permalink}`; const permalink = `https://reddit.com${post.permalink}`;
@ -96,6 +96,15 @@ function curatePost(acc, post, user, index, indexed, processed, args) {
const hostIncludes = args.include && !args.include.includes(host.label); const hostIncludes = args.include && !args.include.includes(host.label);
const hostExcluded = args.exclude && args.exclude.includes(host.label); const hostExcluded = args.exclude && args.exclude.includes(host.label);
if (ignoreIds.has(String(host.id).toLowerCase())) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring content '${post.url}' because its ID is specified to be ignored (${permalink})`,
);
return acc;
}
if (hostIncludes || hostExcluded) { if (hostIncludes || hostExcluded) {
console.log( console.log(
'\x1b[33m%s\x1b[0m', '\x1b[33m%s\x1b[0m',
@ -108,7 +117,7 @@ function curatePost(acc, post, user, index, indexed, processed, args) {
if (config.fetch.avoidDuplicates && processed.has(host.id)) { if (config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log( console.log(
'\x1b[33m%s\x1b[0m', '\x1b[33m%s\x1b[0m',
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`, `Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`,
); );
return acc; return acc;
@ -120,8 +129,9 @@ function curatePost(acc, post, user, index, indexed, processed, args) {
return { ...acc, posts: [...acc.posts, curatedPost] }; return { ...acc, posts: [...acc.posts, curatedPost] };
} }
const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => { const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts).reduce((accPosts, user) => {
const processed = new Set(); const processed = new Set();
const ignoreIds = new Set(ignoreIdsArray.map(postId => String(postId).toLowerCase()));
const indexedByDate = user.indexed.original.sort((entryA, entryB) => new Date(entryA.date) - new Date(entryB.date)); const indexedByDate = user.indexed.original.sort((entryA, entryB) => new Date(entryA.date) - new Date(entryB.date));
const indexed = { const indexed = {
@ -130,7 +140,7 @@ const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPos
latest: indexedByDate.slice(-1)[0], latest: indexedByDate.slice(-1)[0],
}; };
const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, processed, args), { const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, ignoreIds, processed, args), {
posts: [], posts: [],
indexedUpdated: [], indexedUpdated: [],
tooOldCount: 0, tooOldCount: 0,