Added support for file with host IDs to ignore.
This commit is contained in:
parent
b5da928c98
commit
91577a00b1
|
@ -5,3 +5,4 @@ output/
|
||||||
dist/
|
dist/
|
||||||
users
|
users
|
||||||
posts
|
posts
|
||||||
|
ignore
|
||||||
|
|
|
@ -35,6 +35,7 @@ async function getFileContents(location, label) {
|
||||||
|
|
||||||
async function getCompleteUserPosts() {
|
async function getCompleteUserPosts() {
|
||||||
let userPosts = {};
|
let userPosts = {};
|
||||||
|
let ignoreIds = [];
|
||||||
let usernames = args.users || [];
|
let usernames = args.users || [];
|
||||||
let postIds = args.posts || [];
|
let postIds = args.posts || [];
|
||||||
|
|
||||||
|
@ -58,7 +59,11 @@ async function getCompleteUserPosts() {
|
||||||
userPosts = await getPosts(postIds, userPosts);
|
userPosts = await getPosts(postIds, userPosts);
|
||||||
}
|
}
|
||||||
|
|
||||||
const curatedUserPosts = curatePosts(userPosts, args);
|
if (args.fileIgnore) {
|
||||||
|
ignoreIds = await getFileContents(args.fileIgnore, 'ignore');
|
||||||
|
}
|
||||||
|
|
||||||
|
const curatedUserPosts = curatePosts(userPosts, ignoreIds, args);
|
||||||
|
|
||||||
return attachContentInfo(curatedUserPosts);
|
return attachContentInfo(curatedUserPosts);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,10 @@ function getArgs() {
|
||||||
type: 'array',
|
type: 'array',
|
||||||
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'],
|
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'],
|
||||||
})
|
})
|
||||||
|
.option('file-ignore', {
|
||||||
|
describe: 'Ignore the host IDs in this file',
|
||||||
|
type: 'string',
|
||||||
|
})
|
||||||
.option('include', {
|
.option('include', {
|
||||||
describe: 'Include only these sources',
|
describe: 'Include only these sources',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
|
|
|
@ -38,7 +38,7 @@ function report(curatedPosts, indexed, user, args) {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
function curatePost(acc, post, user, index, indexed, processed, args) {
|
function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) {
|
||||||
const host = dissectLink(post.url);
|
const host = dissectLink(post.url);
|
||||||
const permalink = `https://reddit.com${post.permalink}`;
|
const permalink = `https://reddit.com${post.permalink}`;
|
||||||
|
|
||||||
|
@ -96,6 +96,15 @@ function curatePost(acc, post, user, index, indexed, processed, args) {
|
||||||
const hostIncludes = args.include && !args.include.includes(host.label);
|
const hostIncludes = args.include && !args.include.includes(host.label);
|
||||||
const hostExcluded = args.exclude && args.exclude.includes(host.label);
|
const hostExcluded = args.exclude && args.exclude.includes(host.label);
|
||||||
|
|
||||||
|
if (ignoreIds.has(String(host.id).toLowerCase())) {
|
||||||
|
console.log(
|
||||||
|
'\x1b[33m%s\x1b[0m',
|
||||||
|
`Ignoring content '${post.url}' because its ID is specified to be ignored (${permalink})`,
|
||||||
|
);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
if (hostIncludes || hostExcluded) {
|
if (hostIncludes || hostExcluded) {
|
||||||
console.log(
|
console.log(
|
||||||
'\x1b[33m%s\x1b[0m',
|
'\x1b[33m%s\x1b[0m',
|
||||||
|
@ -108,7 +117,7 @@ function curatePost(acc, post, user, index, indexed, processed, args) {
|
||||||
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
|
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
|
||||||
console.log(
|
console.log(
|
||||||
'\x1b[33m%s\x1b[0m',
|
'\x1b[33m%s\x1b[0m',
|
||||||
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
|
`Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`,
|
||||||
);
|
);
|
||||||
|
|
||||||
return acc;
|
return acc;
|
||||||
|
@ -120,8 +129,9 @@ function curatePost(acc, post, user, index, indexed, processed, args) {
|
||||||
return { ...acc, posts: [...acc.posts, curatedPost] };
|
return { ...acc, posts: [...acc.posts, curatedPost] };
|
||||||
}
|
}
|
||||||
|
|
||||||
const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => {
|
const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts).reduce((accPosts, user) => {
|
||||||
const processed = new Set();
|
const processed = new Set();
|
||||||
|
const ignoreIds = new Set(ignoreIdsArray.map(postId => String(postId).toLowerCase()));
|
||||||
const indexedByDate = user.indexed.original.sort((entryA, entryB) => new Date(entryA.date) - new Date(entryB.date));
|
const indexedByDate = user.indexed.original.sort((entryA, entryB) => new Date(entryA.date) - new Date(entryB.date));
|
||||||
|
|
||||||
const indexed = {
|
const indexed = {
|
||||||
|
@ -130,7 +140,7 @@ const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPos
|
||||||
latest: indexedByDate.slice(-1)[0],
|
latest: indexedByDate.slice(-1)[0],
|
||||||
};
|
};
|
||||||
|
|
||||||
const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, processed, args), {
|
const curatedPosts = user.posts.reduce((accUserPosts, post, index) => curatePost(accUserPosts, post, user, index, indexed, ignoreIds, processed, args), {
|
||||||
posts: [],
|
posts: [],
|
||||||
indexedUpdated: [],
|
indexedUpdated: [],
|
||||||
tooOldCount: 0,
|
tooOldCount: 0,
|
||||||
|
|
Loading…
Reference in New Issue