Extracting indexed posts from fetched posts to allow for index rewrite flexibility.
This commit is contained in:
parent
c49e1edca0
commit
f41b788183
|
@ -6,49 +6,11 @@ const omit = require('object.omit');
|
|||
const dissectLink = require('../dissectLink.js');
|
||||
const hashPost = require('./hashPost.js');
|
||||
|
||||
function curatePost(accUserPosts, post, user, index, processed, args) {
|
||||
// cut-off at limit, but don't count posts requested directly by ID
|
||||
if (accUserPosts.length >= args.limit && !post.direct) {
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
function curatePost(acc, post, user, index, indexedPostIds, processed, args) {
|
||||
const host = dissectLink(post.url);
|
||||
const permalink = `https://reddit.com${post.permalink}`;
|
||||
|
||||
const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
|
||||
|
||||
if (ignoring) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
|
||||
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
if (host) {
|
||||
const hostIncludes = args.include && !args.include.includes(host.label);
|
||||
const hostExcluded = args.exclude && args.exclude.includes(host.label);
|
||||
|
||||
if (hostIncludes || hostExcluded) {
|
||||
console.log(
|
||||
'\x1b[33m%s\x1b[0m',
|
||||
`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
|
||||
);
|
||||
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
|
||||
console.log(
|
||||
'\x1b[33m%s\x1b[0m',
|
||||
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
|
||||
);
|
||||
|
||||
return accUserPosts;
|
||||
}
|
||||
|
||||
processed.add(host.id);
|
||||
}
|
||||
|
||||
return accUserPosts.concat({
|
||||
const curatedPost = {
|
||||
id: post.id,
|
||||
index,
|
||||
title: post.title,
|
||||
|
@ -61,15 +23,65 @@ function curatePost(accUserPosts, post, user, index, processed, args) {
|
|||
preview: post.preview ? post.preview.images.map(image => image.source) : null,
|
||||
host,
|
||||
hash: hashPost(post),
|
||||
});
|
||||
};
|
||||
|
||||
if (indexedPostIds.includes(post.id)) {
|
||||
return { ...acc, indexed: { ...acc.indexed, [post.id]: curatedPost } };
|
||||
}
|
||||
|
||||
// cut-off at limit, but don't count posts requested directly by ID
|
||||
if (acc.posts.length >= args.limit && !post.direct) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
|
||||
|
||||
if (ignoring) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
if (host) {
|
||||
const hostIncludes = args.include && !args.include.includes(host.label);
|
||||
const hostExcluded = args.exclude && args.exclude.includes(host.label);
|
||||
|
||||
if (hostIncludes || hostExcluded) {
|
||||
console.log(
|
||||
'\x1b[33m%s\x1b[0m',
|
||||
`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
|
||||
);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
|
||||
console.log(
|
||||
'\x1b[33m%s\x1b[0m',
|
||||
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
|
||||
);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
processed.add(host.id);
|
||||
}
|
||||
|
||||
return { ...acc, posts: [...acc.posts, curatedPost] };
|
||||
}
|
||||
|
||||
const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => {
|
||||
const indexedHostIds = user.indexed.map(entry => entry.hostId); // already downloaded
|
||||
const processed = new Set(indexedHostIds);
|
||||
const processed = new Set();
|
||||
const indexedPostIds = user.indexed.map(entry => entry.postId);
|
||||
|
||||
const posts = user.posts.reduce((accUserPosts, post, index) =>
|
||||
curatePost(accUserPosts, post, user, index, processed, args), []);
|
||||
const { posts, indexed } = user.posts.reduce((accUserPosts, post, index) =>
|
||||
curatePost(accUserPosts, post, user, index, indexedPostIds, processed, args), { posts: [], indexed: {} });
|
||||
|
||||
const indexedLength = Object.keys(indexed).length;
|
||||
|
||||
if (indexedLength > 0) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedLength} indexed posts for '${user.name}'`);
|
||||
}
|
||||
|
||||
return { ...accPosts, [user.name]: { ...user, posts } };
|
||||
}, {});
|
||||
|
|
Loading…
Reference in New Issue