Extracting indexed posts from fetched posts to allow for index rewrite flexibility.
This commit is contained in:
parent
c49e1edca0
commit
f41b788183
|
@ -6,49 +6,11 @@ const omit = require('object.omit');
|
||||||
const dissectLink = require('../dissectLink.js');
|
const dissectLink = require('../dissectLink.js');
|
||||||
const hashPost = require('./hashPost.js');
|
const hashPost = require('./hashPost.js');
|
||||||
|
|
||||||
function curatePost(accUserPosts, post, user, index, processed, args) {
|
function curatePost(acc, post, user, index, indexedPostIds, processed, args) {
|
||||||
// cut-off at limit, but don't count posts requested directly by ID
|
|
||||||
if (accUserPosts.length >= args.limit && !post.direct) {
|
|
||||||
return accUserPosts;
|
|
||||||
}
|
|
||||||
|
|
||||||
const host = dissectLink(post.url);
|
const host = dissectLink(post.url);
|
||||||
const permalink = `https://reddit.com${post.permalink}`;
|
const permalink = `https://reddit.com${post.permalink}`;
|
||||||
|
|
||||||
const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
|
const curatedPost = {
|
||||||
|
|
||||||
if (ignoring) {
|
|
||||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
|
|
||||||
|
|
||||||
return accUserPosts;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (host) {
|
|
||||||
const hostIncludes = args.include && !args.include.includes(host.label);
|
|
||||||
const hostExcluded = args.exclude && args.exclude.includes(host.label);
|
|
||||||
|
|
||||||
if (hostIncludes || hostExcluded) {
|
|
||||||
console.log(
|
|
||||||
'\x1b[33m%s\x1b[0m',
|
|
||||||
`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
|
|
||||||
);
|
|
||||||
|
|
||||||
return accUserPosts;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
|
|
||||||
console.log(
|
|
||||||
'\x1b[33m%s\x1b[0m',
|
|
||||||
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
|
|
||||||
);
|
|
||||||
|
|
||||||
return accUserPosts;
|
|
||||||
}
|
|
||||||
|
|
||||||
processed.add(host.id);
|
|
||||||
}
|
|
||||||
|
|
||||||
return accUserPosts.concat({
|
|
||||||
id: post.id,
|
id: post.id,
|
||||||
index,
|
index,
|
||||||
title: post.title,
|
title: post.title,
|
||||||
|
@ -61,15 +23,65 @@ function curatePost(accUserPosts, post, user, index, processed, args) {
|
||||||
preview: post.preview ? post.preview.images.map(image => image.source) : null,
|
preview: post.preview ? post.preview.images.map(image => image.source) : null,
|
||||||
host,
|
host,
|
||||||
hash: hashPost(post),
|
hash: hashPost(post),
|
||||||
});
|
};
|
||||||
|
|
||||||
|
if (indexedPostIds.includes(post.id)) {
|
||||||
|
return { ...acc, indexed: { ...acc.indexed, [post.id]: curatedPost } };
|
||||||
|
}
|
||||||
|
|
||||||
|
// cut-off at limit, but don't count posts requested directly by ID
|
||||||
|
if (acc.posts.length >= args.limit && !post.direct) {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
|
||||||
|
|
||||||
|
if (ignoring) {
|
||||||
|
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (host) {
|
||||||
|
const hostIncludes = args.include && !args.include.includes(host.label);
|
||||||
|
const hostExcluded = args.exclude && args.exclude.includes(host.label);
|
||||||
|
|
||||||
|
if (hostIncludes || hostExcluded) {
|
||||||
|
console.log(
|
||||||
|
'\x1b[33m%s\x1b[0m',
|
||||||
|
`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
|
||||||
|
);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
|
||||||
|
console.log(
|
||||||
|
'\x1b[33m%s\x1b[0m',
|
||||||
|
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
|
||||||
|
);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
processed.add(host.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { ...acc, posts: [...acc.posts, curatedPost] };
|
||||||
}
|
}
|
||||||
|
|
||||||
const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => {
|
const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => {
|
||||||
const indexedHostIds = user.indexed.map(entry => entry.hostId); // already downloaded
|
const processed = new Set();
|
||||||
const processed = new Set(indexedHostIds);
|
const indexedPostIds = user.indexed.map(entry => entry.postId);
|
||||||
|
|
||||||
const posts = user.posts.reduce((accUserPosts, post, index) =>
|
const { posts, indexed } = user.posts.reduce((accUserPosts, post, index) =>
|
||||||
curatePost(accUserPosts, post, user, index, processed, args), []);
|
curatePost(accUserPosts, post, user, index, indexedPostIds, processed, args), { posts: [], indexed: {} });
|
||||||
|
|
||||||
|
const indexedLength = Object.keys(indexed).length;
|
||||||
|
|
||||||
|
if (indexedLength > 0) {
|
||||||
|
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedLength} indexed posts for '${user.name}'`);
|
||||||
|
}
|
||||||
|
|
||||||
return { ...accPosts, [user.name]: { ...user, posts } };
|
return { ...accPosts, [user.name]: { ...user, posts } };
|
||||||
}, {});
|
}, {});
|
||||||
|
|
Loading…
Reference in New Issue