Extracting indexed posts from fetched posts to allow for index rewrite flexibility.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:56 +02:00
parent c49e1edca0
commit f41b788183
1 changed files with 57 additions and 45 deletions

View File

@ -6,49 +6,11 @@ const omit = require('object.omit');
const dissectLink = require('../dissectLink.js');
const hashPost = require('./hashPost.js');
function curatePost(accUserPosts, post, user, index, processed, args) {
// cut-off at limit, but don't count posts requested directly by ID
if (accUserPosts.length >= args.limit && !post.direct) {
return accUserPosts;
}
function curatePost(acc, post, user, index, indexedPostIds, processed, args) {
const host = dissectLink(post.url);
const permalink = `https://reddit.com${post.permalink}`;
const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
if (ignoring) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
return accUserPosts;
}
if (host) {
const hostIncludes = args.include && !args.include.includes(host.label);
const hostExcluded = args.exclude && args.exclude.includes(host.label);
if (hostIncludes || hostExcluded) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
);
return accUserPosts;
}
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
);
return accUserPosts;
}
processed.add(host.id);
}
return accUserPosts.concat({
const curatedPost = {
id: post.id,
index,
title: post.title,
@ -61,15 +23,65 @@ function curatePost(accUserPosts, post, user, index, processed, args) {
preview: post.preview ? post.preview.images.map(image => image.source) : null,
host,
hash: hashPost(post),
});
};
if (indexedPostIds.includes(post.id)) {
return { ...acc, indexed: { ...acc.indexed, [post.id]: curatedPost } };
}
// cut-off at limit, but don't count posts requested directly by ID
if (acc.posts.length >= args.limit && !post.direct) {
return acc;
}
const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
if (ignoring) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
return acc;
}
if (host) {
const hostIncludes = args.include && !args.include.includes(host.label);
const hostExcluded = args.exclude && args.exclude.includes(host.label);
if (hostIncludes || hostExcluded) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
);
return acc;
}
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring duplicate content '${post.url}' (cross-post, repost, or superfluous --post ID) (${permalink})`,
);
return acc;
}
processed.add(host.id);
}
return { ...acc, posts: [...acc.posts, curatedPost] };
}
const curatePosts = (userPosts, args) => Object.values(userPosts).reduce((accPosts, user) => {
const indexedHostIds = user.indexed.map(entry => entry.hostId); // already downloaded
const processed = new Set(indexedHostIds);
const processed = new Set();
const indexedPostIds = user.indexed.map(entry => entry.postId);
const posts = user.posts.reduce((accUserPosts, post, index) =>
curatePost(accUserPosts, post, user, index, processed, args), []);
const { posts, indexed } = user.posts.reduce((accUserPosts, post, index) =>
curatePost(accUserPosts, post, user, index, indexedPostIds, processed, args), { posts: [], indexed: {} });
const indexedLength = Object.keys(indexed).length;
if (indexedLength > 0) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedLength} indexed posts for '${user.name}'`);
}
return { ...accPosts, [user.name]: { ...user, posts } };
}, {});