Added fetch concurrency.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:56 +02:00
parent e71b54e828
commit 18acb3a878
5 changed files with 7 additions and 20 deletions

View File

@ -45,6 +45,7 @@ module.exports = {
limit: 1000, limit: 1000,
avoidDuplicates: true, avoidDuplicates: true,
retries: 3, retries: 3,
concurrency: 10,
watch: { watch: {
schedule: '*/30 * * * *', schedule: '*/30 * * * *',
}, },

View File

@ -3,6 +3,7 @@
const config = require('config'); const config = require('config');
const Snoowrap = require('snoowrap'); const Snoowrap = require('snoowrap');
const fs = require('fs-extra'); const fs = require('fs-extra');
const Promise = require('bluebird');
const exiftool = require('node-exiftool'); const exiftool = require('node-exiftool');
const exiftoolBin = require('dist-exiftool'); const exiftoolBin = require('dist-exiftool');
const cron = require('node-cron'); const cron = require('node-cron');
@ -69,7 +70,8 @@ async function getCompleteUserPosts() {
} }
function fetchSavePosts(userPosts, ep) { function fetchSavePosts(userPosts, ep) {
return Promise.all(Object.values(userPosts).map(user => fetchSaveContent(user, ep, args))); // don't map to apply concurrency limit and reduce network stress
return Promise.reduce(Object.values(userPosts), (acc, user) => fetchSaveContent(user, ep, args), null);
} }
async function initApp() { async function initApp() {

View File

@ -36,7 +36,7 @@ function report(curatedPosts, indexed, user, args) {
`Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'` `Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`
); );
} }
}; }
function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) { function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) {
const host = dissectLink(post.url); const host = dissectLink(post.url);

View File

@ -73,6 +73,8 @@ async function fetchSaveContent(user, ep, args) {
}, []); }, []);
return post; return post;
}, {
concurrency: config.fetch.concurrency,
}); });
return writeToIndex(posts, profilePaths, user); return writeToIndex(posts, profilePaths, user);

View File

@ -10,24 +10,6 @@ async function writeToIndex(posts, profilePaths, user) {
const filename = interpolate(config.library.index.file, user, null, false); const filename = interpolate(config.library.index.file, user, null, false);
const now = new Date(); const now = new Date();
/*
// Individual posts are wrapped in [] to get a YAML array value for each individual item, allowing them to be joined manually with a newline
// between each entry to improve human readability of the index while maintaining a valid YAML list
const originalEntries = user.indexed.original.map(entry => yaml.safeDump([entry]));
const newAndUpdatedEntries = posts.concat(user.indexed.updated).map(post => yaml.safeDump([{
id: post.id,
subreddit: post.subreddit,
permalink: post.permalink,
url: post.url,
hostId: post.host.id,
date: post.datetime,
indexed: now,
title: post.title,
}]));
const entries = newAndUpdatedEntries.concat(originalEntries).join('\n');
*/
const newAndUpdatedEntries = posts.concat(user.indexed.updated).map(post => ({ const newAndUpdatedEntries = posts.concat(user.indexed.updated).map(post => ({
id: post.id, id: post.id,
subreddit: post.subreddit, subreddit: post.subreddit,