From 18acb3a878833706e824f73920500e041f1579af Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:56 +0200 Subject: [PATCH] Added fetch concurrency. --- config/default.js | 1 + src/app.js | 4 +++- src/curate/posts.js | 2 +- src/fetch/content.js | 2 ++ src/save/writeToIndex.js | 18 ------------------ 5 files changed, 7 insertions(+), 20 deletions(-) diff --git a/config/default.js b/config/default.js index 471e927..37355f5 100644 --- a/config/default.js +++ b/config/default.js @@ -45,6 +45,7 @@ module.exports = { limit: 1000, avoidDuplicates: true, retries: 3, + concurrency: 10, watch: { schedule: '*/30 * * * *', }, diff --git a/src/app.js b/src/app.js index 057bad1..49f8b69 100644 --- a/src/app.js +++ b/src/app.js @@ -3,6 +3,7 @@ const config = require('config'); const Snoowrap = require('snoowrap'); const fs = require('fs-extra'); +const Promise = require('bluebird'); const exiftool = require('node-exiftool'); const exiftoolBin = require('dist-exiftool'); const cron = require('node-cron'); @@ -69,7 +70,8 @@ async function getCompleteUserPosts() { } function fetchSavePosts(userPosts, ep) { - return Promise.all(Object.values(userPosts).map(user => fetchSaveContent(user, ep, args))); + // don't map to apply concurrency limit and reduce network stress + return Promise.reduce(Object.values(userPosts), (acc, user) => fetchSaveContent(user, ep, args), null); } async function initApp() { diff --git a/src/curate/posts.js b/src/curate/posts.js index 4518d37..6f16d75 100644 --- a/src/curate/posts.js +++ b/src/curate/posts.js @@ -36,7 +36,7 @@ function report(curatedPosts, indexed, user, args) { `Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'` ); } -}; +} function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) { const host = dissectLink(post.url); diff --git a/src/fetch/content.js b/src/fetch/content.js index 2a710e0..07adb00 100644 --- a/src/fetch/content.js +++ b/src/fetch/content.js @@ -73,6 +73,8 @@ async function fetchSaveContent(user, ep, args) { }, []); return post; + }, { + concurrency: config.fetch.concurrency, }); return writeToIndex(posts, profilePaths, user); diff --git a/src/save/writeToIndex.js b/src/save/writeToIndex.js index ce2f5a7..784c664 100644 --- a/src/save/writeToIndex.js +++ b/src/save/writeToIndex.js @@ -10,24 +10,6 @@ async function writeToIndex(posts, profilePaths, user) { const filename = interpolate(config.library.index.file, user, null, false); const now = new Date(); - /* - // Individual posts are wrapped in [] to get a YAML array value for each individual item, allowing them to be joined manually with a newline - // between each entry to improve human readability of the index while maintaining a valid YAML list - const originalEntries = user.indexed.original.map(entry => yaml.safeDump([entry])); - const newAndUpdatedEntries = posts.concat(user.indexed.updated).map(post => yaml.safeDump([{ - id: post.id, - subreddit: post.subreddit, - permalink: post.permalink, - url: post.url, - hostId: post.host.id, - date: post.datetime, - indexed: now, - title: post.title, - }])); - - const entries = newAndUpdatedEntries.concat(originalEntries).join('\n'); - */ - const newAndUpdatedEntries = posts.concat(user.indexed.updated).map(post => ({ id: post.id, subreddit: post.subreddit,