From e41f9fa937e297ba0d86d7c4510a51747a4f4a90 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 1 Dec 2021 23:30:10 +0100 Subject: [PATCH] Added waitImmediate to deep scrape, reduced concurrency. --- src/deep.js | 7 ++++++- src/utils/jsdom-perf.js | 7 +++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/deep.js b/src/deep.js index 953cee3d..e65b4b38 100644 --- a/src/deep.js +++ b/src/deep.js @@ -1,5 +1,6 @@ 'use strict'; +const util = require('util'); const Promise = require('bluebird'); const { mergeAdvanced: merge } = require('object-merge-advanced'); @@ -11,6 +12,8 @@ const qu = require('./utils/qu'); const getRecursiveParameters = require('./utils/get-recursive-parameters'); const windows = require('./utils/http-windows'); +const waitImmediate = util.promisify(setImmediate); + function toBaseReleases(baseReleasesOrUrls, entity = null) { if (!baseReleasesOrUrls) { return []; @@ -128,6 +131,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { windows.get(pathname)?.close(); windows.delete(pathname); + await waitImmediate; + logger.debug(`Memory usage after: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`); const scrapedRelease = rawScrapedRelease?.scene || rawScrapedRelease; @@ -196,7 +201,7 @@ async function scrapeReleases(baseReleases, entitiesBySlug, type) { return Promise.map( baseReleases, async (baseRelease) => scrapeRelease(baseRelease, entitiesWithBeforeDataBySlug, type), - { concurrency: 5 }, + { concurrency: 3 }, ); } diff --git a/src/utils/jsdom-perf.js b/src/utils/jsdom-perf.js index 74336ff3..ed8f70a0 100644 --- a/src/utils/jsdom-perf.js +++ b/src/utils/jsdom-perf.js @@ -1,9 +1,12 @@ 'use strict'; +const util = require('util'); const fs = require('fs').promises; const Promise = require('bluebird'); const { JSDOM } = require('jsdom'); +const waitImmediate = util.promisify(setImmediate); + async function init() { let peak = 0; const files = await fs.readdir('./html'); @@ -19,9 +22,9 @@ async function init() { console.log(`Memory usage: ${usage.toFixed(2)} MB, peak ${peak.toFixed(2)} MB`); - await Promise.delay(100); + await waitImmediate; }, { - concurrency: 10, + concurrency: 100, }); await Promise.delay(2000);