Added waitImmediate to deep scrape, reduced concurrency.

This commit is contained in:
DebaucheryLibrarian 2021-12-01 23:30:10 +01:00
parent 56a7fb0ad9
commit e41f9fa937
2 changed files with 11 additions and 3 deletions

View File

@ -1,5 +1,6 @@
'use strict'; 'use strict';
const util = require('util');
const Promise = require('bluebird'); const Promise = require('bluebird');
const { mergeAdvanced: merge } = require('object-merge-advanced'); const { mergeAdvanced: merge } = require('object-merge-advanced');
@ -11,6 +12,8 @@ const qu = require('./utils/qu');
const getRecursiveParameters = require('./utils/get-recursive-parameters'); const getRecursiveParameters = require('./utils/get-recursive-parameters');
const windows = require('./utils/http-windows'); const windows = require('./utils/http-windows');
const waitImmediate = util.promisify(setImmediate);
function toBaseReleases(baseReleasesOrUrls, entity = null) { function toBaseReleases(baseReleasesOrUrls, entity = null) {
if (!baseReleasesOrUrls) { if (!baseReleasesOrUrls) {
return []; return [];
@ -128,6 +131,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
windows.get(pathname)?.close(); windows.get(pathname)?.close();
windows.delete(pathname); windows.delete(pathname);
await waitImmediate;
logger.debug(`Memory usage after: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`); logger.debug(`Memory usage after: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`);
const scrapedRelease = rawScrapedRelease?.scene || rawScrapedRelease; const scrapedRelease = rawScrapedRelease?.scene || rawScrapedRelease;
@ -196,7 +201,7 @@ async function scrapeReleases(baseReleases, entitiesBySlug, type) {
return Promise.map( return Promise.map(
baseReleases, baseReleases,
async (baseRelease) => scrapeRelease(baseRelease, entitiesWithBeforeDataBySlug, type), async (baseRelease) => scrapeRelease(baseRelease, entitiesWithBeforeDataBySlug, type),
{ concurrency: 5 }, { concurrency: 3 },
); );
} }

View File

@ -1,9 +1,12 @@
'use strict'; 'use strict';
const util = require('util');
const fs = require('fs').promises; const fs = require('fs').promises;
const Promise = require('bluebird'); const Promise = require('bluebird');
const { JSDOM } = require('jsdom'); const { JSDOM } = require('jsdom');
const waitImmediate = util.promisify(setImmediate);
async function init() { async function init() {
let peak = 0; let peak = 0;
const files = await fs.readdir('./html'); const files = await fs.readdir('./html');
@ -19,9 +22,9 @@ async function init() {
console.log(`Memory usage: ${usage.toFixed(2)} MB, peak ${peak.toFixed(2)} MB`); console.log(`Memory usage: ${usage.toFixed(2)} MB, peak ${peak.toFixed(2)} MB`);
await Promise.delay(100); await waitImmediate;
}, { }, {
concurrency: 10, concurrency: 100,
}); });
await Promise.delay(2000); await Promise.delay(2000);