Closing JSDOM window after deep scrape in an attempt to save memory. Reduced deep scrape concurrency to 5.
This commit is contained in:
3
src/utils/http-windows.js
Normal file
3
src/utils/http-windows.js
Normal file
@@ -0,0 +1,3 @@
|
||||
'use strict';
|
||||
|
||||
module.exports = new Map();
|
||||
@@ -3,12 +3,15 @@
|
||||
const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
const bhttp = require('bhttp');
|
||||
const fs = require('fs').promises;
|
||||
const util = require('util');
|
||||
const stream = require('stream');
|
||||
const tunnel = require('tunnel');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { JSDOM, toughCookie } = require('jsdom');
|
||||
|
||||
const windows = require('./http-windows');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
const virtualConsole = require('./virtual-console')(__filename);
|
||||
const argv = require('../argv');
|
||||
@@ -114,6 +117,15 @@ async function finalizeResult(res, options) {
|
||||
if (Buffer.isBuffer(res.body)) {
|
||||
const html = res.body.toString();
|
||||
const window = options?.parse ? new JSDOM(html, { virtualConsole, ...options.extract }).window : null;
|
||||
const pathname = new URL(res.request.url).pathname.replace(/\//g, '_');
|
||||
|
||||
if (window) {
|
||||
windows.set(pathname, window);
|
||||
}
|
||||
|
||||
if (argv.saveHtml) {
|
||||
await fs.writeFile(`./html/${pathname}.html`, html);
|
||||
}
|
||||
|
||||
return {
|
||||
...res,
|
||||
|
||||
32
src/utils/jsdom-perf.js
Normal file
32
src/utils/jsdom-perf.js
Normal file
@@ -0,0 +1,32 @@
|
||||
'use strict';
|
||||
|
||||
const fs = require('fs').promises;
|
||||
const Promise = require('bluebird');
|
||||
const { JSDOM } = require('jsdom');
|
||||
|
||||
async function init() {
|
||||
let peak = 0;
|
||||
const files = await fs.readdir('./html');
|
||||
|
||||
await Promise.map(Array.from({ length: 10 }).map(() => files).flat(), async (filename) => {
|
||||
const html = await fs.readFile(`./html/${filename}`, 'utf8');
|
||||
const dom = new JSDOM(html);
|
||||
|
||||
dom.window.close();
|
||||
|
||||
const usage = process.memoryUsage.rss() / 1000000;
|
||||
peak = Math.max(usage, peak);
|
||||
|
||||
console.log(`Memory usage: ${usage.toFixed(2)} MB, peak ${peak.toFixed(2)} MB`);
|
||||
|
||||
await Promise.delay(100);
|
||||
}, {
|
||||
concurrency: 10,
|
||||
});
|
||||
|
||||
await Promise.delay(2000);
|
||||
|
||||
console.log(`Final memory usage: ${(process.memoryUsage.rss() / 1000000).toFixed(2)} MB, max ${peak.toFixed(2)} MB`);
|
||||
}
|
||||
|
||||
init();
|
||||
Reference in New Issue
Block a user