Added waitImmediate to deep scrape, reduced concurrency.
This commit is contained in:
parent
56a7fb0ad9
commit
e41f9fa937
|
@ -1,5 +1,6 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const util = require('util');
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const { mergeAdvanced: merge } = require('object-merge-advanced');
|
const { mergeAdvanced: merge } = require('object-merge-advanced');
|
||||||
|
|
||||||
|
@ -11,6 +12,8 @@ const qu = require('./utils/qu');
|
||||||
const getRecursiveParameters = require('./utils/get-recursive-parameters');
|
const getRecursiveParameters = require('./utils/get-recursive-parameters');
|
||||||
const windows = require('./utils/http-windows');
|
const windows = require('./utils/http-windows');
|
||||||
|
|
||||||
|
const waitImmediate = util.promisify(setImmediate);
|
||||||
|
|
||||||
function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
||||||
if (!baseReleasesOrUrls) {
|
if (!baseReleasesOrUrls) {
|
||||||
return [];
|
return [];
|
||||||
|
@ -128,6 +131,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||||
windows.get(pathname)?.close();
|
windows.get(pathname)?.close();
|
||||||
windows.delete(pathname);
|
windows.delete(pathname);
|
||||||
|
|
||||||
|
await waitImmediate;
|
||||||
|
|
||||||
logger.debug(`Memory usage after: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`);
|
logger.debug(`Memory usage after: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`);
|
||||||
|
|
||||||
const scrapedRelease = rawScrapedRelease?.scene || rawScrapedRelease;
|
const scrapedRelease = rawScrapedRelease?.scene || rawScrapedRelease;
|
||||||
|
@ -196,7 +201,7 @@ async function scrapeReleases(baseReleases, entitiesBySlug, type) {
|
||||||
return Promise.map(
|
return Promise.map(
|
||||||
baseReleases,
|
baseReleases,
|
||||||
async (baseRelease) => scrapeRelease(baseRelease, entitiesWithBeforeDataBySlug, type),
|
async (baseRelease) => scrapeRelease(baseRelease, entitiesWithBeforeDataBySlug, type),
|
||||||
{ concurrency: 5 },
|
{ concurrency: 3 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,12 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const util = require('util');
|
||||||
const fs = require('fs').promises;
|
const fs = require('fs').promises;
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
|
|
||||||
|
const waitImmediate = util.promisify(setImmediate);
|
||||||
|
|
||||||
async function init() {
|
async function init() {
|
||||||
let peak = 0;
|
let peak = 0;
|
||||||
const files = await fs.readdir('./html');
|
const files = await fs.readdir('./html');
|
||||||
|
@ -19,9 +22,9 @@ async function init() {
|
||||||
|
|
||||||
console.log(`Memory usage: ${usage.toFixed(2)} MB, peak ${peak.toFixed(2)} MB`);
|
console.log(`Memory usage: ${usage.toFixed(2)} MB, peak ${peak.toFixed(2)} MB`);
|
||||||
|
|
||||||
await Promise.delay(100);
|
await waitImmediate;
|
||||||
}, {
|
}, {
|
||||||
concurrency: 10,
|
concurrency: 100,
|
||||||
});
|
});
|
||||||
|
|
||||||
await Promise.delay(2000);
|
await Promise.delay(2000);
|
||||||
|
|
Loading…
Reference in New Issue