Added waitImmediate to deep scrape, reduced concurrency.
This commit is contained in:
		
							parent
							
								
									56a7fb0ad9
								
							
						
					
					
						commit
						e41f9fa937
					
				|  | @ -1,5 +1,6 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const util = require('util'); | ||||
| const Promise = require('bluebird'); | ||||
| const { mergeAdvanced: merge } = require('object-merge-advanced'); | ||||
| 
 | ||||
|  | @ -11,6 +12,8 @@ const qu = require('./utils/qu'); | |||
| const getRecursiveParameters = require('./utils/get-recursive-parameters'); | ||||
| const windows = require('./utils/http-windows'); | ||||
| 
 | ||||
| const waitImmediate = util.promisify(setImmediate); | ||||
| 
 | ||||
| function toBaseReleases(baseReleasesOrUrls, entity = null) { | ||||
| 	if (!baseReleasesOrUrls) { | ||||
| 		return []; | ||||
|  | @ -128,6 +131,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { | |||
| 		windows.get(pathname)?.close(); | ||||
| 		windows.delete(pathname); | ||||
| 
 | ||||
| 		await waitImmediate; | ||||
| 
 | ||||
| 		logger.debug(`Memory usage after: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`); | ||||
| 
 | ||||
| 		const scrapedRelease = rawScrapedRelease?.scene || rawScrapedRelease; | ||||
|  | @ -196,7 +201,7 @@ async function scrapeReleases(baseReleases, entitiesBySlug, type) { | |||
| 	return Promise.map( | ||||
| 		baseReleases, | ||||
| 		async (baseRelease) => scrapeRelease(baseRelease, entitiesWithBeforeDataBySlug, type), | ||||
| 		{ concurrency: 5 }, | ||||
| 		{ concurrency: 3 }, | ||||
| 	); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,9 +1,12 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const util = require('util'); | ||||
| const fs = require('fs').promises; | ||||
| const Promise = require('bluebird'); | ||||
| const { JSDOM } = require('jsdom'); | ||||
| 
 | ||||
| const waitImmediate = util.promisify(setImmediate); | ||||
| 
 | ||||
| async function init() { | ||||
| 	let peak = 0; | ||||
| 	const files = await fs.readdir('./html'); | ||||
|  | @ -19,9 +22,9 @@ async function init() { | |||
| 
 | ||||
| 		console.log(`Memory usage: ${usage.toFixed(2)} MB, peak ${peak.toFixed(2)} MB`); | ||||
| 
 | ||||
| 		await Promise.delay(100); | ||||
| 		await waitImmediate; | ||||
| 	}, { | ||||
| 		concurrency: 10, | ||||
| 		concurrency: 100, | ||||
| 	}); | ||||
| 
 | ||||
| 	await Promise.delay(2000); | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue