Closing JSDOM window after deep scrape in an attempt to save memory. Reduced deep scrape concurrency to 5.
This commit is contained in:
12
src/deep.js
12
src/deep.js
@@ -9,6 +9,7 @@ const { fetchReleaseEntities, urlToSiteSlug } = require('./entities');
|
||||
const logger = require('./logger')(__filename);
|
||||
const qu = require('./utils/qu');
|
||||
const getRecursiveParameters = require('./utils/get-recursive-parameters');
|
||||
const windows = require('./utils/http-windows');
|
||||
|
||||
function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
||||
if (!baseReleasesOrUrls) {
|
||||
@@ -116,10 +117,19 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||
parameters: getRecursiveParameters(entity),
|
||||
};
|
||||
|
||||
logger.debug(`Memory usage before: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`);
|
||||
|
||||
const rawScrapedRelease = type === 'scene'
|
||||
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options, null)
|
||||
: await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, options, null);
|
||||
|
||||
const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_');
|
||||
|
||||
windows.get(pathname)?.close();
|
||||
windows.delete(pathname);
|
||||
|
||||
logger.debug(`Memory usage after: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`);
|
||||
|
||||
const scrapedRelease = rawScrapedRelease?.scene || rawScrapedRelease;
|
||||
|
||||
if (!scrapedRelease || typeof scrapedRelease !== 'object' || Array.isArray(scrapedRelease)) {
|
||||
@@ -186,7 +196,7 @@ async function scrapeReleases(baseReleases, entitiesBySlug, type) {
|
||||
return Promise.map(
|
||||
baseReleases,
|
||||
async (baseRelease) => scrapeRelease(baseRelease, entitiesWithBeforeDataBySlug, type),
|
||||
{ concurrency: 10 },
|
||||
{ concurrency: 5 },
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user