From e88554666cfb592629b3bb1b52e995587e918fe4 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 5 Dec 2021 02:54:55 +0100 Subject: [PATCH] Reduced media concurrency to assess effect on memory. Moved qu context removal to deep scrape runner. Updated movie graphql queries. --- assets/js/releases/actions.js | 4 ++-- src/deep.js | 2 ++ src/media.js | 4 +++- src/scrapers/xempire.js | 2 -- src/updates.js | 4 ---- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/assets/js/releases/actions.js b/assets/js/releases/actions.js index 83611498..4edf5dff 100644 --- a/assets/js/releases/actions.js +++ b/assets/js/releases/actions.js @@ -178,7 +178,7 @@ function initReleasesActions(store, router) { isS3 } } - poster: moviesPosterByMovieId { + poster: moviesPoster { media { id path @@ -204,7 +204,7 @@ function initReleasesActions(store, router) { isS3 } } - trailer: moviesTrailerByMovieId { + trailer: moviesTrailer { media { id path diff --git a/src/deep.js b/src/deep.js index a08646cc..43ea6016 100644 --- a/src/deep.js +++ b/src/deep.js @@ -128,6 +128,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_'); + delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory + if (windows.has(pathname)) { logger.debug(`Closing window for ${pathname}`); diff --git a/src/media.js b/src/media.js index 13c0feda..8bee3c53 100644 --- a/src/media.js +++ b/src/media.js @@ -377,6 +377,7 @@ async function writeLazy(image, lazypath) { async function storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options) { logger.silly(`Storing permanent media files for ${media.id} from ${media.src} at ${filepath}`); + logger.debug(`Memory usage at image storage: ${process.memoryUsage.rss() / 1000000} MB (${media.src})`); try { const thumbdir = config.s3.enabled ? path.join(media.role, 'thumbs') : path.join(media.role, 'thumbs', hashDir, hashSubDir); @@ -746,7 +747,8 @@ async function storeMedias(baseMedias, options) { const fetchedMedias = await Promise.map( baseMedias, async (baseMedia) => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }), - { concurrency: 100 }, // don't overload disk (or network, although this has its own throttling) + // { concurrency: 100 }, // don't overload disk (or network, although this has its own throttling) + { concurrency: 10 }, // don't overload disk (or network, although this has its own throttling) ); const { uniqueHashMedias, existingHashMedias } = await findHashDuplicates(fetchedMedias); diff --git a/src/scrapers/xempire.js b/src/scrapers/xempire.js index ff2f66c5..e74cb576 100644 --- a/src/scrapers/xempire.js +++ b/src/scrapers/xempire.js @@ -14,8 +14,6 @@ async function fetchScene(url, site, baseRelease, options) { release.channel = siteSlug; release.director = 'Mason'; - delete release.query; - return release; } diff --git a/src/updates.js b/src/updates.js index e783d53a..e85d2157 100644 --- a/src/updates.js +++ b/src/updates.js @@ -41,8 +41,6 @@ async function filterUniqueReleases(releases) { const releaseIdentifiers = releases .map((release) => [release.entity.id, release.entryId.toString()]); - console.log(releaseIdentifiers.length); - const duplicateReleaseEntriesQuery = knex('releases') .select(knex.raw('releases.*, row_to_json(entities) as entity')) .leftJoin('entities', 'entities.id', 'releases.entity_id') @@ -57,8 +55,6 @@ async function filterUniqueReleases(releases) { .orWhere(knex.raw('updated_at - date > INTERVAL \'1 day\'')); // scene was updated after the release date, no updates expected }); - console.log(duplicateReleaseEntriesQuery.toString()); - const duplicateReleaseEntries = await duplicateReleaseEntriesQuery; const duplicateReleases = duplicateReleaseEntries.map((release) => curateRelease(release));