Reduced media concurrency to assess effect on memory. Moved qu context removal to deep scrape runner. Updated movie graphql queries.

This commit is contained in:
DebaucheryLibrarian 2021-12-05 02:54:55 +01:00
parent 9f37ec4cff
commit e88554666c
5 changed files with 7 additions and 9 deletions

View File

@ -178,7 +178,7 @@ function initReleasesActions(store, router) {
isS3 isS3
} }
} }
poster: moviesPosterByMovieId { poster: moviesPoster {
media { media {
id id
path path
@ -204,7 +204,7 @@ function initReleasesActions(store, router) {
isS3 isS3
} }
} }
trailer: moviesTrailerByMovieId { trailer: moviesTrailer {
media { media {
id id
path path

View File

@ -128,6 +128,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_'); const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_');
delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory
if (windows.has(pathname)) { if (windows.has(pathname)) {
logger.debug(`Closing window for ${pathname}`); logger.debug(`Closing window for ${pathname}`);

View File

@ -377,6 +377,7 @@ async function writeLazy(image, lazypath) {
async function storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options) { async function storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options) {
logger.silly(`Storing permanent media files for ${media.id} from ${media.src} at ${filepath}`); logger.silly(`Storing permanent media files for ${media.id} from ${media.src} at ${filepath}`);
logger.debug(`Memory usage at image storage: ${process.memoryUsage.rss() / 1000000} MB (${media.src})`);
try { try {
const thumbdir = config.s3.enabled ? path.join(media.role, 'thumbs') : path.join(media.role, 'thumbs', hashDir, hashSubDir); const thumbdir = config.s3.enabled ? path.join(media.role, 'thumbs') : path.join(media.role, 'thumbs', hashDir, hashSubDir);
@ -746,7 +747,8 @@ async function storeMedias(baseMedias, options) {
const fetchedMedias = await Promise.map( const fetchedMedias = await Promise.map(
baseMedias, baseMedias,
async (baseMedia) => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }), async (baseMedia) => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }),
{ concurrency: 100 }, // don't overload disk (or network, although this has its own throttling) // { concurrency: 100 }, // don't overload disk (or network, although this has its own throttling)
{ concurrency: 10 }, // don't overload disk (or network, although this has its own throttling)
); );
const { uniqueHashMedias, existingHashMedias } = await findHashDuplicates(fetchedMedias); const { uniqueHashMedias, existingHashMedias } = await findHashDuplicates(fetchedMedias);

View File

@ -14,8 +14,6 @@ async function fetchScene(url, site, baseRelease, options) {
release.channel = siteSlug; release.channel = siteSlug;
release.director = 'Mason'; release.director = 'Mason';
delete release.query;
return release; return release;
} }

View File

@ -41,8 +41,6 @@ async function filterUniqueReleases(releases) {
const releaseIdentifiers = releases const releaseIdentifiers = releases
.map((release) => [release.entity.id, release.entryId.toString()]); .map((release) => [release.entity.id, release.entryId.toString()]);
console.log(releaseIdentifiers.length);
const duplicateReleaseEntriesQuery = knex('releases') const duplicateReleaseEntriesQuery = knex('releases')
.select(knex.raw('releases.*, row_to_json(entities) as entity')) .select(knex.raw('releases.*, row_to_json(entities) as entity'))
.leftJoin('entities', 'entities.id', 'releases.entity_id') .leftJoin('entities', 'entities.id', 'releases.entity_id')
@ -57,8 +55,6 @@ async function filterUniqueReleases(releases) {
.orWhere(knex.raw('updated_at - date > INTERVAL \'1 day\'')); // scene was updated after the release date, no updates expected .orWhere(knex.raw('updated_at - date > INTERVAL \'1 day\'')); // scene was updated after the release date, no updates expected
}); });
console.log(duplicateReleaseEntriesQuery.toString());
const duplicateReleaseEntries = await duplicateReleaseEntriesQuery; const duplicateReleaseEntries = await duplicateReleaseEntriesQuery;
const duplicateReleases = duplicateReleaseEntries.map((release) => curateRelease(release)); const duplicateReleases = duplicateReleaseEntries.map((release) => curateRelease(release));