forked from DebaucheryLibrarian/traxxx
				
			Reduced media concurrency to assess effect on memory. Moved qu context removal to deep scrape runner. Updated movie graphql queries.
This commit is contained in:
		
							parent
							
								
									9f37ec4cff
								
							
						
					
					
						commit
						e88554666c
					
				|  | @ -178,7 +178,7 @@ function initReleasesActions(store, router) { | |||
| 							isS3 | ||||
| 						} | ||||
| 					} | ||||
| 					poster: moviesPosterByMovieId { | ||||
| 					poster: moviesPoster { | ||||
| 						media { | ||||
| 							id | ||||
| 							path | ||||
|  | @ -204,7 +204,7 @@ function initReleasesActions(store, router) { | |||
| 							isS3 | ||||
| 						} | ||||
| 					} | ||||
| 					trailer: moviesTrailerByMovieId { | ||||
| 					trailer: moviesTrailer { | ||||
| 						media { | ||||
| 							id | ||||
| 							path | ||||
|  |  | |||
|  | @ -128,6 +128,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { | |||
| 
 | ||||
| 		const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_'); | ||||
| 
 | ||||
| 		delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory
 | ||||
| 
 | ||||
| 		if (windows.has(pathname)) { | ||||
| 			logger.debug(`Closing window for ${pathname}`); | ||||
| 
 | ||||
|  |  | |||
|  | @ -377,6 +377,7 @@ async function writeLazy(image, lazypath) { | |||
| 
 | ||||
| async function storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options) { | ||||
| 	logger.silly(`Storing permanent media files for ${media.id} from ${media.src} at ${filepath}`); | ||||
| 	logger.debug(`Memory usage at image storage: ${process.memoryUsage.rss() / 1000000} MB (${media.src})`); | ||||
| 
 | ||||
| 	try { | ||||
| 		const thumbdir = config.s3.enabled ? path.join(media.role, 'thumbs') : path.join(media.role, 'thumbs', hashDir, hashSubDir); | ||||
|  | @ -746,7 +747,8 @@ async function storeMedias(baseMedias, options) { | |||
| 	const fetchedMedias = await Promise.map( | ||||
| 		baseMedias, | ||||
| 		async (baseMedia) => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }), | ||||
| 		{ concurrency: 100 }, // don't overload disk (or network, although this has its own throttling)
 | ||||
| 		// { concurrency: 100 }, // don't overload disk (or network, although this has its own throttling)
 | ||||
| 		{ concurrency: 10 }, // don't overload disk (or network, although this has its own throttling)
 | ||||
| 	); | ||||
| 
 | ||||
| 	const { uniqueHashMedias, existingHashMedias } = await findHashDuplicates(fetchedMedias); | ||||
|  |  | |||
|  | @ -14,8 +14,6 @@ async function fetchScene(url, site, baseRelease, options) { | |||
| 	release.channel = siteSlug; | ||||
| 	release.director = 'Mason'; | ||||
| 
 | ||||
| 	delete release.query; | ||||
| 
 | ||||
| 	return release; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -41,8 +41,6 @@ async function filterUniqueReleases(releases) { | |||
| 	const releaseIdentifiers = releases | ||||
| 		.map((release) => [release.entity.id, release.entryId.toString()]); | ||||
| 
 | ||||
| 	console.log(releaseIdentifiers.length); | ||||
| 
 | ||||
| 	const duplicateReleaseEntriesQuery = knex('releases') | ||||
| 		.select(knex.raw('releases.*, row_to_json(entities) as entity')) | ||||
| 		.leftJoin('entities', 'entities.id', 'releases.entity_id') | ||||
|  | @ -57,8 +55,6 @@ async function filterUniqueReleases(releases) { | |||
| 				.orWhere(knex.raw('updated_at - date > INTERVAL \'1 day\'')); // scene was updated after the release date, no updates expected
 | ||||
| 		}); | ||||
| 
 | ||||
| 	console.log(duplicateReleaseEntriesQuery.toString()); | ||||
| 
 | ||||
| 	const duplicateReleaseEntries = await duplicateReleaseEntriesQuery; | ||||
| 
 | ||||
| 	const duplicateReleases = duplicateReleaseEntries.map((release) => curateRelease(release)); | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue