Reduced media concurrency to assess effect on memory. Moved qu context removal to deep scrape runner. Updated movie graphql queries.
This commit is contained in:
parent
9f37ec4cff
commit
e88554666c
|
@ -178,7 +178,7 @@ function initReleasesActions(store, router) {
|
|||
isS3
|
||||
}
|
||||
}
|
||||
poster: moviesPosterByMovieId {
|
||||
poster: moviesPoster {
|
||||
media {
|
||||
id
|
||||
path
|
||||
|
@ -204,7 +204,7 @@ function initReleasesActions(store, router) {
|
|||
isS3
|
||||
}
|
||||
}
|
||||
trailer: moviesTrailerByMovieId {
|
||||
trailer: moviesTrailer {
|
||||
media {
|
||||
id
|
||||
path
|
||||
|
|
|
@ -128,6 +128,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
|||
|
||||
const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_');
|
||||
|
||||
delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory
|
||||
|
||||
if (windows.has(pathname)) {
|
||||
logger.debug(`Closing window for ${pathname}`);
|
||||
|
||||
|
|
|
@ -377,6 +377,7 @@ async function writeLazy(image, lazypath) {
|
|||
|
||||
async function storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options) {
|
||||
logger.silly(`Storing permanent media files for ${media.id} from ${media.src} at ${filepath}`);
|
||||
logger.debug(`Memory usage at image storage: ${process.memoryUsage.rss() / 1000000} MB (${media.src})`);
|
||||
|
||||
try {
|
||||
const thumbdir = config.s3.enabled ? path.join(media.role, 'thumbs') : path.join(media.role, 'thumbs', hashDir, hashSubDir);
|
||||
|
@ -746,7 +747,8 @@ async function storeMedias(baseMedias, options) {
|
|||
const fetchedMedias = await Promise.map(
|
||||
baseMedias,
|
||||
async (baseMedia) => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }),
|
||||
{ concurrency: 100 }, // don't overload disk (or network, although this has its own throttling)
|
||||
// { concurrency: 100 }, // don't overload disk (or network, although this has its own throttling)
|
||||
{ concurrency: 10 }, // don't overload disk (or network, although this has its own throttling)
|
||||
);
|
||||
|
||||
const { uniqueHashMedias, existingHashMedias } = await findHashDuplicates(fetchedMedias);
|
||||
|
|
|
@ -14,8 +14,6 @@ async function fetchScene(url, site, baseRelease, options) {
|
|||
release.channel = siteSlug;
|
||||
release.director = 'Mason';
|
||||
|
||||
delete release.query;
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,8 +41,6 @@ async function filterUniqueReleases(releases) {
|
|||
const releaseIdentifiers = releases
|
||||
.map((release) => [release.entity.id, release.entryId.toString()]);
|
||||
|
||||
console.log(releaseIdentifiers.length);
|
||||
|
||||
const duplicateReleaseEntriesQuery = knex('releases')
|
||||
.select(knex.raw('releases.*, row_to_json(entities) as entity'))
|
||||
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
||||
|
@ -57,8 +55,6 @@ async function filterUniqueReleases(releases) {
|
|||
.orWhere(knex.raw('updated_at - date > INTERVAL \'1 day\'')); // scene was updated after the release date, no updates expected
|
||||
});
|
||||
|
||||
console.log(duplicateReleaseEntriesQuery.toString());
|
||||
|
||||
const duplicateReleaseEntries = await duplicateReleaseEntriesQuery;
|
||||
|
||||
const duplicateReleases = duplicateReleaseEntries.map((release) => curateRelease(release));
|
||||
|
|
Loading…
Reference in New Issue