Reduced media concurrency to assess effect on memory. Moved qu context removal to deep scrape runner. Updated movie graphql queries.
This commit is contained in:
parent
9f37ec4cff
commit
e88554666c
|
@ -178,7 +178,7 @@ function initReleasesActions(store, router) {
|
||||||
isS3
|
isS3
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
poster: moviesPosterByMovieId {
|
poster: moviesPoster {
|
||||||
media {
|
media {
|
||||||
id
|
id
|
||||||
path
|
path
|
||||||
|
@ -204,7 +204,7 @@ function initReleasesActions(store, router) {
|
||||||
isS3
|
isS3
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
trailer: moviesTrailerByMovieId {
|
trailer: moviesTrailer {
|
||||||
media {
|
media {
|
||||||
id
|
id
|
||||||
path
|
path
|
||||||
|
|
|
@ -128,6 +128,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||||
|
|
||||||
const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_');
|
const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_');
|
||||||
|
|
||||||
|
delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory
|
||||||
|
|
||||||
if (windows.has(pathname)) {
|
if (windows.has(pathname)) {
|
||||||
logger.debug(`Closing window for ${pathname}`);
|
logger.debug(`Closing window for ${pathname}`);
|
||||||
|
|
||||||
|
|
|
@ -377,6 +377,7 @@ async function writeLazy(image, lazypath) {
|
||||||
|
|
||||||
async function storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options) {
|
async function storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options) {
|
||||||
logger.silly(`Storing permanent media files for ${media.id} from ${media.src} at ${filepath}`);
|
logger.silly(`Storing permanent media files for ${media.id} from ${media.src} at ${filepath}`);
|
||||||
|
logger.debug(`Memory usage at image storage: ${process.memoryUsage.rss() / 1000000} MB (${media.src})`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const thumbdir = config.s3.enabled ? path.join(media.role, 'thumbs') : path.join(media.role, 'thumbs', hashDir, hashSubDir);
|
const thumbdir = config.s3.enabled ? path.join(media.role, 'thumbs') : path.join(media.role, 'thumbs', hashDir, hashSubDir);
|
||||||
|
@ -746,7 +747,8 @@ async function storeMedias(baseMedias, options) {
|
||||||
const fetchedMedias = await Promise.map(
|
const fetchedMedias = await Promise.map(
|
||||||
baseMedias,
|
baseMedias,
|
||||||
async (baseMedia) => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }),
|
async (baseMedia) => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }),
|
||||||
{ concurrency: 100 }, // don't overload disk (or network, although this has its own throttling)
|
// { concurrency: 100 }, // don't overload disk (or network, although this has its own throttling)
|
||||||
|
{ concurrency: 10 }, // don't overload disk (or network, although this has its own throttling)
|
||||||
);
|
);
|
||||||
|
|
||||||
const { uniqueHashMedias, existingHashMedias } = await findHashDuplicates(fetchedMedias);
|
const { uniqueHashMedias, existingHashMedias } = await findHashDuplicates(fetchedMedias);
|
||||||
|
|
|
@ -14,8 +14,6 @@ async function fetchScene(url, site, baseRelease, options) {
|
||||||
release.channel = siteSlug;
|
release.channel = siteSlug;
|
||||||
release.director = 'Mason';
|
release.director = 'Mason';
|
||||||
|
|
||||||
delete release.query;
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,8 +41,6 @@ async function filterUniqueReleases(releases) {
|
||||||
const releaseIdentifiers = releases
|
const releaseIdentifiers = releases
|
||||||
.map((release) => [release.entity.id, release.entryId.toString()]);
|
.map((release) => [release.entity.id, release.entryId.toString()]);
|
||||||
|
|
||||||
console.log(releaseIdentifiers.length);
|
|
||||||
|
|
||||||
const duplicateReleaseEntriesQuery = knex('releases')
|
const duplicateReleaseEntriesQuery = knex('releases')
|
||||||
.select(knex.raw('releases.*, row_to_json(entities) as entity'))
|
.select(knex.raw('releases.*, row_to_json(entities) as entity'))
|
||||||
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
||||||
|
@ -57,8 +55,6 @@ async function filterUniqueReleases(releases) {
|
||||||
.orWhere(knex.raw('updated_at - date > INTERVAL \'1 day\'')); // scene was updated after the release date, no updates expected
|
.orWhere(knex.raw('updated_at - date > INTERVAL \'1 day\'')); // scene was updated after the release date, no updates expected
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log(duplicateReleaseEntriesQuery.toString());
|
|
||||||
|
|
||||||
const duplicateReleaseEntries = await duplicateReleaseEntriesQuery;
|
const duplicateReleaseEntries = await duplicateReleaseEntriesQuery;
|
||||||
|
|
||||||
const duplicateReleases = duplicateReleaseEntries.map((release) => curateRelease(release));
|
const duplicateReleases = duplicateReleaseEntries.map((release) => curateRelease(release));
|
||||||
|
|
Loading…
Reference in New Issue