diff --git a/src/app.js b/src/app.js index d8cb3c7c..923503f8 100644 --- a/src/app.js +++ b/src/app.js @@ -34,33 +34,40 @@ function logActive() { }, typeof argv.logActive === 'number' ? argv.logActive : 60000); } -/* -function monitorMemory() { - logger.debug(`Memory usage: ${process.memoryUsage.rss() / 1000000} MB`); +async function snapshotMemory() { + const profile = await inspector.heap.takeSnapshot(); + const filepath = `${dayjs().format('traxxx_snapshot_YYYY-MM-DD_HH-mm-ss')}.heapprofile`; - if (!done) { - setTimeout(() => monitorMemory(), 10000); - } + logger.info(`Start heap snapshots, memory usage: ${process.memoryUsage.rss() / 1000000} MB`); + + await inspector.heap.disable(); + await fs.writeFile(filepath, JSON.stringify(profile)); + + logger.info(`Saved heap dump to ${filepath}`); } -*/ async function stopMemorySample() { const profile = await inspector.heap.stopSampling(); - const filepath = `${dayjs().format('YYYY-MM-DD_HH-mm-ss')}.heapprofile`; + const filepath = `${dayjs().format('traxxx_sample_YYYY-MM-DD_HH-mm-ss')}.heapprofile`; + const usage = process.memoryUsage.rss() / 1000000; await inspector.heap.disable(); await fs.writeFile(filepath, JSON.stringify(profile)); logger.info(`Saved heap sample to ${filepath}`); + + if (usage > 1000) { + await snapshotMemory(); + } } async function startMemorySample() { await inspector.heap.enable(); await inspector.heap.startSampling(); - // monitorMemory(); + const usage = process.memoryUsage.rss() / 1000000; - logger.info(`Start heap sampling, memory usage: ${process.memoryUsage.rss() / 1000000} MB`); + logger.info(`Start heap sampling, memory usage: ${usage} MB`); setTimeout(async () => { await stopMemorySample(); @@ -73,7 +80,7 @@ async function startMemorySample() { async function init() { try { - if (argv.memory) { + if (argv.sampleMemory) { await startMemorySample(); } diff --git a/src/deep.js b/src/deep.js index 43ea6016..8f61e01a 100644 --- a/src/deep.js +++ b/src/deep.js @@ -54,12 +54,12 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) { .filter(Boolean); } -async function fetchScene(scraper, url, entity, baseRelease, options) { - if (scraper.fetchScene) { - return scraper.fetchScene(baseRelease.url, entity, baseRelease, options, null); +async function fetchScene(scraper, url, entity, baseRelease, options, type = 'scene') { + if ((type === 'scene' && scraper.fetchScene) || (type === 'movie' && scraper.fetchMovie)) { + return scraper[type === 'movie' ? 'fetchMovie' : 'fetchScene'](baseRelease.url, entity, baseRelease, options, null); } - if (scraper.scrapeScene) { + if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) { const session = qu.session(); const res = await qu.get(url, null, null, { @@ -70,7 +70,7 @@ async function fetchScene(scraper, url, entity, baseRelease, options) { const cookie = await session._sessionOptions.cookieJar.get(url); if (res.ok) { - return scraper.scrapeScene(res.item, url, entity, baseRelease, options, { + return scraper[type === 'movie' ? 'scrapeMovie' : 'scrapeScene'](res.item, url, entity, baseRelease, options, { session, headers: res.headers, cookieJar: session._sessionOptions.cookieJar, @@ -84,6 +84,10 @@ async function fetchScene(scraper, url, entity, baseRelease, options) { return null; } +function fetchMovie(scraper, url, entity, baseRelease, options) { + return fetchScene(scraper, url, entity, baseRelease, options, 'movie'); +} + async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)]; @@ -106,7 +110,7 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { return baseRelease; } - if ((type === 'scene' && !layoutScraper.fetchScene && !layoutScraper.scrapeScene) || (type === 'movie' && !layoutScraper.fetchMovie)) { + if ((type === 'scene' && !layoutScraper.fetchScene && !layoutScraper.scrapeScene) || (type === 'movie' && !layoutScraper.fetchMovie && !layoutScraper.scrapeMovie)) { logger.warn(`The '${entity.name}'-scraper cannot scrape individual ${type}s`); return baseRelease; } @@ -123,12 +127,14 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { logger.debug(`Memory usage before: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`); const rawScrapedRelease = type === 'scene' - ? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options, null) - : await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, options, null); + ? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options) + : await fetchMovie(layoutScraper, baseRelease.url, entity, baseRelease, options); const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_'); - delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory + if (rawScrapedRelease) { + delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory + } if (windows.has(pathname)) { logger.debug(`Closing window for ${pathname}`); diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index 9ddc1abb..78f58c22 100644 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -461,7 +461,7 @@ async function fetchMovieTrailer(release) { return null; } -async function scrapeMovie({ query, el }, window, url, entity, options) { +async function scrapeMovie({ query, el }, url, entity, baseRelease, options) { const release = {}; const { dataLayer } = query.exec('//script[contains(text(), "dataLayer")]', ['dataLayer']); @@ -495,6 +495,8 @@ async function scrapeMovie({ query, el }, window, url, entity, options) { release.trailer = await fetchMovieTrailer(release); } + console.log(release); + return release; } @@ -751,16 +753,6 @@ async function fetchScene(url, site, baseRelease, options) { return null; } -async function fetchMovie(url, channel, baseRelease, options) { - const res = await qu.get(url, null, null); - - if (res.ok) { - return scrapeMovie(res.item, res.window, url, channel, options); - } - - return res.status; -} - async function fetchActorScenes(actorName, apiUrl, siteSlug) { const res = await http.post(apiUrl, { requests: [ @@ -855,7 +847,6 @@ module.exports = { fetchApiUpcoming: fetchUpcomingApi, fetchLatest, fetchLatestApi, - fetchMovie, fetchProfile, fetchScene, fetchSceneApi, @@ -867,12 +858,13 @@ module.exports = { fetchProfile: fetchApiProfile, // fetchScene, fetchScene: fetchSceneApi, - fetchMovie, + scrapeMovie, }, getPhotos, scrapeApiProfile, scrapeApiReleases, scrapeProfile, scrapeAll, + scrapeMovie, scrapeScene, };