Added centralized movie page fetcher. Added memory heap dump.

This commit is contained in:
DebaucheryLibrarian 2021-12-20 02:22:10 +01:00
parent d93670842b
commit 280032937f
3 changed files with 38 additions and 33 deletions

View File

@ -34,33 +34,40 @@ function logActive() {
}, typeof argv.logActive === 'number' ? argv.logActive : 60000);
}
/*
function monitorMemory() {
logger.debug(`Memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
async function snapshotMemory() {
const profile = await inspector.heap.takeSnapshot();
const filepath = `${dayjs().format('traxxx_snapshot_YYYY-MM-DD_HH-mm-ss')}.heapprofile`;
if (!done) {
setTimeout(() => monitorMemory(), 10000);
}
logger.info(`Start heap snapshots, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
await inspector.heap.disable();
await fs.writeFile(filepath, JSON.stringify(profile));
logger.info(`Saved heap dump to ${filepath}`);
}
*/
async function stopMemorySample() {
const profile = await inspector.heap.stopSampling();
const filepath = `${dayjs().format('YYYY-MM-DD_HH-mm-ss')}.heapprofile`;
const filepath = `${dayjs().format('traxxx_sample_YYYY-MM-DD_HH-mm-ss')}.heapprofile`;
const usage = process.memoryUsage.rss() / 1000000;
await inspector.heap.disable();
await fs.writeFile(filepath, JSON.stringify(profile));
logger.info(`Saved heap sample to ${filepath}`);
if (usage > 1000) {
await snapshotMemory();
}
}
async function startMemorySample() {
await inspector.heap.enable();
await inspector.heap.startSampling();
// monitorMemory();
const usage = process.memoryUsage.rss() / 1000000;
logger.info(`Start heap sampling, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
logger.info(`Start heap sampling, memory usage: ${usage} MB`);
setTimeout(async () => {
await stopMemorySample();
@ -73,7 +80,7 @@ async function startMemorySample() {
async function init() {
try {
if (argv.memory) {
if (argv.sampleMemory) {
await startMemorySample();
}

View File

@ -54,12 +54,12 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
.filter(Boolean);
}
async function fetchScene(scraper, url, entity, baseRelease, options) {
if (scraper.fetchScene) {
return scraper.fetchScene(baseRelease.url, entity, baseRelease, options, null);
async function fetchScene(scraper, url, entity, baseRelease, options, type = 'scene') {
if ((type === 'scene' && scraper.fetchScene) || (type === 'movie' && scraper.fetchMovie)) {
return scraper[type === 'movie' ? 'fetchMovie' : 'fetchScene'](baseRelease.url, entity, baseRelease, options, null);
}
if (scraper.scrapeScene) {
if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) {
const session = qu.session();
const res = await qu.get(url, null, null, {
@ -70,7 +70,7 @@ async function fetchScene(scraper, url, entity, baseRelease, options) {
const cookie = await session._sessionOptions.cookieJar.get(url);
if (res.ok) {
return scraper.scrapeScene(res.item, url, entity, baseRelease, options, {
return scraper[type === 'movie' ? 'scrapeMovie' : 'scrapeScene'](res.item, url, entity, baseRelease, options, {
session,
headers: res.headers,
cookieJar: session._sessionOptions.cookieJar,
@ -84,6 +84,10 @@ async function fetchScene(scraper, url, entity, baseRelease, options) {
return null;
}
function fetchMovie(scraper, url, entity, baseRelease, options) {
return fetchScene(scraper, url, entity, baseRelease, options, 'movie');
}
async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)];
@ -106,7 +110,7 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
return baseRelease;
}
if ((type === 'scene' && !layoutScraper.fetchScene && !layoutScraper.scrapeScene) || (type === 'movie' && !layoutScraper.fetchMovie)) {
if ((type === 'scene' && !layoutScraper.fetchScene && !layoutScraper.scrapeScene) || (type === 'movie' && !layoutScraper.fetchMovie && !layoutScraper.scrapeMovie)) {
logger.warn(`The '${entity.name}'-scraper cannot scrape individual ${type}s`);
return baseRelease;
}
@ -123,12 +127,14 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
logger.debug(`Memory usage before: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`);
const rawScrapedRelease = type === 'scene'
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options, null)
: await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, options, null);
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options)
: await fetchMovie(layoutScraper, baseRelease.url, entity, baseRelease, options);
const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_');
if (rawScrapedRelease) {
delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory
}
if (windows.has(pathname)) {
logger.debug(`Closing window for ${pathname}`);

View File

@ -461,7 +461,7 @@ async function fetchMovieTrailer(release) {
return null;
}
async function scrapeMovie({ query, el }, window, url, entity, options) {
async function scrapeMovie({ query, el }, url, entity, baseRelease, options) {
const release = {};
const { dataLayer } = query.exec('//script[contains(text(), "dataLayer")]', ['dataLayer']);
@ -495,6 +495,8 @@ async function scrapeMovie({ query, el }, window, url, entity, options) {
release.trailer = await fetchMovieTrailer(release);
}
console.log(release);
return release;
}
@ -751,16 +753,6 @@ async function fetchScene(url, site, baseRelease, options) {
return null;
}
async function fetchMovie(url, channel, baseRelease, options) {
const res = await qu.get(url, null, null);
if (res.ok) {
return scrapeMovie(res.item, res.window, url, channel, options);
}
return res.status;
}
async function fetchActorScenes(actorName, apiUrl, siteSlug) {
const res = await http.post(apiUrl, {
requests: [
@ -855,7 +847,6 @@ module.exports = {
fetchApiUpcoming: fetchUpcomingApi,
fetchLatest,
fetchLatestApi,
fetchMovie,
fetchProfile,
fetchScene,
fetchSceneApi,
@ -867,12 +858,13 @@ module.exports = {
fetchProfile: fetchApiProfile,
// fetchScene,
fetchScene: fetchSceneApi,
fetchMovie,
scrapeMovie,
},
getPhotos,
scrapeApiProfile,
scrapeApiReleases,
scrapeProfile,
scrapeAll,
scrapeMovie,
scrapeScene,
};