Added centralized movie page fetcher. Added memory heap dump.
This commit is contained in:
parent
d93670842b
commit
280032937f
29
src/app.js
29
src/app.js
|
@ -34,33 +34,40 @@ function logActive() {
|
|||
}, typeof argv.logActive === 'number' ? argv.logActive : 60000);
|
||||
}
|
||||
|
||||
/*
|
||||
function monitorMemory() {
|
||||
logger.debug(`Memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
|
||||
async function snapshotMemory() {
|
||||
const profile = await inspector.heap.takeSnapshot();
|
||||
const filepath = `${dayjs().format('traxxx_snapshot_YYYY-MM-DD_HH-mm-ss')}.heapprofile`;
|
||||
|
||||
if (!done) {
|
||||
setTimeout(() => monitorMemory(), 10000);
|
||||
logger.info(`Start heap snapshots, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
|
||||
|
||||
await inspector.heap.disable();
|
||||
await fs.writeFile(filepath, JSON.stringify(profile));
|
||||
|
||||
logger.info(`Saved heap dump to ${filepath}`);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
async function stopMemorySample() {
|
||||
const profile = await inspector.heap.stopSampling();
|
||||
const filepath = `${dayjs().format('YYYY-MM-DD_HH-mm-ss')}.heapprofile`;
|
||||
const filepath = `${dayjs().format('traxxx_sample_YYYY-MM-DD_HH-mm-ss')}.heapprofile`;
|
||||
const usage = process.memoryUsage.rss() / 1000000;
|
||||
|
||||
await inspector.heap.disable();
|
||||
await fs.writeFile(filepath, JSON.stringify(profile));
|
||||
|
||||
logger.info(`Saved heap sample to ${filepath}`);
|
||||
|
||||
if (usage > 1000) {
|
||||
await snapshotMemory();
|
||||
}
|
||||
}
|
||||
|
||||
async function startMemorySample() {
|
||||
await inspector.heap.enable();
|
||||
await inspector.heap.startSampling();
|
||||
|
||||
// monitorMemory();
|
||||
const usage = process.memoryUsage.rss() / 1000000;
|
||||
|
||||
logger.info(`Start heap sampling, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
|
||||
logger.info(`Start heap sampling, memory usage: ${usage} MB`);
|
||||
|
||||
setTimeout(async () => {
|
||||
await stopMemorySample();
|
||||
|
@ -73,7 +80,7 @@ async function startMemorySample() {
|
|||
|
||||
async function init() {
|
||||
try {
|
||||
if (argv.memory) {
|
||||
if (argv.sampleMemory) {
|
||||
await startMemorySample();
|
||||
}
|
||||
|
||||
|
|
22
src/deep.js
22
src/deep.js
|
@ -54,12 +54,12 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
|||
.filter(Boolean);
|
||||
}
|
||||
|
||||
async function fetchScene(scraper, url, entity, baseRelease, options) {
|
||||
if (scraper.fetchScene) {
|
||||
return scraper.fetchScene(baseRelease.url, entity, baseRelease, options, null);
|
||||
async function fetchScene(scraper, url, entity, baseRelease, options, type = 'scene') {
|
||||
if ((type === 'scene' && scraper.fetchScene) || (type === 'movie' && scraper.fetchMovie)) {
|
||||
return scraper[type === 'movie' ? 'fetchMovie' : 'fetchScene'](baseRelease.url, entity, baseRelease, options, null);
|
||||
}
|
||||
|
||||
if (scraper.scrapeScene) {
|
||||
if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) {
|
||||
const session = qu.session();
|
||||
|
||||
const res = await qu.get(url, null, null, {
|
||||
|
@ -70,7 +70,7 @@ async function fetchScene(scraper, url, entity, baseRelease, options) {
|
|||
const cookie = await session._sessionOptions.cookieJar.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scraper.scrapeScene(res.item, url, entity, baseRelease, options, {
|
||||
return scraper[type === 'movie' ? 'scrapeMovie' : 'scrapeScene'](res.item, url, entity, baseRelease, options, {
|
||||
session,
|
||||
headers: res.headers,
|
||||
cookieJar: session._sessionOptions.cookieJar,
|
||||
|
@ -84,6 +84,10 @@ async function fetchScene(scraper, url, entity, baseRelease, options) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function fetchMovie(scraper, url, entity, baseRelease, options) {
|
||||
return fetchScene(scraper, url, entity, baseRelease, options, 'movie');
|
||||
}
|
||||
|
||||
async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||
const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)];
|
||||
|
||||
|
@ -106,7 +110,7 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
|||
return baseRelease;
|
||||
}
|
||||
|
||||
if ((type === 'scene' && !layoutScraper.fetchScene && !layoutScraper.scrapeScene) || (type === 'movie' && !layoutScraper.fetchMovie)) {
|
||||
if ((type === 'scene' && !layoutScraper.fetchScene && !layoutScraper.scrapeScene) || (type === 'movie' && !layoutScraper.fetchMovie && !layoutScraper.scrapeMovie)) {
|
||||
logger.warn(`The '${entity.name}'-scraper cannot scrape individual ${type}s`);
|
||||
return baseRelease;
|
||||
}
|
||||
|
@ -123,12 +127,14 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
|||
logger.debug(`Memory usage before: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`);
|
||||
|
||||
const rawScrapedRelease = type === 'scene'
|
||||
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options, null)
|
||||
: await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, options, null);
|
||||
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options)
|
||||
: await fetchMovie(layoutScraper, baseRelease.url, entity, baseRelease, options);
|
||||
|
||||
const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_');
|
||||
|
||||
if (rawScrapedRelease) {
|
||||
delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory
|
||||
}
|
||||
|
||||
if (windows.has(pathname)) {
|
||||
logger.debug(`Closing window for ${pathname}`);
|
||||
|
|
|
@ -461,7 +461,7 @@ async function fetchMovieTrailer(release) {
|
|||
return null;
|
||||
}
|
||||
|
||||
async function scrapeMovie({ query, el }, window, url, entity, options) {
|
||||
async function scrapeMovie({ query, el }, url, entity, baseRelease, options) {
|
||||
const release = {};
|
||||
|
||||
const { dataLayer } = query.exec('//script[contains(text(), "dataLayer")]', ['dataLayer']);
|
||||
|
@ -495,6 +495,8 @@ async function scrapeMovie({ query, el }, window, url, entity, options) {
|
|||
release.trailer = await fetchMovieTrailer(release);
|
||||
}
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
|
@ -751,16 +753,6 @@ async function fetchScene(url, site, baseRelease, options) {
|
|||
return null;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, channel, baseRelease, options) {
|
||||
const res = await qu.get(url, null, null);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeMovie(res.item, res.window, url, channel, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchActorScenes(actorName, apiUrl, siteSlug) {
|
||||
const res = await http.post(apiUrl, {
|
||||
requests: [
|
||||
|
@ -855,7 +847,6 @@ module.exports = {
|
|||
fetchApiUpcoming: fetchUpcomingApi,
|
||||
fetchLatest,
|
||||
fetchLatestApi,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchSceneApi,
|
||||
|
@ -867,12 +858,13 @@ module.exports = {
|
|||
fetchProfile: fetchApiProfile,
|
||||
// fetchScene,
|
||||
fetchScene: fetchSceneApi,
|
||||
fetchMovie,
|
||||
scrapeMovie,
|
||||
},
|
||||
getPhotos,
|
||||
scrapeApiProfile,
|
||||
scrapeApiReleases,
|
||||
scrapeProfile,
|
||||
scrapeAll,
|
||||
scrapeMovie,
|
||||
scrapeScene,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue