Added centralized movie page fetcher. Added memory heap dump.
This commit is contained in:
parent
d93670842b
commit
280032937f
29
src/app.js
29
src/app.js
|
@ -34,33 +34,40 @@ function logActive() {
|
||||||
}, typeof argv.logActive === 'number' ? argv.logActive : 60000);
|
}, typeof argv.logActive === 'number' ? argv.logActive : 60000);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
async function snapshotMemory() {
|
||||||
function monitorMemory() {
|
const profile = await inspector.heap.takeSnapshot();
|
||||||
logger.debug(`Memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
|
const filepath = `${dayjs().format('traxxx_snapshot_YYYY-MM-DD_HH-mm-ss')}.heapprofile`;
|
||||||
|
|
||||||
if (!done) {
|
logger.info(`Start heap snapshots, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
|
||||||
setTimeout(() => monitorMemory(), 10000);
|
|
||||||
}
|
await inspector.heap.disable();
|
||||||
|
await fs.writeFile(filepath, JSON.stringify(profile));
|
||||||
|
|
||||||
|
logger.info(`Saved heap dump to ${filepath}`);
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
async function stopMemorySample() {
|
async function stopMemorySample() {
|
||||||
const profile = await inspector.heap.stopSampling();
|
const profile = await inspector.heap.stopSampling();
|
||||||
const filepath = `${dayjs().format('YYYY-MM-DD_HH-mm-ss')}.heapprofile`;
|
const filepath = `${dayjs().format('traxxx_sample_YYYY-MM-DD_HH-mm-ss')}.heapprofile`;
|
||||||
|
const usage = process.memoryUsage.rss() / 1000000;
|
||||||
|
|
||||||
await inspector.heap.disable();
|
await inspector.heap.disable();
|
||||||
await fs.writeFile(filepath, JSON.stringify(profile));
|
await fs.writeFile(filepath, JSON.stringify(profile));
|
||||||
|
|
||||||
logger.info(`Saved heap sample to ${filepath}`);
|
logger.info(`Saved heap sample to ${filepath}`);
|
||||||
|
|
||||||
|
if (usage > 1000) {
|
||||||
|
await snapshotMemory();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function startMemorySample() {
|
async function startMemorySample() {
|
||||||
await inspector.heap.enable();
|
await inspector.heap.enable();
|
||||||
await inspector.heap.startSampling();
|
await inspector.heap.startSampling();
|
||||||
|
|
||||||
// monitorMemory();
|
const usage = process.memoryUsage.rss() / 1000000;
|
||||||
|
|
||||||
logger.info(`Start heap sampling, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
|
logger.info(`Start heap sampling, memory usage: ${usage} MB`);
|
||||||
|
|
||||||
setTimeout(async () => {
|
setTimeout(async () => {
|
||||||
await stopMemorySample();
|
await stopMemorySample();
|
||||||
|
@ -73,7 +80,7 @@ async function startMemorySample() {
|
||||||
|
|
||||||
async function init() {
|
async function init() {
|
||||||
try {
|
try {
|
||||||
if (argv.memory) {
|
if (argv.sampleMemory) {
|
||||||
await startMemorySample();
|
await startMemorySample();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
24
src/deep.js
24
src/deep.js
|
@ -54,12 +54,12 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(scraper, url, entity, baseRelease, options) {
|
async function fetchScene(scraper, url, entity, baseRelease, options, type = 'scene') {
|
||||||
if (scraper.fetchScene) {
|
if ((type === 'scene' && scraper.fetchScene) || (type === 'movie' && scraper.fetchMovie)) {
|
||||||
return scraper.fetchScene(baseRelease.url, entity, baseRelease, options, null);
|
return scraper[type === 'movie' ? 'fetchMovie' : 'fetchScene'](baseRelease.url, entity, baseRelease, options, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scraper.scrapeScene) {
|
if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) {
|
||||||
const session = qu.session();
|
const session = qu.session();
|
||||||
|
|
||||||
const res = await qu.get(url, null, null, {
|
const res = await qu.get(url, null, null, {
|
||||||
|
@ -70,7 +70,7 @@ async function fetchScene(scraper, url, entity, baseRelease, options) {
|
||||||
const cookie = await session._sessionOptions.cookieJar.get(url);
|
const cookie = await session._sessionOptions.cookieJar.get(url);
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scraper.scrapeScene(res.item, url, entity, baseRelease, options, {
|
return scraper[type === 'movie' ? 'scrapeMovie' : 'scrapeScene'](res.item, url, entity, baseRelease, options, {
|
||||||
session,
|
session,
|
||||||
headers: res.headers,
|
headers: res.headers,
|
||||||
cookieJar: session._sessionOptions.cookieJar,
|
cookieJar: session._sessionOptions.cookieJar,
|
||||||
|
@ -84,6 +84,10 @@ async function fetchScene(scraper, url, entity, baseRelease, options) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function fetchMovie(scraper, url, entity, baseRelease, options) {
|
||||||
|
return fetchScene(scraper, url, entity, baseRelease, options, 'movie');
|
||||||
|
}
|
||||||
|
|
||||||
async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||||
const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)];
|
const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)];
|
||||||
|
|
||||||
|
@ -106,7 +110,7 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||||
return baseRelease;
|
return baseRelease;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((type === 'scene' && !layoutScraper.fetchScene && !layoutScraper.scrapeScene) || (type === 'movie' && !layoutScraper.fetchMovie)) {
|
if ((type === 'scene' && !layoutScraper.fetchScene && !layoutScraper.scrapeScene) || (type === 'movie' && !layoutScraper.fetchMovie && !layoutScraper.scrapeMovie)) {
|
||||||
logger.warn(`The '${entity.name}'-scraper cannot scrape individual ${type}s`);
|
logger.warn(`The '${entity.name}'-scraper cannot scrape individual ${type}s`);
|
||||||
return baseRelease;
|
return baseRelease;
|
||||||
}
|
}
|
||||||
|
@ -123,12 +127,14 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||||
logger.debug(`Memory usage before: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`);
|
logger.debug(`Memory usage before: ${process.memoryUsage.rss() / 1000000} MB (${baseRelease.url})`);
|
||||||
|
|
||||||
const rawScrapedRelease = type === 'scene'
|
const rawScrapedRelease = type === 'scene'
|
||||||
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options, null)
|
? await fetchScene(layoutScraper, baseRelease.url, entity, baseRelease, options)
|
||||||
: await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, options, null);
|
: await fetchMovie(layoutScraper, baseRelease.url, entity, baseRelease, options);
|
||||||
|
|
||||||
const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_');
|
const pathname = new URL(baseRelease.url).pathname.replace(/\//g, '_');
|
||||||
|
|
||||||
delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory
|
if (rawScrapedRelease) {
|
||||||
|
delete rawScrapedRelease.query; // some scrapers pass the qu-wrapped window instance to parent scrapers, filling up memory
|
||||||
|
}
|
||||||
|
|
||||||
if (windows.has(pathname)) {
|
if (windows.has(pathname)) {
|
||||||
logger.debug(`Closing window for ${pathname}`);
|
logger.debug(`Closing window for ${pathname}`);
|
||||||
|
|
|
@ -461,7 +461,7 @@ async function fetchMovieTrailer(release) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeMovie({ query, el }, window, url, entity, options) {
|
async function scrapeMovie({ query, el }, url, entity, baseRelease, options) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const { dataLayer } = query.exec('//script[contains(text(), "dataLayer")]', ['dataLayer']);
|
const { dataLayer } = query.exec('//script[contains(text(), "dataLayer")]', ['dataLayer']);
|
||||||
|
@ -495,6 +495,8 @@ async function scrapeMovie({ query, el }, window, url, entity, options) {
|
||||||
release.trailer = await fetchMovieTrailer(release);
|
release.trailer = await fetchMovieTrailer(release);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(release);
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -751,16 +753,6 @@ async function fetchScene(url, site, baseRelease, options) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchMovie(url, channel, baseRelease, options) {
|
|
||||||
const res = await qu.get(url, null, null);
|
|
||||||
|
|
||||||
if (res.ok) {
|
|
||||||
return scrapeMovie(res.item, res.window, url, channel, options);
|
|
||||||
}
|
|
||||||
|
|
||||||
return res.status;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchActorScenes(actorName, apiUrl, siteSlug) {
|
async function fetchActorScenes(actorName, apiUrl, siteSlug) {
|
||||||
const res = await http.post(apiUrl, {
|
const res = await http.post(apiUrl, {
|
||||||
requests: [
|
requests: [
|
||||||
|
@ -855,7 +847,6 @@ module.exports = {
|
||||||
fetchApiUpcoming: fetchUpcomingApi,
|
fetchApiUpcoming: fetchUpcomingApi,
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchLatestApi,
|
fetchLatestApi,
|
||||||
fetchMovie,
|
|
||||||
fetchProfile,
|
fetchProfile,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
fetchSceneApi,
|
fetchSceneApi,
|
||||||
|
@ -867,12 +858,13 @@ module.exports = {
|
||||||
fetchProfile: fetchApiProfile,
|
fetchProfile: fetchApiProfile,
|
||||||
// fetchScene,
|
// fetchScene,
|
||||||
fetchScene: fetchSceneApi,
|
fetchScene: fetchSceneApi,
|
||||||
fetchMovie,
|
scrapeMovie,
|
||||||
},
|
},
|
||||||
getPhotos,
|
getPhotos,
|
||||||
scrapeApiProfile,
|
scrapeApiProfile,
|
||||||
scrapeApiReleases,
|
scrapeApiReleases,
|
||||||
scrapeProfile,
|
scrapeProfile,
|
||||||
scrapeAll,
|
scrapeAll,
|
||||||
|
scrapeMovie,
|
||||||
scrapeScene,
|
scrapeScene,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue