From eca65f6b4dc33fecaa105c517026b529a539a9b2 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Mon, 27 Jan 2020 00:41:04 +0000 Subject: [PATCH] Inspecting performance. --- src/actors.js | 10 ++++--- src/media.js | 66 ++++++++++++++++++++++++----------------- src/releases.js | 4 ++- src/scrapers/dogfart.js | 4 +++ 4 files changed, 52 insertions(+), 32 deletions(-) diff --git a/src/actors.js b/src/actors.js index fd62f11b..cddd6a40 100644 --- a/src/actors.js +++ b/src/actors.js @@ -442,10 +442,12 @@ async function associateActors(mappedActors, releases) { } }); - await Promise.all([ - knex('releases_actors').insert(associations.filter(association => association).flat()), - scrapeBasicActors(), - ]); + await knex('releases_actors').insert(associations.filter(association => association).flat()); + + // basic actor scraping is failure prone, don't together with actor association + // await scrapebasicactors(), + + return; } module.exports = { diff --git a/src/media.js b/src/media.js index b2c1f2eb..fb2041c4 100644 --- a/src/media.js +++ b/src/media.js @@ -37,15 +37,22 @@ function pluckPhotos(photos, specifiedLimit) { } async function createThumbnail(buffer) { - return sharp(buffer) - .resize({ - height: config.media.thumbnailSize, - withoutEnlargement: true, - }) - .jpeg({ - quality: config.media.thumbnailQuality, - }) - .toBuffer(); + try { + const thumbnail = sharp(buffer) + .resize({ + height: config.media.thumbnailSize, + withoutEnlargement: true, + }) + .jpeg({ + quality: config.media.thumbnailQuality, + }) + .toBuffer(); + + return thumbnail; + } catch (error) { + logger.error(`Failed to create thumbnail: ${error.message}`); + throw error; + } } async function createMediaDirectory(domain, subpath) { @@ -138,27 +145,32 @@ async function savePhotos(files, { naming = 'index', }) { return Promise.map(files, async (file, index) => { - const timestamp = new Date().getTime(); - const thumbnail = await createThumbnail(file.photo); + try { + const timestamp = new Date().getTime(); + const thumbnail = await createThumbnail(file.photo); - const filename = naming === 'index' - ? `${file.role || role}${index + 1}` - : `${timestamp + index}`; + const filename = naming === 'index' + ? `${file.role || role}${index + 1}` + : `${timestamp + index}`; - const filepath = path.join(`${domain}s`, subpath, `${filename}.${file.extension}`); - const thumbpath = path.join(`${domain}s`, subpath, `${filename}_thumb.${file.extension}`); + const filepath = path.join(`${domain}s`, subpath, `${filename}.${file.extension}`); + const thumbpath = path.join(`${domain}s`, subpath, `${filename}_thumb.${file.extension}`); - await Promise.all([ - fs.writeFile(path.join(config.media.path, filepath), file.photo), - fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), - ]); + await Promise.all([ + fs.writeFile(path.join(config.media.path, filepath), file.photo), + fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), + ]); - return { - ...file, - thumbnail, - filepath, - thumbpath, - }; + return { + ...file, + thumbnail, + filepath, + thumbpath, + }; + } catch (error) { + logger.error(`Failed to store ${domain} ${role} to ${subpath}: ${error.message}`); + return null; + } }); } @@ -193,7 +205,7 @@ async function storePhotos(photos, { naming, }); - const curatedPhotoEntries = curatePhotoEntries(savedPhotos, domain, role, targetId); + const curatedPhotoEntries = curatePhotoEntries(savedPhotos.filter(Boolean), domain, role, targetId); const newPhotos = await knex('media').insert(curatedPhotoEntries).returning('*'); const photoEntries = Array.isArray(newPhotos) diff --git a/src/releases.js b/src/releases.js index 04ef71c7..b9997612 100644 --- a/src/releases.js +++ b/src/releases.js @@ -8,7 +8,7 @@ const knex = require('./knex'); const argv = require('./argv'); const whereOr = require('./utils/where-or'); const { associateTags } = require('./tags'); -const { associateActors } = require('./actors'); +const { associateActors, scrapeBasicActors } = require('./actors'); const { createMediaDirectory, storePhotos, @@ -425,6 +425,8 @@ async function storeReleases(releases) { storeReleaseAssets(storedReleases), ]); + await scrapeBasicActors(), + return { releases: storedReleases, actors, diff --git a/src/scrapers/dogfart.js b/src/scrapers/dogfart.js index 65f200b0..8aadaefc 100644 --- a/src/scrapers/dogfart.js +++ b/src/scrapers/dogfart.js @@ -131,7 +131,11 @@ async function scrapeScene(html, url, site) { } async function fetchLatest(site, page = 1) { + console.time('dogfart'); + console.log('scraping...', site.name); const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`); + console.timeEnd('dogfart'); + console.log('done!', site.name); return scrapeLatest(res.body.toString(), site); }