From ffdf0690e753a33a9ef3f995bf45afa03dcf14a7 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Fri, 31 Jan 2020 01:55:55 +0100 Subject: [PATCH] Added option to fetch all of an actor's releases (for supporter scrapers), and a utility to extract posters. --- src/actors.js | 12 +++++++++--- src/app.js | 8 +++++++- src/argv.js | 5 +++++ src/scrape-releases.js | 7 ++++++- src/utils/posters.js | 31 +++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 src/utils/posters.js diff --git a/src/actors.js b/src/actors.js index 1fe68e69..9d37e212 100644 --- a/src/actors.js +++ b/src/actors.js @@ -303,10 +303,12 @@ async function mergeProfiles(profiles, actor) { tattoos: prevProfile.tattoos || profile.tattoos, social: prevProfile.social.concat(profile.social || []), avatars: prevProfile.avatars.concat(profile.avatar ? [profile.avatar] : []), // don't flatten fallbacks + releases: prevProfile.releases.concat(profile.releases ? profile.releases : []), // don't flatten fallbacks }; }, { social: [], avatars: [], + releases: [], }); const [birthPlace, residencePlace] = await Promise.all([ @@ -332,7 +334,7 @@ async function mergeProfiles(profiles, actor) { } async function scrapeActors(actorNames) { - await Promise.map(actorNames || argv.actors, async (actorName) => { + return Promise.map(actorNames || argv.actors, async (actorName) => { try { const actorSlug = slugify(actorName); const actorEntry = await knex('actors').where({ slug: actorSlug }).first(); @@ -362,7 +364,7 @@ async function scrapeActors(actorNames) { await updateActor(profile, true, false); } - return; + return null; } if (argv.save) { @@ -382,7 +384,7 @@ async function scrapeActors(actorNames) { }, actorEntry.name), ]); - return; + return profile; } const newActorEntry = await storeActor(profile, true, true); @@ -397,8 +399,12 @@ async function scrapeActors(actorNames) { naming: 'timestamp', }, newActorEntry.name); } + + return profile; } catch (error) { logger.warn(`${actorName}: ${error}`); + + return null; } }, { concurrency: 3, diff --git a/src/app.js b/src/app.js index 5f2f7ec3..6d7a0a1d 100644 --- a/src/app.js +++ b/src/app.js @@ -22,7 +22,13 @@ async function init() { } if (argv.actors && argv.actors.length > 0) { - await scrapeActors(); + const actors = await scrapeActors(); + + if (argv.withReleases) { + const releases = actors.map(actor => actor.releases).flat(); + + await scrapeReleases(releases, null, 'scene'); + } } if (argv.actors && argv.actors.length === 0) { diff --git a/src/argv.js b/src/argv.js index 9e3fbc37..43eecf0c 100644 --- a/src/argv.js +++ b/src/argv.js @@ -29,6 +29,11 @@ const { argv } = yargs type: 'array', alias: 'actor', }) + .option('with-releases', { + describe: 'Fetch all releases for an actor', + type: 'boolean', + default: false, + }) .option('scene', { describe: 'Scrape scene info from URL', type: 'array', diff --git a/src/scrape-releases.js b/src/scrape-releases.js index 8f788661..fc3e06b4 100644 --- a/src/scrape-releases.js +++ b/src/scrape-releases.js @@ -3,11 +3,12 @@ const config = require('config'); const Promise = require('bluebird'); +const logger = require('./logger'); const argv = require('./argv'); const scrapers = require('./scrapers/scrapers'); -const { storeReleases } = require('./releases'); const { findSiteByUrl } = require('./sites'); const { findNetworkByUrl } = require('./networks'); +const { storeReleases } = require('./releases'); async function findSite(url, release) { const site = (release && release.site) || await findSiteByUrl(url); @@ -50,6 +51,10 @@ async function scrapeRelease(url, release, type = 'scene') { throw new Error(`The '${site.name}'-scraper cannot fetch individual movies`); } + if (!release) { + logger.info(`Scraping release from ${url}`); + } + const scrapedRelease = type === 'scene' ? await scraper.fetchScene(url, site, release) : await scraper.fetchMovie(url, site, release); diff --git a/src/utils/posters.js b/src/utils/posters.js new file mode 100644 index 00000000..9d1397dc --- /dev/null +++ b/src/utils/posters.js @@ -0,0 +1,31 @@ +'use strict'; + +const config = require('config'); +const path = require('path'); +const fs = require('fs-extra'); + +const argv = require('../argv'); + +const knex = require('../knex'); + +async function init() { + const posters = await knex('actors') + .select('actors.name', 'releases.title', 'media.path') + .whereIn('name', argv.actors) + .join('releases_actors', 'releases_actors.actor_id', 'actors.id') + .join('releases', 'releases_actors.release_id', 'releases.id') + .join('releases_posters', 'releases_posters.release_id', 'releases.id') + .join('media', 'releases_posters.media_id', 'media.id'); + + const files = await Promise.all(posters.map(async (poster) => { + const source = path.join(config.media.path, poster.path); + const target = path.join(config.media.path, 'posters', `${poster.title.replace('/', '_')}.${poster.name}.jpeg`); + + const file = await fs.readFile(source); + await fs.writeFile(target, file); + + return file; + })); +} + +init();