Added option to fetch all of an actor's releases (for supporter scrapers), and a utility to extract posters.

This commit is contained in:
ThePendulum 2020-01-31 01:55:55 +01:00
parent 4ecb386233
commit ffdf0690e7
5 changed files with 58 additions and 5 deletions

View File

@ -303,10 +303,12 @@ async function mergeProfiles(profiles, actor) {
tattoos: prevProfile.tattoos || profile.tattoos,
social: prevProfile.social.concat(profile.social || []),
avatars: prevProfile.avatars.concat(profile.avatar ? [profile.avatar] : []), // don't flatten fallbacks
releases: prevProfile.releases.concat(profile.releases ? profile.releases : []), // don't flatten fallbacks
};
}, {
social: [],
avatars: [],
releases: [],
});
const [birthPlace, residencePlace] = await Promise.all([
@ -332,7 +334,7 @@ async function mergeProfiles(profiles, actor) {
}
async function scrapeActors(actorNames) {
await Promise.map(actorNames || argv.actors, async (actorName) => {
return Promise.map(actorNames || argv.actors, async (actorName) => {
try {
const actorSlug = slugify(actorName);
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
@ -362,7 +364,7 @@ async function scrapeActors(actorNames) {
await updateActor(profile, true, false);
}
return;
return null;
}
if (argv.save) {
@ -382,7 +384,7 @@ async function scrapeActors(actorNames) {
}, actorEntry.name),
]);
return;
return profile;
}
const newActorEntry = await storeActor(profile, true, true);
@ -397,8 +399,12 @@ async function scrapeActors(actorNames) {
naming: 'timestamp',
}, newActorEntry.name);
}
return profile;
} catch (error) {
logger.warn(`${actorName}: ${error}`);
return null;
}
}, {
concurrency: 3,

View File

@ -22,7 +22,13 @@ async function init() {
}
if (argv.actors && argv.actors.length > 0) {
await scrapeActors();
const actors = await scrapeActors();
if (argv.withReleases) {
const releases = actors.map(actor => actor.releases).flat();
await scrapeReleases(releases, null, 'scene');
}
}
if (argv.actors && argv.actors.length === 0) {

View File

@ -29,6 +29,11 @@ const { argv } = yargs
type: 'array',
alias: 'actor',
})
.option('with-releases', {
describe: 'Fetch all releases for an actor',
type: 'boolean',
default: false,
})
.option('scene', {
describe: 'Scrape scene info from URL',
type: 'array',

View File

@ -3,11 +3,12 @@
const config = require('config');
const Promise = require('bluebird');
const logger = require('./logger');
const argv = require('./argv');
const scrapers = require('./scrapers/scrapers');
const { storeReleases } = require('./releases');
const { findSiteByUrl } = require('./sites');
const { findNetworkByUrl } = require('./networks');
const { storeReleases } = require('./releases');
async function findSite(url, release) {
const site = (release && release.site) || await findSiteByUrl(url);
@ -50,6 +51,10 @@ async function scrapeRelease(url, release, type = 'scene') {
throw new Error(`The '${site.name}'-scraper cannot fetch individual movies`);
}
if (!release) {
logger.info(`Scraping release from ${url}`);
}
const scrapedRelease = type === 'scene'
? await scraper.fetchScene(url, site, release)
: await scraper.fetchMovie(url, site, release);

31
src/utils/posters.js Normal file
View File

@ -0,0 +1,31 @@
'use strict';
const config = require('config');
const path = require('path');
const fs = require('fs-extra');
const argv = require('../argv');
const knex = require('../knex');
async function init() {
const posters = await knex('actors')
.select('actors.name', 'releases.title', 'media.path')
.whereIn('name', argv.actors)
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
.join('releases', 'releases_actors.release_id', 'releases.id')
.join('releases_posters', 'releases_posters.release_id', 'releases.id')
.join('media', 'releases_posters.media_id', 'media.id');
const files = await Promise.all(posters.map(async (poster) => {
const source = path.join(config.media.path, poster.path);
const target = path.join(config.media.path, 'posters', `${poster.title.replace('/', '_')}.${poster.name}.jpeg`);
const file = await fs.readFile(source);
await fs.writeFile(target, file);
return file;
}));
}
init();