2019-03-18 03:46:53 +00:00
|
|
|
'use strict';
|
|
|
|
|
2019-04-05 01:45:40 +00:00
|
|
|
const config = require('config');
|
2019-03-18 03:46:53 +00:00
|
|
|
const yargs = require('yargs');
|
2020-08-12 18:51:08 +00:00
|
|
|
const moment = require('moment');
|
|
|
|
|
2024-06-05 22:20:54 +00:00
|
|
|
function interpretDate(after, ignoreIfEmpty = false) {
|
2021-11-20 22:59:15 +00:00
|
|
|
if (!after && ignoreIfEmpty) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2020-08-12 19:00:50 +00:00
|
|
|
if (!after) {
|
|
|
|
return new Date(0, 0, 0);
|
|
|
|
}
|
|
|
|
|
2020-08-12 18:51:08 +00:00
|
|
|
if (/\d{2,4}-\d{2}-\d{2,4}/.test(after)) {
|
|
|
|
// using date
|
|
|
|
return moment
|
|
|
|
.utc(after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
|
|
|
|
.toDate();
|
|
|
|
}
|
|
|
|
|
|
|
|
// using time distance (e.g. "1 month")
|
|
|
|
return moment
|
|
|
|
.utc()
|
|
|
|
.subtract(...after.split(' '))
|
|
|
|
.toDate();
|
|
|
|
}
|
2019-03-18 03:46:53 +00:00
|
|
|
|
|
|
|
const { argv } = yargs
|
2020-05-14 02:26:05 +00:00
|
|
|
.command('npm start')
|
2023-07-29 21:59:17 +00:00
|
|
|
.command('update', 'fetch latest updates', () => {
|
|
|
|
console.log('UPDATING!');
|
|
|
|
})
|
|
|
|
.command('scene', 'fetch scene by URL', () => {
|
|
|
|
console.log('SCENE!');
|
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('server', {
|
|
|
|
describe: 'Start web server',
|
|
|
|
type: 'boolean',
|
|
|
|
alias: 'web',
|
|
|
|
})
|
2020-10-29 15:06:20 +00:00
|
|
|
.option('include-networks', {
|
2020-06-27 00:57:30 +00:00
|
|
|
describe: 'Network to scrape all channels from (overrides configuration)',
|
2020-05-14 02:26:05 +00:00
|
|
|
type: 'array',
|
2020-10-29 15:06:20 +00:00
|
|
|
alias: ['include-network', 'networks', 'network'],
|
2020-05-14 02:26:05 +00:00
|
|
|
})
|
2020-08-13 22:32:59 +00:00
|
|
|
.option('exclude-networks', {
|
|
|
|
describe: 'Network not to scrape any channels from (overrides configuration)',
|
|
|
|
type: 'array',
|
|
|
|
alias: 'exclude-network',
|
|
|
|
})
|
2020-10-29 15:06:20 +00:00
|
|
|
.option('include-channels', {
|
2020-06-27 00:57:30 +00:00
|
|
|
describe: 'Channel to scrape (overrides configuration)',
|
2020-05-14 02:26:05 +00:00
|
|
|
type: 'array',
|
2020-10-29 15:06:20 +00:00
|
|
|
alias: ['include-channel', 'channels', 'channel'],
|
2020-05-14 02:26:05 +00:00
|
|
|
})
|
2020-08-13 22:32:59 +00:00
|
|
|
.option('exclude-channels', {
|
|
|
|
describe: 'Channel not to scrape (overrides configuration)',
|
|
|
|
type: 'array',
|
|
|
|
alias: 'exclude-channel',
|
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('actors', {
|
|
|
|
describe: 'Scrape actors by name or slug',
|
|
|
|
type: 'array',
|
|
|
|
alias: 'actor',
|
|
|
|
})
|
2020-08-12 18:51:08 +00:00
|
|
|
.option('actors-update', {
|
|
|
|
describe: 'Rescrape actors last updated before this period',
|
|
|
|
type: 'string',
|
|
|
|
})
|
2020-07-15 01:24:47 +00:00
|
|
|
.option('actors-file', {
|
|
|
|
describe: 'Scrape actors names from file',
|
|
|
|
type: 'string',
|
|
|
|
})
|
2020-08-12 18:51:08 +00:00
|
|
|
.option('actors-scenes', {
|
2020-05-14 02:26:05 +00:00
|
|
|
describe: 'Fetch all scenes for an actor',
|
|
|
|
type: 'boolean',
|
|
|
|
default: false,
|
2020-08-20 21:35:18 +00:00
|
|
|
alias: 'actor-scenes',
|
2020-05-14 02:26:05 +00:00
|
|
|
})
|
2020-11-26 03:01:01 +00:00
|
|
|
.option('actor-sources', {
|
2020-08-12 18:51:08 +00:00
|
|
|
describe: 'Use these scrapers for actor data',
|
|
|
|
type: 'array',
|
2020-11-27 23:46:30 +00:00
|
|
|
alias: ['actor-source', 'profile-sources', 'profile-source', 'source', 'sources'],
|
2020-08-12 18:51:08 +00:00
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('movie-scenes', {
|
|
|
|
describe: 'Fetch all scenes for a movie',
|
|
|
|
type: 'boolean',
|
2020-05-18 01:22:03 +00:00
|
|
|
alias: 'with-scenes',
|
2020-05-14 02:26:05 +00:00
|
|
|
default: false,
|
|
|
|
})
|
|
|
|
.option('scene-movies', {
|
|
|
|
describe: 'Fetch movies for scenes',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
})
|
2020-06-28 20:29:18 +00:00
|
|
|
.option('scene-actors', {
|
2020-05-14 02:26:05 +00:00
|
|
|
describe: 'Scrape profiles for new actors after fetching scenes',
|
|
|
|
type: 'boolean',
|
|
|
|
default: false,
|
|
|
|
})
|
2020-08-01 13:11:07 +00:00
|
|
|
.option('scene', {
|
2020-05-14 02:26:05 +00:00
|
|
|
describe: 'Scrape scene info from URL',
|
|
|
|
type: 'array',
|
2020-07-15 01:24:47 +00:00
|
|
|
})
|
2020-08-01 13:11:07 +00:00
|
|
|
.option('scene-file', {
|
2020-07-15 01:24:47 +00:00
|
|
|
describe: 'Scrape scene info from URLs in a file',
|
|
|
|
type: 'string',
|
2020-08-01 13:11:07 +00:00
|
|
|
alias: 'scenes-file',
|
2020-05-14 02:26:05 +00:00
|
|
|
})
|
|
|
|
.option('movie', {
|
|
|
|
describe: 'Scrape movie info from URL',
|
|
|
|
type: 'array',
|
|
|
|
})
|
2022-02-02 23:04:48 +00:00
|
|
|
.option('movie-file', {
|
|
|
|
describe: 'Scrape movie info from URLs in a file',
|
|
|
|
type: 'string',
|
|
|
|
alias: 'movies-file',
|
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('deep', {
|
|
|
|
describe: 'Fetch details for all releases',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
})
|
|
|
|
.option('latest', {
|
|
|
|
describe: 'Scrape latest releases if available',
|
|
|
|
type: 'boolean',
|
2020-08-01 13:11:07 +00:00
|
|
|
default: false,
|
2020-05-14 02:26:05 +00:00
|
|
|
})
|
|
|
|
.option('upcoming', {
|
|
|
|
describe: 'Scrape upcoming releases if available',
|
|
|
|
type: 'boolean',
|
2020-08-01 13:11:07 +00:00
|
|
|
default: false,
|
|
|
|
})
|
2020-08-21 23:57:23 +00:00
|
|
|
.option('paginate-upcoming', {
|
|
|
|
describe: 'Attempt \'next\' upcoming page, usually unavailable.',
|
|
|
|
type: 'boolean',
|
|
|
|
default: false,
|
|
|
|
})
|
2020-08-01 13:11:07 +00:00
|
|
|
.option('movies', {
|
|
|
|
describe: 'Scrape movies from channels',
|
|
|
|
type: 'boolean',
|
|
|
|
default: false,
|
2020-05-14 02:26:05 +00:00
|
|
|
})
|
2020-07-17 02:33:05 +00:00
|
|
|
.option('force', {
|
2020-05-14 02:26:05 +00:00
|
|
|
describe: 'Don\'t ignore duplicates, update existing entries',
|
|
|
|
type: 'boolean',
|
2020-07-17 02:33:05 +00:00
|
|
|
alias: 'redownload',
|
2020-05-14 02:26:05 +00:00
|
|
|
})
|
2023-07-02 19:06:38 +00:00
|
|
|
.option('force-media', {
|
|
|
|
describe: 'Force existing media to be redownloaded.',
|
|
|
|
type: 'boolean',
|
|
|
|
default: false,
|
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('after', {
|
|
|
|
describe: 'Don\'t fetch scenes older than',
|
|
|
|
type: 'string',
|
|
|
|
default: config.fetchAfter.join(' '),
|
|
|
|
})
|
|
|
|
.option('last', {
|
|
|
|
describe: 'Get the latest x releases, no matter the date range',
|
|
|
|
type: 'number',
|
|
|
|
})
|
2020-10-19 22:05:23 +00:00
|
|
|
.option('missing-date-limit', {
|
2020-05-14 02:26:05 +00:00
|
|
|
describe: 'Limit amount of scenes when dates are missing.',
|
|
|
|
type: 'number',
|
2021-02-04 00:18:46 +00:00
|
|
|
default: config.missingDateLimit,
|
2021-02-04 21:55:19 +00:00
|
|
|
alias: ['null-date-limit'],
|
2020-05-14 02:26:05 +00:00
|
|
|
})
|
|
|
|
.option('page', {
|
|
|
|
describe: 'Page to start scraping at',
|
|
|
|
type: 'number',
|
|
|
|
default: 1,
|
|
|
|
})
|
2020-11-22 22:50:24 +00:00
|
|
|
.option('interval', {
|
|
|
|
describe: 'Minimum wait time between HTTP requests',
|
|
|
|
type: 'number',
|
|
|
|
// don't set default, because argument has to override config, but config has to override default
|
|
|
|
})
|
|
|
|
.option('concurrency', {
|
|
|
|
describe: 'Maximum amount of parallel HTTP requests',
|
|
|
|
type: 'number',
|
|
|
|
// don't set default, because argument has to override config, but config has to override default
|
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('save', {
|
|
|
|
describe: 'Save fetched releases to database',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
})
|
|
|
|
.option('media', {
|
|
|
|
describe: 'Include any release media',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
})
|
|
|
|
.option('media-limit', {
|
|
|
|
describe: 'Maximum amount of assets of each type per release',
|
|
|
|
type: 'number',
|
|
|
|
default: config.media.limit,
|
|
|
|
})
|
2023-07-05 22:30:04 +00:00
|
|
|
.option('media-attempts', {
|
|
|
|
describe: 'Maximum amount of retries per URL',
|
|
|
|
type: 'number',
|
|
|
|
default: config.media.attempts,
|
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('images', {
|
|
|
|
describe: 'Include any photos, posters or covers',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
alias: 'pics',
|
|
|
|
})
|
|
|
|
.option('videos', {
|
2022-03-31 21:01:54 +00:00
|
|
|
alias: 'video',
|
2020-05-14 02:26:05 +00:00
|
|
|
describe: 'Include any trailers or teasers',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
})
|
|
|
|
.option('posters', {
|
|
|
|
describe: 'Include release posters',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
alias: 'poster',
|
|
|
|
})
|
|
|
|
.option('covers', {
|
|
|
|
describe: 'Include release covers',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
alias: 'cover',
|
|
|
|
})
|
|
|
|
.option('photos', {
|
|
|
|
describe: 'Include release photos',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
})
|
2023-07-25 01:03:41 +00:00
|
|
|
.option('caps', {
|
|
|
|
describe: 'Include release screen caps',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('trailers', {
|
|
|
|
describe: 'Include release trailers',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
alias: 'trailer',
|
|
|
|
})
|
|
|
|
.option('teasers', {
|
|
|
|
describe: 'Include release teasers',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
alias: 'teaser',
|
|
|
|
})
|
|
|
|
.option('avatars', {
|
|
|
|
describe: 'Include actor avatars',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
})
|
2020-12-02 20:26:55 +00:00
|
|
|
.option('report', {
|
2020-05-14 02:26:05 +00:00
|
|
|
describe: 'Show data in console.',
|
|
|
|
type: 'boolean',
|
|
|
|
default: false,
|
|
|
|
})
|
|
|
|
.option('level', {
|
2022-03-27 21:42:03 +00:00
|
|
|
alias: 'log-level',
|
2020-05-14 02:26:05 +00:00
|
|
|
describe: 'Log level',
|
|
|
|
type: 'string',
|
|
|
|
default: process.env.NODE_ENV === 'development' ? 'silly' : 'info',
|
|
|
|
})
|
2023-07-01 20:24:21 +00:00
|
|
|
.option('prefer-entity', {
|
|
|
|
alias: 'prefer',
|
|
|
|
describe: 'Prefer network or channel when resolving entities with the same identifier.',
|
|
|
|
choices: ['channel', 'network'],
|
|
|
|
type: 'string',
|
|
|
|
})
|
2020-05-17 01:00:44 +00:00
|
|
|
.option('resolve-place', {
|
|
|
|
describe: 'Call OSM Nominatim API for actor place of birth and residence. Raw value discarded if disabled.',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('debug', {
|
|
|
|
describe: 'Show error stack traces',
|
|
|
|
type: 'boolean',
|
|
|
|
default: process.env.NODE_ENV === 'development',
|
|
|
|
})
|
2021-12-27 21:36:01 +00:00
|
|
|
.option('sampleMemory', {
|
|
|
|
alias: 'memory',
|
|
|
|
describe: 'Take memory allocation samples, and snapshots at configured intervals',
|
|
|
|
type: 'boolean',
|
|
|
|
default: config.memorySampling.enabled,
|
|
|
|
})
|
2020-05-14 02:26:05 +00:00
|
|
|
.option('update-search', {
|
|
|
|
describe: 'Update search documents for all releases.',
|
|
|
|
type: 'boolean',
|
|
|
|
default: false,
|
2020-08-12 18:51:08 +00:00
|
|
|
})
|
2020-12-29 23:16:05 +00:00
|
|
|
.option('interpolate-profiles', {
|
|
|
|
describe: 'Interpolate actor profiles',
|
|
|
|
type: 'array',
|
|
|
|
alias: 'interpolate',
|
|
|
|
})
|
2020-10-24 22:52:40 +00:00
|
|
|
.option('flush-orphaned-media', {
|
|
|
|
describe: 'Remove all orphaned media items from database and disk.',
|
2023-06-03 00:51:42 +00:00
|
|
|
type: 'boolean',
|
2020-10-24 22:52:40 +00:00
|
|
|
alias: 'flush-media',
|
|
|
|
})
|
2023-06-03 00:51:42 +00:00
|
|
|
.option('flush-media-files', {
|
|
|
|
describe: 'Remove files from storage when flushing media.',
|
|
|
|
type: 'boolean',
|
|
|
|
alias: 'flush-files',
|
|
|
|
default: true,
|
|
|
|
})
|
2020-10-19 00:02:21 +00:00
|
|
|
.option('flush-channels', {
|
|
|
|
describe: 'Delete all scenes and movies from channels.',
|
|
|
|
type: 'array',
|
|
|
|
alias: 'flush-channel',
|
|
|
|
})
|
|
|
|
.option('flush-networks', {
|
|
|
|
describe: 'Delete all scenes and movies from network.',
|
|
|
|
type: 'array',
|
|
|
|
alias: 'flush-network',
|
|
|
|
})
|
2020-12-30 02:19:09 +00:00
|
|
|
.option('flush-actors', {
|
2021-02-05 03:23:13 +00:00
|
|
|
describe: 'Flush all actors and their scenes.',
|
2020-12-30 02:19:09 +00:00
|
|
|
type: 'array',
|
|
|
|
alias: 'flush-actor',
|
|
|
|
})
|
2020-12-30 01:23:43 +00:00
|
|
|
.option('flush-profiles', {
|
|
|
|
describe: 'Delete all profiles for an actor.',
|
|
|
|
type: 'array',
|
2020-12-30 02:19:09 +00:00
|
|
|
alias: 'flush-profile',
|
2020-12-30 01:23:43 +00:00
|
|
|
})
|
2020-10-24 22:52:40 +00:00
|
|
|
.option('flush-batches', {
|
|
|
|
describe: 'Delete all scenes and movies from batch by ID.',
|
|
|
|
type: 'array',
|
|
|
|
alias: 'flush-batch',
|
|
|
|
})
|
2020-12-30 03:17:09 +00:00
|
|
|
.option('flush-scenes', {
|
|
|
|
describe: 'Remove all scenes.',
|
|
|
|
type: 'boolean',
|
|
|
|
})
|
2021-02-05 03:14:13 +00:00
|
|
|
.option('flush-movies', {
|
|
|
|
describe: 'Remove all movies.',
|
|
|
|
type: 'boolean',
|
|
|
|
})
|
2024-06-05 22:20:54 +00:00
|
|
|
.option('flush-after', {
|
|
|
|
describe: 'Only delete scenes release after including',
|
|
|
|
type: 'string',
|
|
|
|
})
|
|
|
|
.option('flush-before', {
|
|
|
|
describe: 'Only delete scenes released before including',
|
|
|
|
type: 'string',
|
|
|
|
})
|
2021-02-05 03:23:13 +00:00
|
|
|
.option('delete-actors', {
|
|
|
|
describe: 'Remove actors by ID.',
|
|
|
|
type: 'array',
|
|
|
|
alias: ['delete-actor', 'remove-actors', 'remove-actor'],
|
|
|
|
})
|
2020-10-24 22:52:40 +00:00
|
|
|
.option('delete-scenes', {
|
2020-10-19 00:02:21 +00:00
|
|
|
describe: 'Remove scenes by ID.',
|
|
|
|
type: 'array',
|
2020-10-24 22:52:40 +00:00
|
|
|
alias: ['delete-scene', 'delete', 'remove', 'remove-scenes', 'remove-scene'],
|
|
|
|
})
|
|
|
|
.option('delete-movies', {
|
|
|
|
describe: 'Remove movies by ID.',
|
|
|
|
type: 'array',
|
|
|
|
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
|
2020-10-19 00:02:21 +00:00
|
|
|
})
|
2022-04-10 19:51:22 +00:00
|
|
|
.option('request', {
|
|
|
|
describe: 'Make an arbitrary HTTP request',
|
|
|
|
type: 'string',
|
|
|
|
})
|
|
|
|
.option('request-method', {
|
|
|
|
alias: 'method',
|
|
|
|
describe: 'HTTP method for arbitrary HTTP requests',
|
|
|
|
type: 'string',
|
|
|
|
default: 'get',
|
|
|
|
})
|
2021-10-19 23:46:56 +00:00
|
|
|
.option('request-timeout', {
|
|
|
|
describe: 'Default timeout after which to cancel a HTTP request.',
|
|
|
|
type: 'number',
|
|
|
|
alias: ['timeout'],
|
|
|
|
default: 60000,
|
|
|
|
})
|
2023-07-25 01:22:24 +00:00
|
|
|
.option('showcased', {
|
|
|
|
describe: 'Whether the batch should be showcased as new.',
|
|
|
|
type: 'boolean',
|
|
|
|
alias: ['showcase', 'batch-showcased'],
|
|
|
|
default: true,
|
|
|
|
})
|
2024-06-05 22:20:54 +00:00
|
|
|
.coerce('after', interpretDate)
|
|
|
|
.coerce('flush-after', interpretDate)
|
|
|
|
.coerce('flush-before', interpretDate)
|
|
|
|
.coerce('actors-update', (after) => interpretDate(after, true));
|
2019-03-18 03:46:53 +00:00
|
|
|
|
|
|
|
module.exports = argv;
|