From b3f784686f4acf187533177f0f741eb9fd086f88 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Fri, 14 Aug 2020 00:32:59 +0200 Subject: [PATCH] Improved entity provision behavior. --- README.md | 9 ++++-- src/argv.js | 10 ++++++ src/entities.js | 81 ++++++------------------------------------------- src/updates.js | 8 ++--- 4 files changed, 27 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index dac131932..a246c9728 100644 --- a/README.md +++ b/README.md @@ -65,9 +65,12 @@ To generate the thumbnails for logos and tag photos, run: `./traxxx --option value` or `npm start -- --option value` * `--server`: Run the web server -* `--all`: Fetch updates from the channels and networks in the configuration file. -* `--channel [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. -* `--network [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug. + +#### Channels +* `--channels [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. Overrides configured included networks and channels. +* `--networks [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug. Overrides configured included networks and channels. +* `--exclude-channels [slug] [slug]`: Scrape every configured, specified or available channel, except for specified. Overrides configured excluded channels. +* `--exclude-networks [slug] [slug]`: Scrape every configured, specified or available network, except for specified. Overrides configured excluded networks. * `--after "[time]"`: Do not fetch scenes older than this period or date. Example values are: `"1 month"`, `"3 years"`, `"2019-01-01"`. * `--scene [URL]`: Try to retrieve scene details from its official channel or network URL. * `--deep`: Follow each release link found running `--channel` or `--network` and scrape it for more details. Enabled by default ; use `--no-deep` to only save information found on the overview pages. diff --git a/src/argv.js b/src/argv.js index 2bb12b7d0..413e87e5a 100644 --- a/src/argv.js +++ b/src/argv.js @@ -40,11 +40,21 @@ const { argv } = yargs type: 'array', alias: 'network', }) + .option('exclude-networks', { + describe: 'Network not to scrape any channels from (overrides configuration)', + type: 'array', + alias: 'exclude-network', + }) .option('channels', { describe: 'Channel to scrape (overrides configuration)', type: 'array', alias: 'channel', }) + .option('exclude-channels', { + describe: 'Channel not to scrape (overrides configuration)', + type: 'array', + alias: 'exclude-channel', + }) .option('actors', { describe: 'Scrape actors by name or slug', type: 'array', diff --git a/src/entities.js b/src/entities.js index 0028ca458..819798a58 100644 --- a/src/entities.js +++ b/src/entities.js @@ -2,7 +2,6 @@ const config = require('config'); -const logger = require('./logger')(__filename); const argv = require('./argv'); const knex = require('./knex'); const whereOr = require('./utils/where-or'); @@ -37,61 +36,15 @@ async function curateEntities(entities, includeParameters) { return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters))); } -async function fetchChannelsFromArgv() { - const rawNetworks = await knex.raw(` - /* networks from argument with channels as children */ - WITH RECURSIVE children AS ( - SELECT - entities.* - FROM - entities - WHERE - slug = ANY(?) AND entities.type = 'network' - UNION ALL - SELECT - entities.* - FROM - entities - INNER JOIN - children ON children.id = entities.parent_id - ) - SELECT - entities.*, row_to_json(parents) as parent, json_agg(children) as children - FROM - children - LEFT JOIN - entities ON entities.id = children.parent_id - LEFT JOIN - entities AS parents ON parents.id = entities.parent_id - WHERE - children.type = 'channel' - GROUP BY - children.parent_id, entities.id, entities.name, parents.id +async function fetchIncludedEntities() { + const include = { + includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels, + includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [], + includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [], + excludedNetworks: argv.excludeNetworks || config.exclude?.networks || [], + excludedChannels: argv.excludeChannels || config.exclude?.channels || [], + }; - UNION ALL - - /* channels from argument as the child of network with parent */ - SELECT - entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children)) - FROM - entities AS children - LEFT JOIN - entities ON entities.id = children.parent_id - LEFT JOIN - entities AS parents ON parents.id = entities.parent_id - WHERE - children.slug = ANY(?) AND children.type = 'channel' - GROUP BY - entities.id, parents.id; - `, [argv.networks || [], argv.channels || []]); - - const curatedNetworks = await curateEntities(rawNetworks.rows, true); - logger.info(`Found ${curatedNetworks.length} networks in database`); - - return curatedNetworks; -} - -async function fetchChannelsFromConfig() { const rawNetworks = await knex.raw(` WITH RECURSIVE channels AS ( /* select configured channels and networks */ @@ -142,27 +95,13 @@ async function fetchChannelsFromConfig() { channels.type = 'channel' GROUP BY entities.id - `, { - includeAll: !config.include?.networks && !config.include?.channels, - includedNetworks: config.include?.networks || [], - includedChannels: config.include?.channels || [], - excludedNetworks: config.exclude?.networks || [], - excludedChannels: config.exclude?.channels || [], - }); + `, include); const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true)); return curatedNetworks; } -async function fetchIncludedEntities() { - if (argv.networks || argv.channels) { - return fetchChannelsFromArgv(); - } - - return fetchChannelsFromConfig(); -} - async function fetchChannels(queryObject) { const sites = await knex('sites') .where(builder => whereOr(queryObject, 'sites', builder)) @@ -191,7 +130,5 @@ module.exports = { curateEntities, fetchIncludedEntities, fetchChannels, - fetchChannelsFromConfig, - fetchChannelsFromArgv, fetchChannelsFromReleases, }; diff --git a/src/updates.js b/src/updates.js index 0933c3f85..cc48fae99 100644 --- a/src/updates.js +++ b/src/updates.js @@ -8,7 +8,7 @@ const logger = require('./logger')(__filename); const knex = require('./knex'); const include = require('./utils/argv-include')(argv); const scrapers = require('./scrapers/scrapers'); -const { fetchChannelsFromArgv, fetchChannelsFromConfig } = require('./entities'); +const { fetchIncludedEntities } = require('./entities'); async function filterUniqueReleases(latestReleases, accReleases) { const latestReleaseIdentifiers = latestReleases @@ -174,8 +174,6 @@ async function scrapeChannelReleases(scraper, channelEntity, preData) { : [], ]); - console.log(movies); - logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent?.name})`); return [...latestReleases, ...upcomingReleases]; @@ -229,9 +227,7 @@ async function scrapeNetworkParallel(networkEntity) { } async function fetchUpdates() { - const includedNetworks = argv.channels || argv.networks - ? await fetchChannelsFromArgv() - : await fetchChannelsFromConfig(); + const includedNetworks = await fetchIncludedEntities(); const scrapedNetworks = await Promise.map( includedNetworks,