Improved entity provision behavior.

This commit is contained in:
DebaucheryLibrarian
2020-08-14 00:32:59 +02:00
parent 77566eae0d
commit b3f784686f
4 changed files with 27 additions and 81 deletions

View File

@@ -40,11 +40,21 @@ const { argv } = yargs
type: 'array',
alias: 'network',
})
.option('exclude-networks', {
describe: 'Network not to scrape any channels from (overrides configuration)',
type: 'array',
alias: 'exclude-network',
})
.option('channels', {
describe: 'Channel to scrape (overrides configuration)',
type: 'array',
alias: 'channel',
})
.option('exclude-channels', {
describe: 'Channel not to scrape (overrides configuration)',
type: 'array',
alias: 'exclude-channel',
})
.option('actors', {
describe: 'Scrape actors by name or slug',
type: 'array',

View File

@@ -2,7 +2,6 @@
const config = require('config');
const logger = require('./logger')(__filename);
const argv = require('./argv');
const knex = require('./knex');
const whereOr = require('./utils/where-or');
@@ -37,61 +36,15 @@ async function curateEntities(entities, includeParameters) {
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
}
async function fetchChannelsFromArgv() {
const rawNetworks = await knex.raw(`
/* networks from argument with channels as children */
WITH RECURSIVE children AS (
SELECT
entities.*
FROM
entities
WHERE
slug = ANY(?) AND entities.type = 'network'
UNION ALL
SELECT
entities.*
FROM
entities
INNER JOIN
children ON children.id = entities.parent_id
)
SELECT
entities.*, row_to_json(parents) as parent, json_agg(children) as children
FROM
children
LEFT JOIN
entities ON entities.id = children.parent_id
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
children.type = 'channel'
GROUP BY
children.parent_id, entities.id, entities.name, parents.id
async function fetchIncludedEntities() {
const include = {
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
excludedNetworks: argv.excludeNetworks || config.exclude?.networks || [],
excludedChannels: argv.excludeChannels || config.exclude?.channels || [],
};
UNION ALL
/* channels from argument as the child of network with parent */
SELECT
entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children))
FROM
entities AS children
LEFT JOIN
entities ON entities.id = children.parent_id
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
children.slug = ANY(?) AND children.type = 'channel'
GROUP BY
entities.id, parents.id;
`, [argv.networks || [], argv.channels || []]);
const curatedNetworks = await curateEntities(rawNetworks.rows, true);
logger.info(`Found ${curatedNetworks.length} networks in database`);
return curatedNetworks;
}
async function fetchChannelsFromConfig() {
const rawNetworks = await knex.raw(`
WITH RECURSIVE channels AS (
/* select configured channels and networks */
@@ -142,27 +95,13 @@ async function fetchChannelsFromConfig() {
channels.type = 'channel'
GROUP BY
entities.id
`, {
includeAll: !config.include?.networks && !config.include?.channels,
includedNetworks: config.include?.networks || [],
includedChannels: config.include?.channels || [],
excludedNetworks: config.exclude?.networks || [],
excludedChannels: config.exclude?.channels || [],
});
`, include);
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
return curatedNetworks;
}
async function fetchIncludedEntities() {
if (argv.networks || argv.channels) {
return fetchChannelsFromArgv();
}
return fetchChannelsFromConfig();
}
async function fetchChannels(queryObject) {
const sites = await knex('sites')
.where(builder => whereOr(queryObject, 'sites', builder))
@@ -191,7 +130,5 @@ module.exports = {
curateEntities,
fetchIncludedEntities,
fetchChannels,
fetchChannelsFromConfig,
fetchChannelsFromArgv,
fetchChannelsFromReleases,
};

View File

@@ -8,7 +8,7 @@ const logger = require('./logger')(__filename);
const knex = require('./knex');
const include = require('./utils/argv-include')(argv);
const scrapers = require('./scrapers/scrapers');
const { fetchChannelsFromArgv, fetchChannelsFromConfig } = require('./entities');
const { fetchIncludedEntities } = require('./entities');
async function filterUniqueReleases(latestReleases, accReleases) {
const latestReleaseIdentifiers = latestReleases
@@ -174,8 +174,6 @@ async function scrapeChannelReleases(scraper, channelEntity, preData) {
: [],
]);
console.log(movies);
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent?.name})`);
return [...latestReleases, ...upcomingReleases];
@@ -229,9 +227,7 @@ async function scrapeNetworkParallel(networkEntity) {
}
async function fetchUpdates() {
const includedNetworks = argv.channels || argv.networks
? await fetchChannelsFromArgv()
: await fetchChannelsFromConfig();
const includedNetworks = await fetchIncludedEntities();
const scrapedNetworks = await Promise.map(
includedNetworks,