Improved entity provision behavior.

This commit is contained in:
DebaucheryLibrarian 2020-08-14 00:32:59 +02:00
parent 77566eae0d
commit b3f784686f
4 changed files with 27 additions and 81 deletions

View File

@ -65,9 +65,12 @@ To generate the thumbnails for logos and tag photos, run:
`./traxxx --option value` or `npm start -- --option value`
* `--server`: Run the web server
* `--all`: Fetch updates from the channels and networks in the configuration file.
* `--channel [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig.
* `--network [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug.
#### Channels
* `--channels [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. Overrides configured included networks and channels.
* `--networks [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug. Overrides configured included networks and channels.
* `--exclude-channels [slug] [slug]`: Scrape every configured, specified or available channel, except for specified. Overrides configured excluded channels.
* `--exclude-networks [slug] [slug]`: Scrape every configured, specified or available network, except for specified. Overrides configured excluded networks.
* `--after "[time]"`: Do not fetch scenes older than this period or date. Example values are: `"1 month"`, `"3 years"`, `"2019-01-01"`.
* `--scene [URL]`: Try to retrieve scene details from its official channel or network URL.
* `--deep`: Follow each release link found running `--channel` or `--network` and scrape it for more details. Enabled by default ; use `--no-deep` to only save information found on the overview pages.

View File

@ -40,11 +40,21 @@ const { argv } = yargs
type: 'array',
alias: 'network',
})
.option('exclude-networks', {
describe: 'Network not to scrape any channels from (overrides configuration)',
type: 'array',
alias: 'exclude-network',
})
.option('channels', {
describe: 'Channel to scrape (overrides configuration)',
type: 'array',
alias: 'channel',
})
.option('exclude-channels', {
describe: 'Channel not to scrape (overrides configuration)',
type: 'array',
alias: 'exclude-channel',
})
.option('actors', {
describe: 'Scrape actors by name or slug',
type: 'array',

View File

@ -2,7 +2,6 @@
const config = require('config');
const logger = require('./logger')(__filename);
const argv = require('./argv');
const knex = require('./knex');
const whereOr = require('./utils/where-or');
@ -37,61 +36,15 @@ async function curateEntities(entities, includeParameters) {
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
}
async function fetchChannelsFromArgv() {
const rawNetworks = await knex.raw(`
/* networks from argument with channels as children */
WITH RECURSIVE children AS (
SELECT
entities.*
FROM
entities
WHERE
slug = ANY(?) AND entities.type = 'network'
UNION ALL
SELECT
entities.*
FROM
entities
INNER JOIN
children ON children.id = entities.parent_id
)
SELECT
entities.*, row_to_json(parents) as parent, json_agg(children) as children
FROM
children
LEFT JOIN
entities ON entities.id = children.parent_id
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
children.type = 'channel'
GROUP BY
children.parent_id, entities.id, entities.name, parents.id
async function fetchIncludedEntities() {
const include = {
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
excludedNetworks: argv.excludeNetworks || config.exclude?.networks || [],
excludedChannels: argv.excludeChannels || config.exclude?.channels || [],
};
UNION ALL
/* channels from argument as the child of network with parent */
SELECT
entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children))
FROM
entities AS children
LEFT JOIN
entities ON entities.id = children.parent_id
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
children.slug = ANY(?) AND children.type = 'channel'
GROUP BY
entities.id, parents.id;
`, [argv.networks || [], argv.channels || []]);
const curatedNetworks = await curateEntities(rawNetworks.rows, true);
logger.info(`Found ${curatedNetworks.length} networks in database`);
return curatedNetworks;
}
async function fetchChannelsFromConfig() {
const rawNetworks = await knex.raw(`
WITH RECURSIVE channels AS (
/* select configured channels and networks */
@ -142,27 +95,13 @@ async function fetchChannelsFromConfig() {
channels.type = 'channel'
GROUP BY
entities.id
`, {
includeAll: !config.include?.networks && !config.include?.channels,
includedNetworks: config.include?.networks || [],
includedChannels: config.include?.channels || [],
excludedNetworks: config.exclude?.networks || [],
excludedChannels: config.exclude?.channels || [],
});
`, include);
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
return curatedNetworks;
}
async function fetchIncludedEntities() {
if (argv.networks || argv.channels) {
return fetchChannelsFromArgv();
}
return fetchChannelsFromConfig();
}
async function fetchChannels(queryObject) {
const sites = await knex('sites')
.where(builder => whereOr(queryObject, 'sites', builder))
@ -191,7 +130,5 @@ module.exports = {
curateEntities,
fetchIncludedEntities,
fetchChannels,
fetchChannelsFromConfig,
fetchChannelsFromArgv,
fetchChannelsFromReleases,
};

View File

@ -8,7 +8,7 @@ const logger = require('./logger')(__filename);
const knex = require('./knex');
const include = require('./utils/argv-include')(argv);
const scrapers = require('./scrapers/scrapers');
const { fetchChannelsFromArgv, fetchChannelsFromConfig } = require('./entities');
const { fetchIncludedEntities } = require('./entities');
async function filterUniqueReleases(latestReleases, accReleases) {
const latestReleaseIdentifiers = latestReleases
@ -174,8 +174,6 @@ async function scrapeChannelReleases(scraper, channelEntity, preData) {
: [],
]);
console.log(movies);
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent?.name})`);
return [...latestReleases, ...upcomingReleases];
@ -229,9 +227,7 @@ async function scrapeNetworkParallel(networkEntity) {
}
async function fetchUpdates() {
const includedNetworks = argv.channels || argv.networks
? await fetchChannelsFromArgv()
: await fetchChannelsFromConfig();
const includedNetworks = await fetchIncludedEntities();
const scrapedNetworks = await Promise.map(
includedNetworks,