Improved entity provision behavior.

This commit is contained in:
DebaucheryLibrarian 2020-08-14 00:32:59 +02:00
parent 77566eae0d
commit b3f784686f
4 changed files with 27 additions and 81 deletions

View File

@ -65,9 +65,12 @@ To generate the thumbnails for logos and tag photos, run:
`./traxxx --option value` or `npm start -- --option value` `./traxxx --option value` or `npm start -- --option value`
* `--server`: Run the web server * `--server`: Run the web server
* `--all`: Fetch updates from the channels and networks in the configuration file.
* `--channel [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. #### Channels
* `--network [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug. * `--channels [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. Overrides configured included networks and channels.
* `--networks [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug. Overrides configured included networks and channels.
* `--exclude-channels [slug] [slug]`: Scrape every configured, specified or available channel, except for specified. Overrides configured excluded channels.
* `--exclude-networks [slug] [slug]`: Scrape every configured, specified or available network, except for specified. Overrides configured excluded networks.
* `--after "[time]"`: Do not fetch scenes older than this period or date. Example values are: `"1 month"`, `"3 years"`, `"2019-01-01"`. * `--after "[time]"`: Do not fetch scenes older than this period or date. Example values are: `"1 month"`, `"3 years"`, `"2019-01-01"`.
* `--scene [URL]`: Try to retrieve scene details from its official channel or network URL. * `--scene [URL]`: Try to retrieve scene details from its official channel or network URL.
* `--deep`: Follow each release link found running `--channel` or `--network` and scrape it for more details. Enabled by default ; use `--no-deep` to only save information found on the overview pages. * `--deep`: Follow each release link found running `--channel` or `--network` and scrape it for more details. Enabled by default ; use `--no-deep` to only save information found on the overview pages.

View File

@ -40,11 +40,21 @@ const { argv } = yargs
type: 'array', type: 'array',
alias: 'network', alias: 'network',
}) })
.option('exclude-networks', {
describe: 'Network not to scrape any channels from (overrides configuration)',
type: 'array',
alias: 'exclude-network',
})
.option('channels', { .option('channels', {
describe: 'Channel to scrape (overrides configuration)', describe: 'Channel to scrape (overrides configuration)',
type: 'array', type: 'array',
alias: 'channel', alias: 'channel',
}) })
.option('exclude-channels', {
describe: 'Channel not to scrape (overrides configuration)',
type: 'array',
alias: 'exclude-channel',
})
.option('actors', { .option('actors', {
describe: 'Scrape actors by name or slug', describe: 'Scrape actors by name or slug',
type: 'array', type: 'array',

View File

@ -2,7 +2,6 @@
const config = require('config'); const config = require('config');
const logger = require('./logger')(__filename);
const argv = require('./argv'); const argv = require('./argv');
const knex = require('./knex'); const knex = require('./knex');
const whereOr = require('./utils/where-or'); const whereOr = require('./utils/where-or');
@ -37,61 +36,15 @@ async function curateEntities(entities, includeParameters) {
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters))); return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
} }
async function fetchChannelsFromArgv() { async function fetchIncludedEntities() {
const rawNetworks = await knex.raw(` const include = {
/* networks from argument with channels as children */ includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
WITH RECURSIVE children AS ( includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
SELECT includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
entities.* excludedNetworks: argv.excludeNetworks || config.exclude?.networks || [],
FROM excludedChannels: argv.excludeChannels || config.exclude?.channels || [],
entities };
WHERE
slug = ANY(?) AND entities.type = 'network'
UNION ALL
SELECT
entities.*
FROM
entities
INNER JOIN
children ON children.id = entities.parent_id
)
SELECT
entities.*, row_to_json(parents) as parent, json_agg(children) as children
FROM
children
LEFT JOIN
entities ON entities.id = children.parent_id
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
children.type = 'channel'
GROUP BY
children.parent_id, entities.id, entities.name, parents.id
UNION ALL
/* channels from argument as the child of network with parent */
SELECT
entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children))
FROM
entities AS children
LEFT JOIN
entities ON entities.id = children.parent_id
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
children.slug = ANY(?) AND children.type = 'channel'
GROUP BY
entities.id, parents.id;
`, [argv.networks || [], argv.channels || []]);
const curatedNetworks = await curateEntities(rawNetworks.rows, true);
logger.info(`Found ${curatedNetworks.length} networks in database`);
return curatedNetworks;
}
async function fetchChannelsFromConfig() {
const rawNetworks = await knex.raw(` const rawNetworks = await knex.raw(`
WITH RECURSIVE channels AS ( WITH RECURSIVE channels AS (
/* select configured channels and networks */ /* select configured channels and networks */
@ -142,27 +95,13 @@ async function fetchChannelsFromConfig() {
channels.type = 'channel' channels.type = 'channel'
GROUP BY GROUP BY
entities.id entities.id
`, { `, include);
includeAll: !config.include?.networks && !config.include?.channels,
includedNetworks: config.include?.networks || [],
includedChannels: config.include?.channels || [],
excludedNetworks: config.exclude?.networks || [],
excludedChannels: config.exclude?.channels || [],
});
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true)); const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
return curatedNetworks; return curatedNetworks;
} }
async function fetchIncludedEntities() {
if (argv.networks || argv.channels) {
return fetchChannelsFromArgv();
}
return fetchChannelsFromConfig();
}
async function fetchChannels(queryObject) { async function fetchChannels(queryObject) {
const sites = await knex('sites') const sites = await knex('sites')
.where(builder => whereOr(queryObject, 'sites', builder)) .where(builder => whereOr(queryObject, 'sites', builder))
@ -191,7 +130,5 @@ module.exports = {
curateEntities, curateEntities,
fetchIncludedEntities, fetchIncludedEntities,
fetchChannels, fetchChannels,
fetchChannelsFromConfig,
fetchChannelsFromArgv,
fetchChannelsFromReleases, fetchChannelsFromReleases,
}; };

View File

@ -8,7 +8,7 @@ const logger = require('./logger')(__filename);
const knex = require('./knex'); const knex = require('./knex');
const include = require('./utils/argv-include')(argv); const include = require('./utils/argv-include')(argv);
const scrapers = require('./scrapers/scrapers'); const scrapers = require('./scrapers/scrapers');
const { fetchChannelsFromArgv, fetchChannelsFromConfig } = require('./entities'); const { fetchIncludedEntities } = require('./entities');
async function filterUniqueReleases(latestReleases, accReleases) { async function filterUniqueReleases(latestReleases, accReleases) {
const latestReleaseIdentifiers = latestReleases const latestReleaseIdentifiers = latestReleases
@ -174,8 +174,6 @@ async function scrapeChannelReleases(scraper, channelEntity, preData) {
: [], : [],
]); ]);
console.log(movies);
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent?.name})`); logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent?.name})`);
return [...latestReleases, ...upcomingReleases]; return [...latestReleases, ...upcomingReleases];
@ -229,9 +227,7 @@ async function scrapeNetworkParallel(networkEntity) {
} }
async function fetchUpdates() { async function fetchUpdates() {
const includedNetworks = argv.channels || argv.networks const includedNetworks = await fetchIncludedEntities();
? await fetchChannelsFromArgv()
: await fetchChannelsFromConfig();
const scrapedNetworks = await Promise.map( const scrapedNetworks = await Promise.map(
includedNetworks, includedNetworks,