forked from DebaucheryLibrarian/traxxx
Improved entity provision behavior.
This commit is contained in:
parent
77566eae0d
commit
b3f784686f
|
@ -65,9 +65,12 @@ To generate the thumbnails for logos and tag photos, run:
|
|||
`./traxxx --option value` or `npm start -- --option value`
|
||||
|
||||
* `--server`: Run the web server
|
||||
* `--all`: Fetch updates from the channels and networks in the configuration file.
|
||||
* `--channel [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig.
|
||||
* `--network [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug.
|
||||
|
||||
#### Channels
|
||||
* `--channels [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. Overrides configured included networks and channels.
|
||||
* `--networks [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug. Overrides configured included networks and channels.
|
||||
* `--exclude-channels [slug] [slug]`: Scrape every configured, specified or available channel, except for specified. Overrides configured excluded channels.
|
||||
* `--exclude-networks [slug] [slug]`: Scrape every configured, specified or available network, except for specified. Overrides configured excluded networks.
|
||||
* `--after "[time]"`: Do not fetch scenes older than this period or date. Example values are: `"1 month"`, `"3 years"`, `"2019-01-01"`.
|
||||
* `--scene [URL]`: Try to retrieve scene details from its official channel or network URL.
|
||||
* `--deep`: Follow each release link found running `--channel` or `--network` and scrape it for more details. Enabled by default ; use `--no-deep` to only save information found on the overview pages.
|
||||
|
|
10
src/argv.js
10
src/argv.js
|
@ -40,11 +40,21 @@ const { argv } = yargs
|
|||
type: 'array',
|
||||
alias: 'network',
|
||||
})
|
||||
.option('exclude-networks', {
|
||||
describe: 'Network not to scrape any channels from (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'exclude-network',
|
||||
})
|
||||
.option('channels', {
|
||||
describe: 'Channel to scrape (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'channel',
|
||||
})
|
||||
.option('exclude-channels', {
|
||||
describe: 'Channel not to scrape (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'exclude-channel',
|
||||
})
|
||||
.option('actors', {
|
||||
describe: 'Scrape actors by name or slug',
|
||||
type: 'array',
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
const config = require('config');
|
||||
|
||||
const logger = require('./logger')(__filename);
|
||||
const argv = require('./argv');
|
||||
const knex = require('./knex');
|
||||
const whereOr = require('./utils/where-or');
|
||||
|
@ -37,61 +36,15 @@ async function curateEntities(entities, includeParameters) {
|
|||
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
|
||||
}
|
||||
|
||||
async function fetchChannelsFromArgv() {
|
||||
const rawNetworks = await knex.raw(`
|
||||
/* networks from argument with channels as children */
|
||||
WITH RECURSIVE children AS (
|
||||
SELECT
|
||||
entities.*
|
||||
FROM
|
||||
entities
|
||||
WHERE
|
||||
slug = ANY(?) AND entities.type = 'network'
|
||||
UNION ALL
|
||||
SELECT
|
||||
entities.*
|
||||
FROM
|
||||
entities
|
||||
INNER JOIN
|
||||
children ON children.id = entities.parent_id
|
||||
)
|
||||
SELECT
|
||||
entities.*, row_to_json(parents) as parent, json_agg(children) as children
|
||||
FROM
|
||||
children
|
||||
LEFT JOIN
|
||||
entities ON entities.id = children.parent_id
|
||||
LEFT JOIN
|
||||
entities AS parents ON parents.id = entities.parent_id
|
||||
WHERE
|
||||
children.type = 'channel'
|
||||
GROUP BY
|
||||
children.parent_id, entities.id, entities.name, parents.id
|
||||
async function fetchIncludedEntities() {
|
||||
const include = {
|
||||
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
|
||||
includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
|
||||
includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
|
||||
excludedNetworks: argv.excludeNetworks || config.exclude?.networks || [],
|
||||
excludedChannels: argv.excludeChannels || config.exclude?.channels || [],
|
||||
};
|
||||
|
||||
UNION ALL
|
||||
|
||||
/* channels from argument as the child of network with parent */
|
||||
SELECT
|
||||
entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children))
|
||||
FROM
|
||||
entities AS children
|
||||
LEFT JOIN
|
||||
entities ON entities.id = children.parent_id
|
||||
LEFT JOIN
|
||||
entities AS parents ON parents.id = entities.parent_id
|
||||
WHERE
|
||||
children.slug = ANY(?) AND children.type = 'channel'
|
||||
GROUP BY
|
||||
entities.id, parents.id;
|
||||
`, [argv.networks || [], argv.channels || []]);
|
||||
|
||||
const curatedNetworks = await curateEntities(rawNetworks.rows, true);
|
||||
logger.info(`Found ${curatedNetworks.length} networks in database`);
|
||||
|
||||
return curatedNetworks;
|
||||
}
|
||||
|
||||
async function fetchChannelsFromConfig() {
|
||||
const rawNetworks = await knex.raw(`
|
||||
WITH RECURSIVE channels AS (
|
||||
/* select configured channels and networks */
|
||||
|
@ -142,27 +95,13 @@ async function fetchChannelsFromConfig() {
|
|||
channels.type = 'channel'
|
||||
GROUP BY
|
||||
entities.id
|
||||
`, {
|
||||
includeAll: !config.include?.networks && !config.include?.channels,
|
||||
includedNetworks: config.include?.networks || [],
|
||||
includedChannels: config.include?.channels || [],
|
||||
excludedNetworks: config.exclude?.networks || [],
|
||||
excludedChannels: config.exclude?.channels || [],
|
||||
});
|
||||
`, include);
|
||||
|
||||
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
|
||||
|
||||
return curatedNetworks;
|
||||
}
|
||||
|
||||
async function fetchIncludedEntities() {
|
||||
if (argv.networks || argv.channels) {
|
||||
return fetchChannelsFromArgv();
|
||||
}
|
||||
|
||||
return fetchChannelsFromConfig();
|
||||
}
|
||||
|
||||
async function fetchChannels(queryObject) {
|
||||
const sites = await knex('sites')
|
||||
.where(builder => whereOr(queryObject, 'sites', builder))
|
||||
|
@ -191,7 +130,5 @@ module.exports = {
|
|||
curateEntities,
|
||||
fetchIncludedEntities,
|
||||
fetchChannels,
|
||||
fetchChannelsFromConfig,
|
||||
fetchChannelsFromArgv,
|
||||
fetchChannelsFromReleases,
|
||||
};
|
||||
|
|
|
@ -8,7 +8,7 @@ const logger = require('./logger')(__filename);
|
|||
const knex = require('./knex');
|
||||
const include = require('./utils/argv-include')(argv);
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const { fetchChannelsFromArgv, fetchChannelsFromConfig } = require('./entities');
|
||||
const { fetchIncludedEntities } = require('./entities');
|
||||
|
||||
async function filterUniqueReleases(latestReleases, accReleases) {
|
||||
const latestReleaseIdentifiers = latestReleases
|
||||
|
@ -174,8 +174,6 @@ async function scrapeChannelReleases(scraper, channelEntity, preData) {
|
|||
: [],
|
||||
]);
|
||||
|
||||
console.log(movies);
|
||||
|
||||
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent?.name})`);
|
||||
|
||||
return [...latestReleases, ...upcomingReleases];
|
||||
|
@ -229,9 +227,7 @@ async function scrapeNetworkParallel(networkEntity) {
|
|||
}
|
||||
|
||||
async function fetchUpdates() {
|
||||
const includedNetworks = argv.channels || argv.networks
|
||||
? await fetchChannelsFromArgv()
|
||||
: await fetchChannelsFromConfig();
|
||||
const includedNetworks = await fetchIncludedEntities();
|
||||
|
||||
const scrapedNetworks = await Promise.map(
|
||||
includedNetworks,
|
||||
|
|
Loading…
Reference in New Issue