forked from DebaucheryLibrarian/traxxx
Improved entity provision behavior.
This commit is contained in:
parent
77566eae0d
commit
b3f784686f
|
@ -65,9 +65,12 @@ To generate the thumbnails for logos and tag photos, run:
|
||||||
`./traxxx --option value` or `npm start -- --option value`
|
`./traxxx --option value` or `npm start -- --option value`
|
||||||
|
|
||||||
* `--server`: Run the web server
|
* `--server`: Run the web server
|
||||||
* `--all`: Fetch updates from the channels and networks in the configuration file.
|
|
||||||
* `--channel [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig.
|
#### Channels
|
||||||
* `--network [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug.
|
* `--channels [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. Overrides configured included networks and channels.
|
||||||
|
* `--networks [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug. Overrides configured included networks and channels.
|
||||||
|
* `--exclude-channels [slug] [slug]`: Scrape every configured, specified or available channel, except for specified. Overrides configured excluded channels.
|
||||||
|
* `--exclude-networks [slug] [slug]`: Scrape every configured, specified or available network, except for specified. Overrides configured excluded networks.
|
||||||
* `--after "[time]"`: Do not fetch scenes older than this period or date. Example values are: `"1 month"`, `"3 years"`, `"2019-01-01"`.
|
* `--after "[time]"`: Do not fetch scenes older than this period or date. Example values are: `"1 month"`, `"3 years"`, `"2019-01-01"`.
|
||||||
* `--scene [URL]`: Try to retrieve scene details from its official channel or network URL.
|
* `--scene [URL]`: Try to retrieve scene details from its official channel or network URL.
|
||||||
* `--deep`: Follow each release link found running `--channel` or `--network` and scrape it for more details. Enabled by default ; use `--no-deep` to only save information found on the overview pages.
|
* `--deep`: Follow each release link found running `--channel` or `--network` and scrape it for more details. Enabled by default ; use `--no-deep` to only save information found on the overview pages.
|
||||||
|
|
10
src/argv.js
10
src/argv.js
|
@ -40,11 +40,21 @@ const { argv } = yargs
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'network',
|
alias: 'network',
|
||||||
})
|
})
|
||||||
|
.option('exclude-networks', {
|
||||||
|
describe: 'Network not to scrape any channels from (overrides configuration)',
|
||||||
|
type: 'array',
|
||||||
|
alias: 'exclude-network',
|
||||||
|
})
|
||||||
.option('channels', {
|
.option('channels', {
|
||||||
describe: 'Channel to scrape (overrides configuration)',
|
describe: 'Channel to scrape (overrides configuration)',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'channel',
|
alias: 'channel',
|
||||||
})
|
})
|
||||||
|
.option('exclude-channels', {
|
||||||
|
describe: 'Channel not to scrape (overrides configuration)',
|
||||||
|
type: 'array',
|
||||||
|
alias: 'exclude-channel',
|
||||||
|
})
|
||||||
.option('actors', {
|
.option('actors', {
|
||||||
describe: 'Scrape actors by name or slug',
|
describe: 'Scrape actors by name or slug',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
|
|
||||||
const logger = require('./logger')(__filename);
|
|
||||||
const argv = require('./argv');
|
const argv = require('./argv');
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const whereOr = require('./utils/where-or');
|
const whereOr = require('./utils/where-or');
|
||||||
|
@ -37,61 +36,15 @@ async function curateEntities(entities, includeParameters) {
|
||||||
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
|
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchChannelsFromArgv() {
|
async function fetchIncludedEntities() {
|
||||||
const rawNetworks = await knex.raw(`
|
const include = {
|
||||||
/* networks from argument with channels as children */
|
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
|
||||||
WITH RECURSIVE children AS (
|
includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
|
||||||
SELECT
|
includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
|
||||||
entities.*
|
excludedNetworks: argv.excludeNetworks || config.exclude?.networks || [],
|
||||||
FROM
|
excludedChannels: argv.excludeChannels || config.exclude?.channels || [],
|
||||||
entities
|
};
|
||||||
WHERE
|
|
||||||
slug = ANY(?) AND entities.type = 'network'
|
|
||||||
UNION ALL
|
|
||||||
SELECT
|
|
||||||
entities.*
|
|
||||||
FROM
|
|
||||||
entities
|
|
||||||
INNER JOIN
|
|
||||||
children ON children.id = entities.parent_id
|
|
||||||
)
|
|
||||||
SELECT
|
|
||||||
entities.*, row_to_json(parents) as parent, json_agg(children) as children
|
|
||||||
FROM
|
|
||||||
children
|
|
||||||
LEFT JOIN
|
|
||||||
entities ON entities.id = children.parent_id
|
|
||||||
LEFT JOIN
|
|
||||||
entities AS parents ON parents.id = entities.parent_id
|
|
||||||
WHERE
|
|
||||||
children.type = 'channel'
|
|
||||||
GROUP BY
|
|
||||||
children.parent_id, entities.id, entities.name, parents.id
|
|
||||||
|
|
||||||
UNION ALL
|
|
||||||
|
|
||||||
/* channels from argument as the child of network with parent */
|
|
||||||
SELECT
|
|
||||||
entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children))
|
|
||||||
FROM
|
|
||||||
entities AS children
|
|
||||||
LEFT JOIN
|
|
||||||
entities ON entities.id = children.parent_id
|
|
||||||
LEFT JOIN
|
|
||||||
entities AS parents ON parents.id = entities.parent_id
|
|
||||||
WHERE
|
|
||||||
children.slug = ANY(?) AND children.type = 'channel'
|
|
||||||
GROUP BY
|
|
||||||
entities.id, parents.id;
|
|
||||||
`, [argv.networks || [], argv.channels || []]);
|
|
||||||
|
|
||||||
const curatedNetworks = await curateEntities(rawNetworks.rows, true);
|
|
||||||
logger.info(`Found ${curatedNetworks.length} networks in database`);
|
|
||||||
|
|
||||||
return curatedNetworks;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchChannelsFromConfig() {
|
|
||||||
const rawNetworks = await knex.raw(`
|
const rawNetworks = await knex.raw(`
|
||||||
WITH RECURSIVE channels AS (
|
WITH RECURSIVE channels AS (
|
||||||
/* select configured channels and networks */
|
/* select configured channels and networks */
|
||||||
|
@ -142,27 +95,13 @@ async function fetchChannelsFromConfig() {
|
||||||
channels.type = 'channel'
|
channels.type = 'channel'
|
||||||
GROUP BY
|
GROUP BY
|
||||||
entities.id
|
entities.id
|
||||||
`, {
|
`, include);
|
||||||
includeAll: !config.include?.networks && !config.include?.channels,
|
|
||||||
includedNetworks: config.include?.networks || [],
|
|
||||||
includedChannels: config.include?.channels || [],
|
|
||||||
excludedNetworks: config.exclude?.networks || [],
|
|
||||||
excludedChannels: config.exclude?.channels || [],
|
|
||||||
});
|
|
||||||
|
|
||||||
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
|
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
|
||||||
|
|
||||||
return curatedNetworks;
|
return curatedNetworks;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchIncludedEntities() {
|
|
||||||
if (argv.networks || argv.channels) {
|
|
||||||
return fetchChannelsFromArgv();
|
|
||||||
}
|
|
||||||
|
|
||||||
return fetchChannelsFromConfig();
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchChannels(queryObject) {
|
async function fetchChannels(queryObject) {
|
||||||
const sites = await knex('sites')
|
const sites = await knex('sites')
|
||||||
.where(builder => whereOr(queryObject, 'sites', builder))
|
.where(builder => whereOr(queryObject, 'sites', builder))
|
||||||
|
@ -191,7 +130,5 @@ module.exports = {
|
||||||
curateEntities,
|
curateEntities,
|
||||||
fetchIncludedEntities,
|
fetchIncludedEntities,
|
||||||
fetchChannels,
|
fetchChannels,
|
||||||
fetchChannelsFromConfig,
|
|
||||||
fetchChannelsFromArgv,
|
|
||||||
fetchChannelsFromReleases,
|
fetchChannelsFromReleases,
|
||||||
};
|
};
|
||||||
|
|
|
@ -8,7 +8,7 @@ const logger = require('./logger')(__filename);
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const include = require('./utils/argv-include')(argv);
|
const include = require('./utils/argv-include')(argv);
|
||||||
const scrapers = require('./scrapers/scrapers');
|
const scrapers = require('./scrapers/scrapers');
|
||||||
const { fetchChannelsFromArgv, fetchChannelsFromConfig } = require('./entities');
|
const { fetchIncludedEntities } = require('./entities');
|
||||||
|
|
||||||
async function filterUniqueReleases(latestReleases, accReleases) {
|
async function filterUniqueReleases(latestReleases, accReleases) {
|
||||||
const latestReleaseIdentifiers = latestReleases
|
const latestReleaseIdentifiers = latestReleases
|
||||||
|
@ -174,8 +174,6 @@ async function scrapeChannelReleases(scraper, channelEntity, preData) {
|
||||||
: [],
|
: [],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
console.log(movies);
|
|
||||||
|
|
||||||
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent?.name})`);
|
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent?.name})`);
|
||||||
|
|
||||||
return [...latestReleases, ...upcomingReleases];
|
return [...latestReleases, ...upcomingReleases];
|
||||||
|
@ -229,9 +227,7 @@ async function scrapeNetworkParallel(networkEntity) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchUpdates() {
|
async function fetchUpdates() {
|
||||||
const includedNetworks = argv.channels || argv.networks
|
const includedNetworks = await fetchIncludedEntities();
|
||||||
? await fetchChannelsFromArgv()
|
|
||||||
: await fetchChannelsFromConfig();
|
|
||||||
|
|
||||||
const scrapedNetworks = await Promise.map(
|
const scrapedNetworks = await Promise.map(
|
||||||
includedNetworks,
|
includedNetworks,
|
||||||
|
|
Loading…
Reference in New Issue