Refactored various modules for entities. Updated and refactored Kink scraper.

This commit is contained in:
2020-06-27 02:57:30 +02:00
parent 4959dfd14f
commit af56378ee2
107 changed files with 539 additions and 414 deletions

View File

@@ -20,7 +20,7 @@ function curateEntity(entity, includeParameters = false) {
children: (entity.children || []).map(child => curateEntity({
...child,
parent: entity,
})),
}, includeParameters)),
};
return curatedEntity;
@@ -30,39 +30,40 @@ async function curateEntities(entities, includeParameters) {
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
}
async function fetchSitesFromArgv() {
async function fetchChannelsFromArgv() {
const rawNetworks = await knex.raw(`
/* networks from argument with sites as children */
WITH RECURSIVE temp AS (
/* networks from argument with channels as children */
WITH RECURSIVE children AS (
SELECT
id, parent_id, name, slug, type, url, description, parameters
FROM
entities
WHERE
slug = ANY(?) AND entities.type = 1
slug = ANY(?) AND entities.type = 'network'
UNION ALL
SELECT
entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters
FROM
entities
INNER JOIN
temp ON temp.id = entities.parent_id
children ON children.id = entities.parent_id
)
SELECT
entities.*, row_to_json(parents) as parent, json_agg(temp) as children
entities.*, row_to_json(parents) as parent, json_agg(children) as children
FROM
temp
children
LEFT JOIN
entities ON entities.id = temp.parent_id
entities ON entities.id = children.parent_id
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
temp.type = 2
children.type = 'channel'
GROUP BY
temp.parent_id, entities.id, entities.name, parents.id
children.parent_id, entities.id, entities.name, parents.id
UNION ALL
/* sites from argument as the child of network with parent */
/* channels from argument as the child of network with parent */
SELECT
entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children))
FROM
@@ -72,10 +73,10 @@ async function fetchSitesFromArgv() {
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
children.slug = ANY(?) AND children.type = 2
children.slug = ANY(?) AND children.type = 'channel'
GROUP BY
entities.id, parents.id;
`, [argv.networks || [], argv.sites || []]);
`, [argv.networks || [], argv.channels || []]);
const curatedNetworks = await curateEntities(rawNetworks.rows, true);
logger.info(`Found ${curatedNetworks.length} networks in database`);
@@ -83,10 +84,10 @@ async function fetchSitesFromArgv() {
return curatedNetworks;
}
async function fetchSitesFromConfig() {
async function fetchChannelsFromConfig() {
const rawSites = await knex('entities')
.select('entities.*')
.leftJoin('entities as entities_parents', 'entities_parents.id', 'entities.id')
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.where((builder) => {
if (config.include) {
builder.whereIn('entities.slug', config.include);
@@ -103,14 +104,14 @@ async function fetchSitesFromConfig() {
}
async function fetchIncludedEntities() {
if (argv.networks || argv.sites) {
return fetchSitesFromArgv();
if (argv.networks || argv.channels) {
return fetchChannelsFromArgv();
}
return fetchSitesFromConfig();
return fetchChannelsFromConfig();
}
async function fetchSites(queryObject) {
async function fetchChannels(queryObject) {
const sites = await knex('sites')
.where(builder => whereOr(queryObject, 'sites', builder))
.select(
@@ -123,7 +124,7 @@ async function fetchSites(queryObject) {
return curateEntities(sites);
}
async function fetchSitesFromReleases() {
async function fetchChannelsFromReleases() {
const sites = await knex('releases')
.select('site_id', '')
.leftJoin('sites', 'sites.id', 'releases.site_id')
@@ -137,8 +138,8 @@ module.exports = {
curateEntity,
curateEntities,
fetchIncludedEntities,
fetchSites,
fetchSitesFromConfig,
fetchSitesFromArgv,
fetchSitesFromReleases,
fetchChannels,
fetchChannelsFromConfig,
fetchChannelsFromArgv,
fetchChannelsFromReleases,
};