Refactored various modules for entities. Updated and refactored Kink scraper.
This commit is contained in:
@@ -20,7 +20,7 @@ function curateEntity(entity, includeParameters = false) {
|
||||
children: (entity.children || []).map(child => curateEntity({
|
||||
...child,
|
||||
parent: entity,
|
||||
})),
|
||||
}, includeParameters)),
|
||||
};
|
||||
|
||||
return curatedEntity;
|
||||
@@ -30,39 +30,40 @@ async function curateEntities(entities, includeParameters) {
|
||||
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
|
||||
}
|
||||
|
||||
async function fetchSitesFromArgv() {
|
||||
async function fetchChannelsFromArgv() {
|
||||
const rawNetworks = await knex.raw(`
|
||||
/* networks from argument with sites as children */
|
||||
WITH RECURSIVE temp AS (
|
||||
/* networks from argument with channels as children */
|
||||
WITH RECURSIVE children AS (
|
||||
SELECT
|
||||
id, parent_id, name, slug, type, url, description, parameters
|
||||
FROM
|
||||
entities
|
||||
WHERE
|
||||
slug = ANY(?) AND entities.type = 1
|
||||
slug = ANY(?) AND entities.type = 'network'
|
||||
UNION ALL
|
||||
SELECT
|
||||
entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters
|
||||
FROM
|
||||
entities
|
||||
INNER JOIN
|
||||
temp ON temp.id = entities.parent_id
|
||||
children ON children.id = entities.parent_id
|
||||
)
|
||||
SELECT
|
||||
entities.*, row_to_json(parents) as parent, json_agg(temp) as children
|
||||
entities.*, row_to_json(parents) as parent, json_agg(children) as children
|
||||
FROM
|
||||
temp
|
||||
children
|
||||
LEFT JOIN
|
||||
entities ON entities.id = temp.parent_id
|
||||
entities ON entities.id = children.parent_id
|
||||
LEFT JOIN
|
||||
entities AS parents ON parents.id = entities.parent_id
|
||||
WHERE
|
||||
temp.type = 2
|
||||
children.type = 'channel'
|
||||
GROUP BY
|
||||
temp.parent_id, entities.id, entities.name, parents.id
|
||||
children.parent_id, entities.id, entities.name, parents.id
|
||||
|
||||
UNION ALL
|
||||
|
||||
/* sites from argument as the child of network with parent */
|
||||
/* channels from argument as the child of network with parent */
|
||||
SELECT
|
||||
entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children))
|
||||
FROM
|
||||
@@ -72,10 +73,10 @@ async function fetchSitesFromArgv() {
|
||||
LEFT JOIN
|
||||
entities AS parents ON parents.id = entities.parent_id
|
||||
WHERE
|
||||
children.slug = ANY(?) AND children.type = 2
|
||||
children.slug = ANY(?) AND children.type = 'channel'
|
||||
GROUP BY
|
||||
entities.id, parents.id;
|
||||
`, [argv.networks || [], argv.sites || []]);
|
||||
`, [argv.networks || [], argv.channels || []]);
|
||||
|
||||
const curatedNetworks = await curateEntities(rawNetworks.rows, true);
|
||||
logger.info(`Found ${curatedNetworks.length} networks in database`);
|
||||
@@ -83,10 +84,10 @@ async function fetchSitesFromArgv() {
|
||||
return curatedNetworks;
|
||||
}
|
||||
|
||||
async function fetchSitesFromConfig() {
|
||||
async function fetchChannelsFromConfig() {
|
||||
const rawSites = await knex('entities')
|
||||
.select('entities.*')
|
||||
.leftJoin('entities as entities_parents', 'entities_parents.id', 'entities.id')
|
||||
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
|
||||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||
.where((builder) => {
|
||||
if (config.include) {
|
||||
builder.whereIn('entities.slug', config.include);
|
||||
@@ -103,14 +104,14 @@ async function fetchSitesFromConfig() {
|
||||
}
|
||||
|
||||
async function fetchIncludedEntities() {
|
||||
if (argv.networks || argv.sites) {
|
||||
return fetchSitesFromArgv();
|
||||
if (argv.networks || argv.channels) {
|
||||
return fetchChannelsFromArgv();
|
||||
}
|
||||
|
||||
return fetchSitesFromConfig();
|
||||
return fetchChannelsFromConfig();
|
||||
}
|
||||
|
||||
async function fetchSites(queryObject) {
|
||||
async function fetchChannels(queryObject) {
|
||||
const sites = await knex('sites')
|
||||
.where(builder => whereOr(queryObject, 'sites', builder))
|
||||
.select(
|
||||
@@ -123,7 +124,7 @@ async function fetchSites(queryObject) {
|
||||
return curateEntities(sites);
|
||||
}
|
||||
|
||||
async function fetchSitesFromReleases() {
|
||||
async function fetchChannelsFromReleases() {
|
||||
const sites = await knex('releases')
|
||||
.select('site_id', '')
|
||||
.leftJoin('sites', 'sites.id', 'releases.site_id')
|
||||
@@ -137,8 +138,8 @@ module.exports = {
|
||||
curateEntity,
|
||||
curateEntities,
|
||||
fetchIncludedEntities,
|
||||
fetchSites,
|
||||
fetchSitesFromConfig,
|
||||
fetchSitesFromArgv,
|
||||
fetchSitesFromReleases,
|
||||
fetchChannels,
|
||||
fetchChannelsFromConfig,
|
||||
fetchChannelsFromArgv,
|
||||
fetchChannelsFromReleases,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user