Selecting included networks with infinite parent depth to facilitate scraper resolve.

This commit is contained in:
DebaucheryLibrarian
2021-02-02 01:31:12 +01:00
parent 46c0b269c3
commit d5cdfb36a9
4 changed files with 101 additions and 80 deletions

View File

@@ -94,62 +94,80 @@ async function fetchIncludedEntities() {
WITH RECURSIVE included_entities AS (
/* select configured channels and networks */
SELECT
entities.*
entities.*
FROM
entities
entities
WHERE
CASE WHEN :includeAll
THEN
/* select all top level networks and independent channels */
entities.parent_id IS NULL
ELSE
((entities.slug = ANY(:includedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:includedChannels)
AND entities.type = 'channel'))
END
AND NOT (
(entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
CASE WHEN :includeAll
THEN
/* select all top level networks and independent channels */
entities.parent_id IS NULL
ELSE
((entities.slug = ANY(:includedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:includedChannels)
AND entities.type = 'channel'))
END
AND NOT (
(entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
UNION ALL
/* select recursive children of configured networks */
SELECT
entities.*
entities.*
FROM
entities
entities
INNER JOIN
included_entities ON included_entities.id = entities.parent_id
included_entities ON included_entities.id = entities.parent_id
WHERE
NOT ((entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
), included_per_network AS (
/* select recursive channels as children of networks */
SELECT
parents.*,
json_agg(included_entities ORDER BY included_entities.id) included_children,
row_to_json(grandparents) AS parent,
(SELECT json_agg(children)
FROM entities AS children
WHERE children.parent_id = parents.id) children
FROM
included_entities
LEFT JOIN
entities AS parents ON parents.id = included_entities.parent_id
LEFT JOIN
entities AS grandparents ON grandparents.id = parents.parent_id
WHERE
included_entities.type = 'channel'
GROUP BY
parents.id, grandparents.id
), entity_tree as (
/* get recursive parents of networks (necessary for scraper resolve) */
SELECT to_jsonb(included_per_network) as entity,
parent_id,
array['parent'] as parent_path,
0 as depth
FROM included_per_network
UNION ALL
SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)),
entities.parent_id,
entity_tree.parent_path || array['parent'],
depth + 1
FROM entity_tree
JOIN entities ON entity_tree.parent_id = entities.id
)
/* select recursive channels as children of networks */
SELECT
parents.*,
json_agg(included_entities ORDER BY included_entities.id) included_children,
row_to_json(grandparents) AS parent,
(SELECT json_agg(children)
FROM entities AS children
WHERE children.parent_id = parents.id) children
FROM
included_entities
LEFT JOIN
entities AS parents ON parents.id = included_entities.parent_id
LEFT JOIN
entities AS grandparents ON grandparents.id = parents.parent_id
WHERE
included_entities.type = 'channel'
GROUP BY
parents.id, grandparents.id;
SELECT entity FROM entity_tree WHERE parent_id is null;
`, include);
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
const curatedNetworks = rawNetworks.rows.map(({ entity }) => curateEntity(entity, true));
return curatedNetworks;
}
@@ -164,7 +182,7 @@ async function fetchReleaseEntities(baseReleases) {
));
const entities = await knex.raw(`
WITH RECURSIVE tree as (
WITH RECURSIVE entity_tree as (
SELECT to_jsonb(entities) as entity,
parent_id,
array['parent'] as parent_path,
@@ -174,14 +192,14 @@ async function fetchReleaseEntities(baseReleases) {
UNION ALL
SELECT jsonb_set(tree.entity, tree.parent_path, to_jsonb(entities)),
SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)),
entities.parent_id,
tree.parent_path || array['parent'],
entity_tree.parent_path || array['parent'],
depth + 1
FROM tree
JOIN entities ON tree.parent_id = entities.id
FROM entity_tree
JOIN entities ON entity_tree.parent_id = entities.id
)
SELECT entity FROM tree WHERE parent_id is null
SELECT entity FROM entity_tree WHERE parent_id is null
ORDER BY entity->'type' ASC;
`, { entitySlugs });