traxxx/src/entities.js

441 lines
13 KiB
JavaScript
Raw Normal View History

'use strict';
const config = require('config');
const inquirer = require('inquirer');
const logger = require('./logger')(__filename);
const argv = require('./argv');
const knex = require('./knex');
2022-03-26 16:56:22 +00:00
const { deleteScenes, deleteMovies, deleteSeries } = require('./releases');
const { flushOrphanedMedia } = require('./media');
const { resolveScraper, resolveLayoutScraper } = require('./scrapers/resolve');
function getRecursiveParent(entity) {
if (!entity) {
return null;
}
if (entity.parent) {
return getRecursiveParent(entity.parent);
}
return entity;
}
function curateEntity(entity, includeParameters = false) {
if (!entity) {
return null;
}
2020-10-20 13:28:58 +00:00
const logo = (entity.has_logo
&& (((entity.independent || entity.type === 'network') && { logo: `${entity.slug}/network.png`, thumbnail: `${entity.slug}/thumbs/network.png`, favicon: `${entity.slug}/favicon.png` })
|| (entity.parent && { logo: `${entity.parent.slug}/${entity.slug}.png`, thumbnail: `${entity.parent.slug}/thumbs/${entity.slug}.png`, favicon: `${entity.parent.slug}/favicon.png` })))
2020-10-20 13:28:58 +00:00
|| null;
const curatedEntity = entity.id ? {
id: entity.id,
name: entity.name,
url: entity.url,
description: entity.description,
slug: entity.slug,
type: entity.type,
2020-10-20 13:28:58 +00:00
independent: !!entity.independent,
aliases: entity.alias,
...logo,
parent: curateEntity(entity.parent, includeParameters),
} : {};
if (entity.tags) {
curatedEntity.tags = entity.tags.map((tag) => ({
id: tag.id,
name: tag.name,
slug: tag.slug,
priority: tag.priority,
}));
}
if (includeParameters) {
curatedEntity.parameters = entity.parameters;
}
if (entity.children) {
curatedEntity.children = entity.children.map((child) => curateEntity({
...child,
parent: curatedEntity.id ? curatedEntity : null,
}, includeParameters));
}
if (entity.included_children) {
curatedEntity.includedChildren = entity.included_children.map((child) => curateEntity({
...child,
parent: curatedEntity.id ? curatedEntity : null,
}, includeParameters));
}
const scraper = resolveScraper(curatedEntity);
curatedEntity.scraper = resolveLayoutScraper(entity, scraper);
return curatedEntity;
}
async function curateEntities(entities, includeParameters) {
return Promise.all(entities.map(async (entity) => curateEntity(entity, includeParameters)));
}
/* obsolete in favor of urlToHostname
function urlToSiteSlug(url) {
try {
const slug = new URL(url)
.hostname
.match(/([\w-]+)\.\w+$/)?.[1]
.replace(/[-_]+/g, '');
return slug;
} catch (error) {
logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`);
return null;
}
}
*/
function urlToHostname(url) {
try {
const hostname = new URL(url)
.hostname
2023-06-05 00:13:36 +00:00
.match(/(www\.)?(.*)/)?.at(-1);
return hostname;
} catch (error) {
logger.warn(`Failed to derive entity hostname from '${url}': ${error.message}`);
return null;
}
}
2020-08-13 22:32:59 +00:00
async function fetchIncludedEntities() {
const include = {
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
excludedNetworks: argv.excludeNetworks || config.exclude?.networks.filter((network) => !argv.networks?.includes(network)) || [], // ignore explicitly included networks
excludedChannels: argv.excludeChannels || config.exclude?.channels.filter((channel) => !argv.channels?.includes(channel)) || [], // ignore explicitly included channels
2020-08-13 22:32:59 +00:00
};
const rawNetworks = await knex.raw(`
WITH RECURSIVE included_entities AS (
/* select configured channels and networks */
2020-08-13 14:10:58 +00:00
SELECT
entities.*
2020-08-13 14:10:58 +00:00
FROM
entities
2020-08-13 14:10:58 +00:00
WHERE
CASE WHEN :includeAll
THEN
/* select all top level networks and independent channels */
entities.parent_id IS NULL
ELSE
((entities.slug = ANY(:includedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:includedChannels)
AND entities.type = 'channel'))
END
AND NOT (
(entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
2020-08-13 14:10:58 +00:00
UNION ALL
/* select recursive children of configured networks */
SELECT
entities.*
2020-08-13 14:10:58 +00:00
FROM
entities
2020-08-13 14:10:58 +00:00
INNER JOIN
included_entities ON included_entities.id = entities.parent_id
2020-08-13 14:10:58 +00:00
WHERE
NOT ((entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
), included_per_network AS (
/* select recursive channels as children of networks */
SELECT
parents.*,
json_agg(included_entities ORDER BY included_entities.id) included_children,
(SELECT json_agg(children)
FROM entities AS children
WHERE children.parent_id = parents.id) children
FROM
included_entities
LEFT JOIN
entities AS parents ON parents.id = included_entities.parent_id
WHERE
included_entities.type = 'channel'
GROUP BY
parents.id
), entity_tree as (
/* get recursive parents of networks (necessary for scraper resolve) */
SELECT to_jsonb(included_per_network) as entity,
parent_id,
array['parent'] as parent_path
FROM included_per_network
UNION ALL
SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)),
entities.parent_id,
entity_tree.parent_path || array['parent']
FROM entity_tree
JOIN entities ON entity_tree.parent_id = entities.id
2020-08-13 14:10:58 +00:00
)
SELECT entity FROM entity_tree WHERE parent_id is null;
2020-08-13 22:32:59 +00:00
`, include);
2020-08-13 14:10:58 +00:00
const curatedNetworks = rawNetworks.rows.map(({ entity }) => curateEntity(entity, true));
return curatedNetworks;
}
2021-02-02 23:46:59 +00:00
async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
const entities = await knex.raw(`
WITH RECURSIVE entity_tree as (
SELECT to_jsonb(entities) as entity,
parent_id,
array['parent'] as parent_path
FROM entities
WHERE slug = ANY(:entitySlugs)
OR url ILIKE ANY(:entityHosts)
UNION ALL
SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)),
entities.parent_id,
entity_tree.parent_path || array['parent']
FROM entity_tree
JOIN entities ON entity_tree.parent_id = entities.id
)
SELECT jsonb_set(
jsonb_set(
entity,
'{children}',
to_jsonb(COALESCE(json_agg(children) FILTER (WHERE children.id IS NOT NULL), '[]'))),
'{tags}',
to_jsonb(COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]'))
) entity
FROM entity_tree
LEFT JOIN entities AS children ON children.parent_id = (entity->>'id')::int
LEFT JOIN entities_tags ON entities_tags.entity_id = (entity->>'id')::int
LEFT JOIN tags ON tags.id = entities_tags.tag_id
WHERE entity_tree.parent_id IS NULL
GROUP BY entity_tree.entity
2021-02-02 23:46:59 +00:00
ORDER BY entity->'type' :sort;
`, {
entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')),
entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}%`),
sort: knex.raw(sort),
});
// channel entity will overwrite network entity
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => {
const host = urlToHostname(entity.url);
const curatedEntity = accEntities[entity.slug] || accEntities[host] || curateEntity(entity, true);
return {
...accEntities,
[entity.slug]: curatedEntity,
[host]: curatedEntity,
};
}, {});
return entitiesBySlug;
}
2021-02-02 23:46:59 +00:00
async function fetchReleaseEntities(baseReleases) {
const baseReleasesWithoutEntity = baseReleases.filter((release) => release.url && !release.site && !release.entity);
2021-02-02 23:46:59 +00:00
const entitySlugs = Array.from(new Set(
baseReleasesWithoutEntity
.map((baseRelease) => urlToHostname(baseRelease.url))
2021-02-02 23:46:59 +00:00
.filter(Boolean),
));
return fetchEntitiesBySlug(entitySlugs, 'desc');
2021-02-02 23:46:59 +00:00
}
2020-10-16 21:00:03 +00:00
async function fetchEntity(entityId, type) {
const entity = await knex('entities')
.select(knex.raw(`
entities.*,
COALESCE(json_agg(children) FILTER (WHERE children.id IS NOT NULL), '[]') as children,
COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
row_to_json(parents) as parent
`))
.modify((queryBuilder) => {
if (Number(entityId)) {
queryBuilder.where('entities.id', entityId);
return;
}
if (type) {
queryBuilder
.where('entities.type', type)
.where((whereBuilder) => {
whereBuilder
.where('entities.slug', entityId)
.orWhere(knex.raw(':entityId = ANY(entities.alias)', { entityId }));
});
2020-10-16 21:00:03 +00:00
return;
}
throw new Error('Invalid ID or unspecified entity type');
})
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.leftJoin('entities as children', 'children.parent_id', 'entities.id')
.leftJoin('entities_tags', 'entities_tags.entity_id', 'entities.id')
.leftJoin('tags', 'tags.id', 'entities_tags.tag_id')
.groupBy('entities.id', 'parents.id')
.first();
return curateEntity(entity);
}
2020-10-16 21:00:03 +00:00
async function fetchEntities(type, limit) {
const entities = await knex('entities')
.select(knex.raw(`
entities.*,
COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
row_to_json(parents) as parent
`))
.modify((queryBuilder) => {
if (type) {
queryBuilder.where('entities.type', type);
}
})
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.leftJoin('entities_tags', 'entities_tags.entity_id', 'entities.id')
.leftJoin('tags', 'tags.id', 'entities_tags.tag_id')
.groupBy('entities.id', 'parents.id')
.limit(limit || 100);
return curateEntities(entities);
}
2020-10-16 21:00:03 +00:00
async function searchEntities(query, type, limit) {
2020-10-19 22:25:32 +00:00
const entities = await knex
2020-10-16 21:00:03 +00:00
.select(knex.raw(`
2020-10-20 13:28:58 +00:00
entities.id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.alias, entities.has_logo,
2020-10-16 21:00:03 +00:00
COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
row_to_json(parents) as parent
`))
.from(knex.raw('search_entities(?) as entities', [query]))
.modify((queryBuilder) => {
if (type) {
queryBuilder.where('entities.type', type);
}
})
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.leftJoin('entities_tags', 'entities_tags.entity_id', 'entities.id')
.leftJoin('tags', 'tags.id', 'entities_tags.tag_id')
2020-10-20 13:28:58 +00:00
.groupBy('entities.id', 'entities.name', 'entities.slug', 'entities.type', 'entities.url', 'entities.description', 'entities.alias', 'entities.has_logo', 'parents.id')
2020-10-16 21:00:03 +00:00
.limit(limit || 100);
2020-10-19 22:25:32 +00:00
return curateEntities(entities);
}
2020-10-16 21:00:03 +00:00
async function flushEntities(networkSlugs = [], channelSlugs = []) {
const entitySlugs = networkSlugs.concat(channelSlugs).join(', ');
const entityQuery = knex
.withRecursive('selected_entities', knex.raw(`
SELECT entities.*
FROM entities
WHERE
entities.slug = ANY(:networkSlugs)
AND entities.type = 'network'
OR (entities.slug = ANY(:channelSlugs)
AND entities.type = 'channel')
UNION ALL
SELECT entities.*
FROM entities
INNER JOIN selected_entities ON selected_entities.id = entities.parent_id
`, {
networkSlugs,
channelSlugs,
}));
const sceneIds = await entityQuery
.clone()
.select('releases.id')
.distinct('releases.id')
.whereNotNull('releases.id')
.from('selected_entities')
.leftJoin('releases', 'releases.entity_id', 'selected_entities.id')
.pluck('releases.id');
const movieIds = await entityQuery
.clone()
.select('movies.id')
.distinct('movies.id')
.whereNotNull('movies.id')
.from('selected_entities')
.leftJoin('movies', 'movies.entity_id', 'selected_entities.id')
.pluck('movies.id');
2022-03-26 16:56:22 +00:00
const serieIds = await entityQuery
.clone()
.select('series.id')
.distinct('series.id')
.whereNotNull('series.id')
.from('selected_entities')
.leftJoin('series', 'series.entity_id', 'selected_entities.id')
.pluck('series.id');
if (sceneIds.length === 0 && movieIds.length === 0 && serieIds.length === 0) {
logger.info(`No scenes, movies or series found to remove for ${entitySlugs}`);
return;
}
const confirmed = await inquirer.prompt([{
type: 'confirm',
name: 'flushEntities',
2022-03-26 16:56:22 +00:00
message: `You are about to remove ${sceneIds.length} scenes, ${movieIds.length} movies and ${serieIds.length} series for ${entitySlugs}. Are you sure?`,
default: false,
}]);
if (!confirmed.flushEntities) {
2022-03-26 16:56:22 +00:00
logger.warn(`Confirmation rejected, not flushing scenes, movies or series for: ${entitySlugs}`);
return;
}
2022-03-26 16:56:22 +00:00
const [deletedScenesCount, deletedMoviesCount, deletedSeriesCount] = await Promise.all([
deleteScenes(sceneIds),
deleteMovies(movieIds),
2022-03-26 16:56:22 +00:00
deleteSeries(serieIds),
]);
2022-03-26 16:56:22 +00:00
logger.info(`Removed ${deletedScenesCount} scenes, ${deletedMoviesCount} movies and ${deletedSeriesCount} series for ${entitySlugs}`);
if (argv.flushOrphanedMedia !== false) {
await flushOrphanedMedia();
}
}
module.exports = {
curateEntity,
curateEntities,
fetchIncludedEntities,
fetchReleaseEntities,
2021-02-02 23:46:59 +00:00
fetchEntitiesBySlug,
2020-10-16 21:00:03 +00:00
fetchEntity,
fetchEntities,
getRecursiveParent,
2020-10-16 21:00:03 +00:00
searchEntities,
flushEntities,
urlToHostname,
// urlToSiteSlug,
};