'use strict'; const config = require('config'); const inquirer = require('inquirer'); const logger = require('./logger')(__filename); const argv = require('./argv'); const knex = require('./knex'); const { deleteScenes, deleteMovies, deleteSeries } = require('./releases'); const { flushOrphanedMedia } = require('./media'); const { resolveScraper, resolveLayoutScraper } = require('./scrapers/resolve'); function getRecursiveParent(entity) { if (!entity) { return null; } if (entity.parent) { return getRecursiveParent(entity.parent); } return entity; } function curateEntity(entity, includeParameters = false) { if (!entity) { return null; } const logo = (entity.has_logo && (((entity.independent || entity.type === 'network') && { logo: `${entity.slug}/network.png`, thumbnail: `${entity.slug}/thumbs/network.png`, favicon: `${entity.slug}/favicon.png` }) || (entity.parent && { logo: `${entity.parent.slug}/${entity.slug}.png`, thumbnail: `${entity.parent.slug}/thumbs/${entity.slug}.png`, favicon: `${entity.parent.slug}/favicon.png` }))) || null; const curatedEntity = entity.id ? { id: entity.id, name: entity.name, url: entity.url, description: entity.description, slug: entity.slug, type: entity.type, independent: !!entity.independent, aliases: entity.alias, ...logo, parent: curateEntity(entity.parent, includeParameters), } : {}; if (entity.tags) { curatedEntity.tags = entity.tags.map((tag) => ({ id: tag.id, name: tag.name, slug: tag.slug, priority: tag.priority, })); } if (includeParameters) { curatedEntity.parameters = entity.parameters; } if (entity.children) { curatedEntity.children = entity.children.map((child) => curateEntity({ ...child, parent: curatedEntity.id ? curatedEntity : null, }, includeParameters)); } if (entity.included_children) { curatedEntity.includedChildren = entity.included_children.map((child) => curateEntity({ ...child, parent: curatedEntity.id ? curatedEntity : null, }, includeParameters)); } const scraper = resolveScraper(curatedEntity); curatedEntity.scraper = resolveLayoutScraper(entity, scraper); return curatedEntity; } async function curateEntities(entities, includeParameters) { return Promise.all(entities.map(async (entity) => curateEntity(entity, includeParameters))); } /* obsolete in favor of urlToHostname function urlToSiteSlug(url) { try { const slug = new URL(url) .hostname .match(/([\w-]+)\.\w+$/)?.[1] .replace(/[-_]+/g, ''); return slug; } catch (error) { logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`); return null; } } */ function urlToHostname(url) { try { const hostname = new URL(url) .hostname .match(/(www\.)?(.*)/)?.at(-1); return hostname; } catch (error) { logger.warn(`Failed to derive entity hostname from '${url}': ${error.message}`); return null; } } async function fetchIncludedEntities() { const include = { includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels, includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [], includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [], excludedNetworks: argv.excludeNetworks || config.exclude?.networks.filter((network) => !argv.networks?.includes(network)) || [], // ignore explicitly included networks excludedChannels: argv.excludeChannels || config.exclude?.channels.filter((channel) => !argv.channels?.includes(channel)) || [], // ignore explicitly included channels }; const rawNetworks = await knex.raw(` WITH RECURSIVE included_entities AS ( /* select configured channels and networks */ SELECT entities.* FROM entities WHERE CASE WHEN :includeAll THEN /* select all top level networks and independent channels */ entities.parent_id IS NULL ELSE ((entities.slug = ANY(:includedNetworks) AND entities.type = 'network') OR (entities.slug = ANY(:includedChannels) AND entities.type = 'channel')) END AND NOT ( (entities.slug = ANY(:excludedNetworks) AND entities.type = 'network') OR (entities.slug = ANY(:excludedChannels) AND entities.type = 'channel')) UNION ALL /* select recursive children of configured networks */ SELECT entities.* FROM entities INNER JOIN included_entities ON included_entities.id = entities.parent_id WHERE NOT ((entities.slug = ANY(:excludedNetworks) AND entities.type = 'network') OR (entities.slug = ANY(:excludedChannels) AND entities.type = 'channel')) ), included_per_network AS ( /* select recursive channels as children of networks */ SELECT parents.*, json_agg(included_entities ORDER BY included_entities.id) included_children, (SELECT json_agg(children) FROM entities AS children WHERE children.parent_id = parents.id) children FROM included_entities LEFT JOIN entities AS parents ON parents.id = included_entities.parent_id WHERE included_entities.type = 'channel' GROUP BY parents.id ), entity_tree as ( /* get recursive parents of networks (necessary for scraper resolve) */ SELECT to_jsonb(included_per_network) as entity, parent_id, array['parent'] as parent_path FROM included_per_network UNION ALL SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)), entities.parent_id, entity_tree.parent_path || array['parent'] FROM entity_tree JOIN entities ON entity_tree.parent_id = entities.id ) SELECT entity FROM entity_tree WHERE parent_id is null; `, include); const curatedNetworks = rawNetworks.rows.map(({ entity }) => curateEntity(entity, true)); return curatedNetworks; } async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') { const entities = await knex.raw(` WITH RECURSIVE entity_tree as ( SELECT to_jsonb(entities) as entity, parent_id, array['parent'] as parent_path FROM entities WHERE (slug = ANY(:entitySlugs) OR url ILIKE ANY(:entityHosts)) AND type IN ('channel', 'network') UNION ALL SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)), entities.parent_id, entity_tree.parent_path || array['parent'] FROM entity_tree JOIN entities ON entity_tree.parent_id = entities.id ) SELECT jsonb_set( jsonb_set( entity, '{children}', to_jsonb(COALESCE(json_agg(children) FILTER (WHERE children.id IS NOT NULL), '[]'))), '{tags}', to_jsonb(COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]')) ) entity FROM entity_tree LEFT JOIN entities AS children ON children.parent_id = (entity->>'id')::int LEFT JOIN entities_tags ON entities_tags.entity_id = (entity->>'id')::int LEFT JOIN tags ON tags.id = entities_tags.tag_id WHERE entity_tree.parent_id IS NULL GROUP BY entity_tree.entity ORDER BY entity->'type' :sort; `, { entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')), entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}`), sort: knex.raw(prefer === 'channel' ? 'asc' : 'desc'), }); // channel entity will overwrite network entity const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => { const host = urlToHostname(entity.url); const curatedEntity = accEntities[entity.slug] || accEntities[host] || curateEntity(entity, true); return { ...accEntities, [entity.slug]: curatedEntity, [host]: curatedEntity, }; }, {}); return entitiesBySlug; } async function fetchReleaseEntities(baseReleases) { const baseReleasesWithoutEntity = baseReleases.filter((release) => release.url && !release.site && !release.entity); const entitySlugs = Array.from(new Set( baseReleasesWithoutEntity .map((baseRelease) => urlToHostname(baseRelease.url)) .filter(Boolean), )); return fetchEntitiesBySlug(entitySlugs, argv.prefer || 'network'); } async function fetchEntity(entityId, type) { const entity = await knex('entities') .select(knex.raw(` entities.*, COALESCE(json_agg(children) FILTER (WHERE children.id IS NOT NULL), '[]') as children, COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags, row_to_json(parents) as parent `)) .modify((queryBuilder) => { if (Number(entityId)) { queryBuilder.where('entities.id', entityId); return; } if (type) { queryBuilder .where('entities.type', type) .where((whereBuilder) => { whereBuilder .where('entities.slug', entityId) .orWhere(knex.raw(':entityId = ANY(entities.alias)', { entityId })); }); return; } throw new Error('Invalid ID or unspecified entity type'); }) .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') .leftJoin('entities as children', 'children.parent_id', 'entities.id') .leftJoin('entities_tags', 'entities_tags.entity_id', 'entities.id') .leftJoin('tags', 'tags.id', 'entities_tags.tag_id') .groupBy('entities.id', 'parents.id') .first(); return curateEntity(entity); } async function fetchEntities(type, limit) { const entities = await knex('entities') .select(knex.raw(` entities.*, COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags, row_to_json(parents) as parent `)) .modify((queryBuilder) => { if (type) { queryBuilder.where('entities.type', type); } }) .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') .leftJoin('entities_tags', 'entities_tags.entity_id', 'entities.id') .leftJoin('tags', 'tags.id', 'entities_tags.tag_id') .groupBy('entities.id', 'parents.id') .limit(limit || 100); return curateEntities(entities); } async function searchEntities(query, type, limit) { const entities = await knex .select(knex.raw(` entities.id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.alias, entities.has_logo, COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags, row_to_json(parents) as parent `)) .from(knex.raw('search_entities(?) as entities', [query])) .modify((queryBuilder) => { if (type) { queryBuilder.where('entities.type', type); } }) .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') .leftJoin('entities_tags', 'entities_tags.entity_id', 'entities.id') .leftJoin('tags', 'tags.id', 'entities_tags.tag_id') .groupBy('entities.id', 'entities.name', 'entities.slug', 'entities.type', 'entities.url', 'entities.description', 'entities.alias', 'entities.has_logo', 'parents.id') .limit(limit || 100); return curateEntities(entities); } async function flushEntities(networkSlugs = [], channelSlugs = []) { const entitySlugs = networkSlugs.concat(channelSlugs).join(', '); const entityQuery = knex .withRecursive('selected_entities', knex.raw(` SELECT entities.* FROM entities WHERE entities.slug = ANY(:networkSlugs) AND entities.type = 'network' OR (entities.slug = ANY(:channelSlugs) AND entities.type = 'channel') UNION ALL SELECT entities.* FROM entities INNER JOIN selected_entities ON selected_entities.id = entities.parent_id `, { networkSlugs, channelSlugs, })); const sceneIds = await entityQuery .clone() .select('releases.id') .distinct('releases.id') .whereNotNull('releases.id') .from('selected_entities') .leftJoin('releases', 'releases.entity_id', 'selected_entities.id') .pluck('releases.id'); const movieIds = await entityQuery .clone() .select('movies.id') .distinct('movies.id') .whereNotNull('movies.id') .from('selected_entities') .leftJoin('movies', 'movies.entity_id', 'selected_entities.id') .pluck('movies.id'); const serieIds = await entityQuery .clone() .select('series.id') .distinct('series.id') .whereNotNull('series.id') .from('selected_entities') .leftJoin('series', 'series.entity_id', 'selected_entities.id') .pluck('series.id'); if (sceneIds.length === 0 && movieIds.length === 0 && serieIds.length === 0) { logger.info(`No scenes, movies or series found to remove for ${entitySlugs}`); return; } const confirmed = await inquirer.prompt([{ type: 'confirm', name: 'flushEntities', message: `You are about to remove ${sceneIds.length} scenes, ${movieIds.length} movies and ${serieIds.length} series for ${entitySlugs}. Are you sure?`, default: false, }]); if (!confirmed.flushEntities) { logger.warn(`Confirmation rejected, not flushing scenes, movies or series for: ${entitySlugs}`); return; } const [deletedScenesCount, deletedMoviesCount, deletedSeriesCount] = await Promise.all([ deleteScenes(sceneIds), deleteMovies(movieIds), deleteSeries(serieIds), ]); logger.info(`Removed ${deletedScenesCount} scenes, ${deletedMoviesCount} movies and ${deletedSeriesCount} series for ${entitySlugs}`); if (argv.flushOrphanedMedia !== false) { await flushOrphanedMedia(); } } module.exports = { curateEntity, curateEntities, fetchIncludedEntities, fetchReleaseEntities, fetchEntitiesBySlug, fetchEntity, fetchEntities, getRecursiveParent, searchEntities, flushEntities, urlToHostname, // urlToSiteSlug, };