forked from DebaucheryLibrarian/traxxx
402 lines
12 KiB
JavaScript
402 lines
12 KiB
JavaScript
'use strict';
|
|
|
|
const config = require('config');
|
|
const inquirer = require('inquirer');
|
|
|
|
const logger = require('./logger')(__filename);
|
|
const argv = require('./argv');
|
|
const knex = require('./knex');
|
|
const { deleteScenes, deleteMovies } = require('./releases');
|
|
const { flushOrphanedMedia } = require('./media');
|
|
const { resolveScraper, resolveLayoutScraper } = require('./scrapers/resolve');
|
|
|
|
function getRecursiveParent(entity) {
|
|
if (!entity) {
|
|
return null;
|
|
}
|
|
|
|
if (entity.parent) {
|
|
return getRecursiveParent(entity.parent);
|
|
}
|
|
|
|
return entity;
|
|
}
|
|
|
|
function curateEntity(entity, includeParameters = false) {
|
|
if (!entity) {
|
|
return null;
|
|
}
|
|
|
|
const logo = (entity.has_logo
|
|
&& (((entity.independent || entity.type === 'network') && { logo: `${entity.slug}/network.png`, thumbnail: `${entity.slug}/thumbs/network.png`, favicon: `${entity.slug}/favicon.png` })
|
|
|| (entity.parent && { logo: `${entity.parent.slug}/${entity.slug}.png`, thumbnail: `${entity.parent.slug}/thumbs/${entity.slug}.png`, favicon: `${entity.parent.slug}/favicon.png` })))
|
|
|| null;
|
|
|
|
const curatedEntity = entity.id ? {
|
|
id: entity.id,
|
|
name: entity.name,
|
|
url: entity.url,
|
|
description: entity.description,
|
|
slug: entity.slug,
|
|
type: entity.type,
|
|
independent: !!entity.independent,
|
|
aliases: entity.alias,
|
|
...logo,
|
|
parent: curateEntity(entity.parent, includeParameters),
|
|
} : {};
|
|
|
|
if (entity.tags) {
|
|
curatedEntity.tags = entity.tags.map(tag => ({
|
|
id: tag.id,
|
|
name: tag.name,
|
|
slug: tag.slug,
|
|
priority: tag.priority,
|
|
}));
|
|
}
|
|
|
|
if (includeParameters) {
|
|
curatedEntity.parameters = entity.parameters;
|
|
}
|
|
|
|
if (entity.children) {
|
|
curatedEntity.children = entity.children.map(child => curateEntity({
|
|
...child,
|
|
parent: curatedEntity.id ? curatedEntity : null,
|
|
}, includeParameters));
|
|
}
|
|
|
|
if (entity.included_children) {
|
|
curatedEntity.includedChildren = entity.included_children.map(child => curateEntity({
|
|
...child,
|
|
parent: curatedEntity.id ? curatedEntity : null,
|
|
}, includeParameters));
|
|
}
|
|
|
|
const scraper = resolveScraper(curatedEntity);
|
|
curatedEntity.scraper = resolveLayoutScraper(entity, scraper);
|
|
|
|
return curatedEntity;
|
|
}
|
|
|
|
async function curateEntities(entities, includeParameters) {
|
|
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
|
|
}
|
|
|
|
function urlToSiteSlug(url) {
|
|
try {
|
|
const slug = new URL(url)
|
|
.hostname
|
|
.match(/([\w-]+)\.\w+$/)?.[1]
|
|
.replace(/[-_]+/g, '');
|
|
|
|
return slug;
|
|
} catch (error) {
|
|
logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`);
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function fetchIncludedEntities() {
|
|
const include = {
|
|
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
|
|
includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
|
|
includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
|
|
excludedNetworks: argv.excludeNetworks || config.exclude?.networks.filter(network => !argv.networks?.includes(network)) || [], // ignore explicitly included networks
|
|
excludedChannels: argv.excludeChannels || config.exclude?.channels.filter(channel => !argv.channels?.includes(channel)) || [], // ignore explicitly included channels
|
|
};
|
|
|
|
const rawNetworks = await knex.raw(`
|
|
WITH RECURSIVE included_entities AS (
|
|
/* select configured channels and networks */
|
|
SELECT
|
|
entities.*
|
|
FROM
|
|
entities
|
|
WHERE
|
|
CASE WHEN :includeAll
|
|
THEN
|
|
/* select all top level networks and independent channels */
|
|
entities.parent_id IS NULL
|
|
ELSE
|
|
((entities.slug = ANY(:includedNetworks)
|
|
AND entities.type = 'network')
|
|
OR (entities.slug = ANY(:includedChannels)
|
|
AND entities.type = 'channel'))
|
|
END
|
|
AND NOT (
|
|
(entities.slug = ANY(:excludedNetworks)
|
|
AND entities.type = 'network')
|
|
OR (entities.slug = ANY(:excludedChannels)
|
|
AND entities.type = 'channel'))
|
|
|
|
UNION ALL
|
|
|
|
/* select recursive children of configured networks */
|
|
SELECT
|
|
entities.*
|
|
FROM
|
|
entities
|
|
INNER JOIN
|
|
included_entities ON included_entities.id = entities.parent_id
|
|
WHERE
|
|
NOT ((entities.slug = ANY(:excludedNetworks)
|
|
AND entities.type = 'network')
|
|
OR (entities.slug = ANY(:excludedChannels)
|
|
AND entities.type = 'channel'))
|
|
), included_per_network AS (
|
|
/* select recursive channels as children of networks */
|
|
SELECT
|
|
parents.*,
|
|
json_agg(included_entities ORDER BY included_entities.id) included_children,
|
|
(SELECT json_agg(children)
|
|
FROM entities AS children
|
|
WHERE children.parent_id = parents.id) children
|
|
FROM
|
|
included_entities
|
|
LEFT JOIN
|
|
entities AS parents ON parents.id = included_entities.parent_id
|
|
WHERE
|
|
included_entities.type = 'channel'
|
|
GROUP BY
|
|
parents.id
|
|
), entity_tree as (
|
|
/* get recursive parents of networks (necessary for scraper resolve) */
|
|
SELECT to_jsonb(included_per_network) as entity,
|
|
parent_id,
|
|
array['parent'] as parent_path
|
|
FROM included_per_network
|
|
|
|
UNION ALL
|
|
|
|
SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)),
|
|
entities.parent_id,
|
|
entity_tree.parent_path || array['parent']
|
|
FROM entity_tree
|
|
JOIN entities ON entity_tree.parent_id = entities.id
|
|
)
|
|
SELECT entity FROM entity_tree WHERE parent_id is null;
|
|
`, include);
|
|
|
|
const curatedNetworks = rawNetworks.rows.map(({ entity }) => curateEntity(entity, true));
|
|
|
|
return curatedNetworks;
|
|
}
|
|
|
|
async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
|
|
const entities = await knex.raw(`
|
|
WITH RECURSIVE entity_tree as (
|
|
SELECT to_jsonb(entities) as entity,
|
|
parent_id,
|
|
array['parent'] as parent_path
|
|
FROM entities
|
|
WHERE slug = ANY(:entitySlugs)
|
|
|
|
UNION ALL
|
|
|
|
SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)),
|
|
entities.parent_id,
|
|
entity_tree.parent_path || array['parent']
|
|
FROM entity_tree
|
|
JOIN entities ON entity_tree.parent_id = entities.id
|
|
)
|
|
SELECT jsonb_set(
|
|
jsonb_set(
|
|
entity,
|
|
'{children}',
|
|
to_jsonb(COALESCE(json_agg(children) FILTER (WHERE children.id IS NOT NULL), '[]'))),
|
|
'{tags}',
|
|
to_jsonb(COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]'))
|
|
) entity
|
|
FROM entity_tree
|
|
LEFT JOIN entities AS children ON children.parent_id = (entity->>'id')::int
|
|
LEFT JOIN entities_tags ON entities_tags.entity_id = (entity->>'id')::int
|
|
LEFT JOIN tags ON tags.id = entities_tags.tag_id
|
|
WHERE entity_tree.parent_id IS NULL
|
|
GROUP BY entity_tree.entity
|
|
ORDER BY entity->'type' :sort;
|
|
`, { entitySlugs, sort: knex.raw(sort) });
|
|
|
|
// channel entity will overwrite network entity
|
|
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({
|
|
...accEntities,
|
|
[entity.slug]: accEntities[entity.slug] || curateEntity(entity, true),
|
|
[urlToSiteSlug(entity.url)]: accEntities[urlToSiteSlug(entity.url)] || curateEntity(entity, true),
|
|
}), {});
|
|
|
|
return entitiesBySlug;
|
|
}
|
|
|
|
async function fetchReleaseEntities(baseReleases) {
|
|
const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity);
|
|
|
|
const entitySlugs = Array.from(new Set(
|
|
baseReleasesWithoutEntity
|
|
.map(baseRelease => urlToSiteSlug(baseRelease.url))
|
|
.filter(Boolean),
|
|
));
|
|
|
|
return fetchEntitiesBySlug(entitySlugs);
|
|
}
|
|
|
|
async function fetchEntity(entityId, type) {
|
|
const entity = await knex('entities')
|
|
.select(knex.raw(`
|
|
entities.*,
|
|
COALESCE(json_agg(children) FILTER (WHERE children.id IS NOT NULL), '[]') as children,
|
|
COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
|
|
row_to_json(parents) as parent
|
|
`))
|
|
.modify((queryBuilder) => {
|
|
if (Number(entityId)) {
|
|
queryBuilder.where('entities.id', entityId);
|
|
return;
|
|
}
|
|
|
|
if (type) {
|
|
queryBuilder
|
|
.where('entities.type', type)
|
|
.where((whereBuilder) => {
|
|
whereBuilder
|
|
.where('entities.slug', entityId)
|
|
.orWhere(knex.raw(':entityId = ANY(entities.alias)', { entityId }));
|
|
});
|
|
|
|
return;
|
|
}
|
|
|
|
throw new Error('Invalid ID or unspecified entity type');
|
|
})
|
|
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
|
.leftJoin('entities as children', 'children.parent_id', 'entities.id')
|
|
.leftJoin('entities_tags', 'entities_tags.entity_id', 'entities.id')
|
|
.leftJoin('tags', 'tags.id', 'entities_tags.tag_id')
|
|
.groupBy('entities.id', 'parents.id')
|
|
.first();
|
|
|
|
return curateEntity(entity);
|
|
}
|
|
|
|
async function fetchEntities(type, limit) {
|
|
const entities = await knex('entities')
|
|
.select(knex.raw(`
|
|
entities.*,
|
|
COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
|
|
row_to_json(parents) as parent
|
|
`))
|
|
.modify((queryBuilder) => {
|
|
if (type) {
|
|
queryBuilder.where('entities.type', type);
|
|
}
|
|
})
|
|
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
|
.leftJoin('entities_tags', 'entities_tags.entity_id', 'entities.id')
|
|
.leftJoin('tags', 'tags.id', 'entities_tags.tag_id')
|
|
.groupBy('entities.id', 'parents.id')
|
|
.limit(limit || 100);
|
|
|
|
return curateEntities(entities);
|
|
}
|
|
|
|
async function searchEntities(query, type, limit) {
|
|
const entities = await knex
|
|
.select(knex.raw(`
|
|
entities.id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.alias, entities.has_logo,
|
|
COALESCE(json_agg(tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
|
|
row_to_json(parents) as parent
|
|
`))
|
|
.from(knex.raw('search_entities(?) as entities', [query]))
|
|
.modify((queryBuilder) => {
|
|
if (type) {
|
|
queryBuilder.where('entities.type', type);
|
|
}
|
|
})
|
|
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
|
.leftJoin('entities_tags', 'entities_tags.entity_id', 'entities.id')
|
|
.leftJoin('tags', 'tags.id', 'entities_tags.tag_id')
|
|
.groupBy('entities.id', 'entities.name', 'entities.slug', 'entities.type', 'entities.url', 'entities.description', 'entities.alias', 'entities.has_logo', 'parents.id')
|
|
.limit(limit || 100);
|
|
|
|
return curateEntities(entities);
|
|
}
|
|
|
|
async function flushEntities(networkSlugs = [], channelSlugs = []) {
|
|
const entitySlugs = networkSlugs.concat(channelSlugs).join(', ');
|
|
|
|
const entityQuery = knex
|
|
.withRecursive('selected_entities', knex.raw(`
|
|
SELECT entities.*
|
|
FROM entities
|
|
WHERE
|
|
entities.slug = ANY(:networkSlugs)
|
|
AND entities.type = 'network'
|
|
OR (entities.slug = ANY(:channelSlugs)
|
|
AND entities.type = 'channel')
|
|
UNION ALL
|
|
SELECT entities.*
|
|
FROM entities
|
|
INNER JOIN selected_entities ON selected_entities.id = entities.parent_id
|
|
`, {
|
|
networkSlugs,
|
|
channelSlugs,
|
|
}));
|
|
|
|
const sceneIds = await entityQuery
|
|
.clone()
|
|
.select('releases.id')
|
|
.distinct('releases.id')
|
|
.whereNotNull('releases.id')
|
|
.from('selected_entities')
|
|
.leftJoin('releases', 'releases.entity_id', 'selected_entities.id')
|
|
.pluck('releases.id');
|
|
|
|
const movieIds = await entityQuery
|
|
.clone()
|
|
.select('movies.id')
|
|
.distinct('movies.id')
|
|
.whereNotNull('movies.id')
|
|
.from('selected_entities')
|
|
.leftJoin('movies', 'movies.entity_id', 'selected_entities.id')
|
|
.pluck('movies.id');
|
|
|
|
if (sceneIds.length === 0 && movieIds.length === 0) {
|
|
logger.info(`No scenes or movies found to remove for ${entitySlugs}`);
|
|
return;
|
|
}
|
|
|
|
const confirmed = await inquirer.prompt([{
|
|
type: 'confirm',
|
|
name: 'flushEntities',
|
|
message: `You are about to remove ${sceneIds.length} scenes and ${movieIds.length} movies for ${entitySlugs}. Are you sure?`,
|
|
default: false,
|
|
}]);
|
|
|
|
if (!confirmed.flushEntities) {
|
|
logger.warn(`Confirmation rejected, not flushing scenes or movies for: ${entitySlugs}`);
|
|
return;
|
|
}
|
|
|
|
const [deletedScenesCount, deletedMoviesCount] = await Promise.all([
|
|
deleteScenes(sceneIds),
|
|
deleteMovies(movieIds),
|
|
]);
|
|
|
|
logger.info(`Removed ${deletedScenesCount} scenes and ${deletedMoviesCount} movies for ${entitySlugs}`);
|
|
|
|
await flushOrphanedMedia();
|
|
}
|
|
|
|
module.exports = {
|
|
curateEntity,
|
|
curateEntities,
|
|
fetchIncludedEntities,
|
|
fetchReleaseEntities,
|
|
fetchEntitiesBySlug,
|
|
fetchEntity,
|
|
fetchEntities,
|
|
getRecursiveParent,
|
|
searchEntities,
|
|
flushEntities,
|
|
urlToSiteSlug,
|
|
};
|