2020-06-08 01:41:12 +00:00
'use strict' ;
const config = require ( 'config' ) ;
2020-10-19 00:02:21 +00:00
const inquirer = require ( 'inquirer' ) ;
2020-06-08 01:41:12 +00:00
2020-10-19 00:02:21 +00:00
const logger = require ( './logger' ) ( _ _filename ) ;
2020-06-08 01:41:12 +00:00
const argv = require ( './argv' ) ;
const knex = require ( './knex' ) ;
2022-03-26 16:56:22 +00:00
const { deleteScenes , deleteMovies , deleteSeries } = require ( './releases' ) ;
2020-10-24 22:52:40 +00:00
const { flushOrphanedMedia } = require ( './media' ) ;
2021-10-26 21:42:32 +00:00
const { resolveScraper , resolveLayoutScraper } = require ( './scrapers/resolve' ) ;
2020-06-08 01:41:12 +00:00
2021-02-24 01:43:34 +00:00
function getRecursiveParent ( entity ) {
if ( ! entity ) {
return null ;
}
if ( entity . parent ) {
return getRecursiveParent ( entity . parent ) ;
}
return entity ;
}
2020-06-08 01:41:12 +00:00
function curateEntity ( entity , includeParameters = false ) {
2020-08-13 21:59:54 +00:00
if ( ! entity ) {
return null ;
}
2020-10-20 13:28:58 +00:00
const logo = ( entity . has _logo
2020-10-20 13:37:42 +00:00
&& ( ( ( entity . independent || entity . type === 'network' ) && { logo : ` ${ entity . slug } /network.png ` , thumbnail : ` ${ entity . slug } /thumbs/network.png ` , favicon : ` ${ entity . slug } /favicon.png ` } )
|| ( entity . parent && { logo : ` ${ entity . parent . slug } / ${ entity . slug } .png ` , thumbnail : ` ${ entity . parent . slug } /thumbs/ ${ entity . slug } .png ` , favicon : ` ${ entity . parent . slug } /favicon.png ` } ) ) )
2020-10-20 13:28:58 +00:00
|| null ;
2020-08-13 21:59:54 +00:00
const curatedEntity = entity . id ? {
2020-06-08 01:41:12 +00:00
id : entity . id ,
name : entity . name ,
url : entity . url ,
description : entity . description ,
slug : entity . slug ,
2020-06-15 01:58:35 +00:00
type : entity . type ,
2020-10-20 13:28:58 +00:00
independent : ! ! entity . independent ,
2020-10-19 22:21:15 +00:00
aliases : entity . alias ,
2020-10-20 13:37:42 +00:00
... logo ,
2020-09-18 01:27:00 +00:00
parent : curateEntity ( entity . parent , includeParameters ) ,
2020-08-13 21:59:54 +00:00
} : { } ;
2021-01-15 03:04:32 +00:00
if ( entity . tags ) {
2021-11-20 22:59:15 +00:00
curatedEntity . tags = entity . tags . map ( ( tag ) => ( {
2021-01-15 03:04:32 +00:00
id : tag . id ,
name : tag . name ,
slug : tag . slug ,
priority : tag . priority ,
} ) ) ;
}
if ( includeParameters ) {
curatedEntity . parameters = entity . parameters ;
}
2020-08-13 21:59:54 +00:00
if ( entity . children ) {
2021-11-20 22:59:15 +00:00
curatedEntity . children = entity . children . map ( ( child ) => curateEntity ( {
2020-06-17 02:07:24 +00:00
... child ,
2020-08-13 21:59:54 +00:00
parent : curatedEntity . id ? curatedEntity : null ,
} , includeParameters ) ) ;
}
2020-06-08 01:41:12 +00:00
2020-11-26 03:26:52 +00:00
if ( entity . included _children ) {
2021-11-20 22:59:15 +00:00
curatedEntity . includedChildren = entity . included _children . map ( ( child ) => curateEntity ( {
2020-11-26 03:26:52 +00:00
... child ,
parent : curatedEntity . id ? curatedEntity : null ,
} , includeParameters ) ) ;
2020-11-24 03:29:44 +00:00
}
2021-10-26 21:42:32 +00:00
const scraper = resolveScraper ( curatedEntity ) ;
curatedEntity . scraper = resolveLayoutScraper ( entity , scraper ) ;
2020-06-08 01:41:12 +00:00
return curatedEntity ;
}
async function curateEntities ( entities , includeParameters ) {
2021-11-20 22:59:15 +00:00
return Promise . all ( entities . map ( async ( entity ) => curateEntity ( entity , includeParameters ) ) ) ;
2020-06-08 01:41:12 +00:00
}
2023-06-04 19:50:59 +00:00
/ * o b s o l e t e i n f a v o r o f u r l T o H o s t n a m e
2021-02-01 00:45:30 +00:00
function urlToSiteSlug ( url ) {
try {
const slug = new URL ( url )
. hostname
. match ( /([\w-]+)\.\w+$/ ) ? . [ 1 ]
. replace ( /[-_]+/g , '' ) ;
return slug ;
} catch ( error ) {
logger . warn ( ` Failed to derive entity slug from ' ${ url } ': ${ error . message } ` ) ;
return null ;
}
}
2023-06-04 19:50:59 +00:00
* /
function urlToHostname ( url ) {
try {
const hostname = new URL ( url )
. hostname
2023-06-05 00:13:36 +00:00
. match ( /(www\.)?(.*)/ ) ? . at ( - 1 ) ;
2023-06-04 19:50:59 +00:00
return hostname ;
} catch ( error ) {
logger . warn ( ` Failed to derive entity hostname from ' ${ url } ': ${ error . message } ` ) ;
return null ;
}
}
2021-02-01 00:45:30 +00:00
2020-08-13 22:32:59 +00:00
async function fetchIncludedEntities ( ) {
const include = {
includeAll : ! argv . networks && ! argv . channels && ! config . include ? . networks && ! config . include ? . channels ,
includedNetworks : argv . networks || ( ! argv . channels && config . include ? . networks ) || [ ] ,
includedChannels : argv . channels || ( ! argv . networks && config . include ? . channels ) || [ ] ,
2021-11-20 22:59:15 +00:00
excludedNetworks : argv . excludeNetworks || config . exclude ? . networks . filter ( ( network ) => ! argv . networks ? . includes ( network ) ) || [ ] , // ignore explicitly included networks
excludedChannels : argv . excludeChannels || config . exclude ? . channels . filter ( ( channel ) => ! argv . channels ? . includes ( channel ) ) || [ ] , // ignore explicitly included channels
2020-08-13 22:32:59 +00:00
} ;
2020-06-08 01:41:12 +00:00
2020-07-09 00:00:54 +00:00
const rawNetworks = await knex . raw ( `
2020-11-26 02:13:43 +00:00
WITH RECURSIVE included _entities AS (
2020-08-13 21:59:54 +00:00
/* select configured channels and networks */
2020-08-13 14:10:58 +00:00
SELECT
2021-02-02 00:31:12 +00:00
entities . *
2020-08-13 14:10:58 +00:00
FROM
2021-02-02 00:31:12 +00:00
entities
2020-08-13 14:10:58 +00:00
WHERE
2021-02-02 00:31:12 +00:00
CASE WHEN : includeAll
THEN
/* select all top level networks and independent channels */
entities . parent _id IS NULL
ELSE
( ( entities . slug = ANY ( : includedNetworks )
AND entities . type = 'network' )
OR ( entities . slug = ANY ( : includedChannels )
AND entities . type = 'channel' ) )
END
AND NOT (
( entities . slug = ANY ( : excludedNetworks )
AND entities . type = 'network' )
OR ( entities . slug = ANY ( : excludedChannels )
AND entities . type = 'channel' ) )
2020-07-09 00:00:54 +00:00
2020-08-13 14:10:58 +00:00
UNION ALL
/* select recursive children of configured networks */
SELECT
2021-02-02 00:31:12 +00:00
entities . *
2020-08-13 14:10:58 +00:00
FROM
2021-02-02 00:31:12 +00:00
entities
2020-08-13 14:10:58 +00:00
INNER JOIN
2021-02-02 00:31:12 +00:00
included _entities ON included _entities . id = entities . parent _id
2020-08-13 14:10:58 +00:00
WHERE
2020-08-13 21:59:54 +00:00
NOT ( ( entities . slug = ANY ( : excludedNetworks )
AND entities . type = 'network' )
OR ( entities . slug = ANY ( : excludedChannels )
AND entities . type = 'channel' ) )
2021-02-02 00:31:12 +00:00
) , included _per _network AS (
/* select recursive channels as children of networks */
SELECT
parents . * ,
json _agg ( included _entities ORDER BY included _entities . id ) included _children ,
( SELECT json _agg ( children )
FROM entities AS children
WHERE children . parent _id = parents . id ) children
FROM
included _entities
LEFT JOIN
entities AS parents ON parents . id = included _entities . parent _id
WHERE
included _entities . type = 'channel'
GROUP BY
2021-02-02 01:11:16 +00:00
parents . id
2021-02-02 00:31:12 +00:00
) , entity _tree as (
/* get recursive parents of networks (necessary for scraper resolve) */
SELECT to _jsonb ( included _per _network ) as entity ,
parent _id ,
2021-02-02 00:59:51 +00:00
array [ 'parent' ] as parent _path
2021-02-02 00:31:12 +00:00
FROM included _per _network
UNION ALL
SELECT jsonb _set ( entity _tree . entity , entity _tree . parent _path , to _jsonb ( entities ) ) ,
entities . parent _id ,
2021-02-02 00:59:51 +00:00
entity _tree . parent _path || array [ 'parent' ]
2021-02-02 00:31:12 +00:00
FROM entity _tree
JOIN entities ON entity _tree . parent _id = entities . id
2020-08-13 14:10:58 +00:00
)
2021-02-02 00:31:12 +00:00
SELECT entity FROM entity _tree WHERE parent _id is null ;
2020-08-13 22:32:59 +00:00
` , include);
2020-08-13 14:10:58 +00:00
2021-02-02 00:31:12 +00:00
const curatedNetworks = rawNetworks . rows . map ( ( { entity } ) => curateEntity ( entity , true ) ) ;
2020-06-08 01:41:12 +00:00
2020-08-13 21:59:54 +00:00
return curatedNetworks ;
2020-06-08 01:41:12 +00:00
}
2023-07-01 20:24:21 +00:00
async function fetchEntitiesBySlug ( entitySlugs , prefer = 'channel' ) {
2021-02-01 00:45:30 +00:00
const entities = await knex . raw ( `
2021-02-02 00:31:12 +00:00
WITH RECURSIVE entity _tree as (
2021-02-01 00:45:30 +00:00
SELECT to _jsonb ( entities ) as entity ,
parent _id ,
2021-02-02 00:59:51 +00:00
array [ 'parent' ] as parent _path
2021-02-01 00:45:30 +00:00
FROM entities
2023-08-04 20:30:18 +00:00
WHERE ( slug = ANY ( : entitySlugs )
OR url ILIKE ANY ( : entityHosts ) )
AND type IN ( 'channel' , 'network' )
2021-02-01 00:45:30 +00:00
UNION ALL
2021-02-02 00:31:12 +00:00
SELECT jsonb _set ( entity _tree . entity , entity _tree . parent _path , to _jsonb ( entities ) ) ,
2021-02-01 00:45:30 +00:00
entities . parent _id ,
2021-02-02 00:59:51 +00:00
entity _tree . parent _path || array [ 'parent' ]
2021-02-02 00:31:12 +00:00
FROM entity _tree
JOIN entities ON entity _tree . parent _id = entities . id
2021-02-01 00:45:30 +00:00
)
2021-02-05 00:54:06 +00:00
SELECT jsonb _set (
jsonb _set (
entity ,
'{children}' ,
to _jsonb ( COALESCE ( json _agg ( children ) FILTER ( WHERE children . id IS NOT NULL ) , '[]' ) ) ) ,
'{tags}' ,
to _jsonb ( COALESCE ( json _agg ( tags ) FILTER ( WHERE tags . id IS NOT NULL ) , '[]' ) )
) entity
2021-02-02 00:51:22 +00:00
FROM entity _tree
LEFT JOIN entities AS children ON children . parent _id = ( entity - >> 'id' ) : : int
2021-02-05 00:54:06 +00:00
LEFT JOIN entities _tags ON entities _tags . entity _id = ( entity - >> 'id' ) : : int
LEFT JOIN tags ON tags . id = entities _tags . tag _id
2021-02-02 00:51:22 +00:00
WHERE entity _tree . parent _id IS NULL
GROUP BY entity _tree . entity
2021-02-02 23:46:59 +00:00
ORDER BY entity - > 'type' : sort ;
2023-06-04 19:50:59 +00:00
` , {
entitySlugs : entitySlugs . filter ( ( slug ) => ! slug . includes ( '.' ) ) ,
2023-07-16 23:59:26 +00:00
entityHosts : entitySlugs . filter ( ( slug ) => slug . includes ( '.' ) ) . map ( ( hostname ) => ` % ${ hostname } ` ) ,
2023-07-01 20:24:21 +00:00
sort : knex . raw ( prefer === 'channel' ? 'asc' : 'desc' ) ,
2023-06-04 19:50:59 +00:00
} ) ;
2021-02-01 00:45:30 +00:00
// channel entity will overwrite network entity
2023-06-04 19:50:59 +00:00
const entitiesBySlug = entities . rows . reduce ( ( accEntities , { entity } ) => {
const host = urlToHostname ( entity . url ) ;
const curatedEntity = accEntities [ entity . slug ] || accEntities [ host ] || curateEntity ( entity , true ) ;
return {
... accEntities ,
[ entity . slug ] : curatedEntity ,
[ host ] : curatedEntity ,
} ;
} , { } ) ;
2021-02-01 00:45:30 +00:00
return entitiesBySlug ;
}
2021-02-02 23:46:59 +00:00
async function fetchReleaseEntities ( baseReleases ) {
2021-11-20 22:59:15 +00:00
const baseReleasesWithoutEntity = baseReleases . filter ( ( release ) => release . url && ! release . site && ! release . entity ) ;
2021-02-02 23:46:59 +00:00
const entitySlugs = Array . from ( new Set (
baseReleasesWithoutEntity
2023-06-04 19:50:59 +00:00
. map ( ( baseRelease ) => urlToHostname ( baseRelease . url ) )
2021-02-02 23:46:59 +00:00
. filter ( Boolean ) ,
) ) ;
2023-07-01 20:24:21 +00:00
return fetchEntitiesBySlug ( entitySlugs , argv . prefer || 'network' ) ;
2021-02-02 23:46:59 +00:00
}
2020-10-16 21:00:03 +00:00
async function fetchEntity ( entityId , type ) {
const entity = await knex ( 'entities' )
. select ( knex . raw ( `
entities . * ,
COALESCE ( json _agg ( children ) FILTER ( WHERE children . id IS NOT NULL ) , '[]' ) as children ,
COALESCE ( json _agg ( tags ) FILTER ( WHERE tags . id IS NOT NULL ) , '[]' ) as tags ,
row _to _json ( parents ) as parent
` ))
. modify ( ( queryBuilder ) => {
if ( Number ( entityId ) ) {
queryBuilder . where ( 'entities.id' , entityId ) ;
return ;
}
if ( type ) {
queryBuilder
2020-10-19 22:21:15 +00:00
. where ( 'entities.type' , type )
2020-10-20 19:04:29 +00:00
. where ( ( whereBuilder ) => {
whereBuilder
. where ( 'entities.slug' , entityId )
. orWhere ( knex . raw ( ':entityId = ANY(entities.alias)' , { entityId } ) ) ;
} ) ;
2020-10-16 21:00:03 +00:00
return ;
}
throw new Error ( 'Invalid ID or unspecified entity type' ) ;
} )
. leftJoin ( 'entities as parents' , 'parents.id' , 'entities.parent_id' )
. leftJoin ( 'entities as children' , 'children.parent_id' , 'entities.id' )
. leftJoin ( 'entities_tags' , 'entities_tags.entity_id' , 'entities.id' )
. leftJoin ( 'tags' , 'tags.id' , 'entities_tags.tag_id' )
. groupBy ( 'entities.id' , 'parents.id' )
. first ( ) ;
return curateEntity ( entity ) ;
2020-06-08 01:41:12 +00:00
}
2020-10-16 21:00:03 +00:00
async function fetchEntities ( type , limit ) {
const entities = await knex ( 'entities' )
. select ( knex . raw ( `
entities . * ,
COALESCE ( json _agg ( tags ) FILTER ( WHERE tags . id IS NOT NULL ) , '[]' ) as tags ,
row _to _json ( parents ) as parent
` ))
. modify ( ( queryBuilder ) => {
if ( type ) {
queryBuilder . where ( 'entities.type' , type ) ;
}
} )
. leftJoin ( 'entities as parents' , 'parents.id' , 'entities.parent_id' )
. leftJoin ( 'entities_tags' , 'entities_tags.entity_id' , 'entities.id' )
. leftJoin ( 'tags' , 'tags.id' , 'entities_tags.tag_id' )
. groupBy ( 'entities.id' , 'parents.id' )
. limit ( limit || 100 ) ;
return curateEntities ( entities ) ;
}
2020-06-08 01:41:12 +00:00
2020-10-16 21:00:03 +00:00
async function searchEntities ( query , type , limit ) {
2020-10-19 22:25:32 +00:00
const entities = await knex
2020-10-16 21:00:03 +00:00
. select ( knex . raw ( `
2020-10-20 13:28:58 +00:00
entities . id , entities . name , entities . slug , entities . type , entities . url , entities . description , entities . alias , entities . has _logo ,
2020-10-16 21:00:03 +00:00
COALESCE ( json _agg ( tags ) FILTER ( WHERE tags . id IS NOT NULL ) , '[]' ) as tags ,
row _to _json ( parents ) as parent
` ))
. from ( knex . raw ( 'search_entities(?) as entities' , [ query ] ) )
. modify ( ( queryBuilder ) => {
if ( type ) {
queryBuilder . where ( 'entities.type' , type ) ;
}
} )
. leftJoin ( 'entities as parents' , 'parents.id' , 'entities.parent_id' )
. leftJoin ( 'entities_tags' , 'entities_tags.entity_id' , 'entities.id' )
. leftJoin ( 'tags' , 'tags.id' , 'entities_tags.tag_id' )
2020-10-20 13:28:58 +00:00
. groupBy ( 'entities.id' , 'entities.name' , 'entities.slug' , 'entities.type' , 'entities.url' , 'entities.description' , 'entities.alias' , 'entities.has_logo' , 'parents.id' )
2020-10-16 21:00:03 +00:00
. limit ( limit || 100 ) ;
2020-10-19 22:25:32 +00:00
return curateEntities ( entities ) ;
2020-10-17 20:54:00 +00:00
}
2020-10-16 21:00:03 +00:00
2020-10-19 00:02:21 +00:00
async function flushEntities ( networkSlugs = [ ] , channelSlugs = [ ] ) {
const entitySlugs = networkSlugs . concat ( channelSlugs ) . join ( ', ' ) ;
const entityQuery = knex
. withRecursive ( 'selected_entities' , knex . raw ( `
SELECT entities . *
FROM entities
WHERE
entities . slug = ANY ( : networkSlugs )
AND entities . type = 'network'
OR ( entities . slug = ANY ( : channelSlugs )
AND entities . type = 'channel' )
UNION ALL
SELECT entities . *
FROM entities
INNER JOIN selected _entities ON selected _entities . id = entities . parent _id
` , {
networkSlugs ,
channelSlugs ,
} ) ) ;
const sceneIds = await entityQuery
. clone ( )
. select ( 'releases.id' )
. distinct ( 'releases.id' )
. whereNotNull ( 'releases.id' )
. from ( 'selected_entities' )
. leftJoin ( 'releases' , 'releases.entity_id' , 'selected_entities.id' )
. pluck ( 'releases.id' ) ;
2020-10-24 22:52:40 +00:00
const movieIds = await entityQuery
. clone ( )
. select ( 'movies.id' )
. distinct ( 'movies.id' )
. whereNotNull ( 'movies.id' )
. from ( 'selected_entities' )
. leftJoin ( 'movies' , 'movies.entity_id' , 'selected_entities.id' )
. pluck ( 'movies.id' ) ;
2022-03-26 16:56:22 +00:00
const serieIds = await entityQuery
. clone ( )
. select ( 'series.id' )
. distinct ( 'series.id' )
. whereNotNull ( 'series.id' )
. from ( 'selected_entities' )
. leftJoin ( 'series' , 'series.entity_id' , 'selected_entities.id' )
. pluck ( 'series.id' ) ;
if ( sceneIds . length === 0 && movieIds . length === 0 && serieIds . length === 0 ) {
logger . info ( ` No scenes, movies or series found to remove for ${ entitySlugs } ` ) ;
2020-10-19 00:02:21 +00:00
return ;
}
const confirmed = await inquirer . prompt ( [ {
type : 'confirm' ,
name : 'flushEntities' ,
2022-03-26 16:56:22 +00:00
message : ` You are about to remove ${ sceneIds . length } scenes, ${ movieIds . length } movies and ${ serieIds . length } series for ${ entitySlugs } . Are you sure? ` ,
2020-10-19 00:02:21 +00:00
default : false ,
} ] ) ;
if ( ! confirmed . flushEntities ) {
2022-03-26 16:56:22 +00:00
logger . warn ( ` Confirmation rejected, not flushing scenes, movies or series for: ${ entitySlugs } ` ) ;
2020-10-19 00:02:21 +00:00
return ;
}
2022-03-26 16:56:22 +00:00
const [ deletedScenesCount , deletedMoviesCount , deletedSeriesCount ] = await Promise . all ( [
2020-10-24 22:52:40 +00:00
deleteScenes ( sceneIds ) ,
deleteMovies ( movieIds ) ,
2022-03-26 16:56:22 +00:00
deleteSeries ( serieIds ) ,
2020-10-24 22:52:40 +00:00
] ) ;
2022-03-26 16:56:22 +00:00
logger . info ( ` Removed ${ deletedScenesCount } scenes, ${ deletedMoviesCount } movies and ${ deletedSeriesCount } series for ${ entitySlugs } ` ) ;
2020-10-30 16:37:10 +00:00
2023-06-03 00:51:42 +00:00
if ( argv . flushOrphanedMedia !== false ) {
await flushOrphanedMedia ( ) ;
}
2020-10-19 00:02:21 +00:00
}
2020-06-08 01:41:12 +00:00
module . exports = {
curateEntity ,
curateEntities ,
fetchIncludedEntities ,
2021-02-01 00:45:30 +00:00
fetchReleaseEntities ,
2021-02-02 23:46:59 +00:00
fetchEntitiesBySlug ,
2020-10-16 21:00:03 +00:00
fetchEntity ,
fetchEntities ,
2021-02-24 01:43:34 +00:00
getRecursiveParent ,
2020-10-16 21:00:03 +00:00
searchEntities ,
2020-10-19 00:02:21 +00:00
flushEntities ,
2023-06-04 19:50:59 +00:00
urlToHostname ,
// urlToSiteSlug,
2020-06-08 01:41:12 +00:00
} ;