diff --git a/README.md b/README.md index 022ef0df..dac13193 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,38 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js You can also use `npm run flush` to run both steps at once, and wipe the database completely later. +#### Networks and channels +To scrape the networks and channels available in the database, you can configure `include` and `exclude` lists. To include all available channels and only use the `exclude` list, leave the `include` parameter unconfigured. The `exclude` lists will exclude channels and child networks from networks on the `include` lists, but not vice versa. That is, if the `include` list includes a network and the `exclude` list excludes one of that network's channels, the channel will not be scraped. However, if the `include` list includes a channel, and the `exclude` list includes its parent network, the channel will be scraped. + +This configuration will scrape Evil Angel and all XEmpire channels, except for LesbianX. +``` +include: { + networks: [ + 'xempire', + ], + channels: [ + 'evilangel', + ], +}, +exclude: { + channels: [ + 'lesbianx', + ], +} +``` + +This configuration will scrape all channels, except for BAM Visions, and except all channels part of the Vixen network. +``` +exclude: { + channels: [ + 'bamvisions', + ], + networks: [ + 'vixen' + ], +}, +``` + ### Building To build traxxx, run the following command: diff --git a/config/default.js b/config/default.js index 0eda6b49..c858c413 100644 --- a/config/default.js +++ b/config/default.js @@ -11,19 +11,7 @@ module.exports = { sfwHost: '0.0.0.0', sfwPort: 5001, }, - include: { - networks: [ - 'xempire', - 'julesjordan', - ], - channels: [], - }, exclude: { - networks: [ - 'hardx', - 'pornpros', - 'mindgeek', - ], channels: [ // 21sextreme, no longer updated 'mightymistress', diff --git a/src/entities.js b/src/entities.js index b014b9f3..0028ca45 100644 --- a/src/entities.js +++ b/src/entities.js @@ -1,6 +1,5 @@ 'use strict'; -const util = require('util'); const config = require('config'); const logger = require('./logger')(__filename); @@ -9,7 +8,11 @@ const knex = require('./knex'); const whereOr = require('./utils/where-or'); function curateEntity(entity, includeParameters = false) { - const curatedEntity = { + if (!entity) { + return null; + } + + const curatedEntity = entity.id ? { id: entity.id, name: entity.name, url: entity.url, @@ -17,12 +20,15 @@ function curateEntity(entity, includeParameters = false) { slug: entity.slug, type: entity.type, parameters: includeParameters ? entity.parameters : null, - parent: entity.parent_id && entity.parent, - children: (entity.children || []).map(child => curateEntity({ + parent: curateEntity(entity.parent), + } : {}; + + if (entity.children) { + curatedEntity.children = entity.children.map(child => curateEntity({ ...child, - parent: entity, - }, includeParameters)), - }; + parent: curatedEntity.id ? curatedEntity : null, + }, includeParameters)); + } return curatedEntity; } @@ -36,14 +42,14 @@ async function fetchChannelsFromArgv() { /* networks from argument with channels as children */ WITH RECURSIVE children AS ( SELECT - id, parent_id, name, slug, type, url, description, parameters + entities.* FROM entities WHERE slug = ANY(?) AND entities.type = 'network' UNION ALL SELECT - entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters + entities.* FROM entities INNER JOIN @@ -86,78 +92,44 @@ async function fetchChannelsFromArgv() { } async function fetchChannelsFromConfig() { - console.log(config.include); - - /* const rawNetworks = await knex.raw(` - WITH RECURSIVE children AS ( - SELECT - id, parent_id, name, slug, type, url, description, parameters - FROM - entities - WHERE - CASE WHEN array_length(?, 1) IS NOT NULL - THEN slug = ANY(?) - ELSE true - END - AND NOT - slug = ANY(?) - AND - entities.type = 'network' - UNION ALL - SELECT - entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters - FROM - entities - INNER JOIN - children ON children.id = entities.parent_id - ) - SELECT - entities.*, row_to_json(parents) as parent, json_agg(children) as children - FROM - children - LEFT JOIN - entities ON entities.id = children.parent_id - LEFT JOIN - entities AS parents ON parents.id = entities.parent_id - WHERE - children.type = 'channel' - GROUP BY - children.parent_id, entities.id, entities.name, parents.id - `, [ - config.include.networks, - config.include.networks, - config.exclude.networks, - ]); - */ - - const rawNetworks = await knex.raw(` - /* select channels associated to configured networks */ WITH RECURSIVE channels AS ( - /* select configured networks */ + /* select configured channels and networks */ SELECT - id, parent_id, name, type, slug + entities.* FROM entities WHERE - (slug = ANY(:includeNetworks) - AND NOT entities.slug = ANY(:excludedNetworks)) - AND entities.type = 'network' + CASE WHEN :includeAll + THEN + /* select all top level networks and independent channels */ + entities.parent_id IS NULL + ELSE + ((entities.slug = ANY(:includedNetworks) + AND entities.type = 'network') + OR (entities.slug = ANY(:includedChannels) + AND entities.type = 'channel')) + END + AND NOT ( + (entities.slug = ANY(:excludedNetworks) + AND entities.type = 'network') + OR (entities.slug = ANY(:excludedChannels) + AND entities.type = 'channel')) UNION ALL /* select recursive children of configured networks */ SELECT - entities.id, entities.parent_id, entities.name, entities.type, entities.slug + entities.* FROM entities INNER JOIN channels ON channels.id = entities.parent_id WHERE - NOT ( - (entities.slug = ANY(:excludedNetworks) AND entities.type = 'network') - OR (entities.slug = ANY(:excludedChannels) AND entities.type = 'channel') - ) + NOT ((entities.slug = ANY(:excludedNetworks) + AND entities.type = 'network') + OR (entities.slug = ANY(:excludedChannels) + AND entities.type = 'channel')) ) /* select recursive channels as children of networks */ SELECT @@ -170,51 +142,17 @@ async function fetchChannelsFromConfig() { channels.type = 'channel' GROUP BY entities.id - - UNION ALL - - /* select configured channels as children of networks */ - SELECT - entities.*, json_agg(children) as children - FROM - entities AS children - LEFT JOIN - entities ON entities.id = children.parent_id - WHERE - children.slug = ANY(:includedChannels) - AND - children.type = 'channel' - GROUP BY - entities.id `, { - includedNetworks: config.include.networks, - includedChannels: config.include.channels, - excludedNetworks: config.exclude.networks, - excludedChannels: config.exclude.channels, + includeAll: !config.include?.networks && !config.include?.channels, + includedNetworks: config.include?.networks || [], + includedChannels: config.include?.channels || [], + excludedNetworks: config.exclude?.networks || [], + excludedChannels: config.exclude?.channels || [], }); - console.log(util.inspect(rawNetworks.rows, null, null)); + const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true)); - /* - const curatedSites = await curateEntities(rawChannels, true); - logger.info(`Found ${curatedSites.length} entities in database`); - - const rawChannels = await knex('entities') - .select(knex.raw('entities.*, row_to_json(parents) as parent')) - .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') - .where((builder) => { - if (config.include) { - builder.whereIn('entities.slug', config.include); - } - }) - .whereNot((builder) => { - builder.whereIn('entities.slug', config.exclude || []); - }); - - console.log(rawChannels); - */ - - // return curatedSites; + return curatedNetworks; } async function fetchIncludedEntities() {