Fixed and documented entity configuration and query.

This commit is contained in:
DebaucheryLibrarian 2020-08-13 23:59:54 +02:00
parent 59e2124407
commit 77566eae0d
3 changed files with 76 additions and 118 deletions

View File

@ -18,6 +18,38 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js
You can also use `npm run flush` to run both steps at once, and wipe the database completely later. You can also use `npm run flush` to run both steps at once, and wipe the database completely later.
#### Networks and channels
To scrape the networks and channels available in the database, you can configure `include` and `exclude` lists. To include all available channels and only use the `exclude` list, leave the `include` parameter unconfigured. The `exclude` lists will exclude channels and child networks from networks on the `include` lists, but not vice versa. That is, if the `include` list includes a network and the `exclude` list excludes one of that network's channels, the channel will not be scraped. However, if the `include` list includes a channel, and the `exclude` list includes its parent network, the channel will be scraped.
This configuration will scrape Evil Angel and all XEmpire channels, except for LesbianX.
```
include: {
networks: [
'xempire',
],
channels: [
'evilangel',
],
},
exclude: {
channels: [
'lesbianx',
],
}
```
This configuration will scrape all channels, except for BAM Visions, and except all channels part of the Vixen network.
```
exclude: {
channels: [
'bamvisions',
],
networks: [
'vixen'
],
},
```
### Building ### Building
To build traxxx, run the following command: To build traxxx, run the following command:

View File

@ -11,19 +11,7 @@ module.exports = {
sfwHost: '0.0.0.0', sfwHost: '0.0.0.0',
sfwPort: 5001, sfwPort: 5001,
}, },
include: {
networks: [
'xempire',
'julesjordan',
],
channels: [],
},
exclude: { exclude: {
networks: [
'hardx',
'pornpros',
'mindgeek',
],
channels: [ channels: [
// 21sextreme, no longer updated // 21sextreme, no longer updated
'mightymistress', 'mightymistress',

View File

@ -1,6 +1,5 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config'); const config = require('config');
const logger = require('./logger')(__filename); const logger = require('./logger')(__filename);
@ -9,7 +8,11 @@ const knex = require('./knex');
const whereOr = require('./utils/where-or'); const whereOr = require('./utils/where-or');
function curateEntity(entity, includeParameters = false) { function curateEntity(entity, includeParameters = false) {
const curatedEntity = { if (!entity) {
return null;
}
const curatedEntity = entity.id ? {
id: entity.id, id: entity.id,
name: entity.name, name: entity.name,
url: entity.url, url: entity.url,
@ -17,12 +20,15 @@ function curateEntity(entity, includeParameters = false) {
slug: entity.slug, slug: entity.slug,
type: entity.type, type: entity.type,
parameters: includeParameters ? entity.parameters : null, parameters: includeParameters ? entity.parameters : null,
parent: entity.parent_id && entity.parent, parent: curateEntity(entity.parent),
children: (entity.children || []).map(child => curateEntity({ } : {};
if (entity.children) {
curatedEntity.children = entity.children.map(child => curateEntity({
...child, ...child,
parent: entity, parent: curatedEntity.id ? curatedEntity : null,
}, includeParameters)), }, includeParameters));
}; }
return curatedEntity; return curatedEntity;
} }
@ -36,14 +42,14 @@ async function fetchChannelsFromArgv() {
/* networks from argument with channels as children */ /* networks from argument with channels as children */
WITH RECURSIVE children AS ( WITH RECURSIVE children AS (
SELECT SELECT
id, parent_id, name, slug, type, url, description, parameters entities.*
FROM FROM
entities entities
WHERE WHERE
slug = ANY(?) AND entities.type = 'network' slug = ANY(?) AND entities.type = 'network'
UNION ALL UNION ALL
SELECT SELECT
entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters entities.*
FROM FROM
entities entities
INNER JOIN INNER JOIN
@ -86,78 +92,44 @@ async function fetchChannelsFromArgv() {
} }
async function fetchChannelsFromConfig() { async function fetchChannelsFromConfig() {
console.log(config.include);
/*
const rawNetworks = await knex.raw(` const rawNetworks = await knex.raw(`
WITH RECURSIVE children AS (
SELECT
id, parent_id, name, slug, type, url, description, parameters
FROM
entities
WHERE
CASE WHEN array_length(?, 1) IS NOT NULL
THEN slug = ANY(?)
ELSE true
END
AND NOT
slug = ANY(?)
AND
entities.type = 'network'
UNION ALL
SELECT
entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters
FROM
entities
INNER JOIN
children ON children.id = entities.parent_id
)
SELECT
entities.*, row_to_json(parents) as parent, json_agg(children) as children
FROM
children
LEFT JOIN
entities ON entities.id = children.parent_id
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
children.type = 'channel'
GROUP BY
children.parent_id, entities.id, entities.name, parents.id
`, [
config.include.networks,
config.include.networks,
config.exclude.networks,
]);
*/
const rawNetworks = await knex.raw(`
/* select channels associated to configured networks */
WITH RECURSIVE channels AS ( WITH RECURSIVE channels AS (
/* select configured networks */ /* select configured channels and networks */
SELECT SELECT
id, parent_id, name, type, slug entities.*
FROM FROM
entities entities
WHERE WHERE
(slug = ANY(:includeNetworks) CASE WHEN :includeAll
AND NOT entities.slug = ANY(:excludedNetworks)) THEN
AND entities.type = 'network' /* select all top level networks and independent channels */
entities.parent_id IS NULL
ELSE
((entities.slug = ANY(:includedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:includedChannels)
AND entities.type = 'channel'))
END
AND NOT (
(entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
UNION ALL UNION ALL
/* select recursive children of configured networks */ /* select recursive children of configured networks */
SELECT SELECT
entities.id, entities.parent_id, entities.name, entities.type, entities.slug entities.*
FROM FROM
entities entities
INNER JOIN INNER JOIN
channels ON channels.id = entities.parent_id channels ON channels.id = entities.parent_id
WHERE WHERE
NOT ( NOT ((entities.slug = ANY(:excludedNetworks)
(entities.slug = ANY(:excludedNetworks) AND entities.type = 'network') AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels) AND entities.type = 'channel') OR (entities.slug = ANY(:excludedChannels)
) AND entities.type = 'channel'))
) )
/* select recursive channels as children of networks */ /* select recursive channels as children of networks */
SELECT SELECT
@ -170,51 +142,17 @@ async function fetchChannelsFromConfig() {
channels.type = 'channel' channels.type = 'channel'
GROUP BY GROUP BY
entities.id entities.id
UNION ALL
/* select configured channels as children of networks */
SELECT
entities.*, json_agg(children) as children
FROM
entities AS children
LEFT JOIN
entities ON entities.id = children.parent_id
WHERE
children.slug = ANY(:includedChannels)
AND
children.type = 'channel'
GROUP BY
entities.id
`, { `, {
includedNetworks: config.include.networks, includeAll: !config.include?.networks && !config.include?.channels,
includedChannels: config.include.channels, includedNetworks: config.include?.networks || [],
excludedNetworks: config.exclude.networks, includedChannels: config.include?.channels || [],
excludedChannels: config.exclude.channels, excludedNetworks: config.exclude?.networks || [],
excludedChannels: config.exclude?.channels || [],
}); });
console.log(util.inspect(rawNetworks.rows, null, null)); const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
/* return curatedNetworks;
const curatedSites = await curateEntities(rawChannels, true);
logger.info(`Found ${curatedSites.length} entities in database`);
const rawChannels = await knex('entities')
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.where((builder) => {
if (config.include) {
builder.whereIn('entities.slug', config.include);
}
})
.whereNot((builder) => {
builder.whereIn('entities.slug', config.exclude || []);
});
console.log(rawChannels);
*/
// return curatedSites;
} }
async function fetchIncludedEntities() { async function fetchIncludedEntities() {