Fixed and documented entity configuration and query.

This commit is contained in:
DebaucheryLibrarian 2020-08-13 23:59:54 +02:00
parent 59e2124407
commit 77566eae0d
3 changed files with 76 additions and 118 deletions

View File

@ -18,6 +18,38 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js
You can also use `npm run flush` to run both steps at once, and wipe the database completely later.
#### Networks and channels
To scrape the networks and channels available in the database, you can configure `include` and `exclude` lists. To include all available channels and only use the `exclude` list, leave the `include` parameter unconfigured. The `exclude` lists will exclude channels and child networks from networks on the `include` lists, but not vice versa. That is, if the `include` list includes a network and the `exclude` list excludes one of that network's channels, the channel will not be scraped. However, if the `include` list includes a channel, and the `exclude` list includes its parent network, the channel will be scraped.
This configuration will scrape Evil Angel and all XEmpire channels, except for LesbianX.
```
include: {
networks: [
'xempire',
],
channels: [
'evilangel',
],
},
exclude: {
channels: [
'lesbianx',
],
}
```
This configuration will scrape all channels, except for BAM Visions, and except all channels part of the Vixen network.
```
exclude: {
channels: [
'bamvisions',
],
networks: [
'vixen'
],
},
```
### Building
To build traxxx, run the following command:

View File

@ -11,19 +11,7 @@ module.exports = {
sfwHost: '0.0.0.0',
sfwPort: 5001,
},
include: {
networks: [
'xempire',
'julesjordan',
],
channels: [],
},
exclude: {
networks: [
'hardx',
'pornpros',
'mindgeek',
],
channels: [
// 21sextreme, no longer updated
'mightymistress',

View File

@ -1,6 +1,5 @@
'use strict';
const util = require('util');
const config = require('config');
const logger = require('./logger')(__filename);
@ -9,7 +8,11 @@ const knex = require('./knex');
const whereOr = require('./utils/where-or');
function curateEntity(entity, includeParameters = false) {
const curatedEntity = {
if (!entity) {
return null;
}
const curatedEntity = entity.id ? {
id: entity.id,
name: entity.name,
url: entity.url,
@ -17,12 +20,15 @@ function curateEntity(entity, includeParameters = false) {
slug: entity.slug,
type: entity.type,
parameters: includeParameters ? entity.parameters : null,
parent: entity.parent_id && entity.parent,
children: (entity.children || []).map(child => curateEntity({
parent: curateEntity(entity.parent),
} : {};
if (entity.children) {
curatedEntity.children = entity.children.map(child => curateEntity({
...child,
parent: entity,
}, includeParameters)),
};
parent: curatedEntity.id ? curatedEntity : null,
}, includeParameters));
}
return curatedEntity;
}
@ -36,14 +42,14 @@ async function fetchChannelsFromArgv() {
/* networks from argument with channels as children */
WITH RECURSIVE children AS (
SELECT
id, parent_id, name, slug, type, url, description, parameters
entities.*
FROM
entities
WHERE
slug = ANY(?) AND entities.type = 'network'
UNION ALL
SELECT
entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters
entities.*
FROM
entities
INNER JOIN
@ -86,78 +92,44 @@ async function fetchChannelsFromArgv() {
}
async function fetchChannelsFromConfig() {
console.log(config.include);
/*
const rawNetworks = await knex.raw(`
WITH RECURSIVE children AS (
SELECT
id, parent_id, name, slug, type, url, description, parameters
FROM
entities
WHERE
CASE WHEN array_length(?, 1) IS NOT NULL
THEN slug = ANY(?)
ELSE true
END
AND NOT
slug = ANY(?)
AND
entities.type = 'network'
UNION ALL
SELECT
entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters
FROM
entities
INNER JOIN
children ON children.id = entities.parent_id
)
SELECT
entities.*, row_to_json(parents) as parent, json_agg(children) as children
FROM
children
LEFT JOIN
entities ON entities.id = children.parent_id
LEFT JOIN
entities AS parents ON parents.id = entities.parent_id
WHERE
children.type = 'channel'
GROUP BY
children.parent_id, entities.id, entities.name, parents.id
`, [
config.include.networks,
config.include.networks,
config.exclude.networks,
]);
*/
const rawNetworks = await knex.raw(`
/* select channels associated to configured networks */
WITH RECURSIVE channels AS (
/* select configured networks */
/* select configured channels and networks */
SELECT
id, parent_id, name, type, slug
entities.*
FROM
entities
WHERE
(slug = ANY(:includeNetworks)
AND NOT entities.slug = ANY(:excludedNetworks))
AND entities.type = 'network'
CASE WHEN :includeAll
THEN
/* select all top level networks and independent channels */
entities.parent_id IS NULL
ELSE
((entities.slug = ANY(:includedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:includedChannels)
AND entities.type = 'channel'))
END
AND NOT (
(entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
UNION ALL
/* select recursive children of configured networks */
SELECT
entities.id, entities.parent_id, entities.name, entities.type, entities.slug
entities.*
FROM
entities
INNER JOIN
channels ON channels.id = entities.parent_id
WHERE
NOT (
(entities.slug = ANY(:excludedNetworks) AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels) AND entities.type = 'channel')
)
NOT ((entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
)
/* select recursive channels as children of networks */
SELECT
@ -170,51 +142,17 @@ async function fetchChannelsFromConfig() {
channels.type = 'channel'
GROUP BY
entities.id
UNION ALL
/* select configured channels as children of networks */
SELECT
entities.*, json_agg(children) as children
FROM
entities AS children
LEFT JOIN
entities ON entities.id = children.parent_id
WHERE
children.slug = ANY(:includedChannels)
AND
children.type = 'channel'
GROUP BY
entities.id
`, {
includedNetworks: config.include.networks,
includedChannels: config.include.channels,
excludedNetworks: config.exclude.networks,
excludedChannels: config.exclude.channels,
includeAll: !config.include?.networks && !config.include?.channels,
includedNetworks: config.include?.networks || [],
includedChannels: config.include?.channels || [],
excludedNetworks: config.exclude?.networks || [],
excludedChannels: config.exclude?.channels || [],
});
console.log(util.inspect(rawNetworks.rows, null, null));
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
/*
const curatedSites = await curateEntities(rawChannels, true);
logger.info(`Found ${curatedSites.length} entities in database`);
const rawChannels = await knex('entities')
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.where((builder) => {
if (config.include) {
builder.whereIn('entities.slug', config.include);
}
})
.whereNot((builder) => {
builder.whereIn('entities.slug', config.exclude || []);
});
console.log(rawChannels);
*/
// return curatedSites;
return curatedNetworks;
}
async function fetchIncludedEntities() {