forked from DebaucheryLibrarian/traxxx
Fixed and documented entity configuration and query.
This commit is contained in:
parent
59e2124407
commit
77566eae0d
32
README.md
32
README.md
|
@ -18,6 +18,38 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js
|
|||
|
||||
You can also use `npm run flush` to run both steps at once, and wipe the database completely later.
|
||||
|
||||
#### Networks and channels
|
||||
To scrape the networks and channels available in the database, you can configure `include` and `exclude` lists. To include all available channels and only use the `exclude` list, leave the `include` parameter unconfigured. The `exclude` lists will exclude channels and child networks from networks on the `include` lists, but not vice versa. That is, if the `include` list includes a network and the `exclude` list excludes one of that network's channels, the channel will not be scraped. However, if the `include` list includes a channel, and the `exclude` list includes its parent network, the channel will be scraped.
|
||||
|
||||
This configuration will scrape Evil Angel and all XEmpire channels, except for LesbianX.
|
||||
```
|
||||
include: {
|
||||
networks: [
|
||||
'xempire',
|
||||
],
|
||||
channels: [
|
||||
'evilangel',
|
||||
],
|
||||
},
|
||||
exclude: {
|
||||
channels: [
|
||||
'lesbianx',
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
This configuration will scrape all channels, except for BAM Visions, and except all channels part of the Vixen network.
|
||||
```
|
||||
exclude: {
|
||||
channels: [
|
||||
'bamvisions',
|
||||
],
|
||||
networks: [
|
||||
'vixen'
|
||||
],
|
||||
},
|
||||
```
|
||||
|
||||
### Building
|
||||
To build traxxx, run the following command:
|
||||
|
||||
|
|
|
@ -11,19 +11,7 @@ module.exports = {
|
|||
sfwHost: '0.0.0.0',
|
||||
sfwPort: 5001,
|
||||
},
|
||||
include: {
|
||||
networks: [
|
||||
'xempire',
|
||||
'julesjordan',
|
||||
],
|
||||
channels: [],
|
||||
},
|
||||
exclude: {
|
||||
networks: [
|
||||
'hardx',
|
||||
'pornpros',
|
||||
'mindgeek',
|
||||
],
|
||||
channels: [
|
||||
// 21sextreme, no longer updated
|
||||
'mightymistress',
|
||||
|
|
150
src/entities.js
150
src/entities.js
|
@ -1,6 +1,5 @@
|
|||
'use strict';
|
||||
|
||||
const util = require('util');
|
||||
const config = require('config');
|
||||
|
||||
const logger = require('./logger')(__filename);
|
||||
|
@ -9,7 +8,11 @@ const knex = require('./knex');
|
|||
const whereOr = require('./utils/where-or');
|
||||
|
||||
function curateEntity(entity, includeParameters = false) {
|
||||
const curatedEntity = {
|
||||
if (!entity) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const curatedEntity = entity.id ? {
|
||||
id: entity.id,
|
||||
name: entity.name,
|
||||
url: entity.url,
|
||||
|
@ -17,12 +20,15 @@ function curateEntity(entity, includeParameters = false) {
|
|||
slug: entity.slug,
|
||||
type: entity.type,
|
||||
parameters: includeParameters ? entity.parameters : null,
|
||||
parent: entity.parent_id && entity.parent,
|
||||
children: (entity.children || []).map(child => curateEntity({
|
||||
parent: curateEntity(entity.parent),
|
||||
} : {};
|
||||
|
||||
if (entity.children) {
|
||||
curatedEntity.children = entity.children.map(child => curateEntity({
|
||||
...child,
|
||||
parent: entity,
|
||||
}, includeParameters)),
|
||||
};
|
||||
parent: curatedEntity.id ? curatedEntity : null,
|
||||
}, includeParameters));
|
||||
}
|
||||
|
||||
return curatedEntity;
|
||||
}
|
||||
|
@ -36,14 +42,14 @@ async function fetchChannelsFromArgv() {
|
|||
/* networks from argument with channels as children */
|
||||
WITH RECURSIVE children AS (
|
||||
SELECT
|
||||
id, parent_id, name, slug, type, url, description, parameters
|
||||
entities.*
|
||||
FROM
|
||||
entities
|
||||
WHERE
|
||||
slug = ANY(?) AND entities.type = 'network'
|
||||
UNION ALL
|
||||
SELECT
|
||||
entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters
|
||||
entities.*
|
||||
FROM
|
||||
entities
|
||||
INNER JOIN
|
||||
|
@ -86,78 +92,44 @@ async function fetchChannelsFromArgv() {
|
|||
}
|
||||
|
||||
async function fetchChannelsFromConfig() {
|
||||
console.log(config.include);
|
||||
|
||||
/*
|
||||
const rawNetworks = await knex.raw(`
|
||||
WITH RECURSIVE children AS (
|
||||
SELECT
|
||||
id, parent_id, name, slug, type, url, description, parameters
|
||||
FROM
|
||||
entities
|
||||
WHERE
|
||||
CASE WHEN array_length(?, 1) IS NOT NULL
|
||||
THEN slug = ANY(?)
|
||||
ELSE true
|
||||
END
|
||||
AND NOT
|
||||
slug = ANY(?)
|
||||
AND
|
||||
entities.type = 'network'
|
||||
UNION ALL
|
||||
SELECT
|
||||
entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters
|
||||
FROM
|
||||
entities
|
||||
INNER JOIN
|
||||
children ON children.id = entities.parent_id
|
||||
)
|
||||
SELECT
|
||||
entities.*, row_to_json(parents) as parent, json_agg(children) as children
|
||||
FROM
|
||||
children
|
||||
LEFT JOIN
|
||||
entities ON entities.id = children.parent_id
|
||||
LEFT JOIN
|
||||
entities AS parents ON parents.id = entities.parent_id
|
||||
WHERE
|
||||
children.type = 'channel'
|
||||
GROUP BY
|
||||
children.parent_id, entities.id, entities.name, parents.id
|
||||
`, [
|
||||
config.include.networks,
|
||||
config.include.networks,
|
||||
config.exclude.networks,
|
||||
]);
|
||||
*/
|
||||
|
||||
const rawNetworks = await knex.raw(`
|
||||
/* select channels associated to configured networks */
|
||||
WITH RECURSIVE channels AS (
|
||||
/* select configured networks */
|
||||
/* select configured channels and networks */
|
||||
SELECT
|
||||
id, parent_id, name, type, slug
|
||||
entities.*
|
||||
FROM
|
||||
entities
|
||||
WHERE
|
||||
(slug = ANY(:includeNetworks)
|
||||
AND NOT entities.slug = ANY(:excludedNetworks))
|
||||
AND entities.type = 'network'
|
||||
CASE WHEN :includeAll
|
||||
THEN
|
||||
/* select all top level networks and independent channels */
|
||||
entities.parent_id IS NULL
|
||||
ELSE
|
||||
((entities.slug = ANY(:includedNetworks)
|
||||
AND entities.type = 'network')
|
||||
OR (entities.slug = ANY(:includedChannels)
|
||||
AND entities.type = 'channel'))
|
||||
END
|
||||
AND NOT (
|
||||
(entities.slug = ANY(:excludedNetworks)
|
||||
AND entities.type = 'network')
|
||||
OR (entities.slug = ANY(:excludedChannels)
|
||||
AND entities.type = 'channel'))
|
||||
|
||||
UNION ALL
|
||||
|
||||
/* select recursive children of configured networks */
|
||||
SELECT
|
||||
entities.id, entities.parent_id, entities.name, entities.type, entities.slug
|
||||
entities.*
|
||||
FROM
|
||||
entities
|
||||
INNER JOIN
|
||||
channels ON channels.id = entities.parent_id
|
||||
WHERE
|
||||
NOT (
|
||||
(entities.slug = ANY(:excludedNetworks) AND entities.type = 'network')
|
||||
OR (entities.slug = ANY(:excludedChannels) AND entities.type = 'channel')
|
||||
)
|
||||
NOT ((entities.slug = ANY(:excludedNetworks)
|
||||
AND entities.type = 'network')
|
||||
OR (entities.slug = ANY(:excludedChannels)
|
||||
AND entities.type = 'channel'))
|
||||
)
|
||||
/* select recursive channels as children of networks */
|
||||
SELECT
|
||||
|
@ -170,51 +142,17 @@ async function fetchChannelsFromConfig() {
|
|||
channels.type = 'channel'
|
||||
GROUP BY
|
||||
entities.id
|
||||
|
||||
UNION ALL
|
||||
|
||||
/* select configured channels as children of networks */
|
||||
SELECT
|
||||
entities.*, json_agg(children) as children
|
||||
FROM
|
||||
entities AS children
|
||||
LEFT JOIN
|
||||
entities ON entities.id = children.parent_id
|
||||
WHERE
|
||||
children.slug = ANY(:includedChannels)
|
||||
AND
|
||||
children.type = 'channel'
|
||||
GROUP BY
|
||||
entities.id
|
||||
`, {
|
||||
includedNetworks: config.include.networks,
|
||||
includedChannels: config.include.channels,
|
||||
excludedNetworks: config.exclude.networks,
|
||||
excludedChannels: config.exclude.channels,
|
||||
includeAll: !config.include?.networks && !config.include?.channels,
|
||||
includedNetworks: config.include?.networks || [],
|
||||
includedChannels: config.include?.channels || [],
|
||||
excludedNetworks: config.exclude?.networks || [],
|
||||
excludedChannels: config.exclude?.channels || [],
|
||||
});
|
||||
|
||||
console.log(util.inspect(rawNetworks.rows, null, null));
|
||||
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
|
||||
|
||||
/*
|
||||
const curatedSites = await curateEntities(rawChannels, true);
|
||||
logger.info(`Found ${curatedSites.length} entities in database`);
|
||||
|
||||
const rawChannels = await knex('entities')
|
||||
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
|
||||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||
.where((builder) => {
|
||||
if (config.include) {
|
||||
builder.whereIn('entities.slug', config.include);
|
||||
}
|
||||
})
|
||||
.whereNot((builder) => {
|
||||
builder.whereIn('entities.slug', config.exclude || []);
|
||||
});
|
||||
|
||||
console.log(rawChannels);
|
||||
*/
|
||||
|
||||
// return curatedSites;
|
||||
return curatedNetworks;
|
||||
}
|
||||
|
||||
async function fetchIncludedEntities() {
|
||||
|
|
Loading…
Reference in New Issue