Fixed and documented entity configuration and query.
This commit is contained in:
		
							parent
							
								
									59e2124407
								
							
						
					
					
						commit
						77566eae0d
					
				
							
								
								
									
										32
									
								
								README.md
								
								
								
								
							
							
						
						
									
										32
									
								
								README.md
								
								
								
								
							|  | @ -18,6 +18,38 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js | |||
| 
 | ||||
| You can also use `npm run flush` to run both steps at once, and wipe the database completely later. | ||||
| 
 | ||||
| #### Networks and channels | ||||
| To scrape the networks and channels available in the database, you can configure `include` and `exclude` lists. To include all available channels and only use the `exclude` list, leave the `include` parameter unconfigured. The `exclude` lists will exclude channels and child networks from networks on the `include` lists, but not vice versa. That is, if the `include` list includes a network and the `exclude` list excludes one of that network's channels, the channel will not be scraped. However, if the `include` list includes a channel, and the `exclude` list includes its parent network, the channel will be scraped. | ||||
| 
 | ||||
| This configuration will scrape Evil Angel and all XEmpire channels, except for LesbianX. | ||||
| ``` | ||||
| include: { | ||||
| 	networks: [ | ||||
| 		'xempire', | ||||
| 	], | ||||
| 	channels: [ | ||||
| 		'evilangel', | ||||
| 	], | ||||
| }, | ||||
| exclude: { | ||||
| 	channels: [ | ||||
| 		'lesbianx', | ||||
| 	], | ||||
| } | ||||
| ``` | ||||
| 
 | ||||
| This configuration will scrape all channels, except for BAM Visions, and except all channels part of the Vixen network. | ||||
| ``` | ||||
| exclude: { | ||||
| 	channels: [ | ||||
| 		'bamvisions', | ||||
| 	], | ||||
| 	networks: [ | ||||
| 		'vixen' | ||||
| 	], | ||||
| }, | ||||
| ``` | ||||
| 
 | ||||
| ### Building | ||||
| To build traxxx, run the following command: | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,19 +11,7 @@ module.exports = { | |||
| 		sfwHost: '0.0.0.0', | ||||
| 		sfwPort: 5001, | ||||
| 	}, | ||||
| 	include: { | ||||
| 		networks: [ | ||||
| 			'xempire', | ||||
| 			'julesjordan', | ||||
| 		], | ||||
| 		channels: [], | ||||
| 	}, | ||||
| 	exclude: { | ||||
| 		networks: [ | ||||
| 			'hardx', | ||||
| 			'pornpros', | ||||
| 			'mindgeek', | ||||
| 		], | ||||
| 		channels: [ | ||||
| 			// 21sextreme, no longer updated
 | ||||
| 			'mightymistress', | ||||
|  |  | |||
							
								
								
									
										150
									
								
								src/entities.js
								
								
								
								
							
							
						
						
									
										150
									
								
								src/entities.js
								
								
								
								
							|  | @ -1,6 +1,5 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const util = require('util'); | ||||
| const config = require('config'); | ||||
| 
 | ||||
| const logger = require('./logger')(__filename); | ||||
|  | @ -9,7 +8,11 @@ const knex = require('./knex'); | |||
| const whereOr = require('./utils/where-or'); | ||||
| 
 | ||||
| function curateEntity(entity, includeParameters = false) { | ||||
| 	const curatedEntity = { | ||||
| 	if (!entity) { | ||||
| 		return null; | ||||
| 	} | ||||
| 
 | ||||
| 	const curatedEntity = entity.id ? { | ||||
| 		id: entity.id, | ||||
| 		name: entity.name, | ||||
| 		url: entity.url, | ||||
|  | @ -17,12 +20,15 @@ function curateEntity(entity, includeParameters = false) { | |||
| 		slug: entity.slug, | ||||
| 		type: entity.type, | ||||
| 		parameters: includeParameters ? entity.parameters : null, | ||||
| 		parent: entity.parent_id && entity.parent, | ||||
| 		children: (entity.children || []).map(child => curateEntity({ | ||||
| 		parent: curateEntity(entity.parent), | ||||
| 	} : {}; | ||||
| 
 | ||||
| 	if (entity.children) { | ||||
| 		curatedEntity.children = entity.children.map(child => curateEntity({ | ||||
| 			...child, | ||||
| 			parent: entity, | ||||
| 		}, includeParameters)), | ||||
| 	}; | ||||
| 			parent: curatedEntity.id ? curatedEntity : null, | ||||
| 		}, includeParameters)); | ||||
| 	} | ||||
| 
 | ||||
| 	return curatedEntity; | ||||
| } | ||||
|  | @ -36,14 +42,14 @@ async function fetchChannelsFromArgv() { | |||
| 		/* networks from argument with channels as children */ | ||||
| 		WITH RECURSIVE children AS ( | ||||
| 			SELECT | ||||
| 				id, parent_id, name, slug, type, url, description, parameters | ||||
| 				entities.* | ||||
| 			FROM | ||||
| 				entities | ||||
| 			WHERE | ||||
| 				slug = ANY(?) AND entities.type = 'network' | ||||
| 			UNION ALL | ||||
| 			SELECT | ||||
| 				entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters | ||||
| 				entities.* | ||||
| 			FROM | ||||
| 				entities | ||||
| 			INNER JOIN | ||||
|  | @ -86,78 +92,44 @@ async function fetchChannelsFromArgv() { | |||
| } | ||||
| 
 | ||||
| async function fetchChannelsFromConfig() { | ||||
| 	console.log(config.include); | ||||
| 
 | ||||
| 	/* | ||||
| 	const rawNetworks = await knex.raw(` | ||||
| 		WITH RECURSIVE children AS ( | ||||
| 			SELECT | ||||
| 				id, parent_id, name, slug, type, url, description, parameters | ||||
| 			FROM | ||||
| 				entities | ||||
| 			WHERE | ||||
| 				CASE WHEN array_length(?, 1) IS NOT NULL | ||||
| 				THEN slug = ANY(?) | ||||
| 				ELSE true | ||||
| 				END | ||||
| 			AND NOT | ||||
| 				slug = ANY(?) | ||||
| 			AND | ||||
| 				entities.type = 'network' | ||||
| 			UNION ALL | ||||
| 			SELECT | ||||
| 				entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters | ||||
| 			FROM | ||||
| 				entities | ||||
| 			INNER JOIN | ||||
| 				children ON children.id = entities.parent_id | ||||
| 		) | ||||
| 		SELECT | ||||
| 			entities.*, row_to_json(parents) as parent, json_agg(children) as children | ||||
| 		FROM | ||||
| 			children | ||||
| 		LEFT JOIN | ||||
| 			entities ON entities.id = children.parent_id | ||||
| 		LEFT JOIN | ||||
| 			entities AS parents ON parents.id = entities.parent_id | ||||
| 		WHERE | ||||
| 			children.type = 'channel' | ||||
| 		GROUP BY | ||||
| 			children.parent_id, entities.id, entities.name, parents.id | ||||
| 	`, [
 | ||||
| 		config.include.networks, | ||||
| 		config.include.networks, | ||||
| 		config.exclude.networks, | ||||
| 	]); | ||||
| 	*/ | ||||
| 
 | ||||
| 	const rawNetworks = await knex.raw(` | ||||
| 		/* select channels associated to configured networks */ | ||||
| 		WITH RECURSIVE channels AS ( | ||||
| 			/* select configured networks */ | ||||
| 			/* select configured channels and networks */ | ||||
| 			SELECT | ||||
| 				id, parent_id, name, type, slug | ||||
| 				entities.* | ||||
| 			FROM | ||||
| 				entities | ||||
| 			WHERE | ||||
| 				(slug = ANY(:includeNetworks) | ||||
| 				AND NOT entities.slug = ANY(:excludedNetworks)) | ||||
| 				AND entities.type = 'network' | ||||
| 				CASE WHEN :includeAll | ||||
| 				THEN | ||||
| 					/* select all top level networks and independent channels */ | ||||
| 					entities.parent_id IS NULL | ||||
| 				ELSE | ||||
| 					((entities.slug = ANY(:includedNetworks) | ||||
| 					AND entities.type = 'network') | ||||
| 					OR (entities.slug = ANY(:includedChannels) | ||||
| 					AND entities.type = 'channel')) | ||||
| 				END | ||||
| 				AND NOT ( | ||||
| 					(entities.slug = ANY(:excludedNetworks) | ||||
| 					AND entities.type = 'network') | ||||
| 					OR (entities.slug = ANY(:excludedChannels) | ||||
| 					AND entities.type = 'channel')) | ||||
| 
 | ||||
| 			UNION ALL | ||||
| 
 | ||||
| 			/* select recursive children of configured networks */ | ||||
| 			SELECT | ||||
| 				entities.id, entities.parent_id, entities.name, entities.type, entities.slug | ||||
| 				entities.* | ||||
| 			FROM | ||||
| 				entities | ||||
| 			INNER JOIN | ||||
| 				channels ON channels.id = entities.parent_id | ||||
| 			WHERE | ||||
| 				NOT ( | ||||
| 				(entities.slug = ANY(:excludedNetworks) AND entities.type = 'network') | ||||
| 				OR (entities.slug = ANY(:excludedChannels) AND entities.type = 'channel') | ||||
| 				) | ||||
| 				NOT ((entities.slug = ANY(:excludedNetworks) | ||||
| 				AND entities.type = 'network') | ||||
| 				OR (entities.slug = ANY(:excludedChannels) | ||||
| 				AND entities.type = 'channel')) | ||||
| 		) | ||||
| 		/* select recursive channels as children of networks */ | ||||
| 		SELECT | ||||
|  | @ -170,51 +142,17 @@ async function fetchChannelsFromConfig() { | |||
| 			channels.type = 'channel' | ||||
| 		GROUP BY | ||||
| 			entities.id | ||||
| 
 | ||||
| 		UNION ALL | ||||
| 
 | ||||
| 		/* select configured channels as children of networks */ | ||||
| 		SELECT | ||||
| 			entities.*, json_agg(children) as children | ||||
| 		FROM | ||||
| 			entities AS children | ||||
| 		LEFT JOIN | ||||
| 			entities ON entities.id = children.parent_id | ||||
| 		WHERE | ||||
| 			children.slug = ANY(:includedChannels) | ||||
| 		AND | ||||
| 			children.type = 'channel' | ||||
| 		GROUP BY | ||||
| 			entities.id | ||||
| 	`, {
 | ||||
| 		includedNetworks: config.include.networks, | ||||
| 		includedChannels: config.include.channels, | ||||
| 		excludedNetworks: config.exclude.networks, | ||||
| 		excludedChannels: config.exclude.channels, | ||||
| 		includeAll: !config.include?.networks && !config.include?.channels, | ||||
| 		includedNetworks: config.include?.networks || [], | ||||
| 		includedChannels: config.include?.channels || [], | ||||
| 		excludedNetworks: config.exclude?.networks || [], | ||||
| 		excludedChannels: config.exclude?.channels || [], | ||||
| 	}); | ||||
| 
 | ||||
| 	console.log(util.inspect(rawNetworks.rows, null, null)); | ||||
| 	const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true)); | ||||
| 
 | ||||
| 	/* | ||||
| 	const curatedSites = await curateEntities(rawChannels, true); | ||||
| 	logger.info(`Found ${curatedSites.length} entities in database`); | ||||
| 
 | ||||
| 	const rawChannels = await knex('entities') | ||||
| 		.select(knex.raw('entities.*, row_to_json(parents) as parent')) | ||||
| 		.leftJoin('entities as parents', 'parents.id', 'entities.parent_id') | ||||
| 		.where((builder) => { | ||||
| 			if (config.include) { | ||||
| 				builder.whereIn('entities.slug', config.include); | ||||
| 			} | ||||
| 		}) | ||||
| 		.whereNot((builder) => { | ||||
| 			builder.whereIn('entities.slug', config.exclude || []); | ||||
| 		}); | ||||
| 
 | ||||
| 		console.log(rawChannels); | ||||
| 	*/ | ||||
| 
 | ||||
| 	// return curatedSites;
 | ||||
| 	return curatedNetworks; | ||||
| } | ||||
| 
 | ||||
| async function fetchIncludedEntities() { | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue