forked from DebaucheryLibrarian/traxxx
				
			Allowing HTTP rate limits to be set by configuration or argument.
This commit is contained in:
		
							parent
							
								
									6a5063cf32
								
							
						
					
					
						commit
						3d427f7e1d
					
				|  | @ -202,6 +202,11 @@ module.exports = { | ||||||
| 			interval: 50, | 			interval: 50, | ||||||
| 			concurrency: 20, | 			concurrency: 20, | ||||||
| 		}, | 		}, | ||||||
|  | 		'www.deeper.com': { | ||||||
|  | 			enable: false, // can be omitted to enable
 | ||||||
|  | 			interval: 1000, | ||||||
|  | 			concurrency: 1, | ||||||
|  | 		}, | ||||||
| 	}, | 	}, | ||||||
| 	fetchAfter: [1, 'week'], | 	fetchAfter: [1, 'week'], | ||||||
| 	missingDateLimit: 3, | 	missingDateLimit: 3, | ||||||
|  |  | ||||||
							
								
								
									
										10
									
								
								src/argv.js
								
								
								
								
							
							
						
						
									
										10
									
								
								src/argv.js
								
								
								
								
							|  | @ -153,6 +153,16 @@ const { argv } = yargs | ||||||
| 		type: 'number', | 		type: 'number', | ||||||
| 		default: 1, | 		default: 1, | ||||||
| 	}) | 	}) | ||||||
|  | 	.option('interval', { | ||||||
|  | 		describe: 'Minimum wait time between HTTP requests', | ||||||
|  | 		type: 'number', | ||||||
|  | 		// don't set default, because argument has to override config, but config has to override default
 | ||||||
|  | 	}) | ||||||
|  | 	.option('concurrency', { | ||||||
|  | 		describe: 'Maximum amount of parallel HTTP requests', | ||||||
|  | 		type: 'number', | ||||||
|  | 		// don't set default, because argument has to override config, but config has to override default
 | ||||||
|  | 	}) | ||||||
| 	.option('save', { | 	.option('save', { | ||||||
| 		describe: 'Save fetched releases to database', | 		describe: 'Save fetched releases to database', | ||||||
| 		type: 'boolean', | 		type: 'boolean', | ||||||
|  |  | ||||||
|  | @ -159,7 +159,7 @@ async function fetchActorReleases(pages, model, origin) { | ||||||
| 		const url = `${origin}/api${model.targetUrl}?page=${page}`; | 		const url = `${origin}/api${model.targetUrl}?page=${page}`; | ||||||
| 		const res = await http.get(url); | 		const res = await http.get(url); | ||||||
| 
 | 
 | ||||||
| 		if (res.code === 200) { | 		if (res.status === 200) { | ||||||
| 			return scrapeAll(res.body.data.videos.videos, null, origin); | 			return scrapeAll(res.body.data.videos.videos, null, origin); | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | @ -207,22 +207,22 @@ async function fetchLatest(site, page = 1) { | ||||||
| 	const url = `${site.url}/api/videos?page=${page}`; | 	const url = `${site.url}/api/videos?page=${page}`; | ||||||
| 	const res = await http.get(url); | 	const res = await http.get(url); | ||||||
| 
 | 
 | ||||||
| 	if (res.code === 200) { | 	if (res.status === 200) { | ||||||
| 		return scrapeAll(res.body.data.videos, site); | 		return scrapeAll(res.body.data.videos, site); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return res.code; | 	return res.status; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function fetchUpcoming(site) { | async function fetchUpcoming(site) { | ||||||
| 	const apiUrl = `${site.url}/api`; | 	const apiUrl = `${site.url}/api`; | ||||||
| 	const res = await http.get(apiUrl); | 	const res = await http.get(apiUrl); | ||||||
| 
 | 
 | ||||||
| 	if (res.code === 200) { | 	if (res.status === 200) { | ||||||
| 		return scrapeUpcoming(res.body.data.nextScene, site); | 		return scrapeUpcoming(res.body.data.nextScene, site); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return res.code; | 	return res.status; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function fetchScene(url, site, baseRelease) { | async function fetchScene(url, site, baseRelease) { | ||||||
|  | @ -231,11 +231,11 @@ async function fetchScene(url, site, baseRelease) { | ||||||
| 
 | 
 | ||||||
| 	const res = await http.get(apiUrl); | 	const res = await http.get(apiUrl); | ||||||
| 
 | 
 | ||||||
| 	if (res.code === 200) { | 	if (res.status === 200) { | ||||||
| 		return scrapeScene(res.body.data, url, site, baseRelease); | 		return scrapeScene(res.body.data, url, site, baseRelease); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return res.code; | 	return res.status; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function fetchProfile({ name: actorName }, { site }, include) { | async function fetchProfile({ name: actorName }, { site }, include) { | ||||||
|  | @ -244,7 +244,7 @@ async function fetchProfile({ name: actorName }, { site }, include) { | ||||||
| 	const url = `${origin}/api/${actorSlug}`; | 	const url = `${origin}/api/${actorSlug}`; | ||||||
| 	const res = await http.get(url); | 	const res = await http.get(url); | ||||||
| 
 | 
 | ||||||
| 	if (res.code === 200) { | 	if (res.status === 200) { | ||||||
| 		return scrapeProfile(res.body.data, origin, include.scenes); | 		return scrapeProfile(res.body.data, origin, include.scenes); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -9,6 +9,7 @@ const Bottleneck = require('bottleneck'); | ||||||
| const { JSDOM } = require('jsdom'); | const { JSDOM } = require('jsdom'); | ||||||
| 
 | 
 | ||||||
| const logger = require('../logger')(__filename); | const logger = require('../logger')(__filename); | ||||||
|  | const argv = require('../argv'); | ||||||
| 
 | 
 | ||||||
| const pipeline = util.promisify(stream.pipeline); | const pipeline = util.promisify(stream.pipeline); | ||||||
| const limiters = {}; | const limiters = {}; | ||||||
|  | @ -33,12 +34,31 @@ function useProxy(url) { | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	const { hostname } = new URL(url); | 	const { hostname } = new URL(url); | ||||||
|  | 
 | ||||||
| 	return config.proxy.hostnames.includes(hostname); | 	return config.proxy.hostnames.includes(hostname); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function getLimiter(limit = {}) { | function getLimiterValue(prop, options, hostname) { | ||||||
| 	const interval = limit.interval === undefined ? config.limits.default.interval : limit.interval; | 	if (argv[prop] !== undefined) { | ||||||
| 	const concurrency = limit.concurrency === undefined ? config.limits.default.concurrency : limit.concurrency; | 		return argv[prop]; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (options[prop] !== undefined) { | ||||||
|  | 		return options[prop]; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (config.limits[hostname]?.enable !== false && config.limits[hostname]?.[prop] !== undefined) { | ||||||
|  | 		return config.limits[hostname][prop]; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return config.limits.default[prop]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | function getLimiter(options = {}, url) { | ||||||
|  | 	const { hostname } = new URL(url); | ||||||
|  | 
 | ||||||
|  | 	const interval = getLimiterValue('interval', options, hostname); | ||||||
|  | 	const concurrency = getLimiterValue('concurrency', options, hostname); | ||||||
| 
 | 
 | ||||||
| 	if (!limiters[interval]?.[concurrency]) { | 	if (!limiters[interval]?.[concurrency]) { | ||||||
| 		limiters[interval] = limiters[interval] || {}; | 		limiters[interval] = limiters[interval] || {}; | ||||||
|  | @ -52,7 +72,7 @@ function getLimiter(limit = {}) { | ||||||
| 	return limiters[interval][concurrency]; | 	return limiters[interval][concurrency]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function request(method = 'get', url, body, requestOptions = {}) { | async function request(method = 'get', url, body, requestOptions = {}, limiter) { | ||||||
| 	const http = requestOptions.session || bhttp; | 	const http = requestOptions.session || bhttp; | ||||||
| 
 | 
 | ||||||
| 	const options = { | 	const options = { | ||||||
|  | @ -60,16 +80,16 @@ async function request(method = 'get', url, body, requestOptions = {}) { | ||||||
| 		...requestOptions, | 		...requestOptions, | ||||||
| 		responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000, | 		responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000, | ||||||
| 		stream: !!requestOptions.destination, | 		stream: !!requestOptions.destination, | ||||||
| 		interval: requestOptions.interval || config.limits.default.interval, |  | ||||||
| 		concurrency: requestOptions.concurrency || config.limits.default.concurrency, |  | ||||||
| 		session: null, | 		session: null, | ||||||
| 	}; | 	}; | ||||||
| 
 | 
 | ||||||
| 	if (useProxy(url)) { | 	const withProxy = useProxy(url); | ||||||
|  | 
 | ||||||
|  | 	if (withProxy) { | ||||||
| 		options.agent = proxyAgent; | 		options.agent = proxyAgent; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	logger.debug(`GET (${options.interval}ms/${options.concurrency}p) ${url}`); | 	logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}) ${url}`); | ||||||
| 
 | 
 | ||||||
| 	const res = await (body | 	const res = await (body | ||||||
| 		? http[method](url, body, options) | 		? http[method](url, body, options) | ||||||
|  | @ -107,7 +127,9 @@ async function request(method = 'get', url, body, requestOptions = {}) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function scheduleRequest(method = 'get', url, body, options) { | async function scheduleRequest(method = 'get', url, body, options) { | ||||||
| 	return getLimiter(options || {}).schedule(() => request(method, url, body, options)); | 	const limiter = getLimiter(options, url); | ||||||
|  | 
 | ||||||
|  | 	return limiter.schedule(() => request(method, url, body, options, limiter)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function get(url, options) { | async function get(url, options) { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue