forked from DebaucheryLibrarian/traxxx
Allowing HTTP rate limits to be set by configuration or argument.
This commit is contained in:
parent
6a5063cf32
commit
3d427f7e1d
|
@ -202,6 +202,11 @@ module.exports = {
|
||||||
interval: 50,
|
interval: 50,
|
||||||
concurrency: 20,
|
concurrency: 20,
|
||||||
},
|
},
|
||||||
|
'www.deeper.com': {
|
||||||
|
enable: false, // can be omitted to enable
|
||||||
|
interval: 1000,
|
||||||
|
concurrency: 1,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
fetchAfter: [1, 'week'],
|
fetchAfter: [1, 'week'],
|
||||||
missingDateLimit: 3,
|
missingDateLimit: 3,
|
||||||
|
|
10
src/argv.js
10
src/argv.js
|
@ -153,6 +153,16 @@ const { argv } = yargs
|
||||||
type: 'number',
|
type: 'number',
|
||||||
default: 1,
|
default: 1,
|
||||||
})
|
})
|
||||||
|
.option('interval', {
|
||||||
|
describe: 'Minimum wait time between HTTP requests',
|
||||||
|
type: 'number',
|
||||||
|
// don't set default, because argument has to override config, but config has to override default
|
||||||
|
})
|
||||||
|
.option('concurrency', {
|
||||||
|
describe: 'Maximum amount of parallel HTTP requests',
|
||||||
|
type: 'number',
|
||||||
|
// don't set default, because argument has to override config, but config has to override default
|
||||||
|
})
|
||||||
.option('save', {
|
.option('save', {
|
||||||
describe: 'Save fetched releases to database',
|
describe: 'Save fetched releases to database',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
|
|
|
@ -159,7 +159,7 @@ async function fetchActorReleases(pages, model, origin) {
|
||||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||||
const res = await http.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.status === 200) {
|
||||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,22 +207,22 @@ async function fetchLatest(site, page = 1) {
|
||||||
const url = `${site.url}/api/videos?page=${page}`;
|
const url = `${site.url}/api/videos?page=${page}`;
|
||||||
const res = await http.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.status === 200) {
|
||||||
return scrapeAll(res.body.data.videos, site);
|
return scrapeAll(res.body.data.videos, site);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.code;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchUpcoming(site) {
|
async function fetchUpcoming(site) {
|
||||||
const apiUrl = `${site.url}/api`;
|
const apiUrl = `${site.url}/api`;
|
||||||
const res = await http.get(apiUrl);
|
const res = await http.get(apiUrl);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.status === 200) {
|
||||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.code;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site, baseRelease) {
|
async function fetchScene(url, site, baseRelease) {
|
||||||
|
@ -231,11 +231,11 @@ async function fetchScene(url, site, baseRelease) {
|
||||||
|
|
||||||
const res = await http.get(apiUrl);
|
const res = await http.get(apiUrl);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.status === 200) {
|
||||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.code;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }, { site }, include) {
|
async function fetchProfile({ name: actorName }, { site }, include) {
|
||||||
|
@ -244,7 +244,7 @@ async function fetchProfile({ name: actorName }, { site }, include) {
|
||||||
const url = `${origin}/api/${actorSlug}`;
|
const url = `${origin}/api/${actorSlug}`;
|
||||||
const res = await http.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.status === 200) {
|
||||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ const Bottleneck = require('bottleneck');
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
|
|
||||||
const logger = require('../logger')(__filename);
|
const logger = require('../logger')(__filename);
|
||||||
|
const argv = require('../argv');
|
||||||
|
|
||||||
const pipeline = util.promisify(stream.pipeline);
|
const pipeline = util.promisify(stream.pipeline);
|
||||||
const limiters = {};
|
const limiters = {};
|
||||||
|
@ -33,12 +34,31 @@ function useProxy(url) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const { hostname } = new URL(url);
|
const { hostname } = new URL(url);
|
||||||
|
|
||||||
return config.proxy.hostnames.includes(hostname);
|
return config.proxy.hostnames.includes(hostname);
|
||||||
}
|
}
|
||||||
|
|
||||||
function getLimiter(limit = {}) {
|
function getLimiterValue(prop, options, hostname) {
|
||||||
const interval = limit.interval === undefined ? config.limits.default.interval : limit.interval;
|
if (argv[prop] !== undefined) {
|
||||||
const concurrency = limit.concurrency === undefined ? config.limits.default.concurrency : limit.concurrency;
|
return argv[prop];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options[prop] !== undefined) {
|
||||||
|
return options[prop];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.limits[hostname]?.enable !== false && config.limits[hostname]?.[prop] !== undefined) {
|
||||||
|
return config.limits[hostname][prop];
|
||||||
|
}
|
||||||
|
|
||||||
|
return config.limits.default[prop];
|
||||||
|
}
|
||||||
|
|
||||||
|
function getLimiter(options = {}, url) {
|
||||||
|
const { hostname } = new URL(url);
|
||||||
|
|
||||||
|
const interval = getLimiterValue('interval', options, hostname);
|
||||||
|
const concurrency = getLimiterValue('concurrency', options, hostname);
|
||||||
|
|
||||||
if (!limiters[interval]?.[concurrency]) {
|
if (!limiters[interval]?.[concurrency]) {
|
||||||
limiters[interval] = limiters[interval] || {};
|
limiters[interval] = limiters[interval] || {};
|
||||||
|
@ -52,7 +72,7 @@ function getLimiter(limit = {}) {
|
||||||
return limiters[interval][concurrency];
|
return limiters[interval][concurrency];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function request(method = 'get', url, body, requestOptions = {}) {
|
async function request(method = 'get', url, body, requestOptions = {}, limiter) {
|
||||||
const http = requestOptions.session || bhttp;
|
const http = requestOptions.session || bhttp;
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
|
@ -60,16 +80,16 @@ async function request(method = 'get', url, body, requestOptions = {}) {
|
||||||
...requestOptions,
|
...requestOptions,
|
||||||
responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000,
|
responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000,
|
||||||
stream: !!requestOptions.destination,
|
stream: !!requestOptions.destination,
|
||||||
interval: requestOptions.interval || config.limits.default.interval,
|
|
||||||
concurrency: requestOptions.concurrency || config.limits.default.concurrency,
|
|
||||||
session: null,
|
session: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (useProxy(url)) {
|
const withProxy = useProxy(url);
|
||||||
|
|
||||||
|
if (withProxy) {
|
||||||
options.agent = proxyAgent;
|
options.agent = proxyAgent;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug(`GET (${options.interval}ms/${options.concurrency}p) ${url}`);
|
logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}) ${url}`);
|
||||||
|
|
||||||
const res = await (body
|
const res = await (body
|
||||||
? http[method](url, body, options)
|
? http[method](url, body, options)
|
||||||
|
@ -107,7 +127,9 @@ async function request(method = 'get', url, body, requestOptions = {}) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scheduleRequest(method = 'get', url, body, options) {
|
async function scheduleRequest(method = 'get', url, body, options) {
|
||||||
return getLimiter(options || {}).schedule(() => request(method, url, body, options));
|
const limiter = getLimiter(options, url);
|
||||||
|
|
||||||
|
return limiter.schedule(() => request(method, url, body, options, limiter));
|
||||||
}
|
}
|
||||||
|
|
||||||
async function get(url, options) {
|
async function get(url, options) {
|
||||||
|
|
Loading…
Reference in New Issue