2020-02-22 02:22:30 +00:00
|
|
|
'use strict';
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
const config = require('config');
|
|
|
|
const bhttp = require('bhttp');
|
2020-05-06 23:53:07 +00:00
|
|
|
const util = require('util');
|
|
|
|
const stream = require('stream');
|
2020-03-19 00:54:25 +00:00
|
|
|
const tunnel = require('tunnel');
|
2020-11-22 03:07:09 +00:00
|
|
|
const Bottleneck = require('bottleneck');
|
|
|
|
const { JSDOM } = require('jsdom');
|
2020-02-22 02:22:30 +00:00
|
|
|
|
|
|
|
const logger = require('../logger')(__filename);
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
const pipeline = util.promisify(stream.pipeline);
|
|
|
|
const limiters = {};
|
2020-03-21 01:48:24 +00:00
|
|
|
|
|
|
|
const defaultOptions = {
|
2020-11-22 03:07:09 +00:00
|
|
|
encodeJSON: true,
|
|
|
|
headers: {
|
|
|
|
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
|
|
|
},
|
2020-03-21 01:48:24 +00:00
|
|
|
};
|
|
|
|
|
2020-03-19 00:54:25 +00:00
|
|
|
const proxyAgent = tunnel.httpsOverHttp({
|
2020-05-14 02:26:05 +00:00
|
|
|
proxy: {
|
|
|
|
host: config.proxy.host,
|
|
|
|
port: config.proxy.port,
|
|
|
|
},
|
2020-03-19 00:54:25 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
function useProxy(url) {
|
2020-05-14 02:26:05 +00:00
|
|
|
if (!config.proxy.enable) {
|
|
|
|
return false;
|
|
|
|
}
|
2020-03-19 00:54:25 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const { hostname } = new URL(url);
|
|
|
|
return config.proxy.hostnames.includes(hostname);
|
2020-03-19 00:54:25 +00:00
|
|
|
}
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
function getLimiter(limit = {}) {
|
|
|
|
const interval = limit.interval === undefined ? config.limits.default.interval : limit.interval;
|
|
|
|
const concurrency = limit.concurrency === undefined ? config.limits.default.concurrency : limit.concurrency;
|
|
|
|
|
|
|
|
if (!limiters[interval]?.[concurrency]) {
|
|
|
|
limiters[interval] = limiters[interval] || {};
|
|
|
|
|
|
|
|
limiters[interval][concurrency] = new Bottleneck({
|
|
|
|
minTime: interval,
|
|
|
|
maxConcurrent: concurrency,
|
|
|
|
});
|
2020-05-14 02:26:05 +00:00
|
|
|
}
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
return limiters[interval][concurrency];
|
|
|
|
}
|
|
|
|
|
|
|
|
async function request(method = 'get', url, body, requestOptions = {}) {
|
|
|
|
const http = requestOptions.session || bhttp;
|
|
|
|
|
|
|
|
const options = {
|
2020-05-14 02:26:05 +00:00
|
|
|
...defaultOptions,
|
2020-11-22 03:07:09 +00:00
|
|
|
...requestOptions,
|
|
|
|
responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000,
|
|
|
|
stream: !!requestOptions.destination,
|
|
|
|
interval: requestOptions.interval || config.limits.default.interval,
|
|
|
|
concurrency: requestOptions.concurrency || config.limits.default.concurrency,
|
|
|
|
session: null,
|
2020-05-14 02:26:05 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
if (useProxy(url)) {
|
2020-11-22 03:07:09 +00:00
|
|
|
options.agent = proxyAgent;
|
2020-05-14 02:26:05 +00:00
|
|
|
}
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
logger.debug(`GET (${options.interval}ms/${options.concurrency}p) ${url}`);
|
|
|
|
|
|
|
|
const res = await (body
|
|
|
|
? http[method](url, body, options)
|
|
|
|
: http[method](url, options));
|
|
|
|
|
|
|
|
const resIsOk = res.statusCode >= 200 && res.statusCode <= 299;
|
|
|
|
|
|
|
|
if (options.destination) {
|
|
|
|
// res.on('progress', (bytes, totalBytes) => logger.silly(`Downloaded ${Math.round((bytes / totalBytes) * 100)}% of ${url}`));
|
2020-05-14 02:26:05 +00:00
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
await pipeline(res, ...(options.transforms || []), options.destination);
|
2020-05-14 02:26:05 +00:00
|
|
|
}
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
if (Buffer.isBuffer(res.body)) {
|
|
|
|
const html = res.body.toString();
|
|
|
|
const window = new JSDOM(html).window;
|
|
|
|
|
|
|
|
return {
|
|
|
|
...res,
|
|
|
|
body: html,
|
|
|
|
html,
|
|
|
|
status: res.statusCode,
|
|
|
|
document: window.document,
|
|
|
|
window,
|
|
|
|
ok: resIsOk,
|
|
|
|
};
|
|
|
|
}
|
2020-05-14 02:26:05 +00:00
|
|
|
|
|
|
|
return {
|
|
|
|
...res,
|
2020-11-22 03:07:09 +00:00
|
|
|
body: res.body,
|
2020-05-14 02:26:05 +00:00
|
|
|
status: res.statusCode,
|
2020-11-22 03:07:09 +00:00
|
|
|
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
2020-05-14 02:26:05 +00:00
|
|
|
};
|
2020-05-16 02:36:45 +00:00
|
|
|
}
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
async function scheduleRequest(method = 'get', url, body, options) {
|
|
|
|
return getLimiter(options || {}).schedule(() => request(method, url, body, options));
|
|
|
|
}
|
2020-05-16 02:36:45 +00:00
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
async function get(url, options) {
|
|
|
|
return scheduleRequest('get', url, null, options);
|
|
|
|
}
|
2020-02-22 02:22:30 +00:00
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
async function post(url, body, options) {
|
|
|
|
return scheduleRequest('post', url, body, options);
|
|
|
|
}
|
2020-05-16 02:36:45 +00:00
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
async function put(url, body, options) {
|
|
|
|
return scheduleRequest('put', url, body, options);
|
|
|
|
}
|
2020-07-01 02:47:05 +00:00
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
async function patch(url, body, options) {
|
|
|
|
return scheduleRequest('patch', url, body, options);
|
2020-02-22 02:22:30 +00:00
|
|
|
}
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
async function del(url, options) {
|
|
|
|
return scheduleRequest('delete', url, null, options);
|
2020-07-22 02:12:20 +00:00
|
|
|
}
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
async function head(url, options) {
|
|
|
|
return scheduleRequest('head', url, null, options);
|
2020-02-22 02:22:30 +00:00
|
|
|
}
|
|
|
|
|
2020-11-22 03:07:09 +00:00
|
|
|
function getSession(options) {
|
|
|
|
return bhttp.session(options);
|
2020-07-22 22:55:55 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 02:22:30 +00:00
|
|
|
module.exports = {
|
2020-05-14 02:26:05 +00:00
|
|
|
get,
|
2020-07-22 02:12:20 +00:00
|
|
|
head,
|
2020-11-22 03:07:09 +00:00
|
|
|
post,
|
|
|
|
delete: del,
|
|
|
|
put,
|
|
|
|
patch,
|
|
|
|
session: getSession,
|
2020-02-22 02:22:30 +00:00
|
|
|
};
|