forked from DebaucheryLibrarian/traxxx
188 lines
4.3 KiB
JavaScript
188 lines
4.3 KiB
JavaScript
'use strict';
|
|
|
|
const config = require('config');
|
|
const bhttp = require('@thependulum/bhttp');
|
|
const util = require('util');
|
|
const stream = require('stream');
|
|
const tunnel = require('tunnel');
|
|
const Bottleneck = require('bottleneck');
|
|
const { JSDOM, toughCookie } = require('jsdom');
|
|
|
|
const logger = require('../logger')(__filename);
|
|
const virtualConsole = require('./virtual-console')(__filename);
|
|
const argv = require('../argv');
|
|
|
|
const pipeline = util.promisify(stream.pipeline);
|
|
const limiters = {};
|
|
|
|
const defaultOptions = {
|
|
encodeJSON: true,
|
|
headers: {
|
|
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
|
},
|
|
};
|
|
|
|
const proxyAgent = tunnel.httpsOverHttp({
|
|
proxy: {
|
|
host: config.proxy.host,
|
|
port: config.proxy.port,
|
|
},
|
|
});
|
|
|
|
function useProxy(url) {
|
|
if (!config.proxy.enable) {
|
|
return false;
|
|
}
|
|
|
|
const { hostname } = new URL(url);
|
|
|
|
return config.proxy.hostnames.includes(hostname);
|
|
}
|
|
|
|
function getLimiterValue(prop, options, hostname) {
|
|
if (argv[prop] !== undefined) {
|
|
return argv[prop];
|
|
}
|
|
|
|
if (options[prop] !== undefined) {
|
|
return options[prop];
|
|
}
|
|
|
|
if (config.limits[hostname]?.enable !== false && config.limits[hostname]?.[prop] !== undefined) {
|
|
return config.limits[hostname][prop];
|
|
}
|
|
|
|
return config.limits.default[prop];
|
|
}
|
|
|
|
function getLimiter(options = {}, url) {
|
|
const { hostname } = new URL(url);
|
|
|
|
const interval = getLimiterValue('interval', options, hostname);
|
|
const concurrency = getLimiterValue('concurrency', options, hostname);
|
|
|
|
if (!limiters[interval]?.[concurrency]) {
|
|
limiters[interval] = limiters[interval] || {};
|
|
|
|
limiters[interval][concurrency] = new Bottleneck({
|
|
minTime: interval,
|
|
maxConcurrent: concurrency,
|
|
});
|
|
}
|
|
|
|
return limiters[interval][concurrency];
|
|
}
|
|
|
|
async function request(method = 'get', url, body, requestOptions = {}, limiter) {
|
|
const http = requestOptions.session || bhttp;
|
|
|
|
const options = {
|
|
...defaultOptions,
|
|
...requestOptions,
|
|
headers: {
|
|
...defaultOptions.headers,
|
|
...requestOptions.headers,
|
|
},
|
|
responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000,
|
|
stream: !!requestOptions.destination,
|
|
session: null,
|
|
};
|
|
|
|
const withProxy = useProxy(url);
|
|
|
|
if (withProxy) {
|
|
options.agent = proxyAgent;
|
|
}
|
|
|
|
logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}) ${url}`);
|
|
|
|
const res = await (body
|
|
? http[method](url, body, options)
|
|
: http[method](url, options));
|
|
|
|
const resIsOk = res.statusCode >= 200 && res.statusCode <= 299;
|
|
|
|
if (options.destination) {
|
|
// res.on('progress', (bytes, totalBytes) => logger.silly(`Downloaded ${Math.round((bytes / totalBytes) * 100)}% of ${url}`));
|
|
|
|
await pipeline(res, ...(options.transforms || []), options.destination);
|
|
}
|
|
|
|
if (Buffer.isBuffer(res.body)) {
|
|
const html = res.body.toString();
|
|
const window = new JSDOM(html, { virtualConsole, ...options.extract }).window;
|
|
|
|
return {
|
|
...res,
|
|
body: html,
|
|
html,
|
|
status: res.statusCode,
|
|
document: window.document,
|
|
window,
|
|
ok: resIsOk,
|
|
};
|
|
}
|
|
|
|
return {
|
|
...res,
|
|
body: res.body,
|
|
status: res.statusCode,
|
|
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
|
};
|
|
}
|
|
|
|
async function scheduleRequest(method = 'get', url, body, options) {
|
|
const limiter = getLimiter(options, url);
|
|
|
|
return limiter.schedule(() => request(method, url, body, options, limiter));
|
|
}
|
|
|
|
async function get(url, options) {
|
|
return scheduleRequest('get', url, null, options);
|
|
}
|
|
|
|
async function post(url, body, options) {
|
|
return scheduleRequest('post', url, body, options);
|
|
}
|
|
|
|
async function put(url, body, options) {
|
|
return scheduleRequest('put', url, body, options);
|
|
}
|
|
|
|
async function patch(url, body, options) {
|
|
return scheduleRequest('patch', url, body, options);
|
|
}
|
|
|
|
async function del(url, options) {
|
|
return scheduleRequest('delete', url, null, options);
|
|
}
|
|
|
|
async function head(url, options) {
|
|
return scheduleRequest('head', url, null, options);
|
|
}
|
|
|
|
function getSession(options) {
|
|
return bhttp.session({ ...defaultOptions, ...options });
|
|
}
|
|
|
|
function getCookieJar(store, options) {
|
|
return new toughCookie.CookieJar(store, {
|
|
looseMode: true,
|
|
...options,
|
|
});
|
|
}
|
|
|
|
module.exports = {
|
|
toughCookie,
|
|
get,
|
|
head,
|
|
post,
|
|
delete: del,
|
|
put,
|
|
patch,
|
|
session: getSession,
|
|
cookieJar: getCookieJar,
|
|
getSession,
|
|
getCookieJar,
|
|
};
|