'use strict'; const config = require('config'); const bhttp = require('@thependulum/bhttp'); const util = require('util'); const stream = require('stream'); const tunnel = require('tunnel'); const Bottleneck = require('bottleneck'); const { JSDOM, toughCookie } = require('jsdom'); const logger = require('../logger')(__filename); const virtualConsole = require('./virtual-console')(__filename); const argv = require('../argv'); const pipeline = util.promisify(stream.pipeline); const limiters = {}; const defaultOptions = { encodeJSON: true, headers: { 'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1', }, }; const proxyAgent = tunnel.httpsOverHttp({ proxy: { host: config.proxy.host, port: config.proxy.port, }, }); function useProxy(url) { if (!config.proxy.enable) { return false; } const { hostname } = new URL(url); return config.proxy.hostnames.includes(hostname); } function getLimiterValue(prop, options, hostname) { if (argv[prop] !== undefined) { return argv[prop]; } if (options[prop] !== undefined) { return options[prop]; } if (config.limits[hostname]?.enable !== false && config.limits[hostname]?.[prop] !== undefined) { return config.limits[hostname][prop]; } return config.limits.default[prop]; } function getLimiter(options = {}, url) { const { hostname } = new URL(url); const interval = getLimiterValue('interval', options, hostname); const concurrency = getLimiterValue('concurrency', options, hostname); if (!limiters[interval]?.[concurrency]) { limiters[interval] = limiters[interval] || {}; limiters[interval][concurrency] = new Bottleneck({ minTime: interval, maxConcurrent: concurrency, }); } return limiters[interval][concurrency]; } async function request(method = 'get', url, body, requestOptions = {}, limiter) { const http = requestOptions.session || bhttp; const options = { ...defaultOptions, ...requestOptions, headers: { ...defaultOptions.headers, ...requestOptions.headers, }, responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000, stream: !!requestOptions.destination, session: null, }; const withProxy = useProxy(url); if (withProxy) { options.agent = proxyAgent; } logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}) ${url}`); const res = await (body ? http[method](url, body, options) : http[method](url, options)); const resIsOk = res.statusCode >= 200 && res.statusCode <= 299; if (options.destination) { // res.on('progress', (bytes, totalBytes) => logger.silly(`Downloaded ${Math.round((bytes / totalBytes) * 100)}% of ${url}`)); await pipeline(res, ...(options.transforms || []), options.destination); } if (Buffer.isBuffer(res.body)) { const html = res.body.toString(); const window = new JSDOM(html, { virtualConsole, ...options.extract }).window; return { ...res, body: html, html, status: res.statusCode, document: window.document, window, ok: resIsOk, }; } return { ...res, body: res.body, status: res.statusCode, ok: res.statusCode >= 200 && res.statusCode <= 299, }; } async function scheduleRequest(method = 'get', url, body, options) { const limiter = getLimiter(options, url); return limiter.schedule(() => request(method, url, body, options, limiter)); } async function get(url, options) { return scheduleRequest('get', url, null, options); } async function post(url, body, options) { return scheduleRequest('post', url, body, options); } async function put(url, body, options) { return scheduleRequest('put', url, body, options); } async function patch(url, body, options) { return scheduleRequest('patch', url, body, options); } async function del(url, options) { return scheduleRequest('delete', url, null, options); } async function head(url, options) { return scheduleRequest('head', url, null, options); } function getSession(options) { return bhttp.session({ ...defaultOptions, ...options }); } function getCookieJar(store, options) { return new toughCookie.CookieJar(store, { looseMode: true, ...options, }); } module.exports = { toughCookie, get, head, post, delete: del, put, patch, session: getSession, cookieJar: getCookieJar, getSession, getCookieJar, };