'use strict'; const config = require('config'); const Promise = require('bluebird'); const bhttp = require('bhttp'); const fs = require('fs').promises; const util = require('util'); const stream = require('stream'); const tunnel = require('tunnel'); const Bottleneck = require('bottleneck'); const { JSDOM, toughCookie } = require('jsdom'); const windows = require('./http-windows'); const logger = require('../logger')(__filename); const virtualConsole = require('./virtual-console')(__filename); const argv = require('../argv'); const pipeline = util.promisify(stream.pipeline); const limiters = {}; Promise.config({ cancellation: true, }); const defaultOptions = { timeout: argv.requestTimeout, encodeJSON: true, parse: false, headers: { 'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1', }, }; const proxyAgent = tunnel.httpsOverHttp({ proxy: { host: config.proxy.host, port: config.proxy.port, }, }); function useProxy(url) { if (!config.proxy.enable) { return false; } const { hostname } = new URL(url); return config.proxy.hostnames.includes(hostname); } function getLimiterValue(prop, options, hostname) { if (argv[prop] !== undefined) { return argv[prop]; } if (options[prop] !== undefined) { return options[prop]; } if (config.limits[hostname]?.enable !== false && config.limits[hostname]?.[prop] !== undefined) { return config.limits[hostname][prop]; } return config.limits.default[prop]; } function getLimiter(options = {}, url) { const { hostname } = new URL(url); const interval = getLimiterValue('interval', options, hostname); const concurrency = getLimiterValue('concurrency', options, hostname); if (!limiters[interval]?.[concurrency]) { limiters[interval] = limiters[interval] || {}; limiters[interval][concurrency] = new Bottleneck({ minTime: interval, maxConcurrent: concurrency, timeout: (options.timeout || defaultOptions.timeout) + 10000, // timeout 10 seconds after bhttp should }); } return limiters[interval][concurrency]; } async function request(method = 'get', url, body, requestOptions = {}, limiter) { const http = requestOptions.session || bhttp; const options = { ...requestOptions, session: null, }; const withProxy = useProxy(url); if (withProxy) { options.agent = proxyAgent; } logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}) ${url}`); const res = await (body ? http[method](url, body, options) : http[method](url, options)); return res; } async function finalizeResult(res, options) { if (options.destination) { // res.on('progress', (bytes, totalBytes) => logger.silly(`Downloaded ${Math.round((bytes / totalBytes) * 100)}% of ${url}`)); await pipeline(res, ...(options.transforms || []), options.destination); } if (Buffer.isBuffer(res.body)) { const html = res.body.toString(); const window = options?.parse ? new JSDOM(html, { virtualConsole, ...options.extract }).window : null; const pathname = new URL(res.request.url).pathname.replace(/\//g, '_'); if (window) { windows.set(pathname, window); } if (argv.saveHtml) { await fs.writeFile(`./html/${pathname}.html`, html); } return { ...res, body: html, html, status: res.statusCode, headers: res.headers, document: window?.document || null, window, ok: res.statusCode >= 200 && res.statusCode <= 299, }; } return { ...res, body: res.body, status: res.statusCode, headers: res.headers, ok: res.statusCode >= 200 && res.statusCode <= 299, }; } function getTimeout(options, url) { return new Promise((resolve, reject, onCancel) => { const timeout = setTimeout(() => { logger.debug(`Canceled timed out request to ${url}`); reject(new Error(`URL ${url} timed out`)); }, (options?.timeout || defaultOptions.timeout) + 10000); onCancel(() => { clearTimeout(timeout); }); }); } async function scheduleRequest(method = 'get', url, body, requestOptions = {}) { const options = { ...defaultOptions, ...requestOptions, headers: { ...defaultOptions.headers, ...requestOptions.headers, }, responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || defaultOptions.timeout, stream: !!requestOptions.destination, }; const limiter = getLimiter(options, url); const timeout = getTimeout(options, url); const result = await limiter.schedule(async () => Promise.race([request(method, url, body, options, limiter), timeout])); timeout.cancel(); const curatedResult = await finalizeResult(result, options); logger.silly(`Response ${curatedResult.status} for ${method.toUpperCase()} ${url}`); return curatedResult; } async function get(url, options) { return scheduleRequest('get', url, null, options); } async function post(url, body, options) { return scheduleRequest('post', url, body, options); } async function put(url, body, options) { return scheduleRequest('put', url, body, options); } async function patch(url, body, options) { return scheduleRequest('patch', url, body, options); } async function del(url, options) { return scheduleRequest('delete', url, null, options); } async function head(url, options) { return scheduleRequest('head', url, null, options); } function getSession(options) { return bhttp.session({ ...defaultOptions, ...options }); } function getCookieJar(store, options) { return new toughCookie.CookieJar(store, { looseMode: true, ...options, }); } module.exports = { toughCookie, get, head, post, delete: del, put, patch, session: getSession, cookieJar: getCookieJar, getSession, getCookieJar, };