Added configurable proxy to HTTP module (also used by qu). Added network and site URL to search documents.

This commit is contained in:
2020-03-19 01:54:25 +01:00
parent e4b269956e
commit 4b310e9dfa
8 changed files with 72 additions and 31 deletions

View File

@@ -1,10 +1,28 @@
'use strict';
const config = require('config');
const tunnel = require('tunnel');
const bhttp = require('bhttp');
const taskQueue = require('promise-task-queue');
const logger = require('../logger')(__filename);
const proxyAgent = tunnel.httpsOverHttp({
proxy: {
host: config.proxy.host,
port: config.proxy.port,
},
});
function useProxy(url) {
if (!config.proxy.enable) {
return false;
}
const { hostname } = new URL(url);
return config.proxy.hostnames.includes(hostname);
}
const queue = taskQueue();
queue.on('concurrencyReached:httpGet', () => {
@@ -15,47 +33,42 @@ queue.on('concurrencyReached:httpPost', () => {
logger.silly('Queueing POST requests');
});
queue.define('httpGet', async ({
url,
timeout = 30000,
options = {},
}) => {
logger.silly(`GET ${url}`);
const res = await bhttp.get(url, {
responseTimeout: timeout,
...options,
});
res.code = res.statusCode;
return res;
}, {
concurrency: 20,
});
queue.define('httpPost', async ({
queue.define('http', async ({
url,
method = 'GET',
body,
timeout = 30000,
options = {},
}) => {
logger.silly(`POST ${url} with ${body}`);
if (body) {
logger.silly(`${method.toUpperCase()} ${url} with ${body}`);
} else {
logger.silly(`${method.toUpperCase()} ${url}`);
}
const res = await bhttp.post(url, body, {
const reqOptions = {
responseTimeout: timeout,
...options,
});
};
res.code = res.statusCode;
if (useProxy(url)) {
reqOptions.agent = proxyAgent;
}
return res;
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
? await bhttp[method.toLowerCase()](url, body, reqOptions)
: await bhttp[method.toLowerCase()](url, reqOptions);
return {
...res,
code: res.statusCode,
};
}, {
concurrency: 20,
});
async function get(url, options) {
return queue.push('httpGet', {
return queue.push('http', {
method: 'get',
url,
options,
@@ -63,7 +76,7 @@ async function get(url, options) {
}
async function post(url, body, options) {
return queue.push('httpPost', {
return queue.push('http', {
url,
body,
options,

View File

@@ -287,7 +287,7 @@ function extractAll(htmlValue, selector) {
return initAll(window.document, selector, window);
}
async function get(urlValue, selector, headers, queryAll = false) {
async function get(urlValue, selector, headers, options, queryAll = false) {
const res = await http.get(urlValue, {
headers,
});
@@ -315,8 +315,8 @@ async function get(urlValue, selector, headers, queryAll = false) {
};
}
async function getAll(urlValue, selector, headers) {
return get(urlValue, selector, headers, true);
async function getAll(urlValue, selector, headers, options) {
return get(urlValue, selector, headers, options, true);
}
module.exports = {