Added configurable proxy to HTTP module (also used by qu). Added network and site URL to search documents.
This commit is contained in:
@@ -1,10 +1,28 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const tunnel = require('tunnel');
|
||||
const bhttp = require('bhttp');
|
||||
const taskQueue = require('promise-task-queue');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
|
||||
const proxyAgent = tunnel.httpsOverHttp({
|
||||
proxy: {
|
||||
host: config.proxy.host,
|
||||
port: config.proxy.port,
|
||||
},
|
||||
});
|
||||
|
||||
function useProxy(url) {
|
||||
if (!config.proxy.enable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const { hostname } = new URL(url);
|
||||
return config.proxy.hostnames.includes(hostname);
|
||||
}
|
||||
|
||||
const queue = taskQueue();
|
||||
|
||||
queue.on('concurrencyReached:httpGet', () => {
|
||||
@@ -15,47 +33,42 @@ queue.on('concurrencyReached:httpPost', () => {
|
||||
logger.silly('Queueing POST requests');
|
||||
});
|
||||
|
||||
queue.define('httpGet', async ({
|
||||
url,
|
||||
timeout = 30000,
|
||||
options = {},
|
||||
}) => {
|
||||
logger.silly(`GET ${url}`);
|
||||
|
||||
const res = await bhttp.get(url, {
|
||||
responseTimeout: timeout,
|
||||
...options,
|
||||
});
|
||||
|
||||
res.code = res.statusCode;
|
||||
|
||||
return res;
|
||||
}, {
|
||||
concurrency: 20,
|
||||
});
|
||||
|
||||
queue.define('httpPost', async ({
|
||||
queue.define('http', async ({
|
||||
url,
|
||||
method = 'GET',
|
||||
body,
|
||||
timeout = 30000,
|
||||
options = {},
|
||||
}) => {
|
||||
logger.silly(`POST ${url} with ${body}`);
|
||||
if (body) {
|
||||
logger.silly(`${method.toUpperCase()} ${url} with ${body}`);
|
||||
} else {
|
||||
logger.silly(`${method.toUpperCase()} ${url}`);
|
||||
}
|
||||
|
||||
const res = await bhttp.post(url, body, {
|
||||
const reqOptions = {
|
||||
responseTimeout: timeout,
|
||||
...options,
|
||||
});
|
||||
};
|
||||
|
||||
res.code = res.statusCode;
|
||||
if (useProxy(url)) {
|
||||
reqOptions.agent = proxyAgent;
|
||||
}
|
||||
|
||||
return res;
|
||||
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
||||
? await bhttp[method.toLowerCase()](url, body, reqOptions)
|
||||
: await bhttp[method.toLowerCase()](url, reqOptions);
|
||||
|
||||
return {
|
||||
...res,
|
||||
code: res.statusCode,
|
||||
};
|
||||
}, {
|
||||
concurrency: 20,
|
||||
});
|
||||
|
||||
async function get(url, options) {
|
||||
return queue.push('httpGet', {
|
||||
return queue.push('http', {
|
||||
method: 'get',
|
||||
url,
|
||||
options,
|
||||
@@ -63,7 +76,7 @@ async function get(url, options) {
|
||||
}
|
||||
|
||||
async function post(url, body, options) {
|
||||
return queue.push('httpPost', {
|
||||
return queue.push('http', {
|
||||
url,
|
||||
body,
|
||||
options,
|
||||
|
||||
@@ -287,7 +287,7 @@ function extractAll(htmlValue, selector) {
|
||||
return initAll(window.document, selector, window);
|
||||
}
|
||||
|
||||
async function get(urlValue, selector, headers, queryAll = false) {
|
||||
async function get(urlValue, selector, headers, options, queryAll = false) {
|
||||
const res = await http.get(urlValue, {
|
||||
headers,
|
||||
});
|
||||
@@ -315,8 +315,8 @@ async function get(urlValue, selector, headers, queryAll = false) {
|
||||
};
|
||||
}
|
||||
|
||||
async function getAll(urlValue, selector, headers) {
|
||||
return get(urlValue, selector, headers, true);
|
||||
async function getAll(urlValue, selector, headers, options) {
|
||||
return get(urlValue, selector, headers, options, true);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
Reference in New Issue
Block a user