Removed superfluous MindGeek scrapers.
This commit is contained in:
@@ -1,146 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const util = require('util');
|
||||
const stream = require('stream');
|
||||
const config = require('config');
|
||||
const tunnel = require('tunnel');
|
||||
const bhttp = require('@thependulum/bhttp');
|
||||
const taskQueue = require('promise-task-queue');
|
||||
|
||||
const pipeline = util.promisify(stream.pipeline);
|
||||
const logger = require('../logger')(__filename);
|
||||
|
||||
const defaultHeaders = {
|
||||
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
||||
};
|
||||
|
||||
const defaultOptions = {
|
||||
responseTimeout: 30000,
|
||||
};
|
||||
|
||||
const proxyAgent = tunnel.httpsOverHttp({
|
||||
proxy: {
|
||||
host: config.proxy.host,
|
||||
port: config.proxy.port,
|
||||
},
|
||||
});
|
||||
|
||||
function useProxy(url) {
|
||||
if (!config.proxy.enable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const { hostname } = new URL(url);
|
||||
return config.proxy.hostnames.includes(hostname);
|
||||
}
|
||||
|
||||
const queue = taskQueue();
|
||||
const defaultQueueMethod = '20p';
|
||||
|
||||
async function handler({
|
||||
url,
|
||||
method = 'GET',
|
||||
body,
|
||||
headers = {},
|
||||
options = {},
|
||||
}) {
|
||||
if (body) {
|
||||
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)} ${options.queueMethod || defaultQueueMethod}`);
|
||||
} else {
|
||||
logger.silly(`${method.toUpperCase()} ${url} ${options.queueMethod || defaultQueueMethod}`);
|
||||
}
|
||||
|
||||
const reqOptions = {
|
||||
headers: {
|
||||
...(options?.defaultHeaders !== false && defaultHeaders),
|
||||
...headers,
|
||||
},
|
||||
...defaultOptions,
|
||||
...options,
|
||||
...(options?.timeout && { responseTimeout: options?.timeout }),
|
||||
};
|
||||
|
||||
if (useProxy(url)) {
|
||||
reqOptions.agent = proxyAgent;
|
||||
}
|
||||
|
||||
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
||||
? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions)
|
||||
: await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions);
|
||||
|
||||
if (options?.stream && options?.destination) {
|
||||
await pipeline(res, ...(options?.transforms || []), options?.destination);
|
||||
}
|
||||
|
||||
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
|
||||
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
||||
|
||||
return {
|
||||
...res,
|
||||
originalRes: res,
|
||||
html,
|
||||
json,
|
||||
pipe: res.pipe,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
code: res.statusCode,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
|
||||
queue.on('concurrencyReached:http', () => {
|
||||
logger.silly('Queueing requests');
|
||||
});
|
||||
|
||||
queue.define('20p', handler, {
|
||||
concurrency: 20,
|
||||
});
|
||||
|
||||
queue.define('1s', handler, {
|
||||
interval: 1,
|
||||
});
|
||||
|
||||
queue.define('5s', handler, {
|
||||
interval: 5,
|
||||
});
|
||||
|
||||
async function get(url, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'GET',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
async function head(url, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'HEAD',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
async function post(url, body, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'POST',
|
||||
url,
|
||||
body,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
function session(headers, options) {
|
||||
return bhttp.session({
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
get,
|
||||
post,
|
||||
head,
|
||||
session,
|
||||
};
|
||||
Reference in New Issue
Block a user