Added CF resolver to http module. Using priority lookup in tags seed.

This commit is contained in:
DebaucheryLibrarian
2022-08-15 23:51:51 +02:00
parent b7fbcdec94
commit 3db8b80164
6 changed files with 100 additions and 64 deletions

View File

@@ -119,7 +119,11 @@ function scrapeProfile(actor, entity) {
}
async function fetchLatest(channel, page = 1, { parameters }) {
const res = await http.get(`${parameters.videos}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`);
const res = await http.get(`${parameters.videos}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`, {
bypassCloudflare: true,
});
console.log(res.status);
if (res.ok) {
return scrapeAll(res.body.hits.hits, channel);

View File

@@ -17,7 +17,14 @@ const virtualConsole = require('./virtual-console')(__filename);
const argv = require('../argv');
const pipeline = util.promisify(stream.pipeline);
const limiters = {};
const limiters = {
bypass: new Bottleneck({
minTime: 1000,
maxConcurrent: 1,
timeout: 60000,
}),
};
Promise.config({
cancellation: true,
@@ -84,6 +91,47 @@ function getLimiter(options = {}, url) {
return limiters[interval][concurrency];
}
function extractJson(solution) {
if (solution.headers['content-type'].includes('application/json')) {
const { document } = new JSDOM(solution.response, { virtualConsole }).window;
const dataString = document.querySelector('body > pre')?.textContent;
if (dataString) {
const data = JSON.parse(dataString);
return data;
}
}
return solution.response;
}
async function bypassCloudflareRequest(url, method, body, options) {
// the bypass proxy opens a new browser for each request, throttle beyond default limits for this URL
const res = await limiters.bypass.schedule(async () => bhttp.post(config.bypass.cloudflare.path, {
cmd: `request.${method}`,
url,
maxTimeout: options.timeout,
proxy: useProxy(url) ? {
url: `${config.proxy.host}:${config.proxy.port}`,
} : null,
}, {
encodeJSON: true,
}));
if (!res.statusCode === 200 || res.body?.status !== 'ok') {
throw new Error(`CloudFlare bypass failed for ${url} (${res.statusCode}): ${res.body?.message}`);
}
const resBody = extractJson(res.body.solution);
return {
body: resBody,
statusCode: res.body.solution.status,
headers: res.body.solution.headers,
};
}
async function request(method = 'get', url, body, requestOptions = {}, limiter) {
const http = requestOptions.session || bhttp;
@@ -93,12 +141,17 @@ async function request(method = 'get', url, body, requestOptions = {}, limiter)
};
const withProxy = useProxy(url);
const withCloudflareBypass = options.bypassCloudflare && config.bypass.cloudflare.enable;
if (withProxy) {
options.agent = proxyAgent;
}
logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}) ${url}`);
logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}${withCloudflareBypass ? ' bypass' : ''}) ${url}`);
if (withCloudflareBypass) {
return bypassCloudflareRequest(url, method, body, options);
}
const res = await (body
? http[method](url, body, options)