forked from DebaucheryLibrarian/traxxx
Added CF resolver to http module. Using priority lookup in tags seed.
This commit is contained in:
@@ -119,7 +119,11 @@ function scrapeProfile(actor, entity) {
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1, { parameters }) {
|
||||
const res = await http.get(`${parameters.videos}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`);
|
||||
const res = await http.get(`${parameters.videos}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`, {
|
||||
bypassCloudflare: true,
|
||||
});
|
||||
|
||||
console.log(res.status);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.body.hits.hits, channel);
|
||||
|
||||
@@ -17,7 +17,14 @@ const virtualConsole = require('./virtual-console')(__filename);
|
||||
const argv = require('../argv');
|
||||
|
||||
const pipeline = util.promisify(stream.pipeline);
|
||||
const limiters = {};
|
||||
|
||||
const limiters = {
|
||||
bypass: new Bottleneck({
|
||||
minTime: 1000,
|
||||
maxConcurrent: 1,
|
||||
timeout: 60000,
|
||||
}),
|
||||
};
|
||||
|
||||
Promise.config({
|
||||
cancellation: true,
|
||||
@@ -84,6 +91,47 @@ function getLimiter(options = {}, url) {
|
||||
return limiters[interval][concurrency];
|
||||
}
|
||||
|
||||
function extractJson(solution) {
|
||||
if (solution.headers['content-type'].includes('application/json')) {
|
||||
const { document } = new JSDOM(solution.response, { virtualConsole }).window;
|
||||
const dataString = document.querySelector('body > pre')?.textContent;
|
||||
|
||||
if (dataString) {
|
||||
const data = JSON.parse(dataString);
|
||||
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
return solution.response;
|
||||
}
|
||||
|
||||
async function bypassCloudflareRequest(url, method, body, options) {
|
||||
// the bypass proxy opens a new browser for each request, throttle beyond default limits for this URL
|
||||
const res = await limiters.bypass.schedule(async () => bhttp.post(config.bypass.cloudflare.path, {
|
||||
cmd: `request.${method}`,
|
||||
url,
|
||||
maxTimeout: options.timeout,
|
||||
proxy: useProxy(url) ? {
|
||||
url: `${config.proxy.host}:${config.proxy.port}`,
|
||||
} : null,
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
}));
|
||||
|
||||
if (!res.statusCode === 200 || res.body?.status !== 'ok') {
|
||||
throw new Error(`CloudFlare bypass failed for ${url} (${res.statusCode}): ${res.body?.message}`);
|
||||
}
|
||||
|
||||
const resBody = extractJson(res.body.solution);
|
||||
|
||||
return {
|
||||
body: resBody,
|
||||
statusCode: res.body.solution.status,
|
||||
headers: res.body.solution.headers,
|
||||
};
|
||||
}
|
||||
|
||||
async function request(method = 'get', url, body, requestOptions = {}, limiter) {
|
||||
const http = requestOptions.session || bhttp;
|
||||
|
||||
@@ -93,12 +141,17 @@ async function request(method = 'get', url, body, requestOptions = {}, limiter)
|
||||
};
|
||||
|
||||
const withProxy = useProxy(url);
|
||||
const withCloudflareBypass = options.bypassCloudflare && config.bypass.cloudflare.enable;
|
||||
|
||||
if (withProxy) {
|
||||
options.agent = proxyAgent;
|
||||
}
|
||||
|
||||
logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}) ${url}`);
|
||||
logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}${withCloudflareBypass ? ' bypass' : ''}) ${url}`);
|
||||
|
||||
if (withCloudflareBypass) {
|
||||
return bypassCloudflareRequest(url, method, body, options);
|
||||
}
|
||||
|
||||
const res = await (body
|
||||
? http[method](url, body, options)
|
||||
|
||||
Reference in New Issue
Block a user