Added puppeteer to http module, refactored Kink scraper to utilize it.
This commit is contained in:
@@ -9,7 +9,8 @@ const stream = require('stream');
|
||||
const tunnel = require('tunnel');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { JSDOM, toughCookie } = require('jsdom');
|
||||
const puppeteer = require('puppeteer');
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
|
||||
const windows = require('./http-windows');
|
||||
|
||||
@@ -30,12 +31,13 @@ const limiters = {
|
||||
const bypassSessions = new Map();
|
||||
|
||||
let browser = null;
|
||||
const browserSessions = new Map();
|
||||
|
||||
Promise.config({
|
||||
cancellation: true,
|
||||
});
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const defaultOptions = {
|
||||
timeout: argv.requestTimeout,
|
||||
encodeJSON: true,
|
||||
@@ -156,26 +158,27 @@ function extractJson(solution) {
|
||||
return solution.response;
|
||||
}
|
||||
|
||||
async function getBrowserSession(hostname) {
|
||||
console.log(browserSessions);
|
||||
async function getBrowserSession(identifier, options = {}) {
|
||||
return limiters.bypass.schedule(async () => {
|
||||
if (!browser) {
|
||||
browser = await puppeteer.launch({
|
||||
headless: typeof options.headless === 'undefined' ? true : options.headless,
|
||||
// headless: false,
|
||||
});
|
||||
|
||||
if (browserSessions.has(hostname)) {
|
||||
return browserSessions.get(hostname);
|
||||
}
|
||||
logger.info('Initialized puppeteer browser');
|
||||
}
|
||||
|
||||
if (!browser) {
|
||||
browser = await puppeteer.launch({ headless: false });
|
||||
}
|
||||
const tab = await browser.newPage();
|
||||
|
||||
const page = await browser.newPage();
|
||||
logger.verbose(`Opened puppeteer tab${identifier ? `for ${identifier}` : ''}`);
|
||||
|
||||
browserSessions.set(hostname, page);
|
||||
|
||||
return page;
|
||||
return { browser, tab };
|
||||
});
|
||||
}
|
||||
|
||||
async function bypassBrowserRequest(url, _options) {
|
||||
const page = await limiters.bypass.schedule(async () => getBrowserSession(new URL(url).hostname));
|
||||
const page = await getBrowserSession(new URL(url).hostname);
|
||||
|
||||
const res = await page.goto(url);
|
||||
const body = await page.content();
|
||||
@@ -254,6 +257,10 @@ async function destroyBypassSessions() {
|
||||
await Promise.map(sessionListRes.body.sessions, async (sessionId) => destroyBypassSession(sessionId), { concurrency: 5 });
|
||||
}
|
||||
|
||||
async function destroyBrowserSessions() {
|
||||
await browser?.close();
|
||||
}
|
||||
|
||||
async function bypassCloudflareRequest(url, method, body, cloudflareBypass, options, attempts = 0) {
|
||||
const sessionId = await limiters.bypass.schedule(async () => getBypassSession(url, cloudflareBypass));
|
||||
|
||||
@@ -456,9 +463,11 @@ module.exports = {
|
||||
patch,
|
||||
session: getSession,
|
||||
cookieJar: getCookieJar,
|
||||
browser,
|
||||
getBrowserSession,
|
||||
getBypassSession,
|
||||
getSession,
|
||||
getCookieJar,
|
||||
destroyBypassSessions,
|
||||
destroyBrowserSessions,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user