From 3c595bf78035ccbafcd88d84268403d19fc23a4f Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 25 May 2025 22:27:43 +0200 Subject: [PATCH] Added proxy support for puppeteer session, used in Kink scraper. --- src/scrapers/kink.js | 2 +- src/utils/http.js | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/scrapers/kink.js b/src/scrapers/kink.js index d659558e..6c4c3839 100755 --- a/src/scrapers/kink.js +++ b/src/scrapers/kink.js @@ -64,7 +64,7 @@ function scrapeAll(scenes, entity) { } async function fetchLatest(channel, page = 1) { - const { tab } = await http.getBrowserSession('kink'); + const { tab } = await http.getBrowserSession('kink', { headless: false, useGlobalBrowser: false, useProxy: true }); const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`; const res = await tab.goto(url); const status = res.status(); diff --git a/src/utils/http.js b/src/utils/http.js index c5024124..4965261b 100755 --- a/src/utils/http.js +++ b/src/utils/http.js @@ -30,7 +30,7 @@ const limiters = { const bypassSessions = new Map(); -let browser = null; +const browsers = new Map(); Promise.config({ cancellation: true, @@ -169,15 +169,23 @@ function extractJson(body, headers) { async function getBrowserSession(identifier, options = {}) { return limiters.bypass.schedule(async () => { - if (!browser) { - browser = await puppeteer.launch({ - headless: typeof options.bypass?.headless === 'undefined' ? 'new' : options.bypass.headless, + const browserIdentifier = options.useGlobalBrowser === false ? identifier : null; + + if (!browsers.has(browserIdentifier)) { + const newBrowser = await puppeteer.launch({ + headless: typeof options.headless === 'undefined' ? 'new' : options.headless, + args: [ + ...(options.useProxy ? [`--proxy-server=${config.proxy.host}:${config.proxy.port}`] : []), + ], // headless: false, }); + browsers.set(browserIdentifier, newBrowser); + logger.info('Initialized puppeteer browser'); } + const browser = browsers.get(browserIdentifier); const tab = await browser.newPage(); logger.verbose(`Opened puppeteer tab${identifier ? ` for ${identifier}` : ''}`); @@ -280,7 +288,7 @@ async function destroyBypassSessions() { } async function destroyBrowserSessions() { - await browser?.close(); + await Promise.all(Array.from(browsers.values(), async (browser) => browser?.close())); } async function bypassCloudflareRequest(url, method, body, cloudflareBypass, options, attempts = 0) { @@ -485,7 +493,7 @@ module.exports = { patch, session: getSession, cookieJar: getCookieJar, - browser, + browsers, getBrowserSession, getBypassSession, getSession,