Added proxy support for puppeteer session, used in Kink scraper.

This commit is contained in:
DebaucheryLibrarian 2025-05-25 22:27:43 +02:00
parent 091149a75b
commit 3c595bf780
2 changed files with 15 additions and 7 deletions

View File

@ -64,7 +64,7 @@ function scrapeAll(scenes, entity) {
} }
async function fetchLatest(channel, page = 1) { async function fetchLatest(channel, page = 1) {
const { tab } = await http.getBrowserSession('kink'); const { tab } = await http.getBrowserSession('kink', { headless: false, useGlobalBrowser: false, useProxy: true });
const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`; const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`;
const res = await tab.goto(url); const res = await tab.goto(url);
const status = res.status(); const status = res.status();

View File

@ -30,7 +30,7 @@ const limiters = {
const bypassSessions = new Map(); const bypassSessions = new Map();
let browser = null; const browsers = new Map();
Promise.config({ Promise.config({
cancellation: true, cancellation: true,
@ -169,15 +169,23 @@ function extractJson(body, headers) {
async function getBrowserSession(identifier, options = {}) { async function getBrowserSession(identifier, options = {}) {
return limiters.bypass.schedule(async () => { return limiters.bypass.schedule(async () => {
if (!browser) { const browserIdentifier = options.useGlobalBrowser === false ? identifier : null;
browser = await puppeteer.launch({
headless: typeof options.bypass?.headless === 'undefined' ? 'new' : options.bypass.headless, if (!browsers.has(browserIdentifier)) {
const newBrowser = await puppeteer.launch({
headless: typeof options.headless === 'undefined' ? 'new' : options.headless,
args: [
...(options.useProxy ? [`--proxy-server=${config.proxy.host}:${config.proxy.port}`] : []),
],
// headless: false, // headless: false,
}); });
browsers.set(browserIdentifier, newBrowser);
logger.info('Initialized puppeteer browser'); logger.info('Initialized puppeteer browser');
} }
const browser = browsers.get(browserIdentifier);
const tab = await browser.newPage(); const tab = await browser.newPage();
logger.verbose(`Opened puppeteer tab${identifier ? ` for ${identifier}` : ''}`); logger.verbose(`Opened puppeteer tab${identifier ? ` for ${identifier}` : ''}`);
@ -280,7 +288,7 @@ async function destroyBypassSessions() {
} }
async function destroyBrowserSessions() { async function destroyBrowserSessions() {
await browser?.close(); await Promise.all(Array.from(browsers.values(), async (browser) => browser?.close()));
} }
async function bypassCloudflareRequest(url, method, body, cloudflareBypass, options, attempts = 0) { async function bypassCloudflareRequest(url, method, body, cloudflareBypass, options, attempts = 0) {
@ -485,7 +493,7 @@ module.exports = {
patch, patch,
session: getSession, session: getSession,
cookieJar: getCookieJar, cookieJar: getCookieJar,
browser, browsers,
getBrowserSession, getBrowserSession,
getBypassSession, getBypassSession,
getSession, getSession,