Added proxy support for puppeteer session, used in Kink scraper.
This commit is contained in:
parent
091149a75b
commit
3c595bf780
|
@ -64,7 +64,7 @@ function scrapeAll(scenes, entity) {
|
|||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const { tab } = await http.getBrowserSession('kink');
|
||||
const { tab } = await http.getBrowserSession('kink', { headless: false, useGlobalBrowser: false, useProxy: true });
|
||||
const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`;
|
||||
const res = await tab.goto(url);
|
||||
const status = res.status();
|
||||
|
|
|
@ -30,7 +30,7 @@ const limiters = {
|
|||
|
||||
const bypassSessions = new Map();
|
||||
|
||||
let browser = null;
|
||||
const browsers = new Map();
|
||||
|
||||
Promise.config({
|
||||
cancellation: true,
|
||||
|
@ -169,15 +169,23 @@ function extractJson(body, headers) {
|
|||
|
||||
async function getBrowserSession(identifier, options = {}) {
|
||||
return limiters.bypass.schedule(async () => {
|
||||
if (!browser) {
|
||||
browser = await puppeteer.launch({
|
||||
headless: typeof options.bypass?.headless === 'undefined' ? 'new' : options.bypass.headless,
|
||||
const browserIdentifier = options.useGlobalBrowser === false ? identifier : null;
|
||||
|
||||
if (!browsers.has(browserIdentifier)) {
|
||||
const newBrowser = await puppeteer.launch({
|
||||
headless: typeof options.headless === 'undefined' ? 'new' : options.headless,
|
||||
args: [
|
||||
...(options.useProxy ? [`--proxy-server=${config.proxy.host}:${config.proxy.port}`] : []),
|
||||
],
|
||||
// headless: false,
|
||||
});
|
||||
|
||||
browsers.set(browserIdentifier, newBrowser);
|
||||
|
||||
logger.info('Initialized puppeteer browser');
|
||||
}
|
||||
|
||||
const browser = browsers.get(browserIdentifier);
|
||||
const tab = await browser.newPage();
|
||||
|
||||
logger.verbose(`Opened puppeteer tab${identifier ? ` for ${identifier}` : ''}`);
|
||||
|
@ -280,7 +288,7 @@ async function destroyBypassSessions() {
|
|||
}
|
||||
|
||||
async function destroyBrowserSessions() {
|
||||
await browser?.close();
|
||||
await Promise.all(Array.from(browsers.values(), async (browser) => browser?.close()));
|
||||
}
|
||||
|
||||
async function bypassCloudflareRequest(url, method, body, cloudflareBypass, options, attempts = 0) {
|
||||
|
@ -485,7 +493,7 @@ module.exports = {
|
|||
patch,
|
||||
session: getSession,
|
||||
cookieJar: getCookieJar,
|
||||
browser,
|
||||
browsers,
|
||||
getBrowserSession,
|
||||
getBypassSession,
|
||||
getSession,
|
||||
|
|
Loading…
Reference in New Issue