Added proxy support for puppeteer session, used in Kink scraper.
This commit is contained in:
parent
091149a75b
commit
3c595bf780
|
@ -64,7 +64,7 @@ function scrapeAll(scenes, entity) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel, page = 1) {
|
async function fetchLatest(channel, page = 1) {
|
||||||
const { tab } = await http.getBrowserSession('kink');
|
const { tab } = await http.getBrowserSession('kink', { headless: false, useGlobalBrowser: false, useProxy: true });
|
||||||
const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`;
|
const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`;
|
||||||
const res = await tab.goto(url);
|
const res = await tab.goto(url);
|
||||||
const status = res.status();
|
const status = res.status();
|
||||||
|
|
|
@ -30,7 +30,7 @@ const limiters = {
|
||||||
|
|
||||||
const bypassSessions = new Map();
|
const bypassSessions = new Map();
|
||||||
|
|
||||||
let browser = null;
|
const browsers = new Map();
|
||||||
|
|
||||||
Promise.config({
|
Promise.config({
|
||||||
cancellation: true,
|
cancellation: true,
|
||||||
|
@ -169,15 +169,23 @@ function extractJson(body, headers) {
|
||||||
|
|
||||||
async function getBrowserSession(identifier, options = {}) {
|
async function getBrowserSession(identifier, options = {}) {
|
||||||
return limiters.bypass.schedule(async () => {
|
return limiters.bypass.schedule(async () => {
|
||||||
if (!browser) {
|
const browserIdentifier = options.useGlobalBrowser === false ? identifier : null;
|
||||||
browser = await puppeteer.launch({
|
|
||||||
headless: typeof options.bypass?.headless === 'undefined' ? 'new' : options.bypass.headless,
|
if (!browsers.has(browserIdentifier)) {
|
||||||
|
const newBrowser = await puppeteer.launch({
|
||||||
|
headless: typeof options.headless === 'undefined' ? 'new' : options.headless,
|
||||||
|
args: [
|
||||||
|
...(options.useProxy ? [`--proxy-server=${config.proxy.host}:${config.proxy.port}`] : []),
|
||||||
|
],
|
||||||
// headless: false,
|
// headless: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
browsers.set(browserIdentifier, newBrowser);
|
||||||
|
|
||||||
logger.info('Initialized puppeteer browser');
|
logger.info('Initialized puppeteer browser');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const browser = browsers.get(browserIdentifier);
|
||||||
const tab = await browser.newPage();
|
const tab = await browser.newPage();
|
||||||
|
|
||||||
logger.verbose(`Opened puppeteer tab${identifier ? ` for ${identifier}` : ''}`);
|
logger.verbose(`Opened puppeteer tab${identifier ? ` for ${identifier}` : ''}`);
|
||||||
|
@ -280,7 +288,7 @@ async function destroyBypassSessions() {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function destroyBrowserSessions() {
|
async function destroyBrowserSessions() {
|
||||||
await browser?.close();
|
await Promise.all(Array.from(browsers.values(), async (browser) => browser?.close()));
|
||||||
}
|
}
|
||||||
|
|
||||||
async function bypassCloudflareRequest(url, method, body, cloudflareBypass, options, attempts = 0) {
|
async function bypassCloudflareRequest(url, method, body, cloudflareBypass, options, attempts = 0) {
|
||||||
|
@ -485,7 +493,7 @@ module.exports = {
|
||||||
patch,
|
patch,
|
||||||
session: getSession,
|
session: getSession,
|
||||||
cookieJar: getCookieJar,
|
cookieJar: getCookieJar,
|
||||||
browser,
|
browsers,
|
||||||
getBrowserSession,
|
getBrowserSession,
|
||||||
getBypassSession,
|
getBypassSession,
|
||||||
getSession,
|
getSession,
|
||||||
|
|
Loading…
Reference in New Issue