From d4bcf96c64feecc73451cf7bfd8e1eb387ecb69a Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Tue, 30 Dec 2025 02:28:03 +0100 Subject: [PATCH] Refactored Naughty America, using unprint browser. --- package-lock.json | 8 +-- package.json | 2 +- src/app.js | 4 ++ src/scrapers/naughtyamerica.js | 105 ++++++++++++--------------------- 4 files changed, 46 insertions(+), 73 deletions(-) diff --git a/package-lock.json b/package-lock.json index 92adde18..ff8882aa 100644 --- a/package-lock.json +++ b/package-lock.json @@ -92,7 +92,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.17.5", + "unprint": "^0.17.6", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", @@ -18403,9 +18403,9 @@ } }, "node_modules/unprint": { - "version": "0.17.5", - "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.17.5.tgz", - "integrity": "sha512-To51YdA26KMZbpI4arqmQiq42EId5jyBwoOGIs3AIP8vNlis/z2dwBCxINi7h5lUfcEhKGcLmlPyk5Oo9dSKJA==", + "version": "0.17.6", + "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.17.6.tgz", + "integrity": "sha512-ZaoWMq46WPMJ2k6HSW/Ue8OQ/UTrGWUfPbWCTjHvLlk02Rnguodi/vC8dNJjfBCBATXiqwVmnXp03nmSfqqEKg==", "dependencies": { "axios": "^0.27.2", "bottleneck": "^2.19.5", diff --git a/package.json b/package.json index 036969eb..12dce26b 100755 --- a/package.json +++ b/package.json @@ -151,7 +151,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.17.5", + "unprint": "^0.17.6", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", diff --git a/src/app.js b/src/app.js index 6d50290f..7fb1e716 100755 --- a/src/app.js +++ b/src/app.js @@ -35,6 +35,10 @@ unprint.options({ headers: { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36', }, + context: { + // browser requests + userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36', + }, limits: { ...config.limits, default: { diff --git a/src/scrapers/naughtyamerica.js b/src/scrapers/naughtyamerica.js index 1beccc1e..a13ba30d 100755 --- a/src/scrapers/naughtyamerica.js +++ b/src/scrapers/naughtyamerica.js @@ -2,10 +2,9 @@ const unprint = require('unprint'); -const http = require('../utils/http'); const slugify = require('../utils/slugify'); +const { stripQuery } = require('../utils/url'); -// Naughty America network function scrapeLatest(scenes, channel) { return scenes.map(({ query }) => { const release = {}; @@ -25,11 +24,16 @@ function scrapeLatest(scenes, channel) { })); release.poster = [ - ...(query.sourceSet('source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset') || []), + ...(query + .sourceSet('source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset', { includeDescriptor: true }) + ?.toSorted((sourceA, sourceB) => sourceB.density - sourceA.density) + .map((source) => source.url) || []), query.img('.main-scene-img', { attribute: 'srcset' }), query.img('.scene-thumb'), ].filter(Boolean); + release.teaser = query.video('a[data-desktop-video]', { attribute: 'data-desktop-video' }); + release.tags = query.contents('.flag-bg'); release.qualities = [ @@ -44,38 +48,22 @@ function scrapeLatest(scenes, channel) { } async function fetchLatest(channel, page = 1) { - const { tab } = await http.getBrowserSession('naughtyamerica', { useGlobalBrowser: false, useProxy: true, headless: false }); const url = `${channel.url}${channel.parameters?.scenes || ''}?page=${page}`; - const res = await tab.goto(url); - const status = res.status(); + const res = await unprint.browserRequest(url, { + selectAll: '.site-list .scene-item, .panel-body', + async control(ctx) { + await ctx.locator('.site-list').hover({ trial: true, timeout: 10000 }); // wait for trailer to initialize + }, + }); - if (status === 200) { - const html = await tab.content(); - const items = unprint.initAll(html, '.site-list .scene-item, .panel-body'); - - const scenes = scrapeLatest(items, channel); - - await tab.close(); + if (res.ok) { + const scenes = scrapeLatest(res.context, channel); return scenes; } - await tab.close(); - - return status; -} - -/* -async function fetchLatest(site, page = 1) { - const res = await unprint.get(`${site.url}${site.parameters?.scenes || ''}?page=${page}`, { selectAll: '.site-list .scene-item, .panel-body' }); - - if (res.ok) { - return scrapeLatest(res.context, site); - } - return res.status; } -*/ function scrapeScene({ query }, { url }) { const release = {}; @@ -91,7 +79,7 @@ function scrapeScene({ query }, { url }) { release.actors = query.exists('.performer-list') || query.exists('.scene-info a[href*="/pornstar"].scene-title') // title links to performer in live scenes ? query.all('.performer-list a, .grey-performers a, .scene-info a[href*="/pornstar"].scene-title').map((actorEl) => ({ name: unprint.query.content(actorEl), - url: unprint.query.url(actorEl, null), + url: stripQuery(unprint.query.url(actorEl, null)), })) : query.content('.grey-performers')?.split(',').map((actorName) => actorName.trim()); @@ -108,7 +96,7 @@ function scrapeScene({ query }, { url }) { unprint.query.img(imgEl, 'img', { attribute: 'srcset' }), ]); - const trailer = query.video('video source'); + const trailer = query.video('#triggerPlay video source'); if (trailer) { release.trailer = [ @@ -134,25 +122,19 @@ function scrapeScene({ query }, { url }) { } async function fetchScene(url, _channel) { - const { tab } = await http.getBrowserSession('naughtyamerica', { useGlobalBrowser: false, useProxy: true }); - const res = await tab.goto(url); + const res = await unprint.browserRequest(url, { + async control(ctx) { + await ctx.locator('.scene-info').hover({ trial: true, timeout: 30000 }); // wait for trailer to initialize + }, + }); - const status = res.status(); - - if (status === 200) { - const html = await tab.content(); - const item = unprint.init(html); - - const scene = scrapeScene(item, { url }); - - await tab.close(); + if (res.ok) { + const scene = scrapeScene(res.context, { url }); return scene; } - await tab.close(); - - return status; + return res.controlError || res.status; } async function scrapeProfile({ query }) { @@ -165,39 +147,26 @@ async function scrapeProfile({ query }) { } async function fetchProfile({ slug }, { channel }) { - const { tab } = await http.getBrowserSession('naughtyamerica', { useGlobalBrowser: false, useProxy: true }); - const url = `${channel.url}/pornstar/${slug}`; - const res = await tab.goto(url); + const url = unprint.prefixUrl(`/pornstar/${slug}`, channel.url); - const status = res.status(); + const res = await unprint.browserRequest(url, { + browser: { + headless: false, + }, + select: '.bio-info, .performer-details', + async control(ctx) { + await ctx.locator('.bio-info').hover({ trial: true, timeout: 30000 }); // wait for trailer to initialize + }, + }); - if (status === 200) { - const html = await tab.content(); - const item = unprint.init(html, '.bio-info, .performer-details'); - - const profile = scrapeProfile(item, { url }); - - await tab.close(); + if (res.ok) { + const profile = scrapeProfile(res.context, { url }); return profile; } - await tab.close(); - - return status; -} - -/* -async function fetchProfile({ slug }, { channel }) { - const res = await unprint.get(`${channel.url}/pornstar/${slug}`, { select: '.bio-info, .performer-details' }); - - if (res.ok) { - return scrapeProfile(res.context); - } - return res.status; } -*/ module.exports = { fetchLatest,