From 0511b5a4a47c145d237e2a433bc13d878c4ad998 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 1 Feb 2026 03:03:21 +0100 Subject: [PATCH] Updated Karups scraper. --- .eslintrc | 2 +- package-lock.json | 9 ++-- package.json | 2 +- src/scrapers/karups.js | 106 +++++++++++++++++++++++++++++------------ tests/.eslintrc | 1 + tests/profiles.js | 1 + 6 files changed, 85 insertions(+), 36 deletions(-) diff --git a/.eslintrc b/.eslintrc index cb9654e3..202d4f21 100755 --- a/.eslintrc +++ b/.eslintrc @@ -6,7 +6,7 @@ }, "parserOptions": { "parser": "@babel/eslint-parser", - "ecmaVersion": "latest", + "ecmaVersion": 2024, "sourceType": "script" }, "rules": { diff --git a/package-lock.json b/package-lock.json index 06a0e2eb..51d0cef5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -94,7 +94,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.18.13", + "unprint": "^0.18.14", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", @@ -20380,9 +20380,10 @@ } }, "node_modules/unprint": { - "version": "0.18.13", - "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.13.tgz", - "integrity": "sha512-vjUF7X7/dg2Os/zesJ0+23eVc7NH2oKzspPSyBzcIx6IuEcVm1rdlD9dAxdaRMUNBWEeA5ekyk263CBI3lyaBQ==", + "version": "0.18.14", + "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.14.tgz", + "integrity": "sha512-6sHW3/2W2hNTuE/EcxM8CJ7ZX+JWFmWS0G7OuCYz9CAYX2bb6pAQ9Eaz0FvqCqqk1GjaxHEjWBhQjoACfIuiCA==", + "license": "ISC", "dependencies": { "bottleneck": "^2.19.5", "cookie": "^1.1.1", diff --git a/package.json b/package.json index 022486fd..6b00ad9e 100755 --- a/package.json +++ b/package.json @@ -153,7 +153,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.18.13", + "unprint": "^0.18.14", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", diff --git a/src/scrapers/karups.js b/src/scrapers/karups.js index ee49591f..70bcc515 100755 --- a/src/scrapers/karups.js +++ b/src/scrapers/karups.js @@ -1,6 +1,7 @@ 'use strict'; -const qu = require('../utils/qu'); +const unprint = require('unprint'); + const slugify = require('../utils/slugify'); const channelSlugs = { @@ -16,34 +17,66 @@ function scrapeAll(scenes) { release.url = query.url('a'); release.entryId = new URL(release.url).pathname.match(/(\d+)\.html/)?.[1]; - release.title = query.cnt('.title'); + release.title = query.content('.title'); release.date = query.date('.date', 'MMM Do, YYYY'); - release.channel = channelSlugs[query.cnt('.site')]; + release.channel = channelSlugs[query.content('.site')]; - release.poster = query.img('.thumb img'); + const poster = query.img('.thumb img'); + + if (poster) { + release.poster = Array.from(new Set([ + poster.replace('.jpg', '-feat_lg.jpg'), + poster, + ])); + } return release; }); } -function scrapeScene({ query }, url) { +async function fetchLatest(channel, page) { + const res = await unprint.get(`${channel.url}/videos/page${page}.html`, { + selectAll: '.listing-videos .item', + cookies: { + warningHidden: 'hide', + }, + }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + +function scrapeScene({ query }, { url }) { const release = {}; release.entryId = new URL(url).pathname.match(/(\d+)\.html/)?.[1]; - release.title = query.cnt('.title'); + release.title = query.content('.title'); + release.description = query.content('.content-information-description p'); + release.date = query.date('.date .content', 'MMM Do, YYYY'); release.actors = query.all('.models .content a').map((modelEl) => ({ - name: query.cnt(modelEl), - url: query.url(modelEl, null), + name: unprint.query.content(modelEl), + url: unprint.query.url(modelEl, null), })); - release.poster = query.poster(); - release.photos = query.imgs('.video-thumbs img').slice(1); + // videos and photos seem to be removed, query educated guess just in case + const poster = query.poster('.video-player video') || query.img('.video-poster img'); - release.trailer = query.video(); + if (poster) { + release.poster = Array.from(new Set([ + poster, + poster.replace('-feat_lg', ''), + ])); + } + + release.photos = query.imgs('.video-thumbs img').slice(1); + release.trailer = query.video('.video-player source'); return release; } @@ -54,38 +87,47 @@ function scrapeProfile({ query }, entity) { profile.gender = 'female'; profile.avatar = query.img('.model-thumb img[src*=".jpg"]'); - profile.scenes = scrapeAll(qu.initAll(query.all('.listing-videos .item')), entity); + profile.scenes = scrapeAll(unprint.initAll(query.all('.listing-videos .item')), entity); return profile; } -async function fetchLatest(channel, page) { - const res = await qu.getAll(`${channel.url}/videos/page${page}.html`, '.listing-videos .item'); - - if (res.ok) { - return scrapeAll(res.items, channel); +async function getActorUrl(actor) { + if (actor.url) { + return actor.url; } - return res.status; + const res = await unprint.get(`https://www.karups.com/models/search/${actor.slug}/`, { + selectAll: '.listing-models .item', + cookies: { + warningHidden: 'hide', + }, + }); + + if (!res.ok) { + return res.status; + } + + const actorUrl = res.context.find((item) => slugify(item.query.content('.title')) === actor.slug)?.query.url('a'); + + return actorUrl; } -async function fetchProfile(baseActor, entity) { - const searchRes = await qu.getAll(`https://www.karups.com/models/search/${baseActor.slug}/`, '.listing-models .item'); - - if (!searchRes.ok) { - return searchRes.status; - } - - const actorUrl = searchRes.items.find((item) => slugify(item.query.cnt('.title')) === baseActor.slug)?.query.url('a'); +async function fetchProfile(actor, entity) { + const actorUrl = await getActorUrl(actor); if (!actorUrl) { return null; } - const actorRes = await qu.get(actorUrl); + const actorRes = await unprint.get(actorUrl, { + cookies: { + warningHidden: 'hide', + }, + }); if (actorRes.ok) { - return scrapeProfile(actorRes.item, entity); + return scrapeProfile(actorRes.context, entity); } return actorRes.status; @@ -94,6 +136,10 @@ async function fetchProfile(baseActor, entity) { module.exports = { fetchLatest, fetchProfile, - scrapeScene, - deprecated: true, + scrapeScene: { + scraper: scrapeScene, + cookies: { + warningHidden: 'hide', + }, + }, }; diff --git a/tests/.eslintrc b/tests/.eslintrc index e4db6f58..eb0e14f9 100755 --- a/tests/.eslintrc +++ b/tests/.eslintrc @@ -2,6 +2,7 @@ "extends": "airbnb-base", "parserOptions": { "parser": "@babel/eslint-parser", + "ecmaVersion": "latest", "sourceType": "script" }, "rules": { diff --git a/tests/profiles.js b/tests/profiles.js index 8c21e808..ef964788 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -227,6 +227,7 @@ const actors = [ { entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] }, { entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] }, { entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] }, + { entity: 'karups', name: 'Peach Lollypop', fields: ['avatar', 'gender'] }, ]; const actorScrapers = scrapers.actors;