Updated Karups scraper.

This commit is contained in:
DebaucheryLibrarian 2026-02-01 03:03:21 +01:00
parent b4877d16da
commit 0511b5a4a4
6 changed files with 85 additions and 36 deletions

View File

@ -6,7 +6,7 @@
},
"parserOptions": {
"parser": "@babel/eslint-parser",
"ecmaVersion": "latest",
"ecmaVersion": 2024,
"sourceType": "script"
},
"rules": {

9
package-lock.json generated
View File

@ -94,7 +94,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.37",
"undici": "^5.28.1",
"unprint": "^0.18.13",
"unprint": "^0.18.14",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3",
"video.js": "^8.6.1",
@ -20380,9 +20380,10 @@
}
},
"node_modules/unprint": {
"version": "0.18.13",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.13.tgz",
"integrity": "sha512-vjUF7X7/dg2Os/zesJ0+23eVc7NH2oKzspPSyBzcIx6IuEcVm1rdlD9dAxdaRMUNBWEeA5ekyk263CBI3lyaBQ==",
"version": "0.18.14",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.14.tgz",
"integrity": "sha512-6sHW3/2W2hNTuE/EcxM8CJ7ZX+JWFmWS0G7OuCYz9CAYX2bb6pAQ9Eaz0FvqCqqk1GjaxHEjWBhQjoACfIuiCA==",
"license": "ISC",
"dependencies": {
"bottleneck": "^2.19.5",
"cookie": "^1.1.1",

View File

@ -153,7 +153,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.37",
"undici": "^5.28.1",
"unprint": "^0.18.13",
"unprint": "^0.18.14",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3",
"video.js": "^8.6.1",

View File

@ -1,6 +1,7 @@
'use strict';
const qu = require('../utils/qu');
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const channelSlugs = {
@ -16,34 +17,66 @@ function scrapeAll(scenes) {
release.url = query.url('a');
release.entryId = new URL(release.url).pathname.match(/(\d+)\.html/)?.[1];
release.title = query.cnt('.title');
release.title = query.content('.title');
release.date = query.date('.date', 'MMM Do, YYYY');
release.channel = channelSlugs[query.cnt('.site')];
release.channel = channelSlugs[query.content('.site')];
release.poster = query.img('.thumb img');
const poster = query.img('.thumb img');
if (poster) {
release.poster = Array.from(new Set([
poster.replace('.jpg', '-feat_lg.jpg'),
poster,
]));
}
return release;
});
}
function scrapeScene({ query }, url) {
async function fetchLatest(channel, page) {
const res = await unprint.get(`${channel.url}/videos/page${page}.html`, {
selectAll: '.listing-videos .item',
cookies: {
warningHidden: 'hide',
},
});
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
function scrapeScene({ query }, { url }) {
const release = {};
release.entryId = new URL(url).pathname.match(/(\d+)\.html/)?.[1];
release.title = query.cnt('.title');
release.title = query.content('.title');
release.description = query.content('.content-information-description p');
release.date = query.date('.date .content', 'MMM Do, YYYY');
release.actors = query.all('.models .content a').map((modelEl) => ({
name: query.cnt(modelEl),
url: query.url(modelEl, null),
name: unprint.query.content(modelEl),
url: unprint.query.url(modelEl, null),
}));
release.poster = query.poster();
release.photos = query.imgs('.video-thumbs img').slice(1);
// videos and photos seem to be removed, query educated guess just in case
const poster = query.poster('.video-player video') || query.img('.video-poster img');
release.trailer = query.video();
if (poster) {
release.poster = Array.from(new Set([
poster,
poster.replace('-feat_lg', ''),
]));
}
release.photos = query.imgs('.video-thumbs img').slice(1);
release.trailer = query.video('.video-player source');
return release;
}
@ -54,38 +87,47 @@ function scrapeProfile({ query }, entity) {
profile.gender = 'female';
profile.avatar = query.img('.model-thumb img[src*=".jpg"]');
profile.scenes = scrapeAll(qu.initAll(query.all('.listing-videos .item')), entity);
profile.scenes = scrapeAll(unprint.initAll(query.all('.listing-videos .item')), entity);
return profile;
}
async function fetchLatest(channel, page) {
const res = await qu.getAll(`${channel.url}/videos/page${page}.html`, '.listing-videos .item');
if (res.ok) {
return scrapeAll(res.items, channel);
async function getActorUrl(actor) {
if (actor.url) {
return actor.url;
}
return res.status;
const res = await unprint.get(`https://www.karups.com/models/search/${actor.slug}/`, {
selectAll: '.listing-models .item',
cookies: {
warningHidden: 'hide',
},
});
if (!res.ok) {
return res.status;
}
const actorUrl = res.context.find((item) => slugify(item.query.content('.title')) === actor.slug)?.query.url('a');
return actorUrl;
}
async function fetchProfile(baseActor, entity) {
const searchRes = await qu.getAll(`https://www.karups.com/models/search/${baseActor.slug}/`, '.listing-models .item');
if (!searchRes.ok) {
return searchRes.status;
}
const actorUrl = searchRes.items.find((item) => slugify(item.query.cnt('.title')) === baseActor.slug)?.query.url('a');
async function fetchProfile(actor, entity) {
const actorUrl = await getActorUrl(actor);
if (!actorUrl) {
return null;
}
const actorRes = await qu.get(actorUrl);
const actorRes = await unprint.get(actorUrl, {
cookies: {
warningHidden: 'hide',
},
});
if (actorRes.ok) {
return scrapeProfile(actorRes.item, entity);
return scrapeProfile(actorRes.context, entity);
}
return actorRes.status;
@ -94,6 +136,10 @@ async function fetchProfile(baseActor, entity) {
module.exports = {
fetchLatest,
fetchProfile,
scrapeScene,
deprecated: true,
scrapeScene: {
scraper: scrapeScene,
cookies: {
warningHidden: 'hide',
},
},
};

View File

@ -2,6 +2,7 @@
"extends": "airbnb-base",
"parserOptions": {
"parser": "@babel/eslint-parser",
"ecmaVersion": "latest",
"sourceType": "script"
},
"rules": {

View File

@ -227,6 +227,7 @@ const actors = [
{ entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] },
{ entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
{ entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] },
{ entity: 'karups', name: 'Peach Lollypop', fields: ['avatar', 'gender'] },
];
const actorScrapers = scrapers.actors;