Updated Karups scraper.

This commit is contained in:
DebaucheryLibrarian
2026-02-01 03:03:21 +01:00
parent b4877d16da
commit 0511b5a4a4
6 changed files with 85 additions and 36 deletions

View File

@@ -1,6 +1,7 @@
'use strict';
const qu = require('../utils/qu');
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const channelSlugs = {
@@ -16,34 +17,66 @@ function scrapeAll(scenes) {
release.url = query.url('a');
release.entryId = new URL(release.url).pathname.match(/(\d+)\.html/)?.[1];
release.title = query.cnt('.title');
release.title = query.content('.title');
release.date = query.date('.date', 'MMM Do, YYYY');
release.channel = channelSlugs[query.cnt('.site')];
release.channel = channelSlugs[query.content('.site')];
release.poster = query.img('.thumb img');
const poster = query.img('.thumb img');
if (poster) {
release.poster = Array.from(new Set([
poster.replace('.jpg', '-feat_lg.jpg'),
poster,
]));
}
return release;
});
}
function scrapeScene({ query }, url) {
async function fetchLatest(channel, page) {
const res = await unprint.get(`${channel.url}/videos/page${page}.html`, {
selectAll: '.listing-videos .item',
cookies: {
warningHidden: 'hide',
},
});
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
function scrapeScene({ query }, { url }) {
const release = {};
release.entryId = new URL(url).pathname.match(/(\d+)\.html/)?.[1];
release.title = query.cnt('.title');
release.title = query.content('.title');
release.description = query.content('.content-information-description p');
release.date = query.date('.date .content', 'MMM Do, YYYY');
release.actors = query.all('.models .content a').map((modelEl) => ({
name: query.cnt(modelEl),
url: query.url(modelEl, null),
name: unprint.query.content(modelEl),
url: unprint.query.url(modelEl, null),
}));
release.poster = query.poster();
release.photos = query.imgs('.video-thumbs img').slice(1);
// videos and photos seem to be removed, query educated guess just in case
const poster = query.poster('.video-player video') || query.img('.video-poster img');
release.trailer = query.video();
if (poster) {
release.poster = Array.from(new Set([
poster,
poster.replace('-feat_lg', ''),
]));
}
release.photos = query.imgs('.video-thumbs img').slice(1);
release.trailer = query.video('.video-player source');
return release;
}
@@ -54,38 +87,47 @@ function scrapeProfile({ query }, entity) {
profile.gender = 'female';
profile.avatar = query.img('.model-thumb img[src*=".jpg"]');
profile.scenes = scrapeAll(qu.initAll(query.all('.listing-videos .item')), entity);
profile.scenes = scrapeAll(unprint.initAll(query.all('.listing-videos .item')), entity);
return profile;
}
async function fetchLatest(channel, page) {
const res = await qu.getAll(`${channel.url}/videos/page${page}.html`, '.listing-videos .item');
if (res.ok) {
return scrapeAll(res.items, channel);
async function getActorUrl(actor) {
if (actor.url) {
return actor.url;
}
return res.status;
const res = await unprint.get(`https://www.karups.com/models/search/${actor.slug}/`, {
selectAll: '.listing-models .item',
cookies: {
warningHidden: 'hide',
},
});
if (!res.ok) {
return res.status;
}
const actorUrl = res.context.find((item) => slugify(item.query.content('.title')) === actor.slug)?.query.url('a');
return actorUrl;
}
async function fetchProfile(baseActor, entity) {
const searchRes = await qu.getAll(`https://www.karups.com/models/search/${baseActor.slug}/`, '.listing-models .item');
if (!searchRes.ok) {
return searchRes.status;
}
const actorUrl = searchRes.items.find((item) => slugify(item.query.cnt('.title')) === baseActor.slug)?.query.url('a');
async function fetchProfile(actor, entity) {
const actorUrl = await getActorUrl(actor);
if (!actorUrl) {
return null;
}
const actorRes = await qu.get(actorUrl);
const actorRes = await unprint.get(actorUrl, {
cookies: {
warningHidden: 'hide',
},
});
if (actorRes.ok) {
return scrapeProfile(actorRes.item, entity);
return scrapeProfile(actorRes.context, entity);
}
return actorRes.status;
@@ -94,6 +136,10 @@ async function fetchProfile(baseActor, entity) {
module.exports = {
fetchLatest,
fetchProfile,
scrapeScene,
deprecated: true,
scrapeScene: {
scraper: scrapeScene,
cookies: {
warningHidden: 'hide',
},
},
};