Updated Karups scraper.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
'use strict';
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
const channelSlugs = {
|
||||
@@ -16,34 +17,66 @@ function scrapeAll(scenes) {
|
||||
release.url = query.url('a');
|
||||
release.entryId = new URL(release.url).pathname.match(/(\d+)\.html/)?.[1];
|
||||
|
||||
release.title = query.cnt('.title');
|
||||
release.title = query.content('.title');
|
||||
release.date = query.date('.date', 'MMM Do, YYYY');
|
||||
|
||||
release.channel = channelSlugs[query.cnt('.site')];
|
||||
release.channel = channelSlugs[query.content('.site')];
|
||||
|
||||
release.poster = query.img('.thumb img');
|
||||
const poster = query.img('.thumb img');
|
||||
|
||||
if (poster) {
|
||||
release.poster = Array.from(new Set([
|
||||
poster.replace('.jpg', '-feat_lg.jpg'),
|
||||
poster,
|
||||
]));
|
||||
}
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url) {
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await unprint.get(`${channel.url}/videos/page${page}.html`, {
|
||||
selectAll: '.listing-videos .item',
|
||||
cookies: {
|
||||
warningHidden: 'hide',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, { url }) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/(\d+)\.html/)?.[1];
|
||||
|
||||
release.title = query.cnt('.title');
|
||||
release.title = query.content('.title');
|
||||
release.description = query.content('.content-information-description p');
|
||||
|
||||
release.date = query.date('.date .content', 'MMM Do, YYYY');
|
||||
|
||||
release.actors = query.all('.models .content a').map((modelEl) => ({
|
||||
name: query.cnt(modelEl),
|
||||
url: query.url(modelEl, null),
|
||||
name: unprint.query.content(modelEl),
|
||||
url: unprint.query.url(modelEl, null),
|
||||
}));
|
||||
|
||||
release.poster = query.poster();
|
||||
release.photos = query.imgs('.video-thumbs img').slice(1);
|
||||
// videos and photos seem to be removed, query educated guess just in case
|
||||
const poster = query.poster('.video-player video') || query.img('.video-poster img');
|
||||
|
||||
release.trailer = query.video();
|
||||
if (poster) {
|
||||
release.poster = Array.from(new Set([
|
||||
poster,
|
||||
poster.replace('-feat_lg', ''),
|
||||
]));
|
||||
}
|
||||
|
||||
release.photos = query.imgs('.video-thumbs img').slice(1);
|
||||
release.trailer = query.video('.video-player source');
|
||||
|
||||
return release;
|
||||
}
|
||||
@@ -54,38 +87,47 @@ function scrapeProfile({ query }, entity) {
|
||||
profile.gender = 'female';
|
||||
|
||||
profile.avatar = query.img('.model-thumb img[src*=".jpg"]');
|
||||
profile.scenes = scrapeAll(qu.initAll(query.all('.listing-videos .item')), entity);
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.listing-videos .item')), entity);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await qu.getAll(`${channel.url}/videos/page${page}.html`, '.listing-videos .item');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
async function getActorUrl(actor) {
|
||||
if (actor.url) {
|
||||
return actor.url;
|
||||
}
|
||||
|
||||
return res.status;
|
||||
const res = await unprint.get(`https://www.karups.com/models/search/${actor.slug}/`, {
|
||||
selectAll: '.listing-models .item',
|
||||
cookies: {
|
||||
warningHidden: 'hide',
|
||||
},
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
const actorUrl = res.context.find((item) => slugify(item.query.content('.title')) === actor.slug)?.query.url('a');
|
||||
|
||||
return actorUrl;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, entity) {
|
||||
const searchRes = await qu.getAll(`https://www.karups.com/models/search/${baseActor.slug}/`, '.listing-models .item');
|
||||
|
||||
if (!searchRes.ok) {
|
||||
return searchRes.status;
|
||||
}
|
||||
|
||||
const actorUrl = searchRes.items.find((item) => slugify(item.query.cnt('.title')) === baseActor.slug)?.query.url('a');
|
||||
async function fetchProfile(actor, entity) {
|
||||
const actorUrl = await getActorUrl(actor);
|
||||
|
||||
if (!actorUrl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const actorRes = await qu.get(actorUrl);
|
||||
const actorRes = await unprint.get(actorUrl, {
|
||||
cookies: {
|
||||
warningHidden: 'hide',
|
||||
},
|
||||
});
|
||||
|
||||
if (actorRes.ok) {
|
||||
return scrapeProfile(actorRes.item, entity);
|
||||
return scrapeProfile(actorRes.context, entity);
|
||||
}
|
||||
|
||||
return actorRes.status;
|
||||
@@ -94,6 +136,10 @@ async function fetchProfile(baseActor, entity) {
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
scrapeScene,
|
||||
deprecated: true,
|
||||
scrapeScene: {
|
||||
scraper: scrapeScene,
|
||||
cookies: {
|
||||
warningHidden: 'hide',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user