158 lines
4.0 KiB
JavaScript
Executable File
158 lines
4.0 KiB
JavaScript
Executable File
'use strict';
|
|
|
|
const unprint = require('unprint');
|
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
const channelSlugs = {
|
|
kpc: 'karupsprivatecollection',
|
|
kha: 'karupshometownamateurs',
|
|
kow: 'karupsolderwomen',
|
|
};
|
|
|
|
function scrapeAll(scenes) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.url = query.url('a');
|
|
release.entryId = new URL(release.url).pathname.match(/(\d+)\.html/)?.[1];
|
|
|
|
release.title = query.content('.title');
|
|
release.date = query.date('.date', ['MMM Do, YYYY', 'DD MMM YYYY'], { match: null });
|
|
|
|
release.channel = channelSlugs[query.content('.site')];
|
|
|
|
const poster = query.img('.thumb img');
|
|
|
|
if (poster && /\.\w{3,4}$/.test(poster)) { // missing poster points to https://media.karups.com/thumbs_pg/
|
|
release.poster = Array.from(new Set([
|
|
poster.replace('.jpg', '-feat_lg.jpg'),
|
|
poster,
|
|
]));
|
|
}
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
async function fetchLatest(channel, page) {
|
|
const res = await unprint.get(new URL(`./videos/page${page}.html`, channel.url).href, { // some sites require a trailing slash, join paths properly; don't use origin in case channel path is used
|
|
selectAll: '.listing-videos .item',
|
|
cookies: {
|
|
warningHidden: 'hide',
|
|
},
|
|
});
|
|
|
|
if (res.ok) {
|
|
return scrapeAll(res.context, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeScene({ query }, { url }) {
|
|
const release = {};
|
|
|
|
release.entryId = new URL(url).pathname.match(/(\d+)\.html/)?.[1];
|
|
|
|
release.title = query.content('.title');
|
|
release.description = query.content('.content-information-description p');
|
|
|
|
release.date = query.date('.date .content', 'MMM Do, YYYY');
|
|
|
|
release.actors = query.all('.models .content a').map((modelEl) => ({
|
|
name: unprint.query.content(modelEl),
|
|
url: unprint.query.url(modelEl, null),
|
|
}));
|
|
|
|
// videos and photos seem to be removed, query educated guess just in case
|
|
const poster = query.poster('.video-player video') || query.img('.video-poster img');
|
|
|
|
if (poster && /\.\w{3,4}$/.test(poster)) { // missing poster points to https://media.karups.com/thumbs_pg/
|
|
release.poster = Array.from(new Set([
|
|
poster,
|
|
poster.replace('-feat_lg', ''),
|
|
]));
|
|
}
|
|
|
|
release.photos = query.imgs('.video-thumbs img').slice(1);
|
|
release.trailer = query.video('.video-player source');
|
|
|
|
return release;
|
|
}
|
|
|
|
function scrapeProfile({ query }, entity) {
|
|
const profile = {};
|
|
|
|
const bio = Object.fromEntries(query.all('.model-table .item').map((bioEl) => [
|
|
slugify(unprint.query.content(bioEl, '.label'), '_'),
|
|
unprint.query.content(bioEl, '.value'),
|
|
]));
|
|
|
|
profile.age = unprint.extractNumber(bio.date_of_birth); // seemingly only used on Boyfun and always age
|
|
profile.height = unprint.extractNumber(bio.height);
|
|
profile.weight = unprint.extractNumber(bio.height);
|
|
|
|
profile.penisLength = unprint.extractNumber(bio.dick_size);
|
|
|
|
if (bio.cut_uncut?.toLowerCase() === 'cut') profile.isCircumcised = true;
|
|
if (bio.cut_uncut?.toLowerCase() === 'uncut') profile.isCircumcised = false;
|
|
|
|
profile.avatar = query.img('.model-thumb img[src*=".jpg"]');
|
|
profile.scenes = scrapeAll(unprint.initAll(query.all('.listing-videos .item')), entity);
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function getActorUrl(actor, entity) {
|
|
if (actor.url) {
|
|
return actor.url;
|
|
}
|
|
|
|
const res = await unprint.get(`${entity.origin}/models/search/${actor.slug}/`, {
|
|
selectAll: '.listing-models .item',
|
|
cookies: {
|
|
warningHidden: 'hide',
|
|
},
|
|
});
|
|
|
|
if (!res.ok) {
|
|
return res.status;
|
|
}
|
|
|
|
const actorUrl = res.context.find((item) => slugify(item.query.content('.title')) === actor.slug)?.query.url('a');
|
|
|
|
return actorUrl;
|
|
}
|
|
|
|
async function fetchProfile(actor, entity) {
|
|
const actorUrl = await getActorUrl(actor, entity);
|
|
|
|
if (!actorUrl) {
|
|
return null;
|
|
}
|
|
|
|
const actorRes = await unprint.get(actorUrl, {
|
|
cookies: {
|
|
warningHidden: 'hide',
|
|
},
|
|
});
|
|
|
|
if (actorRes.ok) {
|
|
return scrapeProfile(actorRes.context, entity);
|
|
}
|
|
|
|
return actorRes.status;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
fetchProfile,
|
|
scrapeScene: {
|
|
scraper: scrapeScene,
|
|
cookies: {
|
|
warningHidden: 'hide',
|
|
},
|
|
},
|
|
};
|