'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); const channelSlugs = { kpc: 'karupsprivatecollection', kha: 'karupshometownamateurs', kow: 'karupsolderwomen', }; function scrapeAll(scenes) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('a'); release.entryId = new URL(release.url).pathname.match(/(\d+)\.html/)?.[1]; release.title = query.content('.title'); release.date = query.date('.date', ['MMM Do, YYYY', 'DD MMM YYYY'], { match: null }); release.channel = channelSlugs[query.content('.site')]; const poster = query.img('.thumb img'); if (poster && /\.\w{3,4}$/.test(poster)) { // missing poster points to https://media.karups.com/thumbs_pg/ release.poster = Array.from(new Set([ poster.replace('.jpg', '-feat_lg.jpg'), poster, ])); } return release; }); } async function fetchLatest(channel, page) { const res = await unprint.get(new URL(`./videos/page${page}.html`, channel.url).href, { // some sites require a trailing slash, join paths properly; don't use origin in case channel path is used selectAll: '.listing-videos .item', cookies: { warningHidden: 'hide', }, }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } function scrapeScene({ query }, { url }) { const release = {}; release.entryId = new URL(url).pathname.match(/(\d+)\.html/)?.[1]; release.title = query.content('.title'); release.description = query.content('.content-information-description p'); release.date = query.date('.date .content', 'MMM Do, YYYY'); release.actors = query.all('.models .content a').map((modelEl) => ({ name: unprint.query.content(modelEl), url: unprint.query.url(modelEl, null), })); // videos and photos seem to be removed, query educated guess just in case const poster = query.poster('.video-player video') || query.img('.video-poster img'); if (poster && /\.\w{3,4}$/.test(poster)) { // missing poster points to https://media.karups.com/thumbs_pg/ release.poster = Array.from(new Set([ poster, poster.replace('-feat_lg', ''), ])); } release.photos = query.imgs('.video-thumbs img').slice(1); release.trailer = query.video('.video-player source'); return release; } function scrapeProfile({ query }, entity) { const profile = {}; const bio = Object.fromEntries(query.all('.model-table .item').map((bioEl) => [ slugify(unprint.query.content(bioEl, '.label'), '_'), unprint.query.content(bioEl, '.value'), ])); profile.age = unprint.extractNumber(bio.date_of_birth); // seemingly only used on Boyfun and always age profile.height = unprint.extractNumber(bio.height); profile.weight = unprint.extractNumber(bio.height); profile.penisLength = unprint.extractNumber(bio.dick_size); if (bio.cut_uncut?.toLowerCase() === 'cut') profile.isCircumcised = true; if (bio.cut_uncut?.toLowerCase() === 'uncut') profile.isCircumcised = false; profile.avatar = query.img('.model-thumb img[src*=".jpg"]'); profile.scenes = scrapeAll(unprint.initAll(query.all('.listing-videos .item')), entity); return profile; } async function getActorUrl(actor, entity) { if (actor.url) { return actor.url; } const res = await unprint.get(`${entity.origin}/models/search/${actor.slug}/`, { selectAll: '.listing-models .item', cookies: { warningHidden: 'hide', }, }); if (!res.ok) { return res.status; } const actorUrl = res.context.find((item) => slugify(item.query.content('.title')) === actor.slug)?.query.url('a'); return actorUrl; } async function fetchProfile(actor, entity) { const actorUrl = await getActorUrl(actor, entity); if (!actorUrl) { return null; } const actorRes = await unprint.get(actorUrl, { cookies: { warningHidden: 'hide', }, }); if (actorRes.ok) { return scrapeProfile(actorRes.context, entity); } return actorRes.status; } module.exports = { fetchLatest, fetchProfile, scrapeScene: { scraper: scrapeScene, cookies: { warningHidden: 'hide', }, }, };