From f42d79d521a95a0f1cae5d4bd19466b23e41e3f5 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 1 Feb 2026 19:05:20 +0100 Subject: [PATCH] Refactored Little Caprice Dreams. Fixed Karups breaking on BoyFun domain. --- seeds/02_sites.js | 37 ++-- src/scrapers/actors.js | 2 +- src/scrapers/karups.js | 2 +- src/scrapers/littlecapricedreams.js | 267 ++++++++++++++-------------- src/scrapers/releases.js | 2 +- src/utils/convert.js | 2 +- tests/profiles.js | 1 + 7 files changed, 163 insertions(+), 150 deletions(-) diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 180efdac..e243774d 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -7431,70 +7431,81 @@ const sites = [ { name: 'Caprice Casting', slug: 'capricecasting', - url: 'https://www.littlecaprice-dreams.com/caprice-casting', + url: 'https://www.littlecaprice-dreams.com/collection/caprice-casting', + parent: 'littlecapricedreams', + }, + { + name: 'Buttmuse', + slug: 'buttmuse', + url: 'https://www.littlecaprice-dreams.com/collection/buttmuse', parent: 'littlecapricedreams', }, { name: 'Caprice Divas', slug: 'capricedivas', - url: 'https://www.littlecaprice-dreams.com/caprice-divas', + url: 'https://www.littlecaprice-dreams.com/collection/caprice-divas', parent: 'littlecapricedreams', }, { name: 'Nassty', slug: 'nassty', - url: 'https://www.littlecaprice-dreams.com/nassty', + url: 'https://www.littlecaprice-dreams.com/collection/nassty', parent: 'littlecapricedreams', }, { name: 'POV Dreams', slug: 'povdreams', - url: 'https://www.littlecaprice-dreams.com/pov-dreams', + url: 'https://www.littlecaprice-dreams.com/collection/pov-dreams', parent: 'littlecapricedreams', }, { name: 'Porn Lifestyle', slug: 'pornlifestyle', - url: 'https://www.littlecaprice-dreams.com/porn-lifestyle', + url: 'https://www.littlecaprice-dreams.com/collection/porn-lifestyle', parent: 'littlecapricedreams', }, { name: 'Public Sex', slug: 'publicsex', - url: 'https://www.littlecaprice-dreams.com/public-sex', + url: 'https://www.littlecaprice-dreams.com/collection/public-sex', parent: 'littlecapricedreams', }, { name: 'Super Private X', slug: 'superprivatex', - url: 'https://www.littlecaprice-dreams.com/superprivatex', + url: 'https://www.littlecaprice-dreams.com/collection/superprivatex', parent: 'littlecapricedreams', }, { name: 'Sex Lessons', slug: 'sexlessons', - url: 'https://www.littlecaprice-dreams.com/sexlessons', + url: 'https://www.littlecaprice-dreams.com/collection/sexlessons', parent: 'littlecapricedreams', }, { - name: 'Virtual Reality', + name: 'Streetfuck', + slug: 'streetfuck', + url: 'https://www.littlecaprice-dreams.com/collection/streetfuck/', + parent: 'littlecapricedreams', + }, + { + name: 'Little Caprice VR', slug: 'littlecapricevr', - url: 'https://www.littlecaprice-dreams.com/virtual-reality-little-caprice', + url: 'https://www.littlecaprice-dreams.com/collection/virtual-reality-little-caprice', tags: ['vr'], - hasLogo: false, parent: 'littlecapricedreams', }, { name: 'We Cum To You', slug: 'wecumtoyou', - url: 'https://www.littlecaprice-dreams.com/wecumtoyou-swingers', + url: 'https://www.littlecaprice-dreams.com/collection/wecumtoyou-swingers', tags: ['swinging', 'orgy'], parent: 'littlecapricedreams', }, { name: 'Xpervo', slug: 'xpervo', - url: 'https://www.littlecaprice-dreams.com/xpervo', + url: 'https://www.littlecaprice-dreams.com/collection/xpervo', parent: 'littlecapricedreams', }, // LOVE HER FILMS diff --git a/src/scrapers/actors.js b/src/scrapers/actors.js index 242d97e4..ea839371 100644 --- a/src/scrapers/actors.js +++ b/src/scrapers/actors.js @@ -205,7 +205,7 @@ module.exports = { bamvisions, bang, bluedonkeymedia, - delphine: modelmedia, + // delphine: modelmedia, meidenvanholland: bluedonkeymedia, // Vurig Vlaanderen uses same database boobpedia, bradmontana, diff --git a/src/scrapers/karups.js b/src/scrapers/karups.js index 07c12e06..fb6f949c 100755 --- a/src/scrapers/karups.js +++ b/src/scrapers/karups.js @@ -36,7 +36,7 @@ function scrapeAll(scenes) { } async function fetchLatest(channel, page) { - const res = await unprint.get(`${channel.url}videos/page${page}.html`, { + const res = await unprint.get(new URL(`./videos/page${page}.html`, channel.url).href, { // some sites require a trailing slash, join paths properly selectAll: '.listing-videos .item', cookies: { warningHidden: 'hide', diff --git a/src/scrapers/littlecapricedreams.js b/src/scrapers/littlecapricedreams.js index 5051506c..1ab71126 100755 --- a/src/scrapers/littlecapricedreams.js +++ b/src/scrapers/littlecapricedreams.js @@ -1,7 +1,19 @@ 'use strict'; -const qu = require('../utils/qu'); +const unprint = require('unprint'); + const slugify = require('../utils/slugify'); +const { stripQuery } = require('../utils/url'); +const { convert } = require('../utils/convert'); + +const channelMap = { + vr: 'littlecapricevr', + vrporn: 'littlecapricevr', + superprivat: 'superprivatex', + superprivate: 'superprivatex', + nasst: 'nassty', + sexlesson: 'sexlessons', +}; function matchChannel(release, channel) { const series = channel.children || channel.parent?.children; @@ -16,188 +28,176 @@ function matchChannel(release, channel) { [serie.slug]: serie, }), {}); - serieNames.vr = serieNames.littlecapricevr; - serieNames.superprivat = serieNames.superprivatex; - serieNames.superprivate = serieNames.superprivatex; - serieNames.nasst = serieNames.nassty; - serieNames.sexlesson = serieNames.sexlessons; - // ensure longest key matches first const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length); - const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0]; - const serie = serieName && serieNames[slugify(serieName, '')]; + const serieName = release.title?.match(new RegExp(serieKeys.join('|'), 'i'))?.[0]; + const serieSlug = slugify(serieName, ''); + const serie = serieName && serieNames[channelMap[serieSlug] || serieSlug]; if (serie) { - return { - channel: serie.slug, - title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-–:/]+\\s*`, 'ig'), ''), - }; + return serie.slug; } return null; } function scrapeAll(scenes, channel) { - return scenes.map(({ query, el }) => { + return scenes.map(({ query }) => { const release = {}; - release.url = query.url('a'); - release.entryId = query.q(el, null, 'id')?.match(/post-(\d+)/)?.[1]; + release.url = query.url(null); + release.entryId = query.attribute(null, 'class').match(/project-(\d{3,})/)?.[1]; - release.title = query.cnt('.meta h3'); - release.date = query.date('.meta .post-meta', 'MMMM D, YYYY'); + release.title = query.content('h2')?.trim().replace(/\.\.\.$/, ''); - release.poster = { - src: query.img('img'), - referer: channel.url, - }; + const poster = query.img('img'); - return { - ...release, - ...matchChannel(release, channel), - }; - }); -} - -async function fetchPhotos(url) { - if (url) { - const res = await qu.get(url, '.et_post_gallery'); - - if (res.ok) { - return res.item.query.urls('a').map((imgUrl) => ({ - src: imgUrl, - referer: url, + if (poster) { + release.poster = [ + stripQuery(poster), + poster, + ].map((src) => ({ + src, + referer: channel.url, })); } - } - return null; -} + release.channel = matchChannel(release, channel); -async function scrapeScene({ query }, url, channel, include) { - const release = {}; - - const script = query.cnt('script.yoast-schema-graph'); - const data = script && JSON.parse(script); - - release.entryId = query.q('article.project', 'id')?.match(/post-(\d+)/)?.[1]; - - release.title = query.cnt('.vid_title'); - release.description = query.cnt('.vid_desc p'); - - release.date = query.date('.vid_date', 'MMMM D, YYYY'); - release.duration = query.dur('.vid_length'); - - release.actors = query.all('.vid_infos a[href*="author/"]').map((actorEl) => ({ - name: query.cnt(actorEl), - url: query.url(actorEl, null), - })); - - release.tags = query.cnts('.vid_infos a[rel="tag"]'); - - const posterData = data['@graph']?.find((item) => item['@type'] === 'ImageObject'); - - const poster = posterData?.url - || query.q('meta[property="og:image"]', 'content') - || query.q('meta[name="twitter:image"]', 'content'); - - release.poster = { - src: poster, - referer: url, - }; - - release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07? - - if (include.photos) { - release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]')); - } - - release.trailer = { - src: query.video(), - type: query.video('source', 'type'), - quality: query.video('source', 'data-res'), - referer: url, - }; - - return { - ...release, - ...matchChannel(release, channel), - }; -} - -function scrapeProfile({ query, el }, { url, gender }, baseActor, entity) { - const profile = { url, gender }; - - profile.age = query.number('div:nth-child(2) > p'); - profile.birthPlace = query.cnt('div:nth-child(3) > p')?.match(/nationality[\s:]+(\w+)/i)?.[1]; - - profile.description = query.cnt('div:nth-child(4) > p'); - - profile.avatar = { - src: query.img('.model-page'), - referer: url, - }; - - profile.scenes = scrapeAll(qu.initAll(el, '.project_category-videos'), entity); - - return profile; + return release; + }); } async function fetchLatest(channel) { // no apparent pagination, all updates on one page // using channels in part because main overview contains indistinguishable photo albums // however, some serie pages contain videos from other series - const res = await qu.getAll(channel.url, '.project'); + const res = await unprint.get(channel.url, { selectAll: '.project-type-video' }); if (res.ok) { - return scrapeAll(res.items, channel); + return scrapeAll(res.context, channel); } return res.status; } -async function fetchScene(url, channel, baseRelease, include) { - const res = await qu.get(url); +async function attachPhotos(url, release) { + if (url) { + const res = await unprint.get(url); - if (res.ok) { - return scrapeScene(res.item, url, channel, include); + if (res.ok) { + release.photos = res.context.query.imgs('.gallery img').map((imgUrl) => ({ // eslint-disable-line no-param-reassign + src: imgUrl, + referer: url, + })); + + release.photoCount = res.context.query.number('.image-amount'); // eslint-disable-line no-param-reassign + } } - return res.status; + return null; } -async function getActorUrl(baseActor, gender = 'female') { - if (baseActor.url) { - return baseActor.url; +async function scrapeScene({ query }, { url, include }) { + const release = {}; + + release.entryId = query.attribute('#main-project-content', 'class').match(/project-(\d{3,})/)?.[1]; + + release.title = query.content('.project-header h1'); + release.description = query.content('.desc-text'); + + release.date = query.date('.relese-date', 'D. MMM YYYY', { match: /\d{1,2}\. \w{3} \d{4}/ }); // sic + release.duration = query.duration('.video-duration'); + + release.actors = query.all('.project-models .list a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null), + })); + + release.tags = query.contents('.project-tags a[href*="videos/#"]'); + + const poster = query.attribute('meta[property="og:image"]', 'content') + || query.attribute('meta[name="twitter:image"]', 'content'); + + release.poster = { + src: poster, + referer: url, + }; + + if (include.photos) { + await attachPhotos(url.replace(/(\/)?$/, '-2$1'), release); } - const overviewUrl = gender === 'female' - ? 'https://www.littlecaprice-dreams.com/pornstars/' - : 'https://www.littlecaprice-dreams.com/male-models-pornstars/'; + const trailerFrame = query.url('.video iframe', { attribute: 'src' }); + const trailerId = trailerFrame?.match(/\/embed\/\d+\/([a-z0-9-]+)/)?.[1]; - const overviewRes = await qu.getAll(overviewUrl, '.models'); + if (trailerId) { + release.trailer = { + stream: `https://trailer.littlecaprice-dreams.com/${trailerId}/1920x1080/video.m3u8`, + quality: 1080, + referer: url, + }; + } + + const channelSlug = slugify(query.content('.project-tags a[href*="collection/"]'), ''); + + release.channel = channelMap[channelSlug] || channelSlug; + + return release; +} + +function scrapeProfile({ query }, { url, avatar }, entity) { + const profile = { url }; + + profile.nationality = query.content('.info h2').match(/nationality: (\w+)/i)?.[1]; + profile.cup = query.content('.info h2').match(/cu[pb]-size: (\w{1,2})/i)?.[1]; // sic + profile.measurements = query.content('.info h2').match(/\d{2}-\d{2}-\d{2}/i)?.[0]; // sic + profile.height = convert(query.content('.info h2')?.match(/\d′ \d{1,2}″/)?.[0], 'cm'); + + const description = query.content('.info div:last-child'); + + if (!/coming soon/i.test(description) || description.length > 50) { + profile.description = description; + } + + if (avatar) { + profile.avatar = [ + stripQuery(avatar), + avatar, + ].map((src) => ({ + src, + referer: url, + })); + } + + profile.photos = query.imgs('.img-poster'); + profile.scenes = scrapeAll(unprint.initAll(query.all('.project-type-video')), entity); + + return profile; +} + +async function getActorUrl(baseActor) { + // male performers are listed, but hidden + const overviewRes = await unprint.get('https://www.littlecaprice-dreams.com/models/', { selectAll: '.model-preview' }); if (!overviewRes.ok) { return overviewRes.status; } - const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug); + const actorItem = overviewRes.context.find(({ query }) => slugify(query.text('h2')) === baseActor.slug); if (!actorItem) { - if (gender === 'female') { - return getActorUrl(baseActor, 'male'); - } - return null; } - const actorUrl = actorItem.query.url('a'); + const actorUrl = actorItem.query.url(null); + const actorAvatar = actorItem.query.img(); if (actorUrl) { return { url: actorUrl, - gender, + avatar: actorAvatar, }; } @@ -205,16 +205,17 @@ async function getActorUrl(baseActor, gender = 'female') { } async function fetchProfile(baseActor, { entity }) { - const actorUrl = await getActorUrl(baseActor); + // using search for avatar, not on model page + const actorResult = await getActorUrl(baseActor); - if (!actorUrl) { + if (!actorResult) { return null; } - const actorRes = await qu.get(actorUrl.url, '#main-content'); + const actorRes = await unprint.get(actorResult.url, { select: '.model-page' }); if (actorRes.ok) { - return scrapeProfile(actorRes.item, actorUrl, baseActor, entity); + return scrapeProfile(actorRes.context, actorResult, entity); } return actorRes.status; @@ -222,6 +223,6 @@ async function fetchProfile(baseActor, { entity }) { module.exports = { fetchLatest, - fetchScene, fetchProfile, + scrapeScene, }; diff --git a/src/scrapers/releases.js b/src/scrapers/releases.js index 54b6ec37..3a5b538d 100644 --- a/src/scrapers/releases.js +++ b/src/scrapers/releases.js @@ -111,7 +111,7 @@ module.exports = { cumlouder, czechav, pornworld, - delphine: modelmedia, + // delphine: modelmedia, dorcel, elegantangel: adultempire, exploitedx, diff --git a/src/utils/convert.js b/src/utils/convert.js index 5bafc8d6..47efb762 100755 --- a/src/utils/convert.js +++ b/src/utils/convert.js @@ -60,7 +60,7 @@ function kgToLbs(kgs) { } function curateConvertInput(string) { - if (/['’]|(fe*o*t)/.test(string)) { + if (/['’′]|(fe*o*t)/.test(string)) { const result = string.match(/(\d+).*?(\d+)/); if (result) { diff --git a/tests/profiles.js b/tests/profiles.js index 1e3f8d62..932a053f 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -230,6 +230,7 @@ const actors = [ { entity: 'karups', name: 'Peach Lollypop', fields: ['avatar'] }, { entity: 'boyfun', name: 'Amahd Passer', fields: ['avatar', 'age', 'height', 'weight', 'penisLength', 'isCircumcised'] }, { entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] }, + { entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic ]; const actorScrapers = scrapers.actors;