Refactored Little Caprice Dreams. Fixed Karups breaking on BoyFun domain.

2026-02-01 19:05:20 +01:00
parent 3189652fc8
commit f42d79d521
7 changed files with 163 additions and 150 deletions
--- a/seeds/02_sites.js
+++ b/seeds/02_sites.js
@@ -7431,70 +7431,81 @@ const sites = [
 	{
 		name: 'Caprice Casting',
 		slug: 'capricecasting',
-		url: 'https://www.littlecaprice-dreams.com/caprice-casting',
+		url: 'https://www.littlecaprice-dreams.com/collection/caprice-casting',
+		parent: 'littlecapricedreams',
+	},
+	{
+		name: 'Buttmuse',
+		slug: 'buttmuse',
+		url: 'https://www.littlecaprice-dreams.com/collection/buttmuse',
 		parent: 'littlecapricedreams',
 	},
 	{
 		name: 'Caprice Divas',
 		slug: 'capricedivas',
-		url: 'https://www.littlecaprice-dreams.com/caprice-divas',
+		url: 'https://www.littlecaprice-dreams.com/collection/caprice-divas',
 		parent: 'littlecapricedreams',
 	},
 	{
 		name: 'Nassty',
 		slug: 'nassty',
-		url: 'https://www.littlecaprice-dreams.com/nassty',
+		url: 'https://www.littlecaprice-dreams.com/collection/nassty',
 		parent: 'littlecapricedreams',
 	},
 	{
 		name: 'POV Dreams',
 		slug: 'povdreams',
-		url: 'https://www.littlecaprice-dreams.com/pov-dreams',
+		url: 'https://www.littlecaprice-dreams.com/collection/pov-dreams',
 		parent: 'littlecapricedreams',
 	},
 	{
 		name: 'Porn Lifestyle',
 		slug: 'pornlifestyle',
-		url: 'https://www.littlecaprice-dreams.com/porn-lifestyle',
+		url: 'https://www.littlecaprice-dreams.com/collection/porn-lifestyle',
 		parent: 'littlecapricedreams',
 	},
 	{
 		name: 'Public Sex',
 		slug: 'publicsex',
-		url: 'https://www.littlecaprice-dreams.com/public-sex',
+		url: 'https://www.littlecaprice-dreams.com/collection/public-sex',
 		parent: 'littlecapricedreams',
 	},
 	{
 		name: 'Super Private X',
 		slug: 'superprivatex',
-		url: 'https://www.littlecaprice-dreams.com/superprivatex',
+		url: 'https://www.littlecaprice-dreams.com/collection/superprivatex',
 		parent: 'littlecapricedreams',
 	},
 	{
 		name: 'Sex Lessons',
 		slug: 'sexlessons',
-		url: 'https://www.littlecaprice-dreams.com/sexlessons',
+		url: 'https://www.littlecaprice-dreams.com/collection/sexlessons',
 		parent: 'littlecapricedreams',
 	},
 	{
-		name: 'Virtual Reality',
+		name: 'Streetfuck',
+		slug: 'streetfuck',
+		url: 'https://www.littlecaprice-dreams.com/collection/streetfuck/',
+		parent: 'littlecapricedreams',
+	},
+	{
+		name: 'Little Caprice VR',
 		slug: 'littlecapricevr',
-		url: 'https://www.littlecaprice-dreams.com/virtual-reality-little-caprice',
+		url: 'https://www.littlecaprice-dreams.com/collection/virtual-reality-little-caprice',
 		tags: ['vr'],
-		hasLogo: false,
 		parent: 'littlecapricedreams',
 	},
 	{
 		name: 'We Cum To You',
 		slug: 'wecumtoyou',
-		url: 'https://www.littlecaprice-dreams.com/wecumtoyou-swingers',
+		url: 'https://www.littlecaprice-dreams.com/collection/wecumtoyou-swingers',
 		tags: ['swinging', 'orgy'],
 		parent: 'littlecapricedreams',
 	},
 	{
 		name: 'Xpervo',
 		slug: 'xpervo',
-		url: 'https://www.littlecaprice-dreams.com/xpervo',
+		url: 'https://www.littlecaprice-dreams.com/collection/xpervo',
 		parent: 'littlecapricedreams',
 	},
 	// LOVE HER FILMS
--- a/src/scrapers/actors.js
+++ b/src/scrapers/actors.js
@@ -205,7 +205,7 @@ module.exports = {
 	bamvisions,
 	bang,
 	bluedonkeymedia,
-	delphine: modelmedia,
+	// delphine: modelmedia,
 	meidenvanholland: bluedonkeymedia, // Vurig Vlaanderen uses same database
 	boobpedia,
 	bradmontana,
--- a/src/scrapers/karups.js
+++ b/src/scrapers/karups.js
@@ -36,7 +36,7 @@ function scrapeAll(scenes) {
 }

 async function fetchLatest(channel, page) {
-	const res = await unprint.get(`${channel.url}videos/page${page}.html`, {
+	const res = await unprint.get(new URL(`./videos/page${page}.html`, channel.url).href, { // some sites require a trailing slash, join paths properly
 		selectAll: '.listing-videos .item',
 		cookies: {
 			warningHidden: 'hide',
--- a/src/scrapers/littlecapricedreams.js
+++ b/src/scrapers/littlecapricedreams.js
@@ -1,7 +1,19 @@
 'use strict';

-const qu = require('../utils/qu');
+const unprint = require('unprint');
+
 const slugify = require('../utils/slugify');
+const { stripQuery } = require('../utils/url');
+const { convert } = require('../utils/convert');
+
+const channelMap = {
+	vr: 'littlecapricevr',
+	vrporn: 'littlecapricevr',
+	superprivat: 'superprivatex',
+	superprivate: 'superprivatex',
+	nasst: 'nassty',
+	sexlesson: 'sexlessons',
+};

 function matchChannel(release, channel) {
 	const series = channel.children || channel.parent?.children;
@@ -16,188 +28,176 @@ function matchChannel(release, channel) {
 		[serie.slug]: serie,
 	}), {});

-	serieNames.vr = serieNames.littlecapricevr;
-	serieNames.superprivat = serieNames.superprivatex;
-	serieNames.superprivate = serieNames.superprivatex;
-	serieNames.nasst = serieNames.nassty;
-	serieNames.sexlesson = serieNames.sexlessons;
-
 	// ensure longest key matches first
 	const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length);

-	const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
-	const serie = serieName && serieNames[slugify(serieName, '')];
+	const serieName = release.title?.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
+	const serieSlug = slugify(serieName, '');
+	const serie = serieName && serieNames[channelMap[serieSlug] || serieSlug];

 	if (serie) {
-		return {
-			channel: serie.slug,
-			title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-–:/]+\\s*`, 'ig'), ''),
-		};
+		return serie.slug;
 	}

 	return null;
 }

 function scrapeAll(scenes, channel) {
-	return scenes.map(({ query, el }) => {
+	return scenes.map(({ query }) => {
 		const release = {};

-		release.url = query.url('a');
-		release.entryId = query.q(el, null, 'id')?.match(/post-(\d+)/)?.[1];
+		release.url = query.url(null);
+		release.entryId = query.attribute(null, 'class').match(/project-(\d{3,})/)?.[1];

-		release.title = query.cnt('.meta h3');
-		release.date = query.date('.meta .post-meta', 'MMMM D, YYYY');
+		release.title = query.content('h2')?.trim().replace(/\.\.\.$/, '');

-		release.poster = {
-			src: query.img('img'),
-			referer: channel.url,
-		};
+		const poster = query.img('img');

-		return {
-			...release,
-			...matchChannel(release, channel),
-		};
-	});
-}
-
-async function fetchPhotos(url) {
-	if (url) {
-		const res = await qu.get(url, '.et_post_gallery');
-
-		if (res.ok) {
-			return res.item.query.urls('a').map((imgUrl) => ({
-				src: imgUrl,
-				referer: url,
+		if (poster) {
+			release.poster = [
+				stripQuery(poster),
+				poster,
+			].map((src) => ({
+				src,
+				referer: channel.url,
 			}));
 		}
-	}

-	return null;
-}
+		release.channel = matchChannel(release, channel);

-async function scrapeScene({ query }, url, channel, include) {
-	const release = {};
-
-	const script = query.cnt('script.yoast-schema-graph');
-	const data = script && JSON.parse(script);
-
-	release.entryId = query.q('article.project', 'id')?.match(/post-(\d+)/)?.[1];
-
-	release.title = query.cnt('.vid_title');
-	release.description = query.cnt('.vid_desc p');
-
-	release.date = query.date('.vid_date', 'MMMM D, YYYY');
-	release.duration = query.dur('.vid_length');
-
-	release.actors = query.all('.vid_infos a[href*="author/"]').map((actorEl) => ({
-		name: query.cnt(actorEl),
-		url: query.url(actorEl, null),
-	}));
-
-	release.tags = query.cnts('.vid_infos a[rel="tag"]');
-
-	const posterData = data['@graph']?.find((item) => item['@type'] === 'ImageObject');
-
-	const poster = posterData?.url
-		|| query.q('meta[property="og:image"]', 'content')
-		|| query.q('meta[name="twitter:image"]', 'content');
-
-	release.poster = {
-		src: poster,
-		referer: url,
-	};
-
-	release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
-
-	if (include.photos) {
-		release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
-	}
-
-	release.trailer = {
-		src: query.video(),
-		type: query.video('source', 'type'),
-		quality: query.video('source', 'data-res'),
-		referer: url,
-	};
-
-	return {
-		...release,
-		...matchChannel(release, channel),
-	};
-}
-
-function scrapeProfile({ query, el }, { url, gender }, baseActor, entity) {
-	const profile = { url, gender };
-
-	profile.age = query.number('div:nth-child(2) > p');
-	profile.birthPlace = query.cnt('div:nth-child(3) > p')?.match(/nationality[\s:]+(\w+)/i)?.[1];
-
-	profile.description = query.cnt('div:nth-child(4) > p');
-
-	profile.avatar = {
-		src: query.img('.model-page'),
-		referer: url,
-	};
-
-	profile.scenes = scrapeAll(qu.initAll(el, '.project_category-videos'), entity);
-
-	return profile;
+		return release;
+	});
 }

 async function fetchLatest(channel) {
 	// no apparent pagination, all updates on one page
 	// using channels in part because main overview contains indistinguishable photo albums
 	// however, some serie pages contain videos from other series
-	const res = await qu.getAll(channel.url, '.project');
+	const res = await unprint.get(channel.url, { selectAll: '.project-type-video' });

 	if (res.ok) {
-		return scrapeAll(res.items, channel);
+		return scrapeAll(res.context, channel);
 	}

 	return res.status;
 }

-async function fetchScene(url, channel, baseRelease, include) {
-	const res = await qu.get(url);
+async function attachPhotos(url, release) {
+	if (url) {
+		const res = await unprint.get(url);

-	if (res.ok) {
-		return scrapeScene(res.item, url, channel, include);
+		if (res.ok) {
+			release.photos = res.context.query.imgs('.gallery img').map((imgUrl) => ({ // eslint-disable-line no-param-reassign
+				src: imgUrl,
+				referer: url,
+			}));
+
+			release.photoCount = res.context.query.number('.image-amount'); // eslint-disable-line no-param-reassign
+		}
 	}

-	return res.status;
+	return null;
 }

-async function getActorUrl(baseActor, gender = 'female') {
-	if (baseActor.url) {
-		return baseActor.url;
+async function scrapeScene({ query }, { url, include }) {
+	const release = {};
+
+	release.entryId = query.attribute('#main-project-content', 'class').match(/project-(\d{3,})/)?.[1];
+
+	release.title = query.content('.project-header h1');
+	release.description = query.content('.desc-text');
+
+	release.date = query.date('.relese-date', 'D. MMM YYYY', { match: /\d{1,2}\. \w{3} \d{4}/ }); // sic
+	release.duration = query.duration('.video-duration');
+
+	release.actors = query.all('.project-models .list a').map((actorEl) => ({
+		name: unprint.query.content(actorEl),
+		url: unprint.query.url(actorEl, null),
+	}));
+
+	release.tags = query.contents('.project-tags a[href*="videos/#"]');
+
+	const poster = query.attribute('meta[property="og:image"]', 'content')
+		|| query.attribute('meta[name="twitter:image"]', 'content');
+
+	release.poster = {
+		src: poster,
+		referer: url,
+	};
+
+	if (include.photos) {
+		await attachPhotos(url.replace(/(\/)?$/, '-2$1'), release);
 	}

-	const overviewUrl = gender === 'female'
-		? 'https://www.littlecaprice-dreams.com/pornstars/'
-		: 'https://www.littlecaprice-dreams.com/male-models-pornstars/';
+	const trailerFrame = query.url('.video iframe', { attribute: 'src' });
+	const trailerId = trailerFrame?.match(/\/embed\/\d+\/([a-z0-9-]+)/)?.[1];

-	const overviewRes = await qu.getAll(overviewUrl, '.models');
+	if (trailerId) {
+		release.trailer = {
+			stream: `https://trailer.littlecaprice-dreams.com/${trailerId}/1920x1080/video.m3u8`,
+			quality: 1080,
+			referer: url,
+		};
+	}
+
+	const channelSlug = slugify(query.content('.project-tags a[href*="collection/"]'), '');
+
+	release.channel = channelMap[channelSlug] || channelSlug;
+
+	return release;
+}
+
+function scrapeProfile({ query }, { url, avatar }, entity) {
+	const profile = { url };
+
+	profile.nationality = query.content('.info h2').match(/nationality: (\w+)/i)?.[1];
+	profile.cup = query.content('.info h2').match(/cu[pb]-size: (\w{1,2})/i)?.[1]; // sic
+	profile.measurements = query.content('.info h2').match(/\d{2}-\d{2}-\d{2}/i)?.[0]; // sic
+	profile.height = convert(query.content('.info h2')?.match(/\d′ \d{1,2}″/)?.[0], 'cm');
+
+	const description = query.content('.info div:last-child');
+
+	if (!/coming soon/i.test(description) || description.length > 50) {
+		profile.description = description;
+	}
+
+	if (avatar) {
+		profile.avatar = [
+			stripQuery(avatar),
+			avatar,
+		].map((src) => ({
+			src,
+			referer: url,
+		}));
+	}
+
+	profile.photos = query.imgs('.img-poster');
+	profile.scenes = scrapeAll(unprint.initAll(query.all('.project-type-video')), entity);
+
+	return profile;
+}
+
+async function getActorUrl(baseActor) {
+	// male performers are listed, but hidden
+	const overviewRes = await unprint.get('https://www.littlecaprice-dreams.com/models/', { selectAll: '.model-preview' });

 	if (!overviewRes.ok) {
 		return overviewRes.status;
 	}

-	const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug);
+	const actorItem = overviewRes.context.find(({ query }) => slugify(query.text('h2')) === baseActor.slug);

 	if (!actorItem) {
-		if (gender === 'female') {
-			return getActorUrl(baseActor, 'male');
-		}
-
 		return null;
 	}

-	const actorUrl = actorItem.query.url('a');
+	const actorUrl = actorItem.query.url(null);
+	const actorAvatar = actorItem.query.img();

 	if (actorUrl) {
 		return {
 			url: actorUrl,
-			gender,
+			avatar: actorAvatar,
 		};
 	}

@@ -205,16 +205,17 @@ async function getActorUrl(baseActor, gender = 'female') {
 }

 async function fetchProfile(baseActor, { entity }) {
-	const actorUrl = await getActorUrl(baseActor);
+	// using search for avatar, not on model page
+	const actorResult = await getActorUrl(baseActor);

-	if (!actorUrl) {
+	if (!actorResult) {
 		return null;
 	}

-	const actorRes = await qu.get(actorUrl.url, '#main-content');
+	const actorRes = await unprint.get(actorResult.url, { select: '.model-page' });

 	if (actorRes.ok) {
-		return scrapeProfile(actorRes.item, actorUrl, baseActor, entity);
+		return scrapeProfile(actorRes.context, actorResult, entity);
 	}

 	return actorRes.status;
@@ -222,6 +223,6 @@ async function fetchProfile(baseActor, { entity }) {

 module.exports = {
 	fetchLatest,
-	fetchScene,
 	fetchProfile,
+	scrapeScene,
 };
--- a/src/scrapers/releases.js
+++ b/src/scrapers/releases.js
@@ -111,7 +111,7 @@ module.exports = {
 	cumlouder,
 	czechav,
 	pornworld,
-	delphine: modelmedia,
+	// delphine: modelmedia,
 	dorcel,
 	elegantangel: adultempire,
 	exploitedx,
--- a/src/utils/convert.js
+++ b/src/utils/convert.js
@@ -60,7 +60,7 @@ function kgToLbs(kgs) {
 }

 function curateConvertInput(string) {
-	if (/['’]|(fe*o*t)/.test(string)) {
+	if (/['’′]|(fe*o*t)/.test(string)) {
 		const result = string.match(/(\d+).*?(\d+)/);

 		if (result) {
--- a/tests/profiles.js
+++ b/tests/profiles.js
@@ -230,6 +230,7 @@ const actors = [
 	{ entity: 'karups', name: 'Peach Lollypop', fields: ['avatar'] },
 	{ entity: 'boyfun', name: 'Amahd Passer', fields: ['avatar', 'age', 'height', 'weight', 'penisLength', 'isCircumcised'] },
 	{ entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] },
+	{ entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic
 ];

 const actorScrapers = scrapers.actors;