Refactored Little Caprice Dreams. Fixed Karups breaking on BoyFun domain.

This commit is contained in:
DebaucheryLibrarian 2026-02-01 19:05:20 +01:00
parent 3189652fc8
commit f42d79d521
7 changed files with 163 additions and 150 deletions

View File

@ -7431,70 +7431,81 @@ const sites = [
{
name: 'Caprice Casting',
slug: 'capricecasting',
url: 'https://www.littlecaprice-dreams.com/caprice-casting',
url: 'https://www.littlecaprice-dreams.com/collection/caprice-casting',
parent: 'littlecapricedreams',
},
{
name: 'Buttmuse',
slug: 'buttmuse',
url: 'https://www.littlecaprice-dreams.com/collection/buttmuse',
parent: 'littlecapricedreams',
},
{
name: 'Caprice Divas',
slug: 'capricedivas',
url: 'https://www.littlecaprice-dreams.com/caprice-divas',
url: 'https://www.littlecaprice-dreams.com/collection/caprice-divas',
parent: 'littlecapricedreams',
},
{
name: 'Nassty',
slug: 'nassty',
url: 'https://www.littlecaprice-dreams.com/nassty',
url: 'https://www.littlecaprice-dreams.com/collection/nassty',
parent: 'littlecapricedreams',
},
{
name: 'POV Dreams',
slug: 'povdreams',
url: 'https://www.littlecaprice-dreams.com/pov-dreams',
url: 'https://www.littlecaprice-dreams.com/collection/pov-dreams',
parent: 'littlecapricedreams',
},
{
name: 'Porn Lifestyle',
slug: 'pornlifestyle',
url: 'https://www.littlecaprice-dreams.com/porn-lifestyle',
url: 'https://www.littlecaprice-dreams.com/collection/porn-lifestyle',
parent: 'littlecapricedreams',
},
{
name: 'Public Sex',
slug: 'publicsex',
url: 'https://www.littlecaprice-dreams.com/public-sex',
url: 'https://www.littlecaprice-dreams.com/collection/public-sex',
parent: 'littlecapricedreams',
},
{
name: 'Super Private X',
slug: 'superprivatex',
url: 'https://www.littlecaprice-dreams.com/superprivatex',
url: 'https://www.littlecaprice-dreams.com/collection/superprivatex',
parent: 'littlecapricedreams',
},
{
name: 'Sex Lessons',
slug: 'sexlessons',
url: 'https://www.littlecaprice-dreams.com/sexlessons',
url: 'https://www.littlecaprice-dreams.com/collection/sexlessons',
parent: 'littlecapricedreams',
},
{
name: 'Virtual Reality',
name: 'Streetfuck',
slug: 'streetfuck',
url: 'https://www.littlecaprice-dreams.com/collection/streetfuck/',
parent: 'littlecapricedreams',
},
{
name: 'Little Caprice VR',
slug: 'littlecapricevr',
url: 'https://www.littlecaprice-dreams.com/virtual-reality-little-caprice',
url: 'https://www.littlecaprice-dreams.com/collection/virtual-reality-little-caprice',
tags: ['vr'],
hasLogo: false,
parent: 'littlecapricedreams',
},
{
name: 'We Cum To You',
slug: 'wecumtoyou',
url: 'https://www.littlecaprice-dreams.com/wecumtoyou-swingers',
url: 'https://www.littlecaprice-dreams.com/collection/wecumtoyou-swingers',
tags: ['swinging', 'orgy'],
parent: 'littlecapricedreams',
},
{
name: 'Xpervo',
slug: 'xpervo',
url: 'https://www.littlecaprice-dreams.com/xpervo',
url: 'https://www.littlecaprice-dreams.com/collection/xpervo',
parent: 'littlecapricedreams',
},
// LOVE HER FILMS

View File

@ -205,7 +205,7 @@ module.exports = {
bamvisions,
bang,
bluedonkeymedia,
delphine: modelmedia,
// delphine: modelmedia,
meidenvanholland: bluedonkeymedia, // Vurig Vlaanderen uses same database
boobpedia,
bradmontana,

View File

@ -36,7 +36,7 @@ function scrapeAll(scenes) {
}
async function fetchLatest(channel, page) {
const res = await unprint.get(`${channel.url}videos/page${page}.html`, {
const res = await unprint.get(new URL(`./videos/page${page}.html`, channel.url).href, { // some sites require a trailing slash, join paths properly
selectAll: '.listing-videos .item',
cookies: {
warningHidden: 'hide',

View File

@ -1,7 +1,19 @@
'use strict';
const qu = require('../utils/qu');
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { stripQuery } = require('../utils/url');
const { convert } = require('../utils/convert');
const channelMap = {
vr: 'littlecapricevr',
vrporn: 'littlecapricevr',
superprivat: 'superprivatex',
superprivate: 'superprivatex',
nasst: 'nassty',
sexlesson: 'sexlessons',
};
function matchChannel(release, channel) {
const series = channel.children || channel.parent?.children;
@ -16,188 +28,176 @@ function matchChannel(release, channel) {
[serie.slug]: serie,
}), {});
serieNames.vr = serieNames.littlecapricevr;
serieNames.superprivat = serieNames.superprivatex;
serieNames.superprivate = serieNames.superprivatex;
serieNames.nasst = serieNames.nassty;
serieNames.sexlesson = serieNames.sexlessons;
// ensure longest key matches first
const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length);
const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
const serie = serieName && serieNames[slugify(serieName, '')];
const serieName = release.title?.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
const serieSlug = slugify(serieName, '');
const serie = serieName && serieNames[channelMap[serieSlug] || serieSlug];
if (serie) {
return {
channel: serie.slug,
title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-:/]+\\s*`, 'ig'), ''),
};
return serie.slug;
}
return null;
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query, el }) => {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a');
release.entryId = query.q(el, null, 'id')?.match(/post-(\d+)/)?.[1];
release.url = query.url(null);
release.entryId = query.attribute(null, 'class').match(/project-(\d{3,})/)?.[1];
release.title = query.cnt('.meta h3');
release.date = query.date('.meta .post-meta', 'MMMM D, YYYY');
release.title = query.content('h2')?.trim().replace(/\.\.\.$/, '');
release.poster = {
src: query.img('img'),
referer: channel.url,
};
const poster = query.img('img');
return {
...release,
...matchChannel(release, channel),
};
});
}
async function fetchPhotos(url) {
if (url) {
const res = await qu.get(url, '.et_post_gallery');
if (res.ok) {
return res.item.query.urls('a').map((imgUrl) => ({
src: imgUrl,
referer: url,
if (poster) {
release.poster = [
stripQuery(poster),
poster,
].map((src) => ({
src,
referer: channel.url,
}));
}
}
return null;
}
release.channel = matchChannel(release, channel);
async function scrapeScene({ query }, url, channel, include) {
const release = {};
const script = query.cnt('script.yoast-schema-graph');
const data = script && JSON.parse(script);
release.entryId = query.q('article.project', 'id')?.match(/post-(\d+)/)?.[1];
release.title = query.cnt('.vid_title');
release.description = query.cnt('.vid_desc p');
release.date = query.date('.vid_date', 'MMMM D, YYYY');
release.duration = query.dur('.vid_length');
release.actors = query.all('.vid_infos a[href*="author/"]').map((actorEl) => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null),
}));
release.tags = query.cnts('.vid_infos a[rel="tag"]');
const posterData = data['@graph']?.find((item) => item['@type'] === 'ImageObject');
const poster = posterData?.url
|| query.q('meta[property="og:image"]', 'content')
|| query.q('meta[name="twitter:image"]', 'content');
release.poster = {
src: poster,
referer: url,
};
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
if (include.photos) {
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
}
release.trailer = {
src: query.video(),
type: query.video('source', 'type'),
quality: query.video('source', 'data-res'),
referer: url,
};
return {
...release,
...matchChannel(release, channel),
};
}
function scrapeProfile({ query, el }, { url, gender }, baseActor, entity) {
const profile = { url, gender };
profile.age = query.number('div:nth-child(2) > p');
profile.birthPlace = query.cnt('div:nth-child(3) > p')?.match(/nationality[\s:]+(\w+)/i)?.[1];
profile.description = query.cnt('div:nth-child(4) > p');
profile.avatar = {
src: query.img('.model-page'),
referer: url,
};
profile.scenes = scrapeAll(qu.initAll(el, '.project_category-videos'), entity);
return profile;
return release;
});
}
async function fetchLatest(channel) {
// no apparent pagination, all updates on one page
// using channels in part because main overview contains indistinguishable photo albums
// however, some serie pages contain videos from other series
const res = await qu.getAll(channel.url, '.project');
const res = await unprint.get(channel.url, { selectAll: '.project-type-video' });
if (res.ok) {
return scrapeAll(res.items, channel);
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchScene(url, channel, baseRelease, include) {
const res = await qu.get(url);
async function attachPhotos(url, release) {
if (url) {
const res = await unprint.get(url);
if (res.ok) {
return scrapeScene(res.item, url, channel, include);
if (res.ok) {
release.photos = res.context.query.imgs('.gallery img').map((imgUrl) => ({ // eslint-disable-line no-param-reassign
src: imgUrl,
referer: url,
}));
release.photoCount = res.context.query.number('.image-amount'); // eslint-disable-line no-param-reassign
}
}
return res.status;
return null;
}
async function getActorUrl(baseActor, gender = 'female') {
if (baseActor.url) {
return baseActor.url;
async function scrapeScene({ query }, { url, include }) {
const release = {};
release.entryId = query.attribute('#main-project-content', 'class').match(/project-(\d{3,})/)?.[1];
release.title = query.content('.project-header h1');
release.description = query.content('.desc-text');
release.date = query.date('.relese-date', 'D. MMM YYYY', { match: /\d{1,2}\. \w{3} \d{4}/ }); // sic
release.duration = query.duration('.video-duration');
release.actors = query.all('.project-models .list a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.tags = query.contents('.project-tags a[href*="videos/#"]');
const poster = query.attribute('meta[property="og:image"]', 'content')
|| query.attribute('meta[name="twitter:image"]', 'content');
release.poster = {
src: poster,
referer: url,
};
if (include.photos) {
await attachPhotos(url.replace(/(\/)?$/, '-2$1'), release);
}
const overviewUrl = gender === 'female'
? 'https://www.littlecaprice-dreams.com/pornstars/'
: 'https://www.littlecaprice-dreams.com/male-models-pornstars/';
const trailerFrame = query.url('.video iframe', { attribute: 'src' });
const trailerId = trailerFrame?.match(/\/embed\/\d+\/([a-z0-9-]+)/)?.[1];
const overviewRes = await qu.getAll(overviewUrl, '.models');
if (trailerId) {
release.trailer = {
stream: `https://trailer.littlecaprice-dreams.com/${trailerId}/1920x1080/video.m3u8`,
quality: 1080,
referer: url,
};
}
const channelSlug = slugify(query.content('.project-tags a[href*="collection/"]'), '');
release.channel = channelMap[channelSlug] || channelSlug;
return release;
}
function scrapeProfile({ query }, { url, avatar }, entity) {
const profile = { url };
profile.nationality = query.content('.info h2').match(/nationality: (\w+)/i)?.[1];
profile.cup = query.content('.info h2').match(/cu[pb]-size: (\w{1,2})/i)?.[1]; // sic
profile.measurements = query.content('.info h2').match(/\d{2}-\d{2}-\d{2}/i)?.[0]; // sic
profile.height = convert(query.content('.info h2')?.match(/\d \d{1,2}″/)?.[0], 'cm');
const description = query.content('.info div:last-child');
if (!/coming soon/i.test(description) || description.length > 50) {
profile.description = description;
}
if (avatar) {
profile.avatar = [
stripQuery(avatar),
avatar,
].map((src) => ({
src,
referer: url,
}));
}
profile.photos = query.imgs('.img-poster');
profile.scenes = scrapeAll(unprint.initAll(query.all('.project-type-video')), entity);
return profile;
}
async function getActorUrl(baseActor) {
// male performers are listed, but hidden
const overviewRes = await unprint.get('https://www.littlecaprice-dreams.com/models/', { selectAll: '.model-preview' });
if (!overviewRes.ok) {
return overviewRes.status;
}
const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug);
const actorItem = overviewRes.context.find(({ query }) => slugify(query.text('h2')) === baseActor.slug);
if (!actorItem) {
if (gender === 'female') {
return getActorUrl(baseActor, 'male');
}
return null;
}
const actorUrl = actorItem.query.url('a');
const actorUrl = actorItem.query.url(null);
const actorAvatar = actorItem.query.img();
if (actorUrl) {
return {
url: actorUrl,
gender,
avatar: actorAvatar,
};
}
@ -205,16 +205,17 @@ async function getActorUrl(baseActor, gender = 'female') {
}
async function fetchProfile(baseActor, { entity }) {
const actorUrl = await getActorUrl(baseActor);
// using search for avatar, not on model page
const actorResult = await getActorUrl(baseActor);
if (!actorUrl) {
if (!actorResult) {
return null;
}
const actorRes = await qu.get(actorUrl.url, '#main-content');
const actorRes = await unprint.get(actorResult.url, { select: '.model-page' });
if (actorRes.ok) {
return scrapeProfile(actorRes.item, actorUrl, baseActor, entity);
return scrapeProfile(actorRes.context, actorResult, entity);
}
return actorRes.status;
@ -222,6 +223,6 @@ async function fetchProfile(baseActor, { entity }) {
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
scrapeScene,
};

View File

@ -111,7 +111,7 @@ module.exports = {
cumlouder,
czechav,
pornworld,
delphine: modelmedia,
// delphine: modelmedia,
dorcel,
elegantangel: adultempire,
exploitedx,

View File

@ -60,7 +60,7 @@ function kgToLbs(kgs) {
}
function curateConvertInput(string) {
if (/[']|(fe*o*t)/.test(string)) {
if (/[']|(fe*o*t)/.test(string)) {
const result = string.match(/(\d+).*?(\d+)/);
if (result) {

View File

@ -230,6 +230,7 @@ const actors = [
{ entity: 'karups', name: 'Peach Lollypop', fields: ['avatar'] },
{ entity: 'boyfun', name: 'Amahd Passer', fields: ['avatar', 'age', 'height', 'weight', 'penisLength', 'isCircumcised'] },
{ entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] },
{ entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic
];
const actorScrapers = scrapers.actors;