Refactored Cherry Pimps to use unprint, added series as channels.

This commit is contained in:
DebaucheryLibrarian
2026-01-11 23:19:31 +01:00
parent d0bb56e436
commit b7beea60ce
5 changed files with 284 additions and 127 deletions

View File

@@ -1,92 +1,119 @@
'use strict';
const qu = require('../utils/qu');
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const tryUrls = require('../utils/try-urls');
function scrapeAll(scenes, site) {
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const url = query.url('.text-thumb a');
const { pathname } = new URL(url);
const channelUrl = query.url('.badge');
if (site?.parameters?.extract && query.q('.badge', true) !== site.name) {
return null;
}
const release = {};
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
const url = query.url('.item-title a');
const { pathname } = new URL(url);
release.url = url;
release.entryId = pathname.match(/\/trailers\/(.*).html/)[1];
release.title = query.q('.text-thumb a', true);
release.date = query.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = query.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
release.title = query.content('.item-title a');
release.actors = query.all('.category a', true);
release.date = query.date('.item-date', 'MMMM D, YYYY');
release.duration = query.duration('.item-date', /(\d{2}:)?\d{2}:\d{2}/);
release.poster = query.img('img.video_placeholder, .video-images img');
release.teaser = { src: query.trailer() };
release.actors = query.all('.item-models a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.poster = query.img('.item-video-thumb', { attribute: 'data-videoposter' }) || query.img('img.video_placeholder');
release.teaser = query.video('.item-video-thumb', { attribute: 'data-videosrc' });
release.photoCount = query.number('.item-date', { match: /(\d+) photos/i, matchIndex: 1 });
release.channel = slugify(query.content('.item-sitename a'), '');
return release;
}).filter(Boolean);
});
}
function scrapeScene({ query, html }, url, _site, baseRelease) {
const release = { url };
async function fetchLatest(channel, page = 1) {
const slug = channel.parameters?.slug || new URL(channel.url).pathname.match(/\/series\/([\w-]+)/)[1];
const res = await unprint.get(`https://cherrypimps.com/categories/${slug}_${page}_d.html`, { selectAll: '.item-updates .item-video' });
const { pathname } = new URL(url);
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
release.title = query.q('.trailer-block_title', true);
release.description = query.q('.info-block:nth-child(3) .text', true);
release.date = query.date('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
const duration = baseRelease?.duration || Number(query.q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
if (duration) release.duration = duration;
release.actors = query.all('.info-block_data a[href*="/models"]', true);
release.tags = query.all('.info-block a[href*="/categories"]', true);
const posterEl = query.q('.update_thumb');
const poster = posterEl?.getAttribute('src0_3x') || posterEl?.getAttribute('src0_2x') || posterEl?.dataset.src;
if (poster && baseRelease?.poster) release.photos = [poster];
else if (poster) release.poster = poster;
const trailer = html.match(/video src="(.*?)"/);
if (trailer) {
release.trailer = trailer[1];
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
function scrapeScene({ query }, url) {
const release = { url };
const { pathname } = new URL(url);
release.entryId = pathname.match(/\/trailers\/(.*).html/)[1];
release.title = query.content('.item-title h1');
release.description = query.content('.update-info-block p');
release.date = query.date('.update-info-row:first-child', 'MMMM D, YYYY');
release.duration = query.duration('.update-info-row:last-child');
release.photoCount = query.number('.update-info-row:last-child', { match: /(\d+) photos/i, matchIndex: 1 });
release.actors = query.all('.models-list-thumbs .model-list-item').map((actorEl) => ({
name: unprint.query.content(actorEl, 'span'),
url: unprint.query.url(actorEl, 'a'),
avatar: [
unprint.query.img(actorEl, 'img', { attribute: 'src0_3x' }),
unprint.query.img(actorEl, 'img', { attribute: 'src0_2x' }),
unprint.query.img(actorEl, 'img', { attribute: 'src0_1x' }),
],
}));
release.tags = query.contents('.update-info-block a[href*="categories/"]');
release.poster = [
query.img('.update_thumb', { attribute: 'src0_3x' }),
query.img('.update_thumb', { attribute: 'src0_2x' }),
query.img('.update_thumb', { attribute: 'src0_1x' }), // usually only this one available
].filter(Boolean);
// faux video trailer player redirects to signup
return release;
}
function scrapeProfile({ query }) {
const profile = {};
async function fetchScene(url, site, release) {
const res = await unprint.get(url);
const keys = query.all('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = query.all('.model-descr_line:not(.model-descr_rait) p.text').map((el) => query.text(el));
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)\s*cm\)/)?.[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)?.[1]);
if (bio.race) profile.ethnicity = bio.race;
if (bio.date_of_birth) profile.birthdate = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (!/\?/.test(bust)) profile.bust = bust;
if (!/\?/.test(waist)) profile.waist = waist;
if (!/\?/.test(hip)) profile.hip = hip;
if (res.ok) {
return scrapeScene(res.context, url, site, release);
}
if (bio.hair) profile.hair = bio.hair;
if (bio.eyes) profile.eyes = bio.eyes;
return res.status;
}
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
function scrapeProfile({ query }, url) {
const profile = { url };
const bio = Object.fromEntries(query.all('.model-stats li').map((bioEl) => [
slugify(unprint.query.content(bioEl, 'strong'), '_'),
unprint.query.text(bioEl),
]));
profile.height = Number(bio.height?.match(/\((\d+)\s*cm\)/)?.[1]) || null;
profile.weight = Number(bio.weight?.match(/\((\d+)\s*kg\)/)?.[1]) || null;
profile.age = parseInt(bio.age, 10) || null;
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
profile.birthPlace = bio.birthplace;
profile.ethnicity = bio.race;
profile.measurements = bio.measurements;
profile.hair = bio.hair_color;
profile.eyes = bio.eye_color;
if (/various|several/i.test(bio.tattoos)) profile.hasTattoos = true;
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
else if (bio.tattoos) {
profile.hasTattoos = true;
@@ -100,47 +127,31 @@ function scrapeProfile({ query }) {
profile.piercings = bio.piercings;
}
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
profile.aliases = bio.aliases?.split(',').map((alias) => alias.trim());
const avatar = query.q('.model-img img');
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
const releases = query.all('.video-thumb');
profile.releases = scrapeAll(qu.initAll(releases));
profile.avatar = [
query.img('.model-img img, .model_bio_thumb', { attribute: 'src0_3x' }),
query.img('.model-img img, .model_bio_thumb', { attribute: 'src0_2x' }),
query.img('.model-img img, .model_bio_thumb', { attribute: 'src0_1x' }),
];
return profile;
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.extract
? `https://cherrypimps.com/categories/movies_${page}.html`
: `${site.url}/categories/movies_${page}.html`;
const res = await qu.getAll(url, 'div.video-thumb');
async function fetchProfile({ name: actorName, url: actorUrl }, { channel, network }) {
const origin = new URL(channel?.url || network.url).origin;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
const { res, url } = await tryUrls([
actorUrl,
`${origin}/models/${slugify(actorName, '')}.html`,
`${origin}/models/${slugify(actorName)}.html`,
]);
async function fetchScene(url, site, release) {
const res = await qu.get(url);
if (res.ok) {
return scrapeProfile(res.context, url);
}
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
}
async function fetchProfile({ name: actorName }, { site, network, scraper }) {
const actorSlug = slugify(actorName);
const actorSlug2 = slugify(actorName, '');
const origin = site?.url || network.url;
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraper)
? [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`]
: [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`];
const res = await qu.get(url);
if (res.ok) return scrapeProfile(res.item);
const res2 = await qu.get(url2);
return res2.ok ? scrapeProfile(res2.item) : res2.status;
return res.status;
}
module.exports = {

View File

@@ -113,7 +113,7 @@ async function scrapeProfile({ query }, url, include) {
if (tags.includes('tattoo') || tags.includes('tattoos')) profile.hasTattoos = true;
if (tags.includes('piercing') || tags.includes('piercings')) profile.hasPiercings = true;
profile.description = query.text('[class$="description"] [class*="more-less"]');
profile.description = query.content('[class$="description"] [class*="more-less"]');
profile.avatar = query.img('[class*="poster"] img') || null;
if (include.releases) {

View File

@@ -60,13 +60,19 @@ function scrapeScene({ query }, { url }) {
return release;
}
function scrapeProfile({ query }) {
function scrapeProfile({ query }, searchAvatar) {
const profile = {};
profile.nationality = query.content('//h3[contains(text(), "Nationality:")]/span') || null;
profile.age = query.number('//h3[contains(text(), "Age:")]/span');
profile.avatar = query.img();
const pageAvatar = query.img();
profile.avatar = searchAvatar || pageAvatar;
if (searchAvatar) {
profile.photos = [pageAvatar];
}
return profile;
}
@@ -94,18 +100,20 @@ async function fetchUpcoming(channel) {
}
async function fetchProfile({ name: actorName }, entity) {
// don't skip search, avatar not available on actor page
const searchUrl = `${entity.url}/models?name=${actorName}&sort=popularity`;
const searchRes = await unprint.get(searchUrl);
if (searchRes.ok) {
const actorEl = searchRes.context.query.all('.pagination-items .model a').find((resultEl) => unprint.query.attribute(resultEl, null, 'title') === actorName);
const actorUrl = unprint.query.url(actorEl, null);
const avatar = unprint.query.img(actorEl, '.card-img');
if (actorUrl) {
const res = await unprint.get(actorUrl, { select: '.model-detail-card' });
if (res.ok) {
return scrapeProfile(res.context, actorName, entity);
return scrapeProfile(res.context, avatar, entity);
}
return res.status;