Added DarkkoTV scraper. Removed some obsolete web components.

2026-03-31 06:29:30 +02:00
parent 64cdba6f6d
commit ebb8832096
31 changed files with 199 additions and 1098 deletions
--- a/src/scrapers/actors.js
+++ b/src/scrapers/actors.js
@@ -11,6 +11,7 @@ const bradmontana = require('./bradmontana');
 const cherrypimps = require('./cherrypimps');
 const cumlouder = require('./cumlouder');
 const modelmedia = require('./modelmedia');
+const darkkotv = require('./darkkotv');
 const dorcel = require('./dorcel');
 // const famedigital = require('./famedigital');
 const firstanalquest = require('./firstanalquest');
@@ -223,6 +224,7 @@ module.exports = {
 	bradmontana,
 	cherrypimps,
 	cumlouder,
+	darkkotv,
 	dorcelclub: dorcel,
 	freeones,
 	hitzefrei,
--- a/src/scrapers/darkkotv.js
+++ b/src/scrapers/darkkotv.js
@@ -0,0 +1,157 @@
+'use strict';
+
+const unprint = require('unprint');
+
+const slugify = require('../utils/slugify');
+const tryUrls = require('../utils/try-urls');
+const { convert } = require('../utils/convert');
+
+function getEntryId(url) {
+	return slugify(new URL(url).pathname.match(/\/scenes\/(.*?)(_vids)?.html/)[1]);
+}
+
+function scrapeAll(scenes, channel) {
+	return scenes.map(({ query }) => {
+		const release = {};
+
+		release.url = query.url('.videoPic a, h4 a');
+		release.entryId = getEntryId(release.url);
+
+		release.title = query.content('h4 a');
+
+		release.date = query.date('.videoInfo li:first-child ', 'MM-DD-YYYY');
+		release.duration = query.number('.videoInfo li:nth-child(2)') * 60 || null;
+
+		release.actors = query.all('a[href*="models/"]').map((actorEl) => ({
+			name: unprint.query.content(actorEl),
+			url: unprint.query.url(actorEl, null),
+		}));
+
+		release.poster = Array.from({ length: 4 }, (_value, index) => query.img('.videoPic img', { attribute: `src0_${4 - index}x`, origin: channel.origin }));
+
+		return release;
+	});
+}
+
+async function fetchLatest(channel, page = 1) {
+	const url = `${channel.url}/categories/movies_${page}.html`;
+	const res = await unprint.get(url, { selectAll: '.latestUpdateB' });
+
+	if (res.ok) {
+		return scrapeAll(res.context, channel);
+	}
+
+	return res.status;
+}
+
+async function fetchCaps(url) {
+	if (!url) {
+		return null;
+	}
+
+	const res = await unprint.get(url, { select: '.photoDetailsArea' });
+
+	if (res.ok) {
+		return res.context.query.imgs('.photoDPic img');
+	}
+
+	return null;
+}
+
+async function scrapeScene({ query: pageQuery, html }, { url, entity, include }) {
+	const release = {};
+	const { query } = unprint.init(pageQuery.element('.latestUpdateBinfo'));
+
+	release.entryId = getEntryId(url);
+
+	release.title = pageQuery.content('.vidImgTitle h4');
+	release.description = query.content('.vidImgContent p');
+
+	release.date = query.date('.videoInfo li:first-child ', 'MM-DD-YYYY');
+	release.duration = query.number('.videoInfo li:nth-child(2)') * 60 || null;
+
+	release.actors = query.all('a[href*="models/"]').map((actorEl) => ({
+		name: unprint.query.content(actorEl),
+		url: unprint.query.url(actorEl, null),
+	}));
+
+	release.tags = query.contents('.blogTags a');
+
+	const posterPath = html.match(/useimage\s*=\s*"(.*?)"/i)?.[1];
+	const capsUrl = pageQuery.url('a[href*="_caps"]');
+
+	if (posterPath) {
+		release.poster = Array.from({ length: 4 }, (_value, index) => unprint.prefixUrl(posterPath.replace('-4x', `-${4 - index}x`), entity.url));
+	}
+
+	if (include.photos && capsUrl) {
+		release.caps = await fetchCaps(capsUrl);
+	}
+
+	release.trailer = pageQuery.video('#download_select option[value*=".mp4"]', { attribute: 'value' });
+
+	return release;
+}
+
+function scrapeProfile({ query }, { url, actorName }) {
+	const profile = { url };
+
+	const bio = Object.fromEntries(query.contents('.vitalStats li').map((entry) => {
+		const [key, value] = entry.split(':');
+
+		if (!key || !value) {
+			return null;
+		}
+
+		return [slugify(key, '_'), value?.trim()];
+	}).filter(Boolean));
+
+	profile.description = `${query.content('.modelBioInfo')?.replace(new RegExp(`professional bio of ${actorName}`, 'i'), '')}${bio.awards ? ` Awards: ${bio.awards}` : ''}`;
+
+	profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
+	profile.birthPlace = bio.birthplace;
+	profile.ethnicity = bio.ethnicity;
+
+	profile.height = unprint.extractNumber(bio.height, { match: /(\d+)\s*cm/i, matchIndex: 1 })
+		|| convert(bio.height?.match(/\d+\s*ft \d+\s*in/)?.[0], 'cm');
+
+	profile.weight = unprint.extractNumber(bio.weight, { match: /(\d+)\s*kg/i, matchIndex: 1 })
+		|| convert(bio.weight?.match(/\d+\s*lbs/)[0], 'lb', 'kg');
+
+	profile.measurements = bio.measurements;
+
+	if (/yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
+	if (/no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
+
+	if (/yes/i.test(bio.tattoos)) profile.hasTattoos = true;
+	if (/no/i.test(bio.tattoos)) profile.hasTattoos = false;
+
+	if (/yes/i.test(bio.piercings)) profile.hasPiercings = true;
+	if (/no/i.test(bio.piercings)) profile.hasPiercings = false;
+
+	profile.socials = query.urls('.vitalStats a[href*="onlyfans"], .vitalStats a[href*="twitter"], .vitalStats a[href*="instagram"]');
+	profile.avatar = Array.from({ length: 4 }, (_value, index) => query.img('.modelBioPic img', { attribute: `src0_${4 - index}x` }));
+
+	return profile;
+}
+
+async function fetchProfile({ name: actorName, url: actorUrl }, entity) {
+	const { res, url } = await tryUrls([
+		actorUrl,
+		`${entity.url}/models/${slugify(actorName, '-')}.html`,
+		`${entity.url}/models/${slugify(actorName, '')}.html`,
+		`${entity.url}/models/${slugify(actorName, '_')}.html`,
+	]);
+
+	if (res.ok) {
+		return scrapeProfile(res.context, { url, entity, actorName });
+	}
+
+	return res.status;
+}
+
+module.exports = {
+	fetchLatest,
+	fetchProfile,
+	scrapeScene,
+};
--- a/src/scrapers/releases.js
+++ b/src/scrapers/releases.js
@@ -16,6 +16,7 @@ const cherrypimps = require('./cherrypimps');
 const cliffmedia = require('./cliffmedia');
 const cumlouder = require('./cumlouder');
 const czechav = require('./czechav');
+const darkkotv = require('./darkkotv');
 const modelmedia = require('./modelmedia');
 const dorcel = require('./dorcel');
 const fabulouscash = require('./fabulouscash');
@@ -118,6 +119,7 @@ module.exports = {
 	cumlouder,
 	czechav,
 	pornworld,
+	darkkotv,
 	delphine: modelmedia,
 	dorcel,
 	elegantangel: adultempire,
--- a/src/scrapers/template.js
+++ b/src/scrapers/template.js
@@ -30,6 +30,17 @@ function scrapeAll(scenes) {
 	});
 }

+async function fetchLatest(channel, page = 1) {
+	const url = `${channel.url}/${page}`;
+	const res = await unprint.get(url, { selectAll: '.scene' });
+
+	if (res.ok) {
+		return scrapeAll(res.context, channel);
+	}
+
+	return res.status;
+}
+
 function scrapeScene({ query }, { url }) {
 	const release = {};

@@ -62,17 +73,6 @@ function scrapeProfile({ query }) {
 	return profile;
 }

-async function fetchLatest(channel, page = 1) {
-	const url = `${channel.url}/${page}`;
-	const res = await unprint.get(url, { selectAll: '.scene' });
-
-	if (res.ok) {
-		return scrapeAll(res.context, channel);
-	}
-
-	return res.status;
-}
-
 async function fetchProfile({ name: actorName }, entity) {
 	const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
 	const res = await unprint.get(url);