From 762e605bd1c6b81a0864a63f675881df3c85ce04 Mon Sep 17 00:00:00 2001
From: DebaucheryLibrarian <moonloop.adult@protonmail.com>
Date: Sun, 1 Feb 2026 01:31:45 +0100
Subject: [PATCH] Extracting shoot IDs from title in PornBox scraper.

---
 package-lock.json        |   8 ++--
 package.json             |   2 +-
 src/scrapers/analvids.js | 100 +++++++++++++--------------------------
 src/scrapers/pornbox.js  |  14 +++++-
 src/utils/slugify.js     |   2 +-
 5 files changed, 51 insertions(+), 75 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index a19753d6..f3a5b41b 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -94,7 +94,7 @@
                 "tunnel": "0.0.6",
                 "ua-parser-js": "^1.0.37",
                 "undici": "^5.28.1",
-                "unprint": "^0.18.11",
+                "unprint": "^0.18.13",
                 "url-pattern": "^1.0.3",
                 "v-tooltip": "^2.1.3",
                 "video.js": "^8.6.1",
@@ -20380,9 +20380,9 @@
             }
         },
         "node_modules/unprint": {
-            "version": "0.18.11",
-            "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.11.tgz",
-            "integrity": "sha512-mHOfweWWLqhEIRnjhdqCzEpHhIx+m/GwE2eDvJNNbnVEPbV8q8EaN6eGH3vkcAwDVgNIOakZaTZFK+VKy13Lsg==",
+            "version": "0.18.13",
+            "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.13.tgz",
+            "integrity": "sha512-vjUF7X7/dg2Os/zesJ0+23eVc7NH2oKzspPSyBzcIx6IuEcVm1rdlD9dAxdaRMUNBWEeA5ekyk263CBI3lyaBQ==",
             "dependencies": {
                 "bottleneck": "^2.19.5",
                 "cookie": "^1.1.1",
diff --git a/package.json b/package.json
index 3135623e..d1e86289 100755
--- a/package.json
+++ b/package.json
@@ -153,7 +153,7 @@
         "tunnel": "0.0.6",
         "ua-parser-js": "^1.0.37",
         "undici": "^5.28.1",
-        "unprint": "^0.18.11",
+        "unprint": "^0.18.13",
         "url-pattern": "^1.0.3",
         "v-tooltip": "^2.1.3",
         "video.js": "^8.6.1",
diff --git a/src/scrapers/analvids.js b/src/scrapers/analvids.js
index 4b3836de..dd2670ad 100644
--- a/src/scrapers/analvids.js
+++ b/src/scrapers/analvids.js
@@ -2,7 +2,6 @@
 
 const unprint = require('unprint');
 
-const http = require('../utils/http');
 const slugify = require('../utils/slugify');
 
 function extractTitle(originalTitle) {
@@ -43,6 +42,25 @@ function scrapeAll(scenes, channel) {
 	});
 }
 
+async function fetchLatest(channel, page) {
+	// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
+	// studios as channels
+	const url = `${channel.url}/latest/${page}`;
+
+	const res = await unprint.get(url, {
+		selectAll: '.card-scene',
+		headers: {
+			Referer: url,
+		},
+	});
+
+	if (res.ok) {
+		return scrapeAll(res.context, channel);
+	}
+
+	return res.status;
+}
+
 function scrapeScene({ query }, url) {
 	const release = {};
 
@@ -76,71 +94,6 @@ function scrapeScene({ query }, url) {
 	return release;
 }
 
-function scrapeProfile({ query }, url, channel) {
-	const profile = { url };
-
-	profile.nationality = query.content('.model__info a[href*="/nationality"]');
-	profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td');
-
-	profile.avatar = query.img('.model__left img');
-
-	profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel);
-
-	return profile;
-}
-
-async function fetchLatest(channel, page) {
-	// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
-	// studios as channels
-	const url = `${channel.url}/latest/${page}`;
-
-	const res = await unprint.get(url, {
-		selectAll: '.card-scene',
-		headers: {
-			Referer: url,
-		},
-	});
-
-	if (res.ok) {
-		return scrapeAll(res.context, channel);
-	}
-
-	return res.status;
-}
-
-/*
-async function fetchLatest(channel, page) {
-	// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
-	// const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
-	const url = `${channel.url}/latest/${page}`; // studios as channels
-
-	const { tab } = await http.getBrowserSession('analvids', {
-		bypass: {
-			headless: false,
-		},
-	});
-
-	const res = await tab.goto(url);
-
-	const status = res.status();
-
-	console.log('STATUS', status);
-
-	if (status === 200) {
-		const html = await tab.content();
-		const context = unprint.initAll(html, '.card-scene'); // studios as channels
-
-		const scenes = scrapeAll(context, channel);
-
-		tab.close();
-
-		return scenes;
-	}
-
-	return res.status;
-}
-*/
-
 async function fetchScene(url) {
 	const res = await unprint.get(url, {
 		headers: {
@@ -155,6 +108,19 @@ async function fetchScene(url) {
 	return res.status;
 }
 
+function scrapeProfile({ query }, url, channel) {
+	const profile = { url };
+
+	profile.nationality = query.content('.model__info a[href*="/nationality"]');
+	profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td');
+
+	profile.avatar = query.img('.model__left img');
+
+	profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel);
+
+	return profile;
+}
+
 async function getActorUrl(actor, channel) {
 	if (actor.url) {
 		return actor.url;
@@ -162,7 +128,7 @@ async function getActorUrl(actor, channel) {
 
 	const searchUrl = `${channel.url}/api/autocomplete/search?q=${slugify(actor.name, '+')}`;
 
-	const searchRes = await http.get(searchUrl, {
+	const searchRes = await unprint.get(searchUrl, {
 		headers: {
 			Referer: actor.url,
 		},
diff --git a/src/scrapers/pornbox.js b/src/scrapers/pornbox.js
index 87364cdd..35bc0738 100755
--- a/src/scrapers/pornbox.js
+++ b/src/scrapers/pornbox.js
@@ -25,14 +25,24 @@ async function getTrailer(data) {
 	return null;
 }
 
+function extractShootId(title) {
+	if (!title) {
+		return null;
+	}
+
+	return title.trim().match(/[A-Z]{2,3}\d{3,4}\w?/)?.[0].toUpperCase();
+}
+
 async function scrapeScene(data, channel, include) {
 	const release = {};
 	const entityUrl = new URL(channel.url).origin;
 
+	release.entryId = data.id;
+
 	release.title = data.scene_name || data.custom_name;
 
-	release.entryId = data.id;
-	release.url = `${entityUrl}/watch/${data.id}/${slugify(release.title, '_')}`;
+	release.url = `${entityUrl}/watch/${data.id}/${slugify(release.title, '_') || ''}`;
+	release.shootId = extractShootId(release.title);
 
 	release.date = new Date(data.release_date || data.publish_date);
 	release.duration = unprint.extractDuration(data.runtime);
diff --git a/src/utils/slugify.js b/src/utils/slugify.js
index 5c7451d2..92fd64c1 100755
--- a/src/utils/slugify.js
+++ b/src/utils/slugify.js
@@ -56,7 +56,7 @@ function slugify(strings, delimiter = '-', {
 	symbolRegex = defaultSymbolRegex,
 } = {}) {
 	if (!strings || (typeof strings !== 'string' && !Array.isArray(strings))) {
-		return strings;
+		return '';
 	}
 
 	const string = [].concat(strings).join(' ');