Updated Woodman scraper to use unprint.

2026-01-18 01:51:13 +01:00
parent 9ec2ad25a7
commit bfb48abfdd
4 changed files with 85 additions and 69 deletions
--- a/src/scrapers/pierrewoodman.js
+++ b/src/scrapers/pierrewoodman.js
@@ -1,154 +1,165 @@
 'use strict';

-const qu = require('../utils/qu');
+const unprint = require('unprint');
+
 const slugify = require('../utils/slugify');
 const capitalize = require('../utils/capitalize');

-function removeImageBorder(source) {
-	if (!source) {
-		return null;
-	}
-
-	const sourceNoId = source.replace(/_[a-z0-9]+(_v\d)?\.jpg/, '.jpg');
-
-	return [
-		sourceNoId
-			.replace(/actoravatar_/, 'actoravatarnoborder_')
-			.replace(/scenedefault/, 'scenenoborder'),
-		sourceNoId,
-		source,
-	];
-}
-
-function mapActor(actorEl, query, entity) {
-	const avatar = query.img(actorEl);
-
-	return {
-		name: capitalize(query.cnt(actorEl, '.name, .informations p'), { uncapitalize: true }),
-		url: query.url(actorEl, null, 'href', { origin: entity.url }),
-		avatar: removeImageBorder(avatar),
-	};
-}
-
 function scrapeAll(scenes, channel, discard = true) {
-	return scenes.map(({ query, el }) => {
+	return scenes.reduce((acc, { query, element }) => {
 		const release = {};

-		release.url = query.url(el, null, 'href', { origin: channel.url });
+		release.url = unprint.query.url(element, null, { attribute: 'href', origin: channel.url });
+
 		const { hostname, pathname } = new URL(release.url);

 		release.entryId = pathname.match(/_(\d+)/)?.[1];
 		release.channel = hostname.match(/(\w+)\.com/)?.[1];

 		if (discard && release.channel !== channel.slug) {
-			return null;
+			acc.unextracted.concat(release);
+
+			return acc;
 		}

 		release.title = query.content('.title, .informations h3');
-		release.duration = query.duration('.duration, .timer');
+		release.duration = query.duration('.duration, .timer, .infos');

-		release.actors = query.cnt('.sub')?.split(/,\s*/);
+		release.actors = query.content('.sub')?.split(/,\s*/);

-		release.poster = removeImageBorder(query.img('.thumb, picture img'));
+		release.poster = query.img('.thumb, picture img');

-		return release;
-	}).filter(Boolean);
+		acc.scenes.concat(release);
+
+		return acc;
+	}, {
+		scenes: [],
+		unextracted: [],
+	});
 }

-function scrapeScene({ query, html }, url, entity) {
+async function fetchLatest(channel, page) {
+	const res = await unprint.get(channel.parameters?.latest
+		? `${channel.parameters.latest}?page=${page}`
+		: `${channel.url}/videos?page=${page}`, { selectAll: '.items .scene' });
+
+	if (res.ok) {
+		return scrapeAll(res.context, channel);
+	}
+
+	return res.status;
+}
+
+function scrapeScene({ query, html }, { url, entity }) {
 	const release = {};

 	release.entryId = new URL(url).pathname.match(/_(\d+)/)?.[1];

-	const title = query.cnt('.page_title h1, h2');
+	const title = query.content('.page_title h1, h2');
 	const wunfTitle = title.match(/wunf \d+/i)?.[0];

 	release.title = wunfTitle ? wunfTitle.toUpperCase() : title;
-	release.description = query.cnt('.info_container .description');
+	release.description = query.content('.info_container .description');

-	release.date = query.date('.info_container .info_line:nth-child(1)', 'YYYY-MM-DD') || query.date('.description', 'DD MMMM YYYY', /\d{1,2} \w+ \d{4}/);
-	release.actors = query.all('.girl_item, .starring .item').map((actorEl) => mapActor(actorEl, query, entity));
+	release.date = query.date('.info_container .info_line:nth-child(1)', 'YYYY-MM-DD')
+		|| query.date('.description', 'D MMMM YYYY', { match: /\d{1,2} \w+ \d{4}/ });
+
+	release.actors = query.all('.girl_item, .starring .item').map((actorEl) => {
+		const avatar = unprint.query.img(actorEl);
+
+		return {
+			name: capitalize(unprint.query.content(actorEl, '.name, .informations p'), { uncapitalize: true }),
+			url: unprint.query.url(actorEl, null, { origin: entity.url }),
+			avatar,
+		};
+	});

 	release.duration = query.duration('.infos .description');

 	if (!release.duration) {
-		const duration = query.cnt('.info_container .info_line:nth-child(2)');
+		const duration = query.content('.info_container .info_line:nth-child(2)');

 		release.duration = (duration.match(/(\d+) hour/)?.[1] || 0) * 3600
 			+ (duration.match(/(\d+) minutes/)?.[1] || 0) * 60;
 	}

-	release.tags = query.cnts('.tags a:not(.more_tag)');
-	release.poster = removeImageBorder(html.match(/image: "(.*?)"/)?.[1]);
+	release.tags = query.contents('.tags a:not(.more_tag)');
+	release.poster = html.match(/image: "(.*?)"/)?.[1];

 	release.trailer = html.match(/url: "(.*mp4.*)"/g)?.map((src) => ({
 		src: src.match(/"(.*)"/)?.[1],
 		quality: Number(src.match(/[-/](\d+)p/)?.[1]),
 	}));

+	if (query.exists('.download-icon-4k')) {
+		release.qualities = [2160];
+	}
+
 	return release;
 }

 function scrapeProfile({ query }, entity) {
 	const profile = {};

-	profile.avatar = removeImageBorder(query.img('.actor img'));
-	profile.nationality = query.cnt('.nationality, .nationnality'); // sic
+	profile.avatar = query.img('.actor img');
+	profile.nationality = query.content('.nationality, .nationnality'); // sic

-	profile.scenes = scrapeAll(qu.initAll(query.all('.videos .item')), entity, false);
+	profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item')), entity, false);

 	return profile;
 }

-async function fetchLatest(channel, page) {
-	const res = await qu.getAll(channel.parameters?.latest
-		? `${channel.parameters.latest}?page=${page}`
-		: `${channel.url}/videos?page=${page}`, '.items .scene');
-
-	if (res.ok) {
-		return scrapeAll(res.items, channel);
+async function getActorUrl(actor) {
+	if (actor.url) {
+		return actor.url;
 	}

-	return res.status;
-}
-
-async function fetchProfile(baseActor, entity) {
-	const res = await qu.get('https://www.woodmancastingx.com');
+	// Wake up'n Fuck has higher quality images, but not all performers are available, vice versa they are
+	const res = await unprint.get('https://www.woodmancastingx.com');

 	if (!res.ok) {
 		return res.status;
 	}

-	const searchUrl = qu.prefixUrl(res.html.match(/"(.*searchCompletion\.js)"/)?.[1], 'https://www.woodmancastingx.com');
+	const searchUrl = unprint.prefixUrl(res.context.html.match(/"(.*searchCompletion\.js)"/)?.[1], 'https://www.woodmancastingx.com');

 	if (!searchUrl) {
 		return null;
 	}

-	const searchRes = await qu.get(searchUrl, null, null, { decodeJSON: true });
+	const searchRes = await unprint.get(searchUrl);

 	if (!searchRes.ok) {
 		return searchRes.status;
 	}

-	const [actorId] = searchRes.body.actors.find(([_actorId, actorName]) => slugify(actorName) === baseActor.slug) || [];
+	const [actorId] = searchRes.data.actors.find(([_actorId, actorName]) => slugify(actorName) === actor.slug) || [];

 	if (!actorId) {
 		return null;
 	}

-	const actorRes = await qu.get(`https://www.woodmancastingx.com/search/redirection/actors/${actorId}`);
+	return `https://www.woodmancastingx.com/search/redirection/actors/${actorId}`;
+}

-	if (actorRes.ok) {
-		return scrapeProfile(actorRes.item, entity);
+async function fetchProfile(actor, entity) {
+	const actorUrl = await getActorUrl(actor);
+
+	if (typeof actorUrl !== 'string') {
+		return actorUrl;
 	}

-	return actorRes.status;
+	const res = await unprint.get(actorUrl);
+
+	if (res.ok) {
+		return scrapeProfile(res.context, entity);
+	}
+
+	return res.status;
 }

 module.exports = {
 	fetchLatest,
 	scrapeScene,
 	fetchProfile,
-	deprecated: true,
 };
--- a/src/scrapers/score.js
+++ b/src/scrapers/score.js
@@ -24,7 +24,7 @@ function resizeSrc(src) {
 function deriveDate(query) {
 	const now = new Date();

-	// Nov. 12th
+	// Nov. 2025
 	const dateMY = query.date('.i-date', 'MMM. YYYY', { match: /(\w+\.? \d{4})/ });

 	if (dateMY) {
@@ -34,6 +34,7 @@ function deriveDate(query) {
 		};
 	}

+	// Nov. 12th
 	const dateMDo = query.date('.i-date', 'MMM. Do', { match: /(\w+\.? \d{1,2}\w+)/ });

 	if (dateMDo) {
@@ -47,6 +48,7 @@ function deriveDate(query) {
 		};
 	}

+	// 8 Weeks Ago
 	const dateAgo = query.dateAgo('.i-date');

 	if (dateAgo) {
--- a/src/scrapers/scrapers.js
+++ b/src/scrapers/scrapers.js
@@ -279,7 +279,7 @@ const scrapers = {
 		wankzvr,
 		tranzvr: wankzvr,
 		milfvr: wankzvr,
-		// nubilus
+		// nubiles
 		anilos: nubiles,
 		brattysis: nubiles,
 		deeplush: nubiles,
@@ -298,6 +298,9 @@ const scrapers = {
 		aziani,
 		'2poles1hole': aziani,
 		creampiled: aziani,
+		// woodman
+		pierrewoodman,
+		wakeupnfuck: pierrewoodman,
 		// etc
 		'18vr': badoink,
 		theflourishxxx: theflourish,
@@ -362,7 +365,6 @@ const scrapers = {
 		pervcity,
 		dpdiva: pervcity,
 		pervertgallery: fullpornnetwork,
-		pierrewoodman,
 		porncz,
 		pornhub,
 		pornworld,
--- a/tests/profiles.js
+++ b/tests/profiles.js
@@ -176,6 +176,7 @@ const actors = [
 	{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
 	{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
 	{ entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] },
+	{ entity: 'pierrewoodman', name: 'Makayla Cox', fields: ['avatar', 'nationality'] },
 ];

 const actorScrapers = scrapers.actors;