From ec85faf93af47c8228c7c61962129325ccb94559 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 18 Jan 2026 02:14:39 +0100 Subject: [PATCH] Improved Woodman profile scraper. --- seeds/01_networks.js | 2 +- src/app.js | 1 + src/scrapers/pierrewoodman.js | 30 +++++++++++++++++------------- tests/profiles.js | 7 ++++++- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/seeds/01_networks.js b/seeds/01_networks.js index 36cf5c84..f5ca8352 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -818,7 +818,7 @@ const networks = [ { slug: 'pierrewoodman', name: 'Pierre Woodman', - url: 'http://www.woodmancastingx.com', + url: 'https://www.woodmanfilms.com', }, { slug: 'xempire', diff --git a/src/app.js b/src/app.js index b85fc14d..1ff5bfbf 100755 --- a/src/app.js +++ b/src/app.js @@ -31,6 +31,7 @@ const inspector = new Inspector(); let done = false; unprint.options({ + logErrors: false, timeout: argv.requestTimeout, userAgent: 'traxxx', browserUserAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36', diff --git a/src/scrapers/pierrewoodman.js b/src/scrapers/pierrewoodman.js index 7de34326..0acc7f83 100755 --- a/src/scrapers/pierrewoodman.js +++ b/src/scrapers/pierrewoodman.js @@ -4,6 +4,7 @@ const unprint = require('unprint'); const slugify = require('../utils/slugify'); const capitalize = require('../utils/capitalize'); +const tryUrls = require('../utils/try-urls'); function scrapeAll(scenes, channel, discard = true) { return scenes.reduce((acc, { query, element }) => { @@ -98,23 +99,22 @@ function scrapeScene({ query, html }, { url, entity }) { return release; } -function scrapeProfile({ query }, entity) { - const profile = {}; +function scrapeProfile({ query }, url, entity) { + const profile = { url }; - profile.avatar = query.img('.actor img'); - profile.nationality = query.content('.nationality, .nationnality'); // sic + profile.avatar = query.img('.actor img, .avatar img'); + profile.nationality = query.content(['.nationality, .nationnality', '//strong[contains(text(), "Nationnality")]'])?.replace(/nationn?ality\s*:/i, '').trim(); // sic - profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item')), entity, false); + profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item, .list .scene.item')), entity, false); return profile; } async function getActorUrl(actor) { if (actor.url) { - return actor.url; + return [actor.url]; } - // Wake up'n Fuck has higher quality images, but not all performers are available, vice versa they are const res = await unprint.get('https://www.woodmancastingx.com'); if (!res.ok) { @@ -139,20 +139,24 @@ async function getActorUrl(actor) { return null; } - return `https://www.woodmancastingx.com/search/redirection/actors/${actorId}`; + // WUNF has the same avatars at higher quality, but not all performers + return [ + `https://www.wakeupnfuck.com/actor/${actor.slug}_${actorId}`, + `https://www.woodmancastingx.com/girl/${actor.slug}_${actorId}`, + ]; } async function fetchProfile(actor, entity) { - const actorUrl = await getActorUrl(actor); + const actorUrls = await getActorUrl(actor); - if (typeof actorUrl !== 'string') { - return actorUrl; + if (!Array.isArray(actorUrls)) { + return actorUrls; } - const res = await unprint.get(actorUrl); + const { res, url } = await tryUrls(actorUrls); if (res.ok) { - return scrapeProfile(res.context, entity); + return scrapeProfile(res.context, url, entity); } return res.status; diff --git a/tests/profiles.js b/tests/profiles.js index 83313e04..86dbb127 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -2,6 +2,7 @@ const test = require('node:test'); const assert = require('node:assert/strict'); +const unprint = require('unprint'); const argv = require('../src/argv'); const include = require('../src/utils/argv-include')(argv); @@ -12,6 +13,10 @@ const { resolveLayoutScraper } = require('../src/scrapers/resolve'); const getRecursiveParameters = require('../src/utils/get-recursive-parameters'); const knex = require('../src/knex'); +unprint.options({ + logErrors: false, +}); + const actors = [ // vixen { entity: 'vixen', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] }, @@ -176,7 +181,7 @@ const actors = [ { entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] }, { entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] }, { entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] }, - { entity: 'pierrewoodman', name: 'Makayla Cox', fields: ['avatar', 'nationality'] }, + { entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] }, ]; const actorScrapers = scrapers.actors;