Improved Woodman profile scraper.

This commit is contained in:
DebaucheryLibrarian 2026-01-18 02:14:39 +01:00
parent 841d3a8475
commit ec85faf93a
4 changed files with 25 additions and 15 deletions

View File

@ -818,7 +818,7 @@ const networks = [
{
slug: 'pierrewoodman',
name: 'Pierre Woodman',
url: 'http://www.woodmancastingx.com',
url: 'https://www.woodmanfilms.com',
},
{
slug: 'xempire',

View File

@ -31,6 +31,7 @@ const inspector = new Inspector();
let done = false;
unprint.options({
logErrors: false,
timeout: argv.requestTimeout,
userAgent: 'traxxx',
browserUserAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',

View File

@ -4,6 +4,7 @@ const unprint = require('unprint');
const slugify = require('../utils/slugify');
const capitalize = require('../utils/capitalize');
const tryUrls = require('../utils/try-urls');
function scrapeAll(scenes, channel, discard = true) {
return scenes.reduce((acc, { query, element }) => {
@ -98,23 +99,22 @@ function scrapeScene({ query, html }, { url, entity }) {
return release;
}
function scrapeProfile({ query }, entity) {
const profile = {};
function scrapeProfile({ query }, url, entity) {
const profile = { url };
profile.avatar = query.img('.actor img');
profile.nationality = query.content('.nationality, .nationnality'); // sic
profile.avatar = query.img('.actor img, .avatar img');
profile.nationality = query.content(['.nationality, .nationnality', '//strong[contains(text(), "Nationnality")]'])?.replace(/nationn?ality\s*:/i, '').trim(); // sic
profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item')), entity, false);
profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item, .list .scene.item')), entity, false);
return profile;
}
async function getActorUrl(actor) {
if (actor.url) {
return actor.url;
return [actor.url];
}
// Wake up'n Fuck has higher quality images, but not all performers are available, vice versa they are
const res = await unprint.get('https://www.woodmancastingx.com');
if (!res.ok) {
@ -139,20 +139,24 @@ async function getActorUrl(actor) {
return null;
}
return `https://www.woodmancastingx.com/search/redirection/actors/${actorId}`;
// WUNF has the same avatars at higher quality, but not all performers
return [
`https://www.wakeupnfuck.com/actor/${actor.slug}_${actorId}`,
`https://www.woodmancastingx.com/girl/${actor.slug}_${actorId}`,
];
}
async function fetchProfile(actor, entity) {
const actorUrl = await getActorUrl(actor);
const actorUrls = await getActorUrl(actor);
if (typeof actorUrl !== 'string') {
return actorUrl;
if (!Array.isArray(actorUrls)) {
return actorUrls;
}
const res = await unprint.get(actorUrl);
const { res, url } = await tryUrls(actorUrls);
if (res.ok) {
return scrapeProfile(res.context, entity);
return scrapeProfile(res.context, url, entity);
}
return res.status;

View File

@ -2,6 +2,7 @@
const test = require('node:test');
const assert = require('node:assert/strict');
const unprint = require('unprint');
const argv = require('../src/argv');
const include = require('../src/utils/argv-include')(argv);
@ -12,6 +13,10 @@ const { resolveLayoutScraper } = require('../src/scrapers/resolve');
const getRecursiveParameters = require('../src/utils/get-recursive-parameters');
const knex = require('../src/knex');
unprint.options({
logErrors: false,
});
const actors = [
// vixen
{ entity: 'vixen', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
@ -176,7 +181,7 @@ const actors = [
{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
{ entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] },
{ entity: 'pierrewoodman', name: 'Makayla Cox', fields: ['avatar', 'nationality'] },
{ entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] },
];
const actorScrapers = scrapers.actors;