Improved Woodman profile scraper.

This commit is contained in:
DebaucheryLibrarian
2026-01-18 02:14:39 +01:00
parent 841d3a8475
commit ec85faf93a
4 changed files with 25 additions and 15 deletions

View File

@@ -31,6 +31,7 @@ const inspector = new Inspector();
let done = false;
unprint.options({
logErrors: false,
timeout: argv.requestTimeout,
userAgent: 'traxxx',
browserUserAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',

View File

@@ -4,6 +4,7 @@ const unprint = require('unprint');
const slugify = require('../utils/slugify');
const capitalize = require('../utils/capitalize');
const tryUrls = require('../utils/try-urls');
function scrapeAll(scenes, channel, discard = true) {
return scenes.reduce((acc, { query, element }) => {
@@ -98,23 +99,22 @@ function scrapeScene({ query, html }, { url, entity }) {
return release;
}
function scrapeProfile({ query }, entity) {
const profile = {};
function scrapeProfile({ query }, url, entity) {
const profile = { url };
profile.avatar = query.img('.actor img');
profile.nationality = query.content('.nationality, .nationnality'); // sic
profile.avatar = query.img('.actor img, .avatar img');
profile.nationality = query.content(['.nationality, .nationnality', '//strong[contains(text(), "Nationnality")]'])?.replace(/nationn?ality\s*:/i, '').trim(); // sic
profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item')), entity, false);
profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item, .list .scene.item')), entity, false);
return profile;
}
async function getActorUrl(actor) {
if (actor.url) {
return actor.url;
return [actor.url];
}
// Wake up'n Fuck has higher quality images, but not all performers are available, vice versa they are
const res = await unprint.get('https://www.woodmancastingx.com');
if (!res.ok) {
@@ -139,20 +139,24 @@ async function getActorUrl(actor) {
return null;
}
return `https://www.woodmancastingx.com/search/redirection/actors/${actorId}`;
// WUNF has the same avatars at higher quality, but not all performers
return [
`https://www.wakeupnfuck.com/actor/${actor.slug}_${actorId}`,
`https://www.woodmancastingx.com/girl/${actor.slug}_${actorId}`,
];
}
async function fetchProfile(actor, entity) {
const actorUrl = await getActorUrl(actor);
const actorUrls = await getActorUrl(actor);
if (typeof actorUrl !== 'string') {
return actorUrl;
if (!Array.isArray(actorUrls)) {
return actorUrls;
}
const res = await unprint.get(actorUrl);
const { res, url } = await tryUrls(actorUrls);
if (res.ok) {
return scrapeProfile(res.context, entity);
return scrapeProfile(res.context, url, entity);
}
return res.status;