Improved Woodman profile scraper.
This commit is contained in:
@@ -31,6 +31,7 @@ const inspector = new Inspector();
|
||||
let done = false;
|
||||
|
||||
unprint.options({
|
||||
logErrors: false,
|
||||
timeout: argv.requestTimeout,
|
||||
userAgent: 'traxxx',
|
||||
browserUserAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
|
||||
|
||||
@@ -4,6 +4,7 @@ const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
const capitalize = require('../utils/capitalize');
|
||||
const tryUrls = require('../utils/try-urls');
|
||||
|
||||
function scrapeAll(scenes, channel, discard = true) {
|
||||
return scenes.reduce((acc, { query, element }) => {
|
||||
@@ -98,23 +99,22 @@ function scrapeScene({ query, html }, { url, entity }) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, entity) {
|
||||
const profile = {};
|
||||
function scrapeProfile({ query }, url, entity) {
|
||||
const profile = { url };
|
||||
|
||||
profile.avatar = query.img('.actor img');
|
||||
profile.nationality = query.content('.nationality, .nationnality'); // sic
|
||||
profile.avatar = query.img('.actor img, .avatar img');
|
||||
profile.nationality = query.content(['.nationality, .nationnality', '//strong[contains(text(), "Nationnality")]'])?.replace(/nationn?ality\s*:/i, '').trim(); // sic
|
||||
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item')), entity, false);
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item, .list .scene.item')), entity, false);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function getActorUrl(actor) {
|
||||
if (actor.url) {
|
||||
return actor.url;
|
||||
return [actor.url];
|
||||
}
|
||||
|
||||
// Wake up'n Fuck has higher quality images, but not all performers are available, vice versa they are
|
||||
const res = await unprint.get('https://www.woodmancastingx.com');
|
||||
|
||||
if (!res.ok) {
|
||||
@@ -139,20 +139,24 @@ async function getActorUrl(actor) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return `https://www.woodmancastingx.com/search/redirection/actors/${actorId}`;
|
||||
// WUNF has the same avatars at higher quality, but not all performers
|
||||
return [
|
||||
`https://www.wakeupnfuck.com/actor/${actor.slug}_${actorId}`,
|
||||
`https://www.woodmancastingx.com/girl/${actor.slug}_${actorId}`,
|
||||
];
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, entity) {
|
||||
const actorUrl = await getActorUrl(actor);
|
||||
const actorUrls = await getActorUrl(actor);
|
||||
|
||||
if (typeof actorUrl !== 'string') {
|
||||
return actorUrl;
|
||||
if (!Array.isArray(actorUrls)) {
|
||||
return actorUrls;
|
||||
}
|
||||
|
||||
const res = await unprint.get(actorUrl);
|
||||
const { res, url } = await tryUrls(actorUrls);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, entity);
|
||||
return scrapeProfile(res.context, url, entity);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
||||
Reference in New Issue
Block a user