Improved Woodman profile scraper.
This commit is contained in:
parent
841d3a8475
commit
ec85faf93a
|
|
@ -818,7 +818,7 @@ const networks = [
|
|||
{
|
||||
slug: 'pierrewoodman',
|
||||
name: 'Pierre Woodman',
|
||||
url: 'http://www.woodmancastingx.com',
|
||||
url: 'https://www.woodmanfilms.com',
|
||||
},
|
||||
{
|
||||
slug: 'xempire',
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ const inspector = new Inspector();
|
|||
let done = false;
|
||||
|
||||
unprint.options({
|
||||
logErrors: false,
|
||||
timeout: argv.requestTimeout,
|
||||
userAgent: 'traxxx',
|
||||
browserUserAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ const unprint = require('unprint');
|
|||
|
||||
const slugify = require('../utils/slugify');
|
||||
const capitalize = require('../utils/capitalize');
|
||||
const tryUrls = require('../utils/try-urls');
|
||||
|
||||
function scrapeAll(scenes, channel, discard = true) {
|
||||
return scenes.reduce((acc, { query, element }) => {
|
||||
|
|
@ -98,23 +99,22 @@ function scrapeScene({ query, html }, { url, entity }) {
|
|||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, entity) {
|
||||
const profile = {};
|
||||
function scrapeProfile({ query }, url, entity) {
|
||||
const profile = { url };
|
||||
|
||||
profile.avatar = query.img('.actor img');
|
||||
profile.nationality = query.content('.nationality, .nationnality'); // sic
|
||||
profile.avatar = query.img('.actor img, .avatar img');
|
||||
profile.nationality = query.content(['.nationality, .nationnality', '//strong[contains(text(), "Nationnality")]'])?.replace(/nationn?ality\s*:/i, '').trim(); // sic
|
||||
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item')), entity, false);
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item, .list .scene.item')), entity, false);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function getActorUrl(actor) {
|
||||
if (actor.url) {
|
||||
return actor.url;
|
||||
return [actor.url];
|
||||
}
|
||||
|
||||
// Wake up'n Fuck has higher quality images, but not all performers are available, vice versa they are
|
||||
const res = await unprint.get('https://www.woodmancastingx.com');
|
||||
|
||||
if (!res.ok) {
|
||||
|
|
@ -139,20 +139,24 @@ async function getActorUrl(actor) {
|
|||
return null;
|
||||
}
|
||||
|
||||
return `https://www.woodmancastingx.com/search/redirection/actors/${actorId}`;
|
||||
// WUNF has the same avatars at higher quality, but not all performers
|
||||
return [
|
||||
`https://www.wakeupnfuck.com/actor/${actor.slug}_${actorId}`,
|
||||
`https://www.woodmancastingx.com/girl/${actor.slug}_${actorId}`,
|
||||
];
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, entity) {
|
||||
const actorUrl = await getActorUrl(actor);
|
||||
const actorUrls = await getActorUrl(actor);
|
||||
|
||||
if (typeof actorUrl !== 'string') {
|
||||
return actorUrl;
|
||||
if (!Array.isArray(actorUrls)) {
|
||||
return actorUrls;
|
||||
}
|
||||
|
||||
const res = await unprint.get(actorUrl);
|
||||
const { res, url } = await tryUrls(actorUrls);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, entity);
|
||||
return scrapeProfile(res.context, url, entity);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
const test = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const argv = require('../src/argv');
|
||||
const include = require('../src/utils/argv-include')(argv);
|
||||
|
|
@ -12,6 +13,10 @@ const { resolveLayoutScraper } = require('../src/scrapers/resolve');
|
|||
const getRecursiveParameters = require('../src/utils/get-recursive-parameters');
|
||||
const knex = require('../src/knex');
|
||||
|
||||
unprint.options({
|
||||
logErrors: false,
|
||||
});
|
||||
|
||||
const actors = [
|
||||
// vixen
|
||||
{ entity: 'vixen', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
|
||||
|
|
@ -176,7 +181,7 @@ const actors = [
|
|||
{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
|
||||
{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
|
||||
{ entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] },
|
||||
{ entity: 'pierrewoodman', name: 'Makayla Cox', fields: ['avatar', 'nationality'] },
|
||||
{ entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] },
|
||||
];
|
||||
|
||||
const actorScrapers = scrapers.actors;
|
||||
|
|
|
|||
Loading…
Reference in New Issue