Refactored MariskaX to use Next data. Fixed Naughty America profile scraper breaking on Tonight's Girlfriend.

This commit is contained in:
DebaucheryLibrarian 2026-01-19 19:19:24 +01:00
parent 157667f771
commit dde3ea3a34
4 changed files with 151 additions and 58 deletions

View File

@ -175,6 +175,9 @@ module.exports = {
// woodman
pierrewoodman,
wakeupnfuck: pierrewoodman,
// naughty america
naughtyamerica,
tonightsgirlfriend: naughtyamerica,
// etc
'18vr': badoink,
theflourishxxx: theflourish,
@ -231,8 +234,6 @@ module.exports = {
missax,
mylf: teamskeet,
mugfucked: fullpornnetwork,
naughtyamerica,
tonightsgirlfriend: naughtyamerica,
nebraskacoeds: elevatedx,
onlyprince: fullpornnetwork,
pascalssubsluts,

View File

@ -3,84 +3,161 @@
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');
function scrapeLatest(scenes) {
return scenes.map(({ query }) => {
const release = {};
function scrapeScene(data, channel) {
const release = {};
release.title = query.content('.title a');
release.url = query.url('.title a') || query.url('.thumb-wrap a');
release.entryId = data.id;
release.url = `${channel.origin}/scenes/${data.slug}`;
release.entryId = new URL(release.url).pathname.match(/view\/(\d+)\//)[1];
release.title = data.title;
release.description = data.description;
release.date = query.date('time', 'Do MMM YYYY', { match: /\d+\w+ \w+ \d{4}/ });
release.duration = query.duration('.total-time');
release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD hh:mm:ss');
release.duration = unprint.extractDuration(data.videos_duration);
release.actors = query.all('.models a').map((el) => ({
name: unprint.query.content(el),
url: unprint.query.url(el, null),
}));
release.actors = (data.models_thumbs || data.models_slugs)?.map((actor) => ({
name: actor.name,
url: actor.slug && `${channel.origin}/models/${actor.slug}`,
avatar: actor.thumb,
})) || data.models;
[release.poster, ...release.photos] = query.json('.thumb-wrap a', { attribute: 'data-images' });
release.tags = data.tags;
release.photoCount = query.number('.total-photos');
release.poster = data.thumb || data.trailer_screencap;
return release;
});
const posterPath = release.poster && new URL(release.poster).pathname.replace('//', '/');
release.photos = data.extra_thumbnails.filter((src) => !src.includes(posterPath));
release.caps = data.thumbs;
release.teaser = data.special_thumbnails;
release.trailer = data.trailer_url;
release.photoCount = data.photos_duration;
release.channel = data.site?.toLowerCase();
release.qualities = data.videos && Array.from(new Set(Object.values(data.videos).map((video) => video.height))).filter(Boolean);
return release;
}
async function fetchLatest(channel, page) {
const res = await unprint.get(`https://tour.mariskax.com/scenes?page=${page}`, {
selectAll: '.content-item',
timeout: 30000, // slow site
});
if (res.ok) {
return scrapeLatest(res.context);
const data = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.contents.data;
if (data) {
return data.map((scene) => scrapeScene(scene, channel));
}
return null;
}
return res.status;
}
function scrapeScene({ query }, { url }) {
const release = {};
async function fetchScene(url, entity, baseRelease) {
if (baseRelease.entryId) {
// same as as deep data
return baseRelease;
}
release.title = query.content('.content-meta .title');
release.entryId = new URL(url).pathname.match(/view\/(\d+)\//)[1];
release.date = query.date('.post-date', 'Do MMM YYYY', { match: /\d+\w+ \w+ \d{4}/ });
release.actors = query.all('.content-meta .models a').map((el) => ({
name: unprint.query.content(el),
url: unprint.query.url(el, null),
}));
release.poster = query.poster('.trailer-wrap video');
release.trailer = query.video('.trailer-wrap source') || query.video('.download-trailer-wrap a', { attribute: 'href' });
return release;
}
async function fetchProfile(actor) {
const res = await unprint.post('https://tour.mariskax.com/search-preview-mrx', `q=${slugify(actor.name, '+')}`, {
headers: {
'Accept-Language': 'en-US,en', // necessary for some reason
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
},
const res = await unprint.get(url, {
timeout: 30000, // slow site
});
if (res.ok) {
const model = res.data.find((result) => result.type === 'model' && slugify(result.title) === actor.slug);
const data = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.content;
if (model) {
const curatedModel = {
entryId: model.url?.match(/model\/(\d+)\//)?.[1],
url: model.url,
avatar: model.thumb,
};
return curatedModel;
if (data) {
return scrapeScene(data, entity);
}
return null;
}
return res.status;
}
function scrapeProfile(data) {
const profile = {};
const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [slugify(key, '_'), value]));
profile.url = `https://tour.mariskax.com/models/${data.slug}`;
profile.entryId = data.id;
profile.gender = bio.gender;
profile.dateOfBirth = bio.birthdate;
profile.age = bio.age;
profile.placeOfBirth = bio.born;
profile.measurements = bio.measurements;
profile.height = convert(bio.height, 'cm');
profile.weight = convert(bio.weight, 'lb', 'kg');
profile.hairColor = bio.hair_color;
profile.eyes = bio.eye_color;
profile.avatar = data.thumb;
profile.socials = [bio.x && `https://x.com/${bio.x.replace('@', '')}`].filter(Boolean);
return profile;
}
async function getActorUrl(actor, entity) {
if (actor.url) {
return { url: actor.url };
}
const res = await unprint.post(`${entity.origin}/api/search/${actor.name}`);
if (res.ok) {
const model = res.data.models.find((result) => slugify(result.name) === actor.slug);
if (model?.slug) {
return {
url: `${entity.origin}/models/${model.slug}`,
model,
};
}
}
return null;
}
async function fetchProfile(actor, entity) {
const { url, model } = await getActorUrl(actor, entity);
if (model) {
// search data already contains everything except for age, but DOB is included
return scrapeProfile(model);
}
if (url) {
const res = await unprint.get(url, {
parser: {
runScripts: 'dangerously',
},
});
if (res.ok) {
const data = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.model;
if (data) {
return scrapeProfile(data);
}
return null;
}
return res.status;
}
return null;
@ -89,5 +166,6 @@ async function fetchProfile(actor) {
module.exports = {
fetchLatest,
scrapeScene,
fetchScene,
fetchProfile,
};

View File

@ -141,7 +141,7 @@ async function fetchScene(url, _channel) {
async function scrapeProfile({ query }) {
const profile = {};
profile.description = query.content('.bio_about_text');
profile.description = query.content('.bio_about_text, .performer-description');
profile.avatar = query.img('img.performer-pic, img.performer-img, img.peformer-img'); // sic peformer
return profile;
@ -151,12 +151,9 @@ async function fetchProfile({ slug }, { channel }) {
const url = unprint.prefixUrl(`/pornstar/${slug}`, channel.url);
const res = await unprint.browserRequest(url, {
browser: {
headless: false,
},
select: '.bio-info, .performer-details',
async control(ctx) {
await ctx.locator('.bio-info').hover({ trial: true, timeout: 30000 }); // wait for bio to initialize
await ctx.locator('.bio-info, .performer-details').hover({ trial: true, timeout: 30000 }); // wait for bio to initialize
},
});

View File

@ -1,5 +1,6 @@
'use strict';
const config = require('config');
const test = require('node:test');
const assert = require('node:assert/strict');
const unprint = require('unprint');
@ -15,6 +16,19 @@ const knex = require('../src/knex');
unprint.options({
logErrors: false,
timeout: argv.requestTimeout,
userAgent: 'traxxx',
browserUserAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
apiUserAgent: 'traxxx',
limits: {
...config.limits,
default: {
interval: argv.interval || config.limits.default.interval,
concurrency: argv.concurrency || config.limits.default.concurrency,
},
browser: config.limits.browser,
},
proxy: config.proxy,
});
const actors = [
@ -188,6 +202,9 @@ const actors = [
{ entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] },
{ entity: 'dorcelclub', name: 'Clea Gaultier', fields: ['avatar'] },
{ entity: 'hitzefrei', name: 'Jolee Love', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hair', 'description'] },
{ entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] },
{ entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] },
{ entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'placeOfBirth', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] },
];
const actorScrapers = scrapers.actors;