Files
traxxx/src/scrapers/naughtyamerica.js
2026-01-24 01:30:17 +01:00

195 lines
6.5 KiB
JavaScript
Executable File

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { stripQuery } = require('../utils/url');
const channelMap = {
spa: 'thespa',
gym: 'thegym',
dormroom: 'thedormroom',
dressingroom: 'thedressingroom',
psepornstarexperience: 'pornstarexperience',
office: 'theoffice',
ta: 'tanda',
};
function scrapeLatest(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
const url = query.url('a');
release.entryId = query.attribute('a', 'data-scene-id') || (url && new URL(url).pathname.match(/-(\d+)$/)?.[1]) || null;
release.date = query.date('.entry-date, .scene-date', 'MMM D, YYYY');
release.duration = query.duration('.scene-runtime');
release.actors = query.all('.contain-actors a, .scene-actors a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: channel.parameters?.useActorUrl === false
? null // actor URL is scene link in native layout
: unprint.query.url(actorEl, null),
}));
release.poster = [
...(query
.sourceSet('source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset', { includeDescriptor: true })
?.toSorted((sourceA, sourceB) => sourceB.density - sourceA.density)
.map((source) => source.url) || []),
query.img('.main-scene-img', { attribute: 'srcset' }),
query.img('.scene-thumb'),
].filter(Boolean);
release.teaser = query.video('a[data-desktop-video]', { attribute: 'data-desktop-video' });
release.tags = query.contents('.flag-bg');
release.qualities = [
query.exists('//a[contains(@class, "label-four-k") and contains(text(), "4K")] | //span[contains(@class, "bug-4k")]') && 2160, // label-four-k is also used for non-4K tags
query.exists('//a[contains(@class, "label-hd") and contains(text(), "HD")]') && 720,
].filter(Boolean);
const channelSlug = slugify(query.content('.site-title'), '');
release.channel = channelMap[channelSlug] || channelSlug;
// NA affiliate prefers to push more traffic to Naughty America VR, all scenes labeled VR seem to be available on NAVR
release.url = release.tags?.some((tag) => tag.toLowerCase() === 'vr')
? url.replace('naughtyamerica.com', 'naughtyamericavr.com')
: url;
return release;
});
}
async function fetchLatest(channel, page = 1, { parameters }) {
const url = parameters.latest || `${channel.url}${parameters?.scenes || ''}`;
const res = await unprint.browserRequest(`${url}?page=${page}`, {
selectAll: '.site-list .scene-item, .panel-body',
async control(ctx) {
await ctx.locator('.site-list, .grid-three').hover({ trial: true, timeout: 10000 }); // wait for overview to initialize
},
});
if (res.ok) {
const scenes = scrapeLatest(res.context, channel, parameters);
return scenes;
}
return res.status;
}
function scrapeScene({ query }, { url }) {
const release = {};
release.entryId = new URL(url).pathname.match(/-(\d+)$/)?.[1];
// release.title = query.content('.breadcrumb-item.active') || query.content('.scene-title, .grey-title'); // main title has performer name instead of scene title in live scenes
release.title = query.content('.scene-title, .grey-title'); // breadcrumb as used before often doesn't have title
release.description = query.text('.synopsis, .scene-description');
release.date = query.date('.entry-date, .released-date', ['MMM D, YYYY', 'MM/DD/YY']);
release.duration = query.duration('.duration');
release.actors = query.exists('.performer-list') || query.exists('.scene-info a[href*="/pornstar"].scene-title') // title links to performer in live scenes
? query.all('.performer-list a, .grey-performers a, .scene-info a[href*="/pornstar"].scene-title').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: stripQuery(unprint.query.url(actorEl, null)),
}))
: query.content('.grey-performers')?.split(',').map((actorName) => actorName.trim()); // not all performers are linked
release.poster = [
...(query.sourceSet('.play-trailer source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset') || []),
...(query.sourceSet('.scenepage-video source[srcset*="scenes/"][type="image/jpeg"]', 'srcset') || []),
query.img('.play-trailer img[data-srcset*="scenes/"]', { attribute: 'data-srcset' }),
query.img('.scenepage-video .playcard'),
query.img('.scene-page .start-card'),
query.poster('dl8-video[poster]'),
].filter(Boolean);
release.photos = query.els('.contain-scene-images.desktop-only .scene-image').map((imgEl) => [
unprint.query.url(imgEl, null),
unprint.query.img(imgEl, 'img', { attribute: 'srcset' }),
]);
const trailer = query.video('#triggerPlay video source');
if (trailer) {
release.trailer = [
{
source: trailer.replace(/_\d+\.mp4/, '_1080.mp4'),
quality: 1080,
},
trailer,
];
}
const channelSlug = slugify(query.content('.site-title'), '');
release.channel = channelMap[channelSlug] || channelSlug;
release.tags = query.contents('.categories a, .category a');
release.qualities = [
query.exists('//a[contains(@class, "label-four-k") and contains(text(), "4K")]') && 2160, // label-four-k is also used for non-4K tags
query.exists('img.icon-1080') && 1080,
query.exists('//a[contains(@class, "label-hd") and contains(text(), "HD")]') && 720,
].filter(Boolean);
return release;
}
async function fetchScene(url, _channel) {
// latest set NaughtyAmericaVR URL, but try deep scrape from regular NA website
const res = await unprint.browserRequest(url.replace('naughtyamericavr.com', 'naughtyamerica.com'), {
async control(ctx) {
await ctx.locator('.scene-info, .scene').first().hover({ trial: true, timeout: 30000 }); // wait for trailer to initialize
},
});
if (res.ok) {
const scene = scrapeScene(res.context, { url });
return scene;
}
return res.controlError || res.status;
}
async function scrapeProfile({ query }) {
const profile = {};
profile.description = query.content('.bio_about_text, .performer-description');
profile.avatar = query.img('img.performer-pic, img.performer-img, img.peformer-img'); // sic peformer
return profile;
}
async function fetchProfile({ slug }, { channel }) {
const url = unprint.prefixUrl(`/pornstar/${slug}`, channel.url);
const res = await unprint.browserRequest(url, {
select: '.bio-info, .performer-details',
async control(ctx) {
await ctx.locator('.bio-info, .performer-details').hover({ trial: true, timeout: 30000 }); // wait for bio to initialize
},
});
if (res.ok) {
const profile = scrapeProfile(res.context, { url });
return profile;
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};