Files
traxxx/src/scrapers/nubiles.js
2026-01-08 02:12:59 +01:00

240 lines
5.7 KiB
JavaScript
Executable File

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { heightToCm } = require('../utils/convert');
const slugUrlMap = {
nubiles: 'https://www.nubiles.net',
nubilesporn: 'https://www.nubiles-porn.com',
};
function stripQuery(link) {
if (!link) {
return null;
}
const url = new URL(link);
return `${url.origin}${url.pathname}`;
}
async function getPhotos(albumUrl) {
const res = await unprint.get(albumUrl, {
selectAll: '.photo-thumb',
interface: 'request',
});
return res.ok
? res.context.map(({ query }) => unprint.prefixUrl(query.element('source').srcset))
: [];
}
function scrapeAll(scenes, entity) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.content('.title a');
release.url = stripQuery(unprint.prefixUrl(query.url('.title a'), entity.url));
release.entryId = Number(new URL(release.url).pathname.match(/\/watch\/(\d+)/)[1]);
release.date = query.date('.date', 'MMM D, YYYY');
if (query.exists('.models a.model')) {
release.actors = query.all('.models a.model').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.prefixUrl(unprint.query.url(actorEl, null), entity.url),
}));
} else {
// upcoming page has single string of actors, implicitly separated by a lot of whitespace
release.actors = query.content('.models', { trim: false })?.trim().split(/\s{2,}/);
}
const poster = query.sourceSet('img', 'data-srcset')?.[0];
release.poster = poster && [
poster.replace('_640', '_1280'),
poster,
];
release.stars = query.number('.rating');
release.likes = query.number('.likes');
release.comment = `${unprint.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
});
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/video/gallery/${(page - 1) * 12}`; // /0 redirects back to /
const res = await unprint.get(url, {
interface: 'request',
selectAll: '.content-grid-item',
});
if (res.ok) {
return scrapeAll(res.context, site);
}
return res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const res = await unprint.get(url, {
selectAll: '.content-grid-item',
interface: 'request',
});
if (res.ok) {
return scrapeAll(res.context, site);
}
return res.status;
}
return [];
}
async function scrapeScene({ query }, { url, entity, include }) {
const release = {};
const { origin, pathname } = new URL(url);
release.url = `${origin}${pathname}`;
release.entryId = new URL(url).pathname.split('/')[3];
release.title = query.content('.content-pane-title h2');
release.description = query.content('.content-pane-column div');
release.date = query.date('.date', 'MMM D, YYYY');
release.actors = query.all('.content-pane-performers .model').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.prefixUrl(unprint.query.url(actorEl, null), entity.url),
}));
release.tags = query.contents('.categories a');
release.poster = query.poster() || query.img('.fake-video-player img');
release.trailer = query.all('source').map((source) => ({
src: source.src,
quality: Number(source.getAttribute('res')),
}));
release.stars = query.number('.score');
release.likes = query.number('#likecount');
const albumLink = query.url('.content-pane-related-links a[href*="gallery"]');
if (albumLink && include.photos) {
release.photos = await getPhotos(unprint.prefixUrl(albumLink, new URL(entity.url).origin));
}
return release;
}
async function fetchScene(url, entity, _baseRelease, include) {
const res = await unprint.get(url, {
interface: 'request',
});
if (res.ok) {
return scrapeScene(res.context, { url, entity, include });
}
return res.status;
}
function scrapeProfile({ query }, avatar) {
const profile = {};
const keys = query.contents('.model-profile .model-profile-subheading');
const values = query.contents('.model-profile .model-profile-subheading + p');
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
profile.age = Number(bio.age);
profile.description = query.content('.model-bio');
profile.residencePlace = bio.location;
profile.height = heightToCm(bio.height);
profile.measurements = bio.figure;
const photo = query.img('.model-profile img');
// avatar on profile page is different, index avatar preferred
if (avatar?.length > 0) {
profile.avatar = avatar;
profile.photos = [photo];
} else {
profile.avatar = photo;
}
return profile;
}
async function findModel(actor, entity) {
const firstLetter = actor.name.charAt(0).toLowerCase();
const origin = slugUrlMap[entity.slug] || entity.url;
const url = `${origin}/model/alpha/${firstLetter}`;
const resModels = await unprint.get(url, {
interface: 'request',
});
if (!resModels.ok) {
return resModels.status;
}
const modelEl = resModels.context.query.all('.content-grid-item').find((el) => slugify(unprint.query.content(el, 'a.title')) === slugify(actor.name));
if (modelEl) {
const modelUrl = `${origin}${unprint.query.url(modelEl, 'a.title')}`;
const modelAvatar = unprint.query.sourceSet(modelEl, 'a picture img', 'data-srcset');
return {
url: modelUrl,
avatar: modelAvatar,
};
}
// try actor URL last in order to grab avatar
if (actor.url) {
return { url: actor.url };
}
return null;
}
async function fetchProfile(actor, { entity }) {
const model = await findModel(actor, entity);
if (model) {
const resModel = await unprint.get(model.url, {
interface: 'request',
});
if (resModel.ok) {
return scrapeProfile(resModel.context, model.avatar);
}
return resModel.status;
}
return null;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchProfile,
fetchScene,
};