Fixed comment field not updated. Refactored Nubiles scraper.

This commit is contained in:
DebaucheryLibrarian
2025-10-06 03:26:17 +02:00
parent e13c8ccfe0
commit 19c892ab13
7 changed files with 217 additions and 88 deletions

View File

@@ -2,6 +2,7 @@
const unprint = require('unprint');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
// Naughty America network
@@ -42,6 +43,40 @@ function scrapeLatest(scenes, channel) {
});
}
async function fetchLatest(channel, page = 1) {
const { tab } = await http.getBrowserSession('naughtyamerica', { useGlobalBrowser: false, useProxy: true, headless: false });
const url = `${channel.url}${channel.parameters?.scenes || ''}?page=${page}`;
const res = await tab.goto(url);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const items = unprint.initAll(html, '.site-list .scene-item, .panel-body');
const scenes = scrapeLatest(items, channel);
await tab.close();
return scenes;
}
await tab.close();
return status;
}
/*
async function fetchLatest(site, page = 1) {
const res = await unprint.get(`${site.url}${site.parameters?.scenes || ''}?page=${page}`, { selectAll: '.site-list .scene-item, .panel-body' });
if (res.ok) {
return scrapeLatest(res.context, site);
}
return res.status;
}
*/
function scrapeScene({ query }, { url }) {
const release = {};
@@ -98,6 +133,28 @@ function scrapeScene({ query }, { url }) {
return release;
}
async function fetchScene(url, _channel) {
const { tab } = await http.getBrowserSession('naughtyamerica', { useGlobalBrowser: false, useProxy: true });
const res = await tab.goto(url);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const item = unprint.init(html);
const scene = scrapeScene(item, { url });
await tab.close();
return scene;
}
await tab.close();
return status;
}
async function scrapeProfile({ query }) {
const profile = {};
@@ -107,16 +164,30 @@ async function scrapeProfile({ query }) {
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await unprint.get(`${site.url}${site.parameters?.scenes || ''}?page=${page}`, { selectAll: '.site-list .scene-item, .panel-body' });
async function fetchProfile({ slug }, { channel }) {
const { tab } = await http.getBrowserSession('naughtyamerica', { useGlobalBrowser: false, useProxy: true });
const url = `${channel.url}/pornstar/${slug}`;
const res = await tab.goto(url);
if (res.ok) {
return scrapeLatest(res.context, site);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const item = unprint.init(html, '.bio-info, .performer-details');
const profile = scrapeProfile(item, { url });
await tab.close();
return profile;
}
return res.status;
await tab.close();
return status;
}
/*
async function fetchProfile({ slug }, { channel }) {
const res = await unprint.get(`${channel.url}/pornstar/${slug}`, { select: '.bio-info, .performer-details' });
@@ -126,9 +197,10 @@ async function fetchProfile({ slug }, { channel }) {
return res.status;
}
*/
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
scrapeScene,
};

View File

@@ -1,6 +1,7 @@
'use strict';
const qu = require('../utils/qu');
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { heightToCm } = require('../utils/convert');
@@ -9,44 +10,43 @@ const slugUrlMap = {
nubilesporn: 'https://www.nubiles-porn.com',
};
function stripQuery(link) {
if (!link) {
return null;
}
const url = new URL(link);
return `${url.origin}${url.pathname}`;
}
async function getPhotos(albumUrl) {
const res = await qu.getAll(albumUrl, '.photo-thumb');
const res = await unprint.get(albumUrl, { selectAll: '.photo-thumb' });
return res.ok
? res.items.map(({ query }) => qu.prefixUrl(query.q('source').srcset))
? res.context.map(({ query }) => unprint.prefixUrl(query.element('source').srcset))
: [];
}
function scrapeAll(scenes, site, origin) {
function scrapeAll(scenes, entity) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.q('.title a', true);
const url = query.url('.title a').split('?')[0];
const channelUrl = query.url('.site-link');
if (/^http/.test(url)) {
const { pathname } = new URL(url);
// release.entryId = pathname.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${pathname}`;
else release.url = url;
} else if (!/\/join/.test(url)) {
// release.entryId = url.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${url}`;
else if (site?.url) release.url = `${site.url}${url}`;
else if (origin) release.url = `${origin}${url}`;
} else {
// release.entryId = qu.q('a img', 'tube_tour_thumb_id');
}
release.title = query.content('.title a');
release.url = stripQuery(unprint.prefixUrl(query.url('.title a'), entity.url));
release.entryId = Number(new URL(release.url).pathname.match(/\/watch\/(\d+)/)[1]);
release.date = query.date('.date', 'MMM D, YYYY');
release.actors = query.all('.models a.model', true);
// no reliable entry ID between upcoming and released scenes
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
if (query.exists('.models a.model')) {
release.actors = query.all('.models a.model').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.prefixUrl(unprint.query.url(actorEl, null), entity.url),
}));
} else {
// upcoming page has single string of actors, implicitly separated by a lot of whitespace
release.actors = query.content('.models', { trim: false })?.trim().split(/\s{2,}/);
}
const poster = query.sourceSet('img', 'data-srcset')?.[0];
@@ -58,24 +58,56 @@ function scrapeAll(scenes, site, origin) {
release.stars = query.number('.rating');
release.likes = query.number('.likes');
release.comment = `${unprint.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
});
}
async function scrapeScene({ query }, url, site) {
async function fetchLatest(site, page = 1) {
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
const res = await unprint.get(url, { selectAll: '.content-grid-item' });
if (res.ok) {
return scrapeAll(res.context, site);
}
return res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const res = await unprint.get(url, { selectAll: '.content-grid-item' });
if (res.ok) {
return scrapeAll(res.context, site);
}
return res.status;
}
return [];
}
async function scrapeScene({ query }, { url, entity, include }) {
const release = {};
const { origin, pathname } = new URL(url);
release.url = `${origin}${pathname}`;
release.entryId = new URL(url).pathname.split('/')[3];
release.title = query.q('.content-pane-title h2', true);
release.description = query.q('.content-pane-column div', true);
release.title = query.content('.content-pane-title h2');
release.description = query.content('.content-pane-column div');
release.date = query.date('.date', 'MMM D, YYYY');
release.actors = query.all('.content-pane-performers .model', true);
release.tags = query.all('.categories a', true);
release.actors = query.all('.content-pane-performers .model').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.prefixUrl(unprint.query.url(actorEl, null), entity.url),
}));
release.tags = query.contents('.categories a');
release.poster = query.poster() || query.img('.fake-video-player img');
release.trailer = query.all('source').map((source) => ({
@@ -83,76 +115,89 @@ async function scrapeScene({ query }, url, site) {
quality: Number(source.getAttribute('res')),
}));
release.stars = Number(query.q('.score', true));
release.likes = Number(query.q('#likecount', true));
release.stars = query.number('.score');
release.likes = query.number('#likecount');
const albumLink = query.url('.content-pane-related-links a[href*="gallery"]');
if (albumLink) {
release.photos = await getPhotos(`${site.url}${albumLink}`);
if (albumLink && include.photos) {
release.photos = await getPhotos(albumLink);
}
return release;
}
function scrapeProfile({ query }, _actorName, origin) {
function scrapeProfile({ query }, avatar) {
const profile = {};
const keys = query.all('.model-profile h5', true);
const values = query.all('.model-profile h5 + p', true);
const keys = query.contents('.model-profile .model-profile-subheading');
const values = query.contents('.model-profile .model-profile-subheading + p');
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
profile.age = Number(bio.age);
profile.description = query.q('.model-bio', true);
profile.description = query.content('.model-bio');
profile.residencePlace = bio.location;
profile.height = heightToCm(bio.height);
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map((v) => Number(v) || v);
profile.measurements = bio.figure;
profile.avatar = query.img('.model-profile img');
const photo = query.img('.model-profile img');
const releases = query.all('.content-grid-item').filter((el) => /video\//.test(query.url(el, '.img-wrapper a'))); // filter out photos
profile.releases = scrapeAll(query.initAll(releases), null, origin);
// avatar on profile page is different, index avatar preferred
if (avatar?.length > 0) {
profile.avatar = avatar;
profile.photos = [photo];
} else {
profile.avatar = photo;
}
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
const res = await qu.getAll(url, '.content-grid-item');
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const res = await qu.getAll(url, '.content-grid-item');
return res.ok ? scrapeAll(res.items, site) : res.status;
}
return [];
}
async function fetchProfile({ name: actorName }, { site }) {
const firstLetter = actorName.charAt(0).toLowerCase();
const origin = slugUrlMap[site.slug] || site.url;
async function findModel(actor, entity) {
const firstLetter = actor.name.charAt(0).toLowerCase();
const origin = slugUrlMap[entity.slug] || entity.url;
const url = `${origin}/model/alpha/${firstLetter}`;
const resModels = await qu.get(url);
const resModels = await unprint.get(url);
if (!resModels.ok) return resModels.status;
if (!resModels.ok) {
return resModels.status;
}
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find((el) => slugify(el.textContent) === slugify(actorName));
const modelEl = resModels.context.query.all('.content-grid-item').find((el) => slugify(unprint.query.content(el, 'a.title')) === slugify(actor.name));
if (modelPath) {
const modelUrl = `${origin}${modelPath}`;
const resModel = await qu.get(modelUrl);
if (modelEl) {
const modelUrl = `${origin}${unprint.query.url(modelEl, 'a.title')}`;
const modelAvatar = unprint.query.sourceSet(modelEl, 'a picture img', 'data-srcset');
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
return {
url: modelUrl,
avatar: modelAvatar,
};
}
// try actor URL last in order to grab avatar
if (actor.url) {
return { url: actor.url };
}
return null;
}
async function fetchProfile(actor, { entity }) {
const model = await findModel(actor, entity);
if (model) {
const resModel = await unprint.get(model.url);
if (resModel.ok) {
return scrapeProfile(resModel.context, model.avatar);
}
return resModel.status;
}
return null;
@@ -163,5 +208,4 @@ module.exports = {
fetchUpcoming,
fetchProfile,
scrapeScene,
deprecated: true,
};