Compare commits

..

No commits in common. "b1f663d4b7849f583f4a075fef10e38b81cc937d" and "e13c8ccfe04a0c46a91ef4918a4b1b153a872d4b" have entirely different histories.

7 changed files with 91 additions and 220 deletions

24
package-lock.json generated
View File

@ -1,12 +1,12 @@
{
"name": "traxxx",
"version": "1.244.73",
"version": "1.244.72",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "traxxx",
"version": "1.244.73",
"version": "1.244.72",
"license": "ISC",
"dependencies": {
"@aws-sdk/client-s3": "^3.458.0",
@ -89,7 +89,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.37",
"undici": "^5.28.1",
"unprint": "^0.16.1",
"unprint": "^0.15.7",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3",
"video.js": "^8.6.1",
@ -17137,17 +17137,6 @@
"node": ">= 0.6"
}
},
"node_modules/srcset": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/srcset/-/srcset-4.0.0.tgz",
"integrity": "sha512-wvLeHgcVHKO8Sc/H/5lkGreJQVeYMm9rlmt8PuR1xE31rIuXhuzznUUqAt8MqLhB3MqJdFzlNAfpcWnxiFUcPw==",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/sshpk": {
"version": "1.18.0",
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.18.0.tgz",
@ -18370,9 +18359,9 @@
}
},
"node_modules/unprint": {
"version": "0.16.1",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.16.1.tgz",
"integrity": "sha512-vOT6kdoZwVae9iHS5H+eBOqTZaVJRJWrBJrfnAEIzqPO8KseFvajd+kLZSL9iCE6Al5S0hi2TuMW89c8YK3Baw==",
"version": "0.15.7",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.15.7.tgz",
"integrity": "sha512-sR4HhdJbPxkcQlQem/Hl3N67Nhn47wiK71qvl+yCT1N31tknA+mhtD+aWW5MG5F9fnJpCTlr/s4mCLxalj6XEA==",
"dependencies": {
"axios": "^0.27.2",
"bottleneck": "^2.19.5",
@ -18382,7 +18371,6 @@
"eslint-config-airbnb-base": "^15.0.0",
"jsdom": "^17.0.0",
"moment-timezone": "^0.5.34",
"srcset": "^4.0.0",
"tunnel": "^0.0.6"
}
},

View File

@ -1,6 +1,6 @@
{
"name": "traxxx",
"version": "1.244.73",
"version": "1.244.72",
"description": "All the latest porn releases in one place",
"main": "src/app.js",
"scripts": {
@ -148,7 +148,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.37",
"undici": "^5.28.1",
"unprint": "^0.16.1",
"unprint": "^0.15.7",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3",
"video.js": "^8.6.1",

View File

@ -8663,7 +8663,7 @@ const sites = [
url: 'https://www.petitehdporn.com',
parent: 'nubiles',
parameters: {
upcoming: false,
upcoming: true,
},
},
{
@ -11445,7 +11445,7 @@ const sites = [
slug: 'danejones',
name: 'Dane Jones',
alias: ['dnj'],
url: 'https://www.danejones.com',
url: 'https://www.danejones.com/',
parameters: {
siteId: 290,
native: true,

View File

@ -2,7 +2,6 @@
const unprint = require('unprint');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
// Naughty America network
@ -43,40 +42,6 @@ function scrapeLatest(scenes, channel) {
});
}
async function fetchLatest(channel, page = 1) {
const { tab } = await http.getBrowserSession('naughtyamerica', { useGlobalBrowser: false, useProxy: true, headless: false });
const url = `${channel.url}${channel.parameters?.scenes || ''}?page=${page}`;
const res = await tab.goto(url);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const items = unprint.initAll(html, '.site-list .scene-item, .panel-body');
const scenes = scrapeLatest(items, channel);
await tab.close();
return scenes;
}
await tab.close();
return status;
}
/*
async function fetchLatest(site, page = 1) {
const res = await unprint.get(`${site.url}${site.parameters?.scenes || ''}?page=${page}`, { selectAll: '.site-list .scene-item, .panel-body' });
if (res.ok) {
return scrapeLatest(res.context, site);
}
return res.status;
}
*/
function scrapeScene({ query }, { url }) {
const release = {};
@ -133,28 +98,6 @@ function scrapeScene({ query }, { url }) {
return release;
}
async function fetchScene(url, _channel) {
const { tab } = await http.getBrowserSession('naughtyamerica', { useGlobalBrowser: false, useProxy: true });
const res = await tab.goto(url);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const item = unprint.init(html);
const scene = scrapeScene(item, { url });
await tab.close();
return scene;
}
await tab.close();
return status;
}
async function scrapeProfile({ query }) {
const profile = {};
@ -164,30 +107,16 @@ async function scrapeProfile({ query }) {
return profile;
}
async function fetchProfile({ slug }, { channel }) {
const { tab } = await http.getBrowserSession('naughtyamerica', { useGlobalBrowser: false, useProxy: true });
const url = `${channel.url}/pornstar/${slug}`;
const res = await tab.goto(url);
async function fetchLatest(site, page = 1) {
const res = await unprint.get(`${site.url}${site.parameters?.scenes || ''}?page=${page}`, { selectAll: '.site-list .scene-item, .panel-body' });
const status = res.status();
if (status === 200) {
const html = await tab.content();
const item = unprint.init(html, '.bio-info, .performer-details');
const profile = scrapeProfile(item, { url });
await tab.close();
return profile;
if (res.ok) {
return scrapeLatest(res.context, site);
}
await tab.close();
return status;
return res.status;
}
/*
async function fetchProfile({ slug }, { channel }) {
const res = await unprint.get(`${channel.url}/pornstar/${slug}`, { select: '.bio-info, .performer-details' });
@ -197,10 +126,9 @@ async function fetchProfile({ slug }, { channel }) {
return res.status;
}
*/
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
scrapeScene,
};

View File

@ -1,7 +1,6 @@
'use strict';
const unprint = require('unprint');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const { heightToCm } = require('../utils/convert');
@ -10,43 +9,44 @@ const slugUrlMap = {
nubilesporn: 'https://www.nubiles-porn.com',
};
function stripQuery(link) {
if (!link) {
return null;
}
const url = new URL(link);
return `${url.origin}${url.pathname}`;
}
async function getPhotos(albumUrl) {
const res = await unprint.get(albumUrl, { selectAll: '.photo-thumb' });
const res = await qu.getAll(albumUrl, '.photo-thumb');
return res.ok
? res.context.map(({ query }) => unprint.prefixUrl(query.element('source').srcset))
? res.items.map(({ query }) => qu.prefixUrl(query.q('source').srcset))
: [];
}
function scrapeAll(scenes, entity) {
function scrapeAll(scenes, site, origin) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.content('.title a');
release.url = stripQuery(unprint.prefixUrl(query.url('.title a'), entity.url));
release.entryId = Number(new URL(release.url).pathname.match(/\/watch\/(\d+)/)[1]);
release.title = query.q('.title a', true);
const url = query.url('.title a').split('?')[0];
const channelUrl = query.url('.site-link');
if (/^http/.test(url)) {
const { pathname } = new URL(url);
// release.entryId = pathname.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${pathname}`;
else release.url = url;
} else if (!/\/join/.test(url)) {
// release.entryId = url.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${url}`;
else if (site?.url) release.url = `${site.url}${url}`;
else if (origin) release.url = `${origin}${url}`;
} else {
// release.entryId = qu.q('a img', 'tube_tour_thumb_id');
}
release.date = query.date('.date', 'MMM D, YYYY');
release.actors = query.all('.models a.model', true);
if (query.exists('.models a.model')) {
release.actors = query.all('.models a.model').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.prefixUrl(unprint.query.url(actorEl, null), entity.url),
}));
} else {
// upcoming page has single string of actors, implicitly separated by a lot of whitespace
release.actors = query.content('.models', { trim: false })?.trim().split(/\s{2,}/);
}
// no reliable entry ID between upcoming and released scenes
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
const poster = query.sourceSet('img', 'data-srcset')?.[0];
@ -58,56 +58,24 @@ function scrapeAll(scenes, entity) {
release.stars = query.number('.rating');
release.likes = query.number('.likes');
release.comment = `${unprint.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
});
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
const res = await unprint.get(url, { selectAll: '.content-grid-item' });
if (res.ok) {
return scrapeAll(res.context, site);
}
return res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const res = await unprint.get(url, { selectAll: '.content-grid-item' });
if (res.ok) {
return scrapeAll(res.context, site);
}
return res.status;
}
return [];
}
async function scrapeScene({ query }, { url, entity, include }) {
async function scrapeScene({ query }, url, site) {
const release = {};
const { origin, pathname } = new URL(url);
release.url = `${origin}${pathname}`;
release.entryId = new URL(url).pathname.split('/')[3];
release.title = query.content('.content-pane-title h2');
release.description = query.content('.content-pane-column div');
release.title = query.q('.content-pane-title h2', true);
release.description = query.q('.content-pane-column div', true);
release.date = query.date('.date', 'MMM D, YYYY');
release.actors = query.all('.content-pane-performers .model').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.prefixUrl(unprint.query.url(actorEl, null), entity.url),
}));
release.tags = query.contents('.categories a');
release.actors = query.all('.content-pane-performers .model', true);
release.tags = query.all('.categories a', true);
release.poster = query.poster() || query.img('.fake-video-player img');
release.trailer = query.all('source').map((source) => ({
@ -115,89 +83,76 @@ async function scrapeScene({ query }, { url, entity, include }) {
quality: Number(source.getAttribute('res')),
}));
release.stars = query.number('.score');
release.likes = query.number('#likecount');
release.stars = Number(query.q('.score', true));
release.likes = Number(query.q('#likecount', true));
const albumLink = query.url('.content-pane-related-links a[href*="gallery"]');
if (albumLink && include.photos) {
release.photos = await getPhotos(albumLink);
if (albumLink) {
release.photos = await getPhotos(`${site.url}${albumLink}`);
}
return release;
}
function scrapeProfile({ query }, avatar) {
function scrapeProfile({ query }, _actorName, origin) {
const profile = {};
const keys = query.contents('.model-profile .model-profile-subheading');
const values = query.contents('.model-profile .model-profile-subheading + p');
const keys = query.all('.model-profile h5', true);
const values = query.all('.model-profile h5 + p', true);
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
profile.age = Number(bio.age);
profile.description = query.content('.model-bio');
profile.description = query.q('.model-bio', true);
profile.residencePlace = bio.location;
profile.height = heightToCm(bio.height);
profile.measurements = bio.figure;
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map((v) => Number(v) || v);
const photo = query.img('.model-profile img');
profile.avatar = query.img('.model-profile img');
// avatar on profile page is different, index avatar preferred
if (avatar?.length > 0) {
profile.avatar = avatar;
profile.photos = [photo];
} else {
profile.avatar = photo;
}
const releases = query.all('.content-grid-item').filter((el) => /video\//.test(query.url(el, '.img-wrapper a'))); // filter out photos
profile.releases = scrapeAll(query.initAll(releases), null, origin);
return profile;
}
async function findModel(actor, entity) {
const firstLetter = actor.name.charAt(0).toLowerCase();
const origin = slugUrlMap[entity.slug] || entity.url;
async function fetchLatest(site, page = 1) {
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
const res = await qu.getAll(url, '.content-grid-item');
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const res = await qu.getAll(url, '.content-grid-item');
return res.ok ? scrapeAll(res.items, site) : res.status;
}
return [];
}
async function fetchProfile({ name: actorName }, { site }) {
const firstLetter = actorName.charAt(0).toLowerCase();
const origin = slugUrlMap[site.slug] || site.url;
const url = `${origin}/model/alpha/${firstLetter}`;
const resModels = await unprint.get(url);
const resModels = await qu.get(url);
if (!resModels.ok) {
return resModels.status;
}
if (!resModels.ok) return resModels.status;
const modelEl = resModels.context.query.all('.content-grid-item').find((el) => slugify(unprint.query.content(el, 'a.title')) === slugify(actor.name));
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find((el) => slugify(el.textContent) === slugify(actorName));
if (modelEl) {
const modelUrl = `${origin}${unprint.query.url(modelEl, 'a.title')}`;
const modelAvatar = unprint.query.sourceSet(modelEl, 'a picture img', 'data-srcset');
if (modelPath) {
const modelUrl = `${origin}${modelPath}`;
const resModel = await qu.get(modelUrl);
return {
url: modelUrl,
avatar: modelAvatar,
};
}
// try actor URL last in order to grab avatar
if (actor.url) {
return { url: actor.url };
}
return null;
}
async function fetchProfile(actor, { entity }) {
const model = await findModel(actor, entity);
if (model) {
const resModel = await unprint.get(model.url);
if (resModel.ok) {
return scrapeProfile(resModel.context, model.avatar);
}
return resModel.status;
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
}
return null;
@ -208,4 +163,5 @@ module.exports = {
fetchUpcoming,
fetchProfile,
scrapeScene,
deprecated: true,
};

View File

@ -437,11 +437,10 @@ async function storeScenes(releases, useBatchId) {
title = COALESCE(new.title, releases.title),
description = COALESCE(new.description, releases.description),
duration = COALESCE(new.duration, releases.duration),
comment = COALESCE(new.comment, releases.comment),
deep = new.url IS NOT NULL,
updated_at = NOW()
FROM json_to_recordset(:scenes)
AS new(id int, url text, date timestamptz, entity json, title text, description text, duration integer, comment text, deep boolean)
AS new(id int, url text, date timestamptz, entity json, title text, description text, duration integer, deep boolean)
WHERE releases.id = new.id
`, {
scenes: JSON.stringify(curatedDuplicateReleases),

View File

@ -175,7 +175,7 @@ async function getBrowserSession(identifier, options = {}) {
const newBrowser = await puppeteer.launch({
headless: typeof options.headless === 'undefined' ? 'new' : options.headless,
args: [
...(options.useProxy && config.proxy.enabled ? [`--proxy-server=${config.proxy.host}:${config.proxy.port}`] : []),
...(options.useProxy ? [`--proxy-server=${config.proxy.host}:${config.proxy.port}`] : []),
],
// headless: false,
});