4 Commits

Author SHA1 Message Date
DebaucheryLibrarian
818669011e 1.252.7 2026-04-16 04:01:34 +02:00
DebaucheryLibrarian
f962d71d10 Fixed Aylo session acquire, migrated to unprint. Fixed Jules Jordan profile test. 2026-04-16 04:01:31 +02:00
DebaucheryLibrarian
04278284af 1.252.6 2026-04-16 01:16:02 +02:00
DebaucheryLibrarian
946afcb229 Updated Jules Jordan scraper for new site, changed entryId to always use URLs (upcoming WIP). 2026-04-16 01:15:58 +02:00
5 changed files with 175 additions and 222 deletions

4
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "traxxx",
"version": "1.252.5",
"version": "1.252.7",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "traxxx",
"version": "1.252.5",
"version": "1.252.7",
"license": "ISC",
"dependencies": {
"@aws-sdk/client-s3": "^3.458.0",

View File

@@ -1,6 +1,6 @@
{
"name": "traxxx",
"version": "1.252.5",
"version": "1.252.7",
"description": "All the latest porn releases in one place",
"main": "src/app.js",
"scripts": {

View File

@@ -1,15 +1,10 @@
'use strict';
/* eslint-disable newline-per-chained-call */
const Promise = require('bluebird');
const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
const cookie = require('cookie');
const moment = require('moment');
// const unprint = require('unprint');
const unprint = require('unprint');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const http = require('../utils/http');
const { inchesToCm, lbsToKg } = require('../utils/convert');
function getBasePath(parameters, channel, path = '/scene') {
@@ -126,6 +121,119 @@ async function scrapeLatest(items, site, filterChannel, options) {
};
}
function getUrl(site) {
const { searchParams, pathname } = new URL(site.url);
// if (search.match(/\?site=\d+/)) {
if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) {
return site.url;
}
if (site.parameters?.native) {
return `${site.url}/scenes`;
}
if (site.parameters?.extract) {
return `${site.url}/scenes`;
}
if (site.parameters?.siteId) {
return `${site.parent.url}/scenes?site=${site.parameters.siteId}`;
}
throw new Error(`Aylo site '${site.name}' (${site.url}) not supported`);
}
async function getSession(site, _parameters, url) {
// if (site.slug === 'aylo' || site.parameters?.parentSession === false) {
if (site.slug === 'aylo') {
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
return null;
}
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false)
? site.parent.url
: (url || site.url);
const res = await unprint.get(sessionUrl, {
headers: {
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
Connection: 'keep-alive',
},
});
if (res.status === 200) {
const instanceToken = res.cookies.instance_token;
if (instanceToken) {
return { instanceToken };
}
}
throw new Error(`Failed to acquire Aylo session (${res.statusCode})`);
}
async function fetchLatest(site, page = 1, options) {
const url = getUrl(site);
const { searchParams, pathname } = new URL(url);
const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]);
if (!siteId && !site.parameters?.native && !site.parameters?.extract) {
return null;
}
const { instanceToken } = options.beforeNetwork?.instanceToken && !(options.parameters?.native || options.parameters?.childSession || options.parameters?.parentSession === false)
? options.beforeNetwork
: await getSession(site, options.parameters, url);
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
const limit = 24;
const apiUrl = site.parameters?.native || site.parameters?.extract
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
const res = await unprint.get(apiUrl, {
interval: options.parameters.interval,
concurrency: options.parameters.concurrency,
headers: {
Instance: instanceToken,
Origin: site.url,
Referer: url,
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
},
});
if (res.status === 200 && res.data.result) {
return scrapeLatest(res.data.result, site, false, options);
}
return res.status;
}
async function fetchUpcoming(site, _page, options) {
const url = getUrl(site);
const { instanceToken } = await getSession(site, options.parameters);
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
const res = await unprint.get(apiUrl, {
interval: options.parameters.interval,
concurrency: options.parameters.concurrency,
headers: {
Instance: instanceToken,
Origin: site.url,
Referer: url,
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
},
});
if (res.status === 200 && res.data.result) {
return scrapeLatest(res.data.result, site, true, options);
}
return res.status;
}
function scrapeRelease(data, url, channel, networkName, options) {
if (Array.isArray(data)) {
return null;
@@ -192,139 +300,6 @@ function scrapeRelease(data, url, channel, networkName, options) {
return release;
}
function getUrl(site) {
const { searchParams, pathname } = new URL(site.url);
// if (search.match(/\?site=\d+/)) {
if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) {
return site.url;
}
if (site.parameters?.native) {
return `${site.url}/scenes`;
}
if (site.parameters?.extract) {
return `${site.url}/scenes`;
}
if (site.parameters?.siteId) {
return `${site.parent.url}/scenes?site=${site.parameters.siteId}`;
}
throw new Error(`Aylo site '${site.name}' (${site.url}) not supported`);
}
async function getSession(site, parameters, url) {
// if (site.slug === 'aylo' || site.parameters?.parentSession === false) {
if (site.slug === 'aylo') {
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
return null;
}
const cookieJar = new CookieJar();
const session = http.session({ cookieJar });
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false)
? site.parent.url
: (url || site.url);
/*
await unprint.browserRequest(sessionUrl, {
browser: {
headless: false,
},
async control() {
await new Promise((resolve) => { setTimeout(() => resolve(), 10000); });
},
});
*/
const res = await http.get(sessionUrl, {
session,
headers: {
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
Connection: 'keep-alive',
},
interval: parameters?.interval,
concurrency: parameters?.concurrency,
parse: false,
});
if (res.status === 200) {
const cookieString = await cookieJar.getCookieStringAsync(sessionUrl);
const { instance_token: instanceToken } = cookie.parse(cookieString);
if (instanceToken) {
return { session, instanceToken };
}
}
throw new Error(`Failed to acquire Aylo session (${res.statusCode})`);
}
async function fetchLatest(site, page = 1, options) {
const url = getUrl(site);
const { searchParams, pathname } = new URL(url);
const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]);
if (!siteId && !site.parameters?.native && !site.parameters?.extract) {
return null;
}
const { instanceToken } = options.beforeNetwork?.instanceToken && !(options.parameters?.native || options.parameters?.childSession || options.parameters?.parentSession === false)
? options.beforeNetwork
: await getSession(site, options.parameters, url);
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
const limit = 24;
const apiUrl = site.parameters?.native || site.parameters?.extract
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
const res = await http.get(apiUrl, {
interval: options.parameters.interval,
concurrency: options.parameters.concurrency,
headers: {
Instance: instanceToken,
Origin: site.url,
Referer: url,
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
},
});
if (res.status === 200 && res.body.result) {
return scrapeLatest(res.body.result, site, false, options);
}
return res.statusCode;
}
async function fetchUpcoming(site, page, options) {
const url = getUrl(site);
const { session, instanceToken } = await getSession(site, options.parameters);
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
const res = await http.get(apiUrl, {
session,
interval: options.parameters.interval,
concurrency: options.parameters.concurrency,
headers: {
Instance: instanceToken,
Origin: site.url,
Referer: url,
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
},
});
if (res.statusCode === 200 && res.body.result) {
return scrapeLatest(res.body.result, site, true, options);
}
return res.statusCode;
}
async function fetchRelease(url, site, baseScene, options) {
if (baseScene?.entryId && !baseScene.shallow && !options.parameters.forceDeep) {
// overview and deep data is the same, don't hit server unnecessarily
@@ -332,10 +307,9 @@ async function fetchRelease(url, site, baseScene, options) {
}
const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1];
const { session, instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters);
const { instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters);
const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
session,
const res = await unprint.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
interval: options.parameters.interval,
concurrency: options.parameters.concurrency,
headers: {
@@ -344,16 +318,16 @@ async function fetchRelease(url, site, baseScene, options) {
},
});
if (res.status === 200 && res.body.result) {
if (res.status === 200 && res.data.result) {
return {
scene: scrapeRelease(res.body.result, url, site, null, options),
scene: scrapeRelease(res.data.result, url, site, null, options),
};
}
return null;
}
function scrapeProfile(data, networkName, _releases = []) {
function scrapeProfile(data, _networkName, _releases = []) {
const profile = {
description: data.bio,
aliases: data.aliases.filter(Boolean),
@@ -367,7 +341,7 @@ function scrapeProfile(data, networkName, _releases = []) {
profile.measurements = data.measurements;
}
profile.dateOfBirth = qu.parseDate(data.birthday);
profile.dateOfBirth = unprint.extractDate(data.birthday);
profile.birthPlace = data.birthPlace;
profile.height = inchesToCm(data.height);
profile.weight = lbsToKg(data.weight);
@@ -406,10 +380,9 @@ function scrapeProfile(data, networkName, _releases = []) {
async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
const { session, instanceToken } = await getSession(entity, parameters);
const { instanceToken } = await getSession(entity, parameters);
const res = await http.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
session,
const res = await unprint.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
interval: parameters.interval,
concurrency: parameters.concurrency,
headers: {
@@ -418,14 +391,13 @@ async function fetchProfile({ name: actorName }, { entity, parameters }, include
},
});
if (res.statusCode === 200) {
const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
if (res.status === 200) {
const actorData = res.data.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
if (actorData) {
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, {
session,
const actorReleasesRes = include.includeActorScenes && await unprint.get(actorReleasesUrl, {
interval: parameters.interval,
concurrency: parameters.concurrency,
headers: {
@@ -433,8 +405,8 @@ async function fetchProfile({ name: actorName }, { entity, parameters }, include
},
});
if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, entity.slug, actorReleasesRes.body.result);
if (actorReleasesRes.status === 200 && actorReleasesRes.data.result) {
return scrapeProfile(actorData, entity.slug, actorReleasesRes.data.result);
}
return scrapeProfile(actorData, entity.slug, []);

View File

@@ -1,44 +1,38 @@
'use strict';
const util = require('util');
const Promise = require('bluebird');
const unprint = require('unprint');
const argv = require('../argv');
const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
const tryUrls = require('../utils/try-urls');
function getEntryId(html) {
const entryId = html.match(/showtagform\((\d+)\)/);
if (entryId) {
return entryId[1];
function getEntryId(url) {
if (!url) {
return null;
}
const setIdIndex = html.indexOf('setid:"');
const entryId = new URL(url).pathname.split('/').at(-1).match('(.*?)_vids.html');
if (setIdIndex) {
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)?.[0];
if (entryId) {
return slugify(entryId[1]);
}
return null;
}
function getEntryIdFromTitle(release) {
// return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); // date not shown on updates page
// return slugify(release.title);
return slugify([release.title, ...(release.actors?.map((actor) => actor.name || actor).toSorted() || [])]);
}
function scrapeAll(scenes, site, entryIdFromTitle) {
return scenes.map(({ element, query }) => {
function scrapeAll(scenes, site) {
return scenes.map(({ query }) => {
const release = {};
const title = query.content('.content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text')
const title = query.content('.jj-card-title, .content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text')
|| query.content('a[title*=" "]');
release.title = title?.slice(0, title.match(/starring:/i)?.index || Infinity).trim();
release.url = query.url('.content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]');
release.date = query.date('.update_date', ['MM/DD/YYYY', 'YYYY-MM-DD']);
release.url = query.url('.jj-card-thumb, .content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]');
release.date = query.date('.jj-card-date, .update_date', ['MM/DD/YYYY', 'YYYY-MM-DD', 'MMMM D, YYYY']);
release.entryId = getEntryId(release.url);
release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({
name: unprint.query.content(actorEl),
@@ -70,17 +64,16 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
return null;
}).filter(Boolean);
const teaserScript = query.html('script');
release.teaser = query.video('.jj-card-video', { attribute: 'data-src' });
if (teaserScript) {
release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (!release.teaser) {
const teaserScript = query.html('script');
if (teaserScript) {
release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
}
}
release.entryId = (entryIdFromTitle && getEntryIdFromTitle(release))
|| element.dataset.setid
|| query.element('.rating_box')?.dataset.id
|| query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1];
return release;
});
}
@@ -91,7 +84,7 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle =
: `${site.url}/trial/categories/movies_${page}_d.html`;
// const res = await http.get(url);
const res = await unprint.get(url, { selectAll: '.update_details, .grid-item' });
const res = await unprint.get(url, { selectAll: '.scenes-listing-grid .jj-content-card' });
if (res.ok) {
return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle);
@@ -100,7 +93,8 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle =
return res.status;
}
function scrapeUpcoming(scenes, channel) {
/* disable until we have entryId solution
function scrapeUpcoming(scenes, _channel) {
return scenes.map(({ query, html }) => {
const release = {};
@@ -135,6 +129,7 @@ async function fetchUpcoming(site) {
return res.status;
}
*/
function extractLegacyTrailer(html, context) {
const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line));
@@ -194,17 +189,19 @@ function getPhotos(query, release, context) {
async function scrapeScene({ html, query }, context) {
const release = {};
release.title = query.content('.title_bar_hilite, .movie_title');
release.description = query.content('.update_description') || query.text('//div[./span[contains(text(), "Description")]]');
release.entryId = getEntryId(context.url);
release.date = query.date(['.update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD']);
release.title = query.content('.scene-title, .title_bar_hilite, .movie_title');
release.description = query.content('.scene-desc, .update_description') || query.text('//div[./span[contains(text(), "Description")]]');
release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({
release.date = query.date(['.meta-item:nth-child(2) .val, .update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD', 'MMMM D, YYYY']);
release.actors = query.all('.meta-item .update_models a, .backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.tags = query.contents('.update_tags a, .player-scene-description a[href*="/categories"]');
release.tags = query.contents('.scene-cats a, .update_tags a, .player-scene-description a[href*="/categories"]');
release.director = release.tags?.find((tag) => ['mike john', 'van styles'].includes(tag?.trim().toLowerCase()));
const posterPath = query.poster('#video-player', { forceGetAttribute: true }) // without getAttribute, missing poster is returned as page URL
@@ -245,7 +242,7 @@ async function scrapeScene({ html, query }, context) {
// #images img selects a list of images that is present on every page; the JJ website removes the ones that failed to load with JS (lol)
release.photos = [
...context.baseRelease?.photos?.map((sources) => sources.at(-1).src) || [],
...query.imgs('#images img'),
...query.imgs('.tp-photos-strip img, #images img'),
...query.imgs('img.update_thumb', { attribute: 'src0_1x' }),
].filter(Boolean).map((source) => Array.from(new Set([
source.replace(/.jpg$/, '-full.jpg'),
@@ -270,9 +267,7 @@ async function scrapeScene({ html, query }, context) {
release.stars = query.number('.avg_rating');
release.entryId = context.entity.parameters?.entryIdFromTitle
? getEntryIdFromTitle(release)
: getEntryId(html);
release.qualities = query.contents('.res-item .res-lbl').map((resolution) => Number(resolution.match(/\d+$/)?.[0])).filter(Boolean);
return release;
}
@@ -296,7 +291,7 @@ function scrapeMovie({ query }, { url }) {
scene.date = unprint.query.date(sceneEl, '//span[contains(@class, "dvd-scene-description") and span[contains(text(), "Date")]]', 'MM/DD/YYYY');
scene.actors = unprint.query.contents(sceneEl, '.update_models a');
scene.entryId = getEntryIdFromTitle(scene);
scene.entryId = getEntryId(scene.url);
return scene;
});
@@ -339,48 +334,34 @@ function scrapeProfile({ query }, url, entity) {
verifyType: 'image',
}));
profile.scenes = scrapeAll(unprint.initAll(query.all('.grid-item')), entity, true);
profile.scenes = scrapeAll(unprint.initAll(query.all('.mbp-scenes-grid .jj-content-card, .grid-item')), entity, true);
return profile;
}
async function fetchProfile({ name: actorName, url }, entity) {
async function fetchProfile({ name: actorName, url: actorUrl }, entity) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName, '-');
const urls = [
url,
actorUrl,
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugA}.html`,
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugB}.html`,
];
return urls.reduce(async (chain, profileUrl) => {
const profile = await chain;
const { res, url } = await tryUrls(urls);
if (profile) {
return profile;
}
if (res.ok) {
return scrapeProfile(res.context, url, entity);
}
if (!profileUrl) {
return null;
}
const res = await unprint.get(profileUrl, {
followRedirects: false,
});
if (res.ok) {
return scrapeProfile(res.context, profileUrl, entity);
}
return null;
}, Promise.resolve());
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchUpcoming,
// fetchUpcoming,
scrapeScene,
scrapeMovie,
};

View File

@@ -213,7 +213,7 @@ const actors = [
{ entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] },
{ entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] },
// jules jordan scraper
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] },
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'avatar'] },
{ entity: 'amateurallure', name: 'Ava Amira', fields: ['avatar', 'description'] },
{ entity: 'swallowsalon', name: 'Abella Danger', fields: ['avatar'] },
// exploitedx