From f962d71d10ebe21e7c8d567bc9333ba54af936a1 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Thu, 16 Apr 2026 04:01:31 +0200 Subject: [PATCH] Fixed Aylo session acquire, migrated to unprint. Fixed Jules Jordan profile test. --- src/scrapers/aylo.js | 282 +++++++++++++++++++------------------------ tests/profiles.js | 2 +- 2 files changed, 128 insertions(+), 156 deletions(-) diff --git a/src/scrapers/aylo.js b/src/scrapers/aylo.js index 57be7cb9..cbfc33aa 100755 --- a/src/scrapers/aylo.js +++ b/src/scrapers/aylo.js @@ -1,15 +1,10 @@ 'use strict'; /* eslint-disable newline-per-chained-call */ -const Promise = require('bluebird'); -const { CookieJar } = Promise.promisifyAll(require('tough-cookie')); -const cookie = require('cookie'); const moment = require('moment'); -// const unprint = require('unprint'); +const unprint = require('unprint'); -const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); -const http = require('../utils/http'); const { inchesToCm, lbsToKg } = require('../utils/convert'); function getBasePath(parameters, channel, path = '/scene') { @@ -126,6 +121,119 @@ async function scrapeLatest(items, site, filterChannel, options) { }; } +function getUrl(site) { + const { searchParams, pathname } = new URL(site.url); + + // if (search.match(/\?site=\d+/)) { + if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) { + return site.url; + } + + if (site.parameters?.native) { + return `${site.url}/scenes`; + } + + if (site.parameters?.extract) { + return `${site.url}/scenes`; + } + + if (site.parameters?.siteId) { + return `${site.parent.url}/scenes?site=${site.parameters.siteId}`; + } + + throw new Error(`Aylo site '${site.name}' (${site.url}) not supported`); +} + +async function getSession(site, _parameters, url) { + // if (site.slug === 'aylo' || site.parameters?.parentSession === false) { + if (site.slug === 'aylo') { + // most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels + return null; + } + + const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false) + ? site.parent.url + : (url || site.url); + + const res = await unprint.get(sessionUrl, { + headers: { + 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites + Connection: 'keep-alive', + }, + }); + + if (res.status === 200) { + const instanceToken = res.cookies.instance_token; + + if (instanceToken) { + return { instanceToken }; + } + } + + throw new Error(`Failed to acquire Aylo session (${res.statusCode})`); +} + +async function fetchLatest(site, page = 1, options) { + const url = getUrl(site); + const { searchParams, pathname } = new URL(url); + const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]); + + if (!siteId && !site.parameters?.native && !site.parameters?.extract) { + return null; + } + + const { instanceToken } = options.beforeNetwork?.instanceToken && !(options.parameters?.native || options.parameters?.childSession || options.parameters?.parentSession === false) + ? options.beforeNetwork + : await getSession(site, options.parameters, url); + + const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD'); + const limit = 24; + const apiUrl = site.parameters?.native || site.parameters?.extract + ? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene` + : `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`; + + const res = await unprint.get(apiUrl, { + interval: options.parameters.interval, + concurrency: options.parameters.concurrency, + headers: { + Instance: instanceToken, + Origin: site.url, + Referer: url, + 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites + }, + }); + + if (res.status === 200 && res.data.result) { + return scrapeLatest(res.data.result, site, false, options); + } + + return res.status; +} + +async function fetchUpcoming(site, _page, options) { + const url = getUrl(site); + const { instanceToken } = await getSession(site, options.parameters); + + const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases'; + + const res = await unprint.get(apiUrl, { + interval: options.parameters.interval, + concurrency: options.parameters.concurrency, + headers: { + Instance: instanceToken, + Origin: site.url, + Referer: url, + 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites + }, + }); + + if (res.status === 200 && res.data.result) { + return scrapeLatest(res.data.result, site, true, options); + } + + return res.status; +} + function scrapeRelease(data, url, channel, networkName, options) { if (Array.isArray(data)) { return null; @@ -192,139 +300,6 @@ function scrapeRelease(data, url, channel, networkName, options) { return release; } -function getUrl(site) { - const { searchParams, pathname } = new URL(site.url); - - // if (search.match(/\?site=\d+/)) { - if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) { - return site.url; - } - - if (site.parameters?.native) { - return `${site.url}/scenes`; - } - - if (site.parameters?.extract) { - return `${site.url}/scenes`; - } - - if (site.parameters?.siteId) { - return `${site.parent.url}/scenes?site=${site.parameters.siteId}`; - } - - throw new Error(`Aylo site '${site.name}' (${site.url}) not supported`); -} - -async function getSession(site, parameters, url) { - // if (site.slug === 'aylo' || site.parameters?.parentSession === false) { - if (site.slug === 'aylo') { - // most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels - return null; - } - - const cookieJar = new CookieJar(); - const session = http.session({ cookieJar }); - - const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false) - ? site.parent.url - : (url || site.url); - - /* - await unprint.browserRequest(sessionUrl, { - browser: { - headless: false, - }, - async control() { - await new Promise((resolve) => { setTimeout(() => resolve(), 10000); }); - }, - }); - */ - - const res = await http.get(sessionUrl, { - session, - headers: { - 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites - Connection: 'keep-alive', - }, - interval: parameters?.interval, - concurrency: parameters?.concurrency, - parse: false, - }); - - if (res.status === 200) { - const cookieString = await cookieJar.getCookieStringAsync(sessionUrl); - const { instance_token: instanceToken } = cookie.parse(cookieString); - - if (instanceToken) { - return { session, instanceToken }; - } - } - - throw new Error(`Failed to acquire Aylo session (${res.statusCode})`); -} - -async function fetchLatest(site, page = 1, options) { - const url = getUrl(site); - const { searchParams, pathname } = new URL(url); - const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]); - - if (!siteId && !site.parameters?.native && !site.parameters?.extract) { - return null; - } - - const { instanceToken } = options.beforeNetwork?.instanceToken && !(options.parameters?.native || options.parameters?.childSession || options.parameters?.parentSession === false) - ? options.beforeNetwork - : await getSession(site, options.parameters, url); - - const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD'); - const limit = 24; - const apiUrl = site.parameters?.native || site.parameters?.extract - ? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene` - : `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`; - - const res = await http.get(apiUrl, { - interval: options.parameters.interval, - concurrency: options.parameters.concurrency, - headers: { - Instance: instanceToken, - Origin: site.url, - Referer: url, - 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites - }, - }); - - if (res.status === 200 && res.body.result) { - return scrapeLatest(res.body.result, site, false, options); - } - - return res.statusCode; -} - -async function fetchUpcoming(site, page, options) { - const url = getUrl(site); - const { session, instanceToken } = await getSession(site, options.parameters); - - const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases'; - - const res = await http.get(apiUrl, { - session, - interval: options.parameters.interval, - concurrency: options.parameters.concurrency, - headers: { - Instance: instanceToken, - Origin: site.url, - Referer: url, - 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites - }, - }); - - if (res.statusCode === 200 && res.body.result) { - return scrapeLatest(res.body.result, site, true, options); - } - - return res.statusCode; -} - async function fetchRelease(url, site, baseScene, options) { if (baseScene?.entryId && !baseScene.shallow && !options.parameters.forceDeep) { // overview and deep data is the same, don't hit server unnecessarily @@ -332,10 +307,9 @@ async function fetchRelease(url, site, baseScene, options) { } const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1]; - const { session, instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters); + const { instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters); - const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, { - session, + const res = await unprint.get(`https://site-api.project1service.com/v2/releases/${entryId}`, { interval: options.parameters.interval, concurrency: options.parameters.concurrency, headers: { @@ -344,16 +318,16 @@ async function fetchRelease(url, site, baseScene, options) { }, }); - if (res.status === 200 && res.body.result) { + if (res.status === 200 && res.data.result) { return { - scene: scrapeRelease(res.body.result, url, site, null, options), + scene: scrapeRelease(res.data.result, url, site, null, options), }; } return null; } -function scrapeProfile(data, networkName, _releases = []) { +function scrapeProfile(data, _networkName, _releases = []) { const profile = { description: data.bio, aliases: data.aliases.filter(Boolean), @@ -367,7 +341,7 @@ function scrapeProfile(data, networkName, _releases = []) { profile.measurements = data.measurements; } - profile.dateOfBirth = qu.parseDate(data.birthday); + profile.dateOfBirth = unprint.extractDate(data.birthday); profile.birthPlace = data.birthPlace; profile.height = inchesToCm(data.height); profile.weight = lbsToKg(data.weight); @@ -406,10 +380,9 @@ function scrapeProfile(data, networkName, _releases = []) { async function fetchProfile({ name: actorName }, { entity, parameters }, include) { // const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`; - const { session, instanceToken } = await getSession(entity, parameters); + const { instanceToken } = await getSession(entity, parameters); - const res = await http.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, { - session, + const res = await unprint.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, { interval: parameters.interval, concurrency: parameters.concurrency, headers: { @@ -418,14 +391,13 @@ async function fetchProfile({ name: actorName }, { entity, parameters }, include }, }); - if (res.statusCode === 200) { - const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase()); + if (res.status === 200) { + const actorData = res.data.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase()); if (actorData) { const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`; - const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, { - session, + const actorReleasesRes = include.includeActorScenes && await unprint.get(actorReleasesUrl, { interval: parameters.interval, concurrency: parameters.concurrency, headers: { @@ -433,8 +405,8 @@ async function fetchProfile({ name: actorName }, { entity, parameters }, include }, }); - if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) { - return scrapeProfile(actorData, entity.slug, actorReleasesRes.body.result); + if (actorReleasesRes.status === 200 && actorReleasesRes.data.result) { + return scrapeProfile(actorData, entity.slug, actorReleasesRes.data.result); } return scrapeProfile(actorData, entity.slug, []); diff --git a/tests/profiles.js b/tests/profiles.js index 8cfdf933..e9a52e04 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -213,7 +213,7 @@ const actors = [ { entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] }, { entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] }, // jules jordan scraper - { entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] }, + { entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'avatar'] }, { entity: 'amateurallure', name: 'Ava Amira', fields: ['avatar', 'description'] }, { entity: 'swallowsalon', name: 'Abella Danger', fields: ['avatar'] }, // exploitedx