From 3392b10182bff969baac4211bcb9ef20d6e2ea4a Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 11 Jan 2026 02:35:18 +0100 Subject: [PATCH] Moved PurgatoryX to Radical API scraper. --- seeds/01_networks.js | 1 - seeds/02_sites.js | 7 +- src/scrapers/purgatoryx.js | 173 ----------------------- src/scrapers/radical.js | 273 ++++++++++++------------------------- src/scrapers/scrapers.js | 46 +++---- tests/profiles.js | 9 +- 6 files changed, 119 insertions(+), 390 deletions(-) delete mode 100755 src/scrapers/purgatoryx.js diff --git a/seeds/01_networks.js b/seeds/01_networks.js index 52202be5..46f86e66 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -754,7 +754,6 @@ const networks = [ url: 'https://tour.topwebmodels.com', parent: 'radical', parameters: { - layout: 'api', endpoint: '7D_7bQ7peaFQgjlOR42GH', videos: 'scenes', }, diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 7b4952ab..77df63cc 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -10004,6 +10004,9 @@ const sites = [ url: 'https://tour.purgatoryx.com', independent: true, parent: 'radical', + parameters: { + videos: 'episodes', + }, }, { name: 'Inserted', @@ -10012,9 +10015,9 @@ const sites = [ independent: true, parent: 'radical', parameters: { - layout: 'api', // endpoint: 'QrQe_TF3broC5P80XTIbd', endpoint: 'E06HaYWbvDg4UG8fIsSi2', + upcoming: 'upcoming', }, }, { @@ -10024,7 +10027,6 @@ const sites = [ independent: true, parent: 'radical', parameters: { - layout: 'api', // endpoint: 'nOpFJJgD_-c5PrBqecVXA', endpoint: 'GUZuXniB1KVJwF9ZDWiRe', }, @@ -10036,7 +10038,6 @@ const sites = [ independent: true, parent: 'radical', parameters: { - layout: 'api', // endpoint: 'fnkMPhO2Gd-XwWTZHyftg', endpoint: 'K9_0ysd-cpgwtSaZ8_nPT', }, diff --git a/src/scrapers/purgatoryx.js b/src/scrapers/purgatoryx.js deleted file mode 100755 index 46271288..00000000 --- a/src/scrapers/purgatoryx.js +++ /dev/null @@ -1,173 +0,0 @@ -'use strict'; - -const qu = require('../utils/qu'); -const http = require('../utils/http'); -const slugify = require('../utils/slugify'); -const { feetInchesToCm, lbsToKg } = require('../utils/convert'); - -function scrapeAll(scenes) { - return scenes.map(({ query }) => { - const release = {}; - - release.title = query.cnt('.title'); - release.url = query.url('.title a'); - release.entryId = new URL(release.url).pathname.match(/\/view\/(\d+)/)[1]; - - release.date = query.date('.pub-date', 'MMM DD, YYYY'); - release.duration = query.duration('.video-duration'); - - release.actors = query.all('.models a').map((el) => ({ - name: query.cnt(el), - url: query.url(el, null), - })); - - if (query.exists('.thumb-big')) { // updates page - release.poster = query.img('.thumb-big', 'data-image') || JSON.parse(query.el('.thumbnails-wrap a', 'data-images')); - release.photos = [query.img('.thumb-top', 'data-image'), query.img('.thumb-bottom', 'data-image')]; - } - - if (query.exists('.thumbnails-wrap')) { // actor page - try { - const images = JSON.parse(query.el('.thumbnails-wrap a', 'data-images')); - - release.poster = images.slice(0, 1)[0]; - release.photos = images.slice(1); - } catch (error) { - // images probably not available - } - } - - return release; - }); -} - -function scrapeUpcoming({ query }) { - const release = {}; - - release.url = query.url('.bottom-info a'); - release.entryId = new URL(release.url).pathname.match(/\/view\/(\d+)/)?.[1]; - release.title = query.cnt('.title'); - - release.actors = query.all('.model-wrap li').map((el) => ({ - name: query.cnt(el, 'h5'), - url: query.url(el, '.model-thumb a'), - avatar: query.img(el, '.model-thumb img'), - })); - - return release; -} - -function scrapeScene({ query }, url) { - const release = {}; - - release.title = query.cnt('.title'); - release.entryId = new URL(url).pathname.match(/\/view\/(\d+)/)[1]; - release.date = query.date('.date', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); - - release.description = query.cnt('.description p'); - release.duration = query.duration('.total-time'); - - release.actors = query.all('.model-wrap li').map((el) => ({ - name: query.cnt(el, 'h5'), - url: query.url(el, 'a'), - avatar: query.img(el), - })); - - release.poster = query.poster(); - release.photos = query.urls('.photos-slider a'); - release.trailer = query.video(); - - release.comment = query.cnt('.series'); - - return release; -} - -async function fetchLatest(channel, page) { - const res = await qu.getAll(`${channel.url}/episodes?page=${page}`, '.content-item'); - - if (res.ok) { - return scrapeAll(res.items, channel); - } - - return res.status; -} - -async function fetchUpcoming(channel) { - const res = await qu.get(channel.url, '.upcoming-info-wrap'); - - if (res.ok && res.item) { - return [scrapeUpcoming(res.item, channel)]; - } - - return res.status; -} - -function scrapeProfile({ query }, url) { - const profile = { url }; - - const bio = Object.fromEntries(query.all('.model-desc li').map((el) => [slugify(query.cnt(el, 'span'), '_'), query.text(el)])); - - profile.description = bio.bio; - - profile.dateOfBirth = qu.extractDate(bio.birthdate, 'YYYY-MM-DD'); - profile.birthPlace = bio.birthplace; - - profile.hairColor = bio.hair_color; - profile.eyes = bio.eye_color; - - profile.height = feetInchesToCm(bio.height); - profile.weight = lbsToKg(bio.weight); - profile.measurements = bio.measurements; - - profile.avatar = query.img('.model-pic img'); - - profile.scenes = scrapeAll(qu.initAll(query.all('.content-item'))); - - return profile; -} - -async function searchActor(baseActor, channel) { - const searchRes = await http.post(`${channel.url}/search-preview`, { q: slugify(baseActor.name, ' ') }, { - encodeJSON: false, - headers: { - 'Accept-Language': 'en-US,en;', - }, - }); - - if (searchRes.ok) { - const actorUrl = searchRes.body.find((item) => item.type === 'model' && slugify(item.title) === baseActor.slug)?.url; - - return actorUrl || null; - } - - return null; -} - -async function fetchProfile(baseActor, context, include, retry = false) { - const actorUrl = (!retry && baseActor.url) || await searchActor(baseActor, context.entity); - - if (!actorUrl) { - return null; - } - - const res = await qu.get(actorUrl); - - if (res.ok) { - return scrapeProfile(res.item, actorUrl); - } - - if (baseActor.url) { - return fetchProfile(baseActor, context, include, true); - } - - return res.status; -} - -module.exports = { - fetchLatest, - fetchProfile, - fetchUpcoming, - scrapeAll, - scrapeScene, - deprecated: true, -}; diff --git a/src/scrapers/radical.js b/src/scrapers/radical.js index d759bb0b..f4bcb9ce 100755 --- a/src/scrapers/radical.js +++ b/src/scrapers/radical.js @@ -4,62 +4,28 @@ const unprint = require('unprint'); const mime = require('mime'); const http = require('../utils/http'); -const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); -const { lbsToKg, feetInchesToCm } = require('../utils/convert'); +const { convert } = require('../utils/convert'); const teaserOrder = ['large', 'small', 'mobile', 'mp4', 'jpg']; -function scrapeSceneMetadata(data, channel) { +function getVideoPath(data, parameters) { + if (data.is_published === 0 && parameters.upcoming) { + return parameters.upcoming; + } + + if (parameters.videos) { + return parameters.videos; + } + + return 'videos'; +} + +function scrapeScene(data, channel, parameters) { const release = {}; release.entryId = data.id; - release.url = `${channel.url}/tour/videos/${data.id}/${slugify(data.title, '-', { removePunctuation: true })}`; - - release.title = data.title; - release.description = data.description; - - release.date = new Date(data.release_date); - release.duration = data.seconds_duration || qu.durationToSeconds(data.videos_duration); - - release.actors = data.models.map((model) => ({ - entryId: model.id, - name: model.name, - gender: model.gender, - avatar: model.thumb, - url: `${channel.url}/tour/models/${model.id}/${slugify(model.name, '-', { removePunctuation: true })}`, - })); - - release.poster = data.trailer?.poster || [data.thumb?.replace('mobile.jpg', '.jpg'), data.thumb]; - release.photos = [ - data.extra_thumbs?.find((url) => /portrait1.jpg/.test(url)), - data.extra_thumbs?.find((url) => /scene.jpg/.test(url)), - data.extra_thumbs?.find((url) => /portrait2.jpg/.test(url)), - ]; // ordered by chronology: portrait1.jpg and scene.jpg are usually pre-shoot poses, portrait2.jpg is the cumshot aftermath - - release.trailer = data.trailer && { - src: data.trailer.src, - type: data.trailer.type, - }; - - release.teaser = data.special_thumbs; - - release.tags = [].concat(data.tags?.map((tag) => tag.name)); - release.qualities = data.downloads && Object.values(data.downloads)?.map((download) => download.meta_data.height); - release.stars = data.rating; - - return release; -} - -function scrapeAllMetadata(scenes, channel) { - return scenes.map((data) => scrapeSceneMetadata(data, channel)); -} - -function scrapeSceneApi(data, channel, parameters) { - const release = {}; - - release.entryId = data.id; - release.url = `${channel.url}/${parameters.videos || 'videos'}/${data.slug}`; + release.url = `${channel.url}/${getVideoPath(data, parameters)}/${data.slug}`; release.title = data.title; release.description = data.description; @@ -75,6 +41,7 @@ function scrapeSceneApi(data, channel, parameters) { return { name: actor.name, avatar: actor.thumb, + url: actor.slug && `${channel.url}/models/${actor.slug}`, }; } @@ -119,64 +86,8 @@ function scrapeSceneApi(data, channel, parameters) { return release; } -function scrapeAllApi(scenes, channel, parameters) { - return scenes.map((data) => scrapeSceneApi(data, channel, parameters)); -} - -function scrapeProfileMetadata(data, channel) { - const profile = {}; - - profile.entryId = data.id; - profile.url = `${channel.url}/tour/models/${data.id}/${slugify(data.name, '-', { removePunctuation: true })}`; - - profile.description = data.attributes.bio?.value; - profile.dateOfBirth = qu.parseDate(data.attributes.birthdate?.value, 'YYYY-MM-DD'); - profile.gender = data.gender; - profile.age = data.attributes.age?.value; - profile.birthPlace = data.attributes.born?.value; - - profile.measurements = data.attributes.measurements?.value; - profile.height = feetInchesToCm(data.attributes.height?.value); - profile.weight = lbsToKg(data.attributes.weight?.value); - - profile.eyes = data.attributes.eyes?.value; - profile.hairColor = data.attributes.hair?.value; - - profile.avatar = data.thumb; - profile.date = new Date(data.publish_date); - - return profile; -} - -function scrapeProfileApi(data, channel, scenes, parameters) { - const profile = {}; - const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value])); // keys are mixed upper and lowercase - - profile.entryId = bio.id; - - profile.description = bio.bio; - - profile.gender = bio.gender; - - profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD'); - profile.birthPlace = bio.born; - profile.age = bio.age; - - profile.measurements = bio.measurements; - - profile.height = feetInchesToCm(bio.height); - profile.weight = lbsToKg(bio.weight); - - profile.eyes = bio.eyes; - profile.hairColor = bio.hair; - - profile.avatar = data.thumb; - - if (scenes) { - profile.scenes = scrapeAllApi(scenes, channel, parameters); - } - - return profile; +function scrapeAll(scenes, channel, parameters) { + return scenes.map((data) => scrapeScene(data, channel, parameters)); } async function fetchEndpoint(channel, parameters) { @@ -194,7 +105,7 @@ async function fetchEndpoint(channel, parameters) { return parameters.endpoint; } -async function fetchLatestApi(channel, page, { parameters }) { +async function fetchLatest(channel, page, { parameters }) { const endpoint = await fetchEndpoint(channel, parameters); if (!endpoint) { @@ -208,86 +119,25 @@ async function fetchLatestApi(channel, page, { parameters }) { const res = await http.get(url); if (res.ok && res.body.pageProps?.contents?.data) { - return scrapeAllApi(res.body.pageProps.contents.data, channel, parameters); + return scrapeAll(res.body.pageProps.contents.data, channel, parameters); } return res.status; } -async function fetchSceneApi(url, channel, baseScene, { parameters }) { - const slug = new URL(url).pathname.split('/').at(-1); - const endpoint = await fetchEndpoint(channel); - const res = await http.get(`${channel.url}/_next/data/${endpoint}/${parameters.videos || 'videos'}/${slug}.json?slug=${slug}`); - - if (res.ok && res.body.pageProps?.content) { - return scrapeSceneApi(res.body.pageProps.content, channel, parameters); - } - - return res.status; -} - -async function fetchProfileApi(actor, { channel, parameters }) { - const endpoint = await fetchEndpoint(channel); - const res = await http.get(`${channel.url}/_next/data/${endpoint}/models/${actor.slug}.json?slug=${actor.slug}`); - - if (res.ok && res.body.pageProps?.model) { - return scrapeProfileApi(res.body.pageProps.model, channel, res.body.pageProps.model_contents, parameters); - } - - return res.status; -} - -async function fetchLatestMetadata(channel, page = 1) { - const url = `${channel.url}/tour/videos?page=${page}`; - const res = await http.get(url, { - parse: true, - extract: { +async function fetchUpcoming(channel, _page, { parameters }) { + const res = await unprint.get(channel.url, { + parser: { runScripts: 'dangerously', }, }); - if (res.ok && res.window.__DATA__) { - return scrapeAllMetadata(res.window.__DATA__.videos.items, channel); - } - if (res.ok) { - return res.window.__DATA__?.error || null; - } + const data = res.context.query.json('#__NEXT_DATA__'); + const scene = data?.props.pageProps.upcoming_scene; - return res.status; -} - -async function fetchSceneMetadata(url, channel) { - const res = await http.get(url, { - parse: true, - extract: { - runScripts: 'dangerously', - }, - }); - - if (res.ok && res.window.__DATA__?.video) { - return scrapeSceneMetadata(res.window.__DATA__.video, channel); - } - - if (res.ok) { - return res.window.__DATA__?.error || null; - } - - return res.status; -} - -async function fetchProfileMetadata(actor, channel) { - const res = await http.get(`${channel.url}/tour/search-preview/${actor.name}`, { - headers: { - 'X-Requested-With': 'XMLHttpRequest', - }, - }); - - if (res.ok) { - const model = res.body.models?.items.find((modelX) => slugify(modelX.name) === actor.slug); - - if (model) { - return scrapeProfileMetadata(model, channel); + if (scene) { + return scrapeScene(scene, channel, parameters); } return null; @@ -296,16 +146,63 @@ async function fetchProfileMetadata(actor, channel) { return res.status; } +async function fetchScene(url, channel, _baseScene, { parameters }) { + const slug = new URL(url).pathname.split('/').at(-1); + const endpoint = await fetchEndpoint(channel); + const res = await http.get(`${channel.url}/_next/data/${endpoint}/${parameters.videos || 'videos'}/${slug}.json?slug=${slug}`); + + if (res.ok && res.body.pageProps?.content) { + return scrapeScene(res.body.pageProps.content, channel, parameters); + } + + return res.status; +} + +function scrapeProfile(data, channel, scenes, parameters) { + const profile = {}; + const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [slugify(key, '_'), value])); // keys are mixed upper and lowercase + + profile.entryId = bio.id; + + profile.description = bio.bio; + + profile.gender = bio.gender; + + profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD'); + profile.birthPlace = bio.born || bio.birthplace; + profile.age = bio.age; + + profile.measurements = bio.measurements; + + profile.height = convert(bio.height, 'cm'); + profile.weight = convert(bio.weight, 'lb', 'kg'); + + profile.eyes = bio.eyes || bio.eye_color; + profile.hairColor = bio.hair || bio.hair_color; + + profile.avatar = data.thumb; + + if (scenes) { + profile.scenes = scrapeAll(scenes, channel, parameters); + } + + return profile; +} + +async function fetchProfile(actor, { channel, parameters }) { + const endpoint = await fetchEndpoint(channel); + const res = await http.get(`${channel.url}/_next/data/${endpoint}/models/${actor.slug}.json?slug=${actor.slug}`); + + if (res.ok && res.body.pageProps?.model) { + return scrapeProfile(res.body.pageProps.model, channel, res.body.pageProps.model_contents, parameters); + } + + return res.status; +} + module.exports = { - metadata: { - // probably deprecated - fetchLatest: fetchLatestMetadata, - fetchScene: fetchSceneMetadata, - fetchProfile: fetchProfileMetadata, - }, - api: { - fetchLatest: fetchLatestApi, - fetchScene: fetchSceneApi, - fetchProfile: fetchProfileApi, - }, + fetchLatest, + fetchUpcoming, + fetchScene, + fetchProfile, }; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index cdc2da86..60b42af5 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -61,7 +61,6 @@ const porncz = require('./porncz'); const pornhub = require('./pornhub'); const pornworld = require('./pornworld'); const privateNetwork = require('./private'); // reserved keyword -const purgatoryx = require('./purgatoryx'); const radical = require('./radical'); const rickysroom = require('./rickysroom'); const sexlikereal = require('./sexlikereal'); @@ -169,7 +168,6 @@ const scrapers = { pornpros: whalemember, pornplus: whalemember, private: privateNetwork, - purgatoryx, radical, rickysroom, sayuncle: teamskeet, @@ -226,6 +224,8 @@ const scrapers = { twistys: aylo, // gamma '21sextury': gamma, + '3rddegreefilms': gamma, + addicted2girls: gamma, biphoria: gamma, blakemason: gamma, blowpass: gamma, @@ -239,6 +239,7 @@ const scrapers = { fantasymassage: gamma, filthykings: gamma, gangbangcreampie: gamma, + genderxfilms: gamma, girlsway: gamma, gloryholesecrets: gamma, peternorth: gamma, @@ -253,20 +254,30 @@ const scrapers = { wicked: gamma, xempire: gamma, zerotolerancefilms: gamma, - '3rddegreefilms': gamma, - addicted2girls: gamma, - genderxfilms: gamma, // mike adriano - trueanal: mikeadriano, - swallowed: mikeadriano, - nympho: mikeadriano, - dirtyauditions: mikeadriano, - analonly: mikeadriano, allanal: mikeadriano, - // the flourish - theflourishxxx: theflourish, + analonly: mikeadriano, + dirtyauditions: mikeadriano, + nympho: mikeadriano, + swallowed: mikeadriano, + trueanal: mikeadriano, + // radical + bjraw: radical, + gotfilled: radical, + inserted: radical, + purgatoryx: radical, + topwebmodels: radical, + // hush / hussiepass + eyeontheguy: hush, + hushpass: hush, + hussiepass: hush, + interracialpass: hush, + interracialpovs: hush, + povpornstars: hush, + seehimfuck: hush, // etc '18vr': badoink, + theflourishxxx: theflourish, adultempire, archangel, allherluv: missax, @@ -286,7 +297,6 @@ const scrapers = { badoinkvr: badoink, bamvisions, bang, - bjraw: radical, bluedonkeymedia, delphine: modelmedia, meidenvanholland: bluedonkeymedia, @@ -301,22 +311,15 @@ const scrapers = { doubleviewcasting: firstanalquest, dtfsluts: fullpornnetwork, exploitedx, // only from known URL that will specify site - eyeontheguy: hush, firstanalquest, forbondage: porndoe, freeones, girlfaction: fullpornnetwork, - gotfilled: radical, hergape: fullpornnetwork, hitzefrei, homemadeanalwhores: fullpornnetwork, hookuphotshot, hotcrazymess: nubiles, - hushpass: hush, - hussiepass: hush, - inserted: radical, - interracialpass: hush, - interracialpovs: hush, inthecrack, jamesdeen: fullpornnetwork, jerkaoke: modelmedia, @@ -357,14 +360,11 @@ const scrapers = { pornhub, pornworld, povperverts: fullpornnetwork, - povpornstars: hush, private: privateNetwork, - purgatoryx, realvr: badoink, rickysroom, sayuncle: teamskeet, score, - seehimfuck: hush, sexlikereal, spermmania: snowvalley, handjobjapan: snowvalley, diff --git a/tests/profiles.js b/tests/profiles.js index a8fe320a..e9bb5d25 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -113,11 +113,16 @@ const actors = [ // perv city { entity: 'pervcity', name: 'Brooklyn Gray', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'eyes', 'hairColor'] }, { entity: 'dpdiva', name: 'Liz Jordan', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'eyes', 'hairColor'] }, + // radical + { entity: 'bjraw', name: 'Nikki Knightly', fields: ['avatar', 'description', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] }, + { entity: 'gotfilled', name: 'Alexa Chains', fields: ['avatar', 'description', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] }, + { entity: 'inserted', name: 'Anissa Kate', fields: ['avatar', 'description', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] }, + { entity: 'topwebmodels', name: 'Lexi Belle', fields: ['avatar', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] }, + { entity: 'purgatoryx', name: 'Kenzie Reeves', fields: ['avatar', 'description', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] }, // etc. { entity: 'archangel', name: 'Summer Brielle', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'measurements', 'height', 'aliases'] }, { entity: 'theflourishxxx', name: 'XWifeKaren', fields: ['avatar', 'description'] }, { entity: 'hookuphotshot', name: 'Kenzie Reeves', fields: ['avatar', 'description'] }, - { entity: 'inserted', name: 'Kenzie Reeves', fields: ['avatar', 'description'] }, ]; const actorScrapers = scrapers.actors; @@ -153,7 +158,7 @@ const validators = { nationality: (value) => typeof value === 'string' && value.length > 3, // height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value), // ft in needs to be converted height: (value) => !!Number(value) && value > 150, - weight: (value) => !!Number(value) && value > 50, + weight: (value) => !!Number(value) && value > 40, eyes: (value) => typeof value === 'string' && value.length > 3, hairColor: (value) => typeof value === 'string' && value.length > 3, measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module