From c9ba7bb90ba17f83d9750093fe46b1c27c483932 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sat, 10 Jan 2026 23:07:03 +0100 Subject: [PATCH] Using unprint for Perv City . Updated unprint for date formats. --- package-lock.json | 8 +- package.json | 2 +- src/scrapers/pervcity.js | 192 +++++++++++++++++++++++---------------- src/scrapers/scrapers.js | 1 + src/scrapers/vixen.js | 4 +- src/utils/convert.js | 20 +++- tests/profiles.js | 8 +- 7 files changed, 142 insertions(+), 93 deletions(-) diff --git a/package-lock.json b/package-lock.json index c12924f7..a6f0394c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -93,7 +93,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.18.5", + "unprint": "^0.18.6", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", @@ -20340,9 +20340,9 @@ } }, "node_modules/unprint": { - "version": "0.18.5", - "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.5.tgz", - "integrity": "sha512-ila82go467kSefN3RqGpGqqR85r+lk9CS/V89y0uuYTC8DA30fqbdKcqsIoThQAF3MlpwNmepj9XRlIecrISLg==", + "version": "0.18.6", + "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.6.tgz", + "integrity": "sha512-kcDpsaTaMrxY0AkoHq1bGPuVz6Cv1umC0kA1U58Th+UhFarmwPB5racAY514eWEpjC9AXGEhOvIa+n2hErQmRg==", "dependencies": { "bottleneck": "^2.19.5", "cookie": "^1.1.1", diff --git a/package.json b/package.json index 9ab8c2af..532d93bb 100755 --- a/package.json +++ b/package.json @@ -152,7 +152,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.18.5", + "unprint": "^0.18.6", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", diff --git a/src/scrapers/pervcity.js b/src/scrapers/pervcity.js index 4e4836f7..de9330eb 100755 --- a/src/scrapers/pervcity.js +++ b/src/scrapers/pervcity.js @@ -1,8 +1,9 @@ 'use strict'; -const qu = require('../utils/qu'); +const unprint = require('unprint'); + const slugify = require('../utils/slugify'); -const { feetInchesToCm, lbsToKg } = require('../utils/convert'); +const { convert } = require('../utils/convert'); const channelCodes = { ao: 'analoverdose', @@ -21,80 +22,30 @@ const qualities = { const channelRegExp = new RegExp(Object.keys(channelCodes).join('|'), 'i'); -function scrapeAll(scenes, entity) { +function scrapeAll(scenes) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('.videoPic a'); - release.entryId = query.q('.videoPic img', 'id').match(/set-target-(\d+)/)[1]; + release.entryId = query.attribute('.videoPic img', 'id').match(/set-target-(\d+)/)[1]; - release.title = query.cnt('h3 a'); - release.description = query.cnt('.runtime + p'); + release.title = query.content('h3 a'); + release.description = query.content('.runtime + p'); release.date = query.date('.date', 'MM-DD-YYYY'); - release.duration = query.dur('.runtime'); + release.duration = query.duration('.runtime'); - release.actors = query.cnts('.tour_update_models a'); + release.actors = query.all('.tour_update_models a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null), + })); release.poster = query.img('.videoPic img'); - release.entity = entity; return release; }); } -function scrapeScene({ query }, channel) { - const release = {}; - - release.entryId = query.q('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1]; - - release.title = query.cnt('.infoHeader h1'); - release.description = query.cnt('.description'); - release.duration = query.duration('.tRuntime'); - - release.actors = query.cnts('.infoBox .tour_update_models a'); - release.tags = query.cnts('.tagcats a'); - release.qualities = query.imgs('.avaiFormate img').map((src) => qualities[src.match(/\/(\w+)\.png/)[1]]).filter(Boolean); - - release.poster = query.img('.posterimg'); - release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality - - const trailer = query.q('script')?.textContent.match(/\/trailers\/.+\.mp4/)?.[0]; - - if (trailer) { - release.trailer = `${channel.url}${trailer}`; - release.channel = channelCodes[release.trailer.match(channelRegExp)?.[0]]; - } - - return release; -} - -function scrapeProfile({ query }) { - const profile = {}; - - const bio = query.all('.moreInfo li').reduce((acc, el) => ({ - ...acc, - [slugify(query.cnt(el, 'span'), '_')]: query.text(el), - }), {}); - - profile.description = query.cnt('.aboutModel p'); - profile.dateOfBirth = qu.extractDate(bio.date_of_birth, ['MMMM D, YYYY', 'DD-MMM-YY']); - - profile.birthPlace = bio.birth_location; - profile.ethnicity = bio.ethnicity; - - profile.height = feetInchesToCm(bio.height); - profile.weight = lbsToKg(bio.weight); - - profile.eyes = bio.eye_color; - profile.hairColor = bio.hair_color; - - profile.avatar = query.img('.starPic img'); - profile.releases = scrapeAll(qu.initAll(query.all('.aboutScenes .videoBlock'))); - - return profile; -} - function getLatestUrl(channel, page) { if (channel.parameters?.siteId) { return `https://pervcity.com/search.php?site[]=${channel.parameters.siteId}&page=${page}`; @@ -111,42 +62,125 @@ async function fetchLatest(channel, page = 1) { const url = getLatestUrl(channel, page); if (url) { - const res = await qu.getAll(url, '.videoBlock'); + const res = await unprint.get(url, { selectAll: '.videoBlock' }); - return res.ok ? scrapeAll(res.items, channel) : res.status; + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; } - return []; + return null; } async function fetchUpcoming(channel) { - const res = await qu.getAll(channel.url, '.upcoming .videoBlock'); + const res = await unprint.get(channel.url, { selectAll: '.upcoming .videoBlock' }); - return res.ok ? scrapeAll(res.items, channel.parameters?.native ? channel : channel.parent) : res.status; + if (res.ok) { + return scrapeAll(res.context, channel.parameters?.native ? channel : channel.parent); + } + + return res.status; +} + +function scrapeScene({ query }, channel) { + const release = {}; + + release.entryId = query.attribute('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1]; + + release.title = query.content('.infoHeader h1'); + release.description = query.content('.description'); + release.duration = query.duration('.tRuntime'); + + release.actors = query.all('.infoBox .tour_update_models a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null), + })); + + release.tags = query.contents('.tagcats a'); + release.qualities = query.imgs('.avaiFormate img').map((src) => qualities[src.match(/\/(\w+)\.png/)[1]]).filter(Boolean); + + release.poster = query.img('.posterimg'); + release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality + + const trailer = query.element('script')?.textContent.match(/\/trailers\/.+\.mp4/)?.[0]; + + if (trailer) { + release.trailer = `${channel.url}${trailer}`; + release.channel = channelCodes[release.trailer.match(channelRegExp)?.[0]]; + } + + return release; } async function fetchScene(url, entity) { - const res = await qu.get(url, '.trailerArea'); - - return res.ok ? scrapeScene(res.item, entity) : res.status; -} - -async function fetchProfile({ name: actorName }) { - const url = `https://pervcity.com/models/${slugify(actorName)}.html`; - const res = await qu.get(url); + const res = await unprint.get(url, { select: '.trailerArea' }); if (res.ok) { - return scrapeProfile(res.item); + return scrapeScene(res.context, entity); } + return res.status; +} + +function scrapeProfile({ query }, url) { + const profile = { url }; + + const bio = query.all('.moreInfo li, .information li').reduce((acc, el) => ({ + ...acc, + [slugify(unprint.query.content(el, 'span'), '_')]: unprint.query.text(el), + }), {}); + + profile.description = query.content('.aboutModel p, .modelContent p'); + profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, ['MMMM D, YYYY', 'DD-MMM-YY', 'MM-DD-YYYY']); + + profile.birthPlace = bio.birth_location; + profile.ethnicity = bio.ethnicity; + + profile.height = convert(bio.height, 'cm'); + profile.weight = convert(bio.weight, 'lb', 'kg'); + + profile.eyes = bio.eye_color; + profile.hairColor = bio.hair_color; + + profile.avatar = query.img('.starPic img, .bioBPic img'); + profile.releases = scrapeAll(unprint.initAll(query.all('.aboutScenes .videoBlock, .videosArea .videoBlock'))); + + return profile; +} + +async function fetchProfile({ name: actorName, url: actorUrl }) { + if (actorUrl) { + const res = await unprint.get(actorUrl); + + if (res.ok) { + return scrapeProfile(res.context); + } + } + + const url = `https://pervcity.com/models/${slugify(actorName)}.html`; const url2 = `https://pervcity.com/models/${slugify(actorName, '')}.html`; - const res2 = await qu.get(url2); - if (res2.ok) { - return scrapeProfile(res2.item); + if (url !== actorUrl) { + const res = await unprint.get(url); + + if (res.ok) { + return scrapeProfile(res.context, url); + } } - return res2.status; + if (url2 !== actorUrl) { + const res = await unprint.get(url2); + + if (res.ok) { + return scrapeProfile(res.context, url); + } + + return res.status; + } + + return null; } module.exports = { diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 9a24ce32..62a52ddd 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -342,6 +342,7 @@ const scrapers = { onlyprince: fullpornnetwork, pascalssubsluts, pervcity, + dpdiva: pervcity, pervertgallery: fullpornnetwork, pierrewoodman, pimpxxx: cherrypimps, diff --git a/src/scrapers/vixen.js b/src/scrapers/vixen.js index d13d1400..d468150d 100755 --- a/src/scrapers/vixen.js +++ b/src/scrapers/vixen.js @@ -411,7 +411,7 @@ async function fetchScene(url, channel, baseRelease, options) { return res.status; } -async function scrapeProfile(data, _channel) { +async function scrapeProfile(data, channel) { const model = data.model; const profile = {}; @@ -429,11 +429,9 @@ async function scrapeProfile(data, _channel) { profile.poster = getAvatarFallbacks(model.images.profile); profile.banner = getAvatarFallbacks(model.images.poster); - /* if (model.videos) { profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel); } - */ return profile; } diff --git a/src/utils/convert.js b/src/utils/convert.js index 7266dca5..55611f7b 100755 --- a/src/utils/convert.js +++ b/src/utils/convert.js @@ -59,11 +59,23 @@ function kgToLbs(kgs) { return Math.round(Number(kilos) / 0.453592); } +function curateConvertInput(string) { + if (/['’]|(fe*t)/.test(string)) { + const result = string.match(/(\d+).*(\d+)/); + + if (result) { + return `${result[1]}ft ${result[2]}in`; + } + } + + return string; +} + function convertManyApi(input, to) { - const curatedInput = input - .replace(/['’]\s*/, 'ft ') // ensure 1 space - .replace(/["”]|('')/, 'in') // 5’4” - .replace(/\d+ft\s*\d+\s*$/, (match) => `${match}in`); // height without any inch symbol + const curatedInput = curateConvertInput(input); + + console.log('CONVERT', input); + console.log('RESULT', curatedInput); return Math.round(convertMany(curatedInput).to(to)) || null; } diff --git a/tests/profiles.js b/tests/profiles.js index 7833cb09..ea32c569 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -110,6 +110,9 @@ const actors = [ { entity: 'silverstonedvd', name: 'Leanni Lei', fields: ['avatar', 'gender'] }, { entity: 'silviasaint', name: 'Silvia Saint', fields: ['avatar', 'gender', 'description'] }, { entity: 'whiteghetto', name: 'Proxy Paige', fields: ['avatar', 'gender', 'description'] }, + // perv city + { entity: 'pervcity', name: 'Brooklyn Gray', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'eyes', 'hairColor'] }, + { entity: 'dpdiva', name: 'Liz Jordan', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'eyes', 'hairColor'] }, ]; const actorScrapers = scrapers.actors; @@ -143,8 +146,9 @@ const validators = { birthPlace: (value) => typeof value === 'string' && value.length > 1, // may return US or USA birthCountry: (value) => typeof value === 'string' && value.length > 1, nationality: (value) => typeof value === 'string' && value.length > 3, - height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value), - weight: (value) => !!Number(value), + // height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value), // ft in needs to be converted + height: (value) => !!Number(value) && value > 150, + weight: (value) => !!Number(value) && value > 50, eyes: (value) => typeof value === 'string' && value.length > 3, hairColor: (value) => typeof value === 'string' && value.length > 3, measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module