Using unprint for Perv City . Updated unprint for date formats.

This commit is contained in:
DebaucheryLibrarian
2026-01-10 23:07:03 +01:00
parent f04451f746
commit c9ba7bb90b
7 changed files with 142 additions and 93 deletions

View File

@@ -1,8 +1,9 @@
'use strict';
const qu = require('../utils/qu');
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
const { convert } = require('../utils/convert');
const channelCodes = {
ao: 'analoverdose',
@@ -21,80 +22,30 @@ const qualities = {
const channelRegExp = new RegExp(Object.keys(channelCodes).join('|'), 'i');
function scrapeAll(scenes, entity) {
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.videoPic a');
release.entryId = query.q('.videoPic img', 'id').match(/set-target-(\d+)/)[1];
release.entryId = query.attribute('.videoPic img', 'id').match(/set-target-(\d+)/)[1];
release.title = query.cnt('h3 a');
release.description = query.cnt('.runtime + p');
release.title = query.content('h3 a');
release.description = query.content('.runtime + p');
release.date = query.date('.date', 'MM-DD-YYYY');
release.duration = query.dur('.runtime');
release.duration = query.duration('.runtime');
release.actors = query.cnts('.tour_update_models a');
release.actors = query.all('.tour_update_models a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.poster = query.img('.videoPic img');
release.entity = entity;
return release;
});
}
function scrapeScene({ query }, channel) {
const release = {};
release.entryId = query.q('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1];
release.title = query.cnt('.infoHeader h1');
release.description = query.cnt('.description');
release.duration = query.duration('.tRuntime');
release.actors = query.cnts('.infoBox .tour_update_models a');
release.tags = query.cnts('.tagcats a');
release.qualities = query.imgs('.avaiFormate img').map((src) => qualities[src.match(/\/(\w+)\.png/)[1]]).filter(Boolean);
release.poster = query.img('.posterimg');
release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality
const trailer = query.q('script')?.textContent.match(/\/trailers\/.+\.mp4/)?.[0];
if (trailer) {
release.trailer = `${channel.url}${trailer}`;
release.channel = channelCodes[release.trailer.match(channelRegExp)?.[0]];
}
return release;
}
function scrapeProfile({ query }) {
const profile = {};
const bio = query.all('.moreInfo li').reduce((acc, el) => ({
...acc,
[slugify(query.cnt(el, 'span'), '_')]: query.text(el),
}), {});
profile.description = query.cnt('.aboutModel p');
profile.dateOfBirth = qu.extractDate(bio.date_of_birth, ['MMMM D, YYYY', 'DD-MMM-YY']);
profile.birthPlace = bio.birth_location;
profile.ethnicity = bio.ethnicity;
profile.height = feetInchesToCm(bio.height);
profile.weight = lbsToKg(bio.weight);
profile.eyes = bio.eye_color;
profile.hairColor = bio.hair_color;
profile.avatar = query.img('.starPic img');
profile.releases = scrapeAll(qu.initAll(query.all('.aboutScenes .videoBlock')));
return profile;
}
function getLatestUrl(channel, page) {
if (channel.parameters?.siteId) {
return `https://pervcity.com/search.php?site[]=${channel.parameters.siteId}&page=${page}`;
@@ -111,42 +62,125 @@ async function fetchLatest(channel, page = 1) {
const url = getLatestUrl(channel, page);
if (url) {
const res = await qu.getAll(url, '.videoBlock');
const res = await unprint.get(url, { selectAll: '.videoBlock' });
return res.ok ? scrapeAll(res.items, channel) : res.status;
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
return [];
return null;
}
async function fetchUpcoming(channel) {
const res = await qu.getAll(channel.url, '.upcoming .videoBlock');
const res = await unprint.get(channel.url, { selectAll: '.upcoming .videoBlock' });
return res.ok ? scrapeAll(res.items, channel.parameters?.native ? channel : channel.parent) : res.status;
if (res.ok) {
return scrapeAll(res.context, channel.parameters?.native ? channel : channel.parent);
}
return res.status;
}
function scrapeScene({ query }, channel) {
const release = {};
release.entryId = query.attribute('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1];
release.title = query.content('.infoHeader h1');
release.description = query.content('.description');
release.duration = query.duration('.tRuntime');
release.actors = query.all('.infoBox .tour_update_models a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.tags = query.contents('.tagcats a');
release.qualities = query.imgs('.avaiFormate img').map((src) => qualities[src.match(/\/(\w+)\.png/)[1]]).filter(Boolean);
release.poster = query.img('.posterimg');
release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality
const trailer = query.element('script')?.textContent.match(/\/trailers\/.+\.mp4/)?.[0];
if (trailer) {
release.trailer = `${channel.url}${trailer}`;
release.channel = channelCodes[release.trailer.match(channelRegExp)?.[0]];
}
return release;
}
async function fetchScene(url, entity) {
const res = await qu.get(url, '.trailerArea');
return res.ok ? scrapeScene(res.item, entity) : res.status;
}
async function fetchProfile({ name: actorName }) {
const url = `https://pervcity.com/models/${slugify(actorName)}.html`;
const res = await qu.get(url);
const res = await unprint.get(url, { select: '.trailerArea' });
if (res.ok) {
return scrapeProfile(res.item);
return scrapeScene(res.context, entity);
}
return res.status;
}
function scrapeProfile({ query }, url) {
const profile = { url };
const bio = query.all('.moreInfo li, .information li').reduce((acc, el) => ({
...acc,
[slugify(unprint.query.content(el, 'span'), '_')]: unprint.query.text(el),
}), {});
profile.description = query.content('.aboutModel p, .modelContent p');
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, ['MMMM D, YYYY', 'DD-MMM-YY', 'MM-DD-YYYY']);
profile.birthPlace = bio.birth_location;
profile.ethnicity = bio.ethnicity;
profile.height = convert(bio.height, 'cm');
profile.weight = convert(bio.weight, 'lb', 'kg');
profile.eyes = bio.eye_color;
profile.hairColor = bio.hair_color;
profile.avatar = query.img('.starPic img, .bioBPic img');
profile.releases = scrapeAll(unprint.initAll(query.all('.aboutScenes .videoBlock, .videosArea .videoBlock')));
return profile;
}
async function fetchProfile({ name: actorName, url: actorUrl }) {
if (actorUrl) {
const res = await unprint.get(actorUrl);
if (res.ok) {
return scrapeProfile(res.context);
}
}
const url = `https://pervcity.com/models/${slugify(actorName)}.html`;
const url2 = `https://pervcity.com/models/${slugify(actorName, '')}.html`;
const res2 = await qu.get(url2);
if (res2.ok) {
return scrapeProfile(res2.item);
if (url !== actorUrl) {
const res = await unprint.get(url);
if (res.ok) {
return scrapeProfile(res.context, url);
}
}
return res2.status;
if (url2 !== actorUrl) {
const res = await unprint.get(url2);
if (res.ok) {
return scrapeProfile(res.context, url);
}
return res.status;
}
return null;
}
module.exports = {

View File

@@ -342,6 +342,7 @@ const scrapers = {
onlyprince: fullpornnetwork,
pascalssubsluts,
pervcity,
dpdiva: pervcity,
pervertgallery: fullpornnetwork,
pierrewoodman,
pimpxxx: cherrypimps,

View File

@@ -411,7 +411,7 @@ async function fetchScene(url, channel, baseRelease, options) {
return res.status;
}
async function scrapeProfile(data, _channel) {
async function scrapeProfile(data, channel) {
const model = data.model;
const profile = {};
@@ -429,11 +429,9 @@ async function scrapeProfile(data, _channel) {
profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster);
/*
if (model.videos) {
profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel);
}
*/
return profile;
}

View File

@@ -59,11 +59,23 @@ function kgToLbs(kgs) {
return Math.round(Number(kilos) / 0.453592);
}
function curateConvertInput(string) {
if (/[']|(fe*t)/.test(string)) {
const result = string.match(/(\d+).*(\d+)/);
if (result) {
return `${result[1]}ft ${result[2]}in`;
}
}
return string;
}
function convertManyApi(input, to) {
const curatedInput = input
.replace(/[']\s*/, 'ft ') // ensure 1 space
.replace(/["”]|('')/, 'in') // 54”
.replace(/\d+ft\s*\d+\s*$/, (match) => `${match}in`); // height without any inch symbol
const curatedInput = curateConvertInput(input);
console.log('CONVERT', input);
console.log('RESULT', curatedInput);
return Math.round(convertMany(curatedInput).to(to)) || null;
}