forked from DebaucheryLibrarian/traxxx
Using unprint for Perv City . Updated unprint for date formats.
This commit is contained in:
parent
f04451f746
commit
c9ba7bb90b
|
|
@ -93,7 +93,7 @@
|
||||||
"tunnel": "0.0.6",
|
"tunnel": "0.0.6",
|
||||||
"ua-parser-js": "^1.0.37",
|
"ua-parser-js": "^1.0.37",
|
||||||
"undici": "^5.28.1",
|
"undici": "^5.28.1",
|
||||||
"unprint": "^0.18.5",
|
"unprint": "^0.18.6",
|
||||||
"url-pattern": "^1.0.3",
|
"url-pattern": "^1.0.3",
|
||||||
"v-tooltip": "^2.1.3",
|
"v-tooltip": "^2.1.3",
|
||||||
"video.js": "^8.6.1",
|
"video.js": "^8.6.1",
|
||||||
|
|
@ -20340,9 +20340,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/unprint": {
|
"node_modules/unprint": {
|
||||||
"version": "0.18.5",
|
"version": "0.18.6",
|
||||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.5.tgz",
|
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.6.tgz",
|
||||||
"integrity": "sha512-ila82go467kSefN3RqGpGqqR85r+lk9CS/V89y0uuYTC8DA30fqbdKcqsIoThQAF3MlpwNmepj9XRlIecrISLg==",
|
"integrity": "sha512-kcDpsaTaMrxY0AkoHq1bGPuVz6Cv1umC0kA1U58Th+UhFarmwPB5racAY514eWEpjC9AXGEhOvIa+n2hErQmRg==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"bottleneck": "^2.19.5",
|
"bottleneck": "^2.19.5",
|
||||||
"cookie": "^1.1.1",
|
"cookie": "^1.1.1",
|
||||||
|
|
|
||||||
|
|
@ -152,7 +152,7 @@
|
||||||
"tunnel": "0.0.6",
|
"tunnel": "0.0.6",
|
||||||
"ua-parser-js": "^1.0.37",
|
"ua-parser-js": "^1.0.37",
|
||||||
"undici": "^5.28.1",
|
"undici": "^5.28.1",
|
||||||
"unprint": "^0.18.5",
|
"unprint": "^0.18.6",
|
||||||
"url-pattern": "^1.0.3",
|
"url-pattern": "^1.0.3",
|
||||||
"v-tooltip": "^2.1.3",
|
"v-tooltip": "^2.1.3",
|
||||||
"video.js": "^8.6.1",
|
"video.js": "^8.6.1",
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const qu = require('../utils/qu');
|
const unprint = require('unprint');
|
||||||
|
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
|
const { convert } = require('../utils/convert');
|
||||||
|
|
||||||
const channelCodes = {
|
const channelCodes = {
|
||||||
ao: 'analoverdose',
|
ao: 'analoverdose',
|
||||||
|
|
@ -21,80 +22,30 @@ const qualities = {
|
||||||
|
|
||||||
const channelRegExp = new RegExp(Object.keys(channelCodes).join('|'), 'i');
|
const channelRegExp = new RegExp(Object.keys(channelCodes).join('|'), 'i');
|
||||||
|
|
||||||
function scrapeAll(scenes, entity) {
|
function scrapeAll(scenes) {
|
||||||
return scenes.map(({ query }) => {
|
return scenes.map(({ query }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.url = query.url('.videoPic a');
|
release.url = query.url('.videoPic a');
|
||||||
release.entryId = query.q('.videoPic img', 'id').match(/set-target-(\d+)/)[1];
|
release.entryId = query.attribute('.videoPic img', 'id').match(/set-target-(\d+)/)[1];
|
||||||
|
|
||||||
release.title = query.cnt('h3 a');
|
release.title = query.content('h3 a');
|
||||||
release.description = query.cnt('.runtime + p');
|
release.description = query.content('.runtime + p');
|
||||||
|
|
||||||
release.date = query.date('.date', 'MM-DD-YYYY');
|
release.date = query.date('.date', 'MM-DD-YYYY');
|
||||||
release.duration = query.dur('.runtime');
|
release.duration = query.duration('.runtime');
|
||||||
|
|
||||||
release.actors = query.cnts('.tour_update_models a');
|
release.actors = query.all('.tour_update_models a').map((actorEl) => ({
|
||||||
|
name: unprint.query.content(actorEl),
|
||||||
|
url: unprint.query.url(actorEl, null),
|
||||||
|
}));
|
||||||
|
|
||||||
release.poster = query.img('.videoPic img');
|
release.poster = query.img('.videoPic img');
|
||||||
release.entity = entity;
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ query }, channel) {
|
|
||||||
const release = {};
|
|
||||||
|
|
||||||
release.entryId = query.q('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1];
|
|
||||||
|
|
||||||
release.title = query.cnt('.infoHeader h1');
|
|
||||||
release.description = query.cnt('.description');
|
|
||||||
release.duration = query.duration('.tRuntime');
|
|
||||||
|
|
||||||
release.actors = query.cnts('.infoBox .tour_update_models a');
|
|
||||||
release.tags = query.cnts('.tagcats a');
|
|
||||||
release.qualities = query.imgs('.avaiFormate img').map((src) => qualities[src.match(/\/(\w+)\.png/)[1]]).filter(Boolean);
|
|
||||||
|
|
||||||
release.poster = query.img('.posterimg');
|
|
||||||
release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality
|
|
||||||
|
|
||||||
const trailer = query.q('script')?.textContent.match(/\/trailers\/.+\.mp4/)?.[0];
|
|
||||||
|
|
||||||
if (trailer) {
|
|
||||||
release.trailer = `${channel.url}${trailer}`;
|
|
||||||
release.channel = channelCodes[release.trailer.match(channelRegExp)?.[0]];
|
|
||||||
}
|
|
||||||
|
|
||||||
return release;
|
|
||||||
}
|
|
||||||
|
|
||||||
function scrapeProfile({ query }) {
|
|
||||||
const profile = {};
|
|
||||||
|
|
||||||
const bio = query.all('.moreInfo li').reduce((acc, el) => ({
|
|
||||||
...acc,
|
|
||||||
[slugify(query.cnt(el, 'span'), '_')]: query.text(el),
|
|
||||||
}), {});
|
|
||||||
|
|
||||||
profile.description = query.cnt('.aboutModel p');
|
|
||||||
profile.dateOfBirth = qu.extractDate(bio.date_of_birth, ['MMMM D, YYYY', 'DD-MMM-YY']);
|
|
||||||
|
|
||||||
profile.birthPlace = bio.birth_location;
|
|
||||||
profile.ethnicity = bio.ethnicity;
|
|
||||||
|
|
||||||
profile.height = feetInchesToCm(bio.height);
|
|
||||||
profile.weight = lbsToKg(bio.weight);
|
|
||||||
|
|
||||||
profile.eyes = bio.eye_color;
|
|
||||||
profile.hairColor = bio.hair_color;
|
|
||||||
|
|
||||||
profile.avatar = query.img('.starPic img');
|
|
||||||
profile.releases = scrapeAll(qu.initAll(query.all('.aboutScenes .videoBlock')));
|
|
||||||
|
|
||||||
return profile;
|
|
||||||
}
|
|
||||||
|
|
||||||
function getLatestUrl(channel, page) {
|
function getLatestUrl(channel, page) {
|
||||||
if (channel.parameters?.siteId) {
|
if (channel.parameters?.siteId) {
|
||||||
return `https://pervcity.com/search.php?site[]=${channel.parameters.siteId}&page=${page}`;
|
return `https://pervcity.com/search.php?site[]=${channel.parameters.siteId}&page=${page}`;
|
||||||
|
|
@ -111,42 +62,125 @@ async function fetchLatest(channel, page = 1) {
|
||||||
const url = getLatestUrl(channel, page);
|
const url = getLatestUrl(channel, page);
|
||||||
|
|
||||||
if (url) {
|
if (url) {
|
||||||
const res = await qu.getAll(url, '.videoBlock');
|
const res = await unprint.get(url, { selectAll: '.videoBlock' });
|
||||||
|
|
||||||
return res.ok ? scrapeAll(res.items, channel) : res.status;
|
if (res.ok) {
|
||||||
|
return scrapeAll(res.context, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
return [];
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchUpcoming(channel) {
|
async function fetchUpcoming(channel) {
|
||||||
const res = await qu.getAll(channel.url, '.upcoming .videoBlock');
|
const res = await unprint.get(channel.url, { selectAll: '.upcoming .videoBlock' });
|
||||||
|
|
||||||
return res.ok ? scrapeAll(res.items, channel.parameters?.native ? channel : channel.parent) : res.status;
|
if (res.ok) {
|
||||||
|
return scrapeAll(res.context, channel.parameters?.native ? channel : channel.parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeScene({ query }, channel) {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.entryId = query.attribute('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1];
|
||||||
|
|
||||||
|
release.title = query.content('.infoHeader h1');
|
||||||
|
release.description = query.content('.description');
|
||||||
|
release.duration = query.duration('.tRuntime');
|
||||||
|
|
||||||
|
release.actors = query.all('.infoBox .tour_update_models a').map((actorEl) => ({
|
||||||
|
name: unprint.query.content(actorEl),
|
||||||
|
url: unprint.query.url(actorEl, null),
|
||||||
|
}));
|
||||||
|
|
||||||
|
release.tags = query.contents('.tagcats a');
|
||||||
|
release.qualities = query.imgs('.avaiFormate img').map((src) => qualities[src.match(/\/(\w+)\.png/)[1]]).filter(Boolean);
|
||||||
|
|
||||||
|
release.poster = query.img('.posterimg');
|
||||||
|
release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality
|
||||||
|
|
||||||
|
const trailer = query.element('script')?.textContent.match(/\/trailers\/.+\.mp4/)?.[0];
|
||||||
|
|
||||||
|
if (trailer) {
|
||||||
|
release.trailer = `${channel.url}${trailer}`;
|
||||||
|
release.channel = channelCodes[release.trailer.match(channelRegExp)?.[0]];
|
||||||
|
}
|
||||||
|
|
||||||
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, entity) {
|
async function fetchScene(url, entity) {
|
||||||
const res = await qu.get(url, '.trailerArea');
|
const res = await unprint.get(url, { select: '.trailerArea' });
|
||||||
|
|
||||||
return res.ok ? scrapeScene(res.item, entity) : res.status;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }) {
|
|
||||||
const url = `https://pervcity.com/models/${slugify(actorName)}.html`;
|
|
||||||
const res = await qu.get(url);
|
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeProfile(res.item);
|
return scrapeScene(res.context, entity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeProfile({ query }, url) {
|
||||||
|
const profile = { url };
|
||||||
|
|
||||||
|
const bio = query.all('.moreInfo li, .information li').reduce((acc, el) => ({
|
||||||
|
...acc,
|
||||||
|
[slugify(unprint.query.content(el, 'span'), '_')]: unprint.query.text(el),
|
||||||
|
}), {});
|
||||||
|
|
||||||
|
profile.description = query.content('.aboutModel p, .modelContent p');
|
||||||
|
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, ['MMMM D, YYYY', 'DD-MMM-YY', 'MM-DD-YYYY']);
|
||||||
|
|
||||||
|
profile.birthPlace = bio.birth_location;
|
||||||
|
profile.ethnicity = bio.ethnicity;
|
||||||
|
|
||||||
|
profile.height = convert(bio.height, 'cm');
|
||||||
|
profile.weight = convert(bio.weight, 'lb', 'kg');
|
||||||
|
|
||||||
|
profile.eyes = bio.eye_color;
|
||||||
|
profile.hairColor = bio.hair_color;
|
||||||
|
|
||||||
|
profile.avatar = query.img('.starPic img, .bioBPic img');
|
||||||
|
profile.releases = scrapeAll(unprint.initAll(query.all('.aboutScenes .videoBlock, .videosArea .videoBlock')));
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchProfile({ name: actorName, url: actorUrl }) {
|
||||||
|
if (actorUrl) {
|
||||||
|
const res = await unprint.get(actorUrl);
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeProfile(res.context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const url = `https://pervcity.com/models/${slugify(actorName)}.html`;
|
||||||
const url2 = `https://pervcity.com/models/${slugify(actorName, '')}.html`;
|
const url2 = `https://pervcity.com/models/${slugify(actorName, '')}.html`;
|
||||||
const res2 = await qu.get(url2);
|
|
||||||
|
|
||||||
if (res2.ok) {
|
if (url !== actorUrl) {
|
||||||
return scrapeProfile(res2.item);
|
const res = await unprint.get(url);
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeProfile(res.context, url);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return res2.status;
|
if (url2 !== actorUrl) {
|
||||||
|
const res = await unprint.get(url2);
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeProfile(res.context, url);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
||||||
|
|
@ -342,6 +342,7 @@ const scrapers = {
|
||||||
onlyprince: fullpornnetwork,
|
onlyprince: fullpornnetwork,
|
||||||
pascalssubsluts,
|
pascalssubsluts,
|
||||||
pervcity,
|
pervcity,
|
||||||
|
dpdiva: pervcity,
|
||||||
pervertgallery: fullpornnetwork,
|
pervertgallery: fullpornnetwork,
|
||||||
pierrewoodman,
|
pierrewoodman,
|
||||||
pimpxxx: cherrypimps,
|
pimpxxx: cherrypimps,
|
||||||
|
|
|
||||||
|
|
@ -411,7 +411,7 @@ async function fetchScene(url, channel, baseRelease, options) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeProfile(data, _channel) {
|
async function scrapeProfile(data, channel) {
|
||||||
const model = data.model;
|
const model = data.model;
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
|
|
@ -429,11 +429,9 @@ async function scrapeProfile(data, _channel) {
|
||||||
profile.poster = getAvatarFallbacks(model.images.profile);
|
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||||
|
|
||||||
/*
|
|
||||||
if (model.videos) {
|
if (model.videos) {
|
||||||
profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel);
|
profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel);
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -59,11 +59,23 @@ function kgToLbs(kgs) {
|
||||||
return Math.round(Number(kilos) / 0.453592);
|
return Math.round(Number(kilos) / 0.453592);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function curateConvertInput(string) {
|
||||||
|
if (/['’]|(fe*t)/.test(string)) {
|
||||||
|
const result = string.match(/(\d+).*(\d+)/);
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
return `${result[1]}ft ${result[2]}in`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return string;
|
||||||
|
}
|
||||||
|
|
||||||
function convertManyApi(input, to) {
|
function convertManyApi(input, to) {
|
||||||
const curatedInput = input
|
const curatedInput = curateConvertInput(input);
|
||||||
.replace(/['’]\s*/, 'ft ') // ensure 1 space
|
|
||||||
.replace(/["”]|('')/, 'in') // 5’4”
|
console.log('CONVERT', input);
|
||||||
.replace(/\d+ft\s*\d+\s*$/, (match) => `${match}in`); // height without any inch symbol
|
console.log('RESULT', curatedInput);
|
||||||
|
|
||||||
return Math.round(convertMany(curatedInput).to(to)) || null;
|
return Math.round(convertMany(curatedInput).to(to)) || null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,9 @@ const actors = [
|
||||||
{ entity: 'silverstonedvd', name: 'Leanni Lei', fields: ['avatar', 'gender'] },
|
{ entity: 'silverstonedvd', name: 'Leanni Lei', fields: ['avatar', 'gender'] },
|
||||||
{ entity: 'silviasaint', name: 'Silvia Saint', fields: ['avatar', 'gender', 'description'] },
|
{ entity: 'silviasaint', name: 'Silvia Saint', fields: ['avatar', 'gender', 'description'] },
|
||||||
{ entity: 'whiteghetto', name: 'Proxy Paige', fields: ['avatar', 'gender', 'description'] },
|
{ entity: 'whiteghetto', name: 'Proxy Paige', fields: ['avatar', 'gender', 'description'] },
|
||||||
|
// perv city
|
||||||
|
{ entity: 'pervcity', name: 'Brooklyn Gray', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||||
|
{ entity: 'dpdiva', name: 'Liz Jordan', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||||
];
|
];
|
||||||
|
|
||||||
const actorScrapers = scrapers.actors;
|
const actorScrapers = scrapers.actors;
|
||||||
|
|
@ -143,8 +146,9 @@ const validators = {
|
||||||
birthPlace: (value) => typeof value === 'string' && value.length > 1, // may return US or USA
|
birthPlace: (value) => typeof value === 'string' && value.length > 1, // may return US or USA
|
||||||
birthCountry: (value) => typeof value === 'string' && value.length > 1,
|
birthCountry: (value) => typeof value === 'string' && value.length > 1,
|
||||||
nationality: (value) => typeof value === 'string' && value.length > 3,
|
nationality: (value) => typeof value === 'string' && value.length > 3,
|
||||||
height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value),
|
// height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value), // ft in needs to be converted
|
||||||
weight: (value) => !!Number(value),
|
height: (value) => !!Number(value) && value > 150,
|
||||||
|
weight: (value) => !!Number(value) && value > 50,
|
||||||
eyes: (value) => typeof value === 'string' && value.length > 3,
|
eyes: (value) => typeof value === 'string' && value.length > 3,
|
||||||
hairColor: (value) => typeof value === 'string' && value.length > 3,
|
hairColor: (value) => typeof value === 'string' && value.length > 3,
|
||||||
measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module
|
measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue