Re-wrote broken Perv City scraper, added profile scraping.

This commit is contained in:
ThePendulum 2020-07-12 05:10:23 +02:00
parent 051556936e
commit 9c8cfe3bdb
6 changed files with 270 additions and 125 deletions

View File

@ -4347,7 +4347,10 @@ const sites = [
url: 'http://www.analoverdose.com', url: 'http://www.analoverdose.com',
description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.', description: 'Before proceeding, use caution: the stunning pornstars of Anal Overdose are so fiery that they cause heavy breathing, throbbing cocks and volcanic loads of cum. If you think you can handle the heat of smoking tits, sweltering pussy and red hot ass.',
parent: 'pervcity', parent: 'pervcity',
parameters: { tourId: 3 }, parameters: {
siteId: 2,
tourId: 3,
},
}, },
{ {
slug: 'bangingbeauties', slug: 'bangingbeauties',
@ -4355,7 +4358,10 @@ const sites = [
description: "Banging Beauties isn't just a porn site; it's the gateway to all your pussy-obsessed fantasies! Our members' area is flowing with beautiful pornstars anticipating big dick throbbing in their syrupy pink slits. These experienced babes love brutal vaginal pounding! Similarly, they're eager for anal switch-hitting to shake things up. However, it's not only about gorgeous sexperts filling their hungry holes. Sometimes, it's all about innocent rookies earning their pornstar status in first time threesomes and premier interracial scenes.", description: "Banging Beauties isn't just a porn site; it's the gateway to all your pussy-obsessed fantasies! Our members' area is flowing with beautiful pornstars anticipating big dick throbbing in their syrupy pink slits. These experienced babes love brutal vaginal pounding! Similarly, they're eager for anal switch-hitting to shake things up. However, it's not only about gorgeous sexperts filling their hungry holes. Sometimes, it's all about innocent rookies earning their pornstar status in first time threesomes and premier interracial scenes.",
url: 'http://www.bangingbeauties.com', url: 'http://www.bangingbeauties.com',
parent: 'pervcity', parent: 'pervcity',
parameters: { tourId: 7 }, parameters: {
siteId: 3,
tourId: 7,
},
}, },
{ {
slug: 'oraloverdose', slug: 'oraloverdose',
@ -4363,7 +4369,10 @@ const sites = [
description: "Oral Overdose is the only site you need to live out every saliva soaked blowjob of your dreams in HD POV! We've got the most stunning cocksuckers in the world going to town on big dick. These babes not only love cock, they can't get enough of it! In fact, there is no prick too huge for our hungry girls' throats. You'll find gorgeous, big tits pornstars exercising their gag reflex in intense balls deep facefuck scenes. We also feature fresh, young newbies taking on the gagging deepthroat challenge.", description: "Oral Overdose is the only site you need to live out every saliva soaked blowjob of your dreams in HD POV! We've got the most stunning cocksuckers in the world going to town on big dick. These babes not only love cock, they can't get enough of it! In fact, there is no prick too huge for our hungry girls' throats. You'll find gorgeous, big tits pornstars exercising their gag reflex in intense balls deep facefuck scenes. We also feature fresh, young newbies taking on the gagging deepthroat challenge.",
url: 'http://www.oraloverdose.com', url: 'http://www.oraloverdose.com',
parent: 'pervcity', parent: 'pervcity',
parameters: { tourId: 4 }, parameters: {
siteId: 4,
tourId: 4,
},
}, },
{ {
slug: 'chocolatebjs', slug: 'chocolatebjs',
@ -4371,15 +4380,21 @@ const sites = [
description: "You've just won the golden ticket to the best Chocolate BJs on the planet! We've sought far and wide to bring you the most beautiful black and ethnic pornstars. And they're in our members' area now! They can't wait to suck your white lollipop and lick the thick cream shooting from your big dick. Of course, no matter how sweet the booty or juicy the big tits, these brown foxes aren't all sugar and spice. In fact, when it comes to giving head, these big ass ebony babes know what they want: huge white cocks filling their throats!", description: "You've just won the golden ticket to the best Chocolate BJs on the planet! We've sought far and wide to bring you the most beautiful black and ethnic pornstars. And they're in our members' area now! They can't wait to suck your white lollipop and lick the thick cream shooting from your big dick. Of course, no matter how sweet the booty or juicy the big tits, these brown foxes aren't all sugar and spice. In fact, when it comes to giving head, these big ass ebony babes know what they want: huge white cocks filling their throats!",
url: 'http://www.chocolatebjs.com', url: 'http://www.chocolatebjs.com',
parent: 'pervcity', parent: 'pervcity',
parameters: { tourId: 6 }, parameters: {
siteId: 5,
tourId: 6,
},
}, },
{ {
slug: 'upherasshole', slug: 'upherasshole',
name: 'Up Her Asshole', name: 'Up Her Asshole',
description: "You don't need to travel the globe in search of the anal wonders of the world, because you get your own private tour right here on Up Her Asshole! Our stunning pornstars and rookie starlets welcome all ass fetish and anal sex fans, with their twerking bubble butts and winking assholes. However, big booty worship is just a slice of the fun. Combined with juicy tits (big and small, wet pussy (hairy and bald, these girls deliver a spectacular sensory experience in HD POV. Not only are you in danger of busting a nut before the going gets good, but also when the good turns remarkable with rimming, fingering and butt toys!", description: "You don't need to travel the globe in search of the anal wonders of the world, because you get your own private tour right here on Up Her Asshole! Our stunning pornstars and rookie starlets welcome all ass fetish and anal sex fans, with their twerking bubble butts and winking assholes. However, big booty worship is just a slice of the fun. Combined with juicy tits (big and small, wet pussy (hairy and bald, these girls deliver a spectacular sensory experience in HD POV. Not only are you in danger of busting a nut before the going gets good, but also when the good turns remarkable with rimming, fingering and butt toys!",
url: 'http://www.upherasshole.com', url: 'http://upherasshole.com',
parent: 'pervcity', parent: 'pervcity',
parameters: { tourId: 9 }, parameters: {
siteId: 6,
tourId: 9,
},
}, },
// PIMP XXX // PIMP XXX
{ {

View File

@ -302,12 +302,12 @@ async function curateProfile(profile) {
curatedProfile.dateOfDeath = Number.isNaN(Number(profile.dateOfDeath)) ? null : profile.dateOfDeath; curatedProfile.dateOfDeath = Number.isNaN(Number(profile.dateOfDeath)) ? null : profile.dateOfDeath;
curatedProfile.cup = profile.cup || (typeof profile.bust === 'string' && profile.bust?.match(/[a-zA-Z]+/)?.[0]) || null; curatedProfile.cup = profile.cup || (typeof profile.bust === 'string' && profile.bust?.match?.(/[a-zA-Z]+/)?.[0]) || null;
curatedProfile.bust = Number(profile.bust) || profile.bust?.match(/\d+/)?.[0] || null; curatedProfile.bust = Number(profile.bust) || profile.bust?.match?.(/\d+/)?.[0] || null;
curatedProfile.waist = Number(profile.waist) || profile.waist?.match(/\d+/)?.[0] || null; curatedProfile.waist = Number(profile.waist) || profile.waist?.match?.(/\d+/)?.[0] || null;
curatedProfile.hip = Number(profile.hip) || profile.hip?.match(/\d+/)?.[0] || null; curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null;
curatedProfile.height = Number(profile.height) || profile.height?.match(/\d+/)?.[0] || null; curatedProfile.height = Number(profile.height) || profile.height?.match?.(/\d+/)?.[0] || null;
curatedProfile.weight = Number(profile.weight) || profile.weight?.match(/\d+/)?.[0] || null; curatedProfile.weight = Number(profile.weight) || profile.weight?.match?.(/\d+/)?.[0] || null;
curatedProfile.naturalBoobs = typeof profile.naturalBoobs === 'boolean' ? profile.naturalBoobs : null; curatedProfile.naturalBoobs = typeof profile.naturalBoobs === 'boolean' ? profile.naturalBoobs : null;
curatedProfile.hasTattoos = typeof profile.hasTattoos === 'boolean' ? profile.hasTattoos : null; curatedProfile.hasTattoos = typeof profile.hasTattoos === 'boolean' ? profile.hasTattoos : null;

View File

@ -1,140 +1,113 @@
'use strict'; 'use strict';
const bhttp = require('bhttp'); const qu = require('../utils/qu');
const cheerio = require('cheerio'); const slugify = require('../utils/slugify');
const { JSDOM } = require('jsdom'); const { feetInchesToCm, lbsToKg } = require('../utils/convert');
const moment = require('moment');
async function getTrailer(entryId) { function scrapeAll(scenes) {
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', { return scenes.map(({ query }) => {
setId: entryId, const release = {};
release.url = query.url('.videoPic a');
release.entryId = query.q('.videoPic img', 'id').match(/set-target-(\d+)/)[1];
release.title = query.q('h3 a', true);
release.description = query.q('.runtime + p', true);
release.date = query.date('.date', 'MM-DD-YYYY');
release.duration = query.dur('.runtime');
release.actors = query.all('.tour_update_models a', true);
release.poster = query.img('.videoPic img');
return release;
}); });
}
if (trailerRes.statusCode === 200) { function scrapeScene({ query }) {
return { const release = {};
poster: trailerRes.body.TrailerImg,
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback, release.entryId = query.q('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1];
release.title = query.q('.infoHeader h1', true);
release.description = query.q('.infoBox p', true);
release.actors = query.all('.tour_update_models a', true);
release.poster = query.img('.posterimg');
release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality
const trailer = query.q('script')?.textContent.match(/\/trailers\/.+\.mp4/)?.[0];
if (trailer) {
release.trailer = {
src: `https://pervcity.com${trailer}`,
}; };
} }
return null;
}
function scrapeLatestScene(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const entryId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
const poster = $('a:nth-child(2) > img').attr('src');
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
const stars = $('img[src*="/star.png"]')
.toArray()
.map(element => $(element).attr('src'))
.length || 0;
return {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
stars,
},
site,
};
}
async function scrapeScene(html, url, site) {
const { document } = new JSDOM(html).window;
const release = { url, site };
release.entryId = document.querySelector('input#set_ID').value;
release.title = document.querySelector('title').textContent;
release.description = document.querySelector('.player_data').textContent.trim();
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
const [minutes, seconds] = durationString.match(/\d+/g);
release.duration = Number(minutes) * 60 + Number(seconds);
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
const { poster, trailer } = await getTrailer(release.entryId);
release.poster = poster;
release.trailer = { src: trailer };
return release; return release;
} }
function scrapeFallbackLanding(html) { function scrapeProfile({ query }) {
const { document } = new JSDOM(html).window; const profile = {};
return document.querySelector('input#set_ID').value; const bio = query.all('.moreInfo li').reduce((acc, el) => ({
...acc,
[slugify(query.q(el, 'span', true), '_')]: query.text(el),
}), {});
profile.description = query.q('.aboutModel p', true);
profile.dateOfBirth = qu.extractDate(bio.date_of_birth, ['MMMM D, YYYY', 'DD-MMM-YY']);
profile.birthPlace = bio.birth_location;
profile.ethnicity = bio.ethnicity;
profile.height = feetInchesToCm(bio.height);
profile.weight = lbsToKg(bio.weight);
profile.eyes = bio.eye_color;
profile.hairColor = bio.hair_color;
profile.avatar = query.img('.starPic img');
profile.releases = scrapeAll(qu.initAll(query.all('.aboutScenes .videoBlock')));
return profile;
} }
async function scrapeFallbackScene(html, entryId, url, site) { async function fetchLatest(channel, page = 1) {
const { document } = new JSDOM(html).window; const url = `https://pervcity.com/search.php?site[]=${channel.parameters.siteId}&page=${page}`;
const release = { url, entryId, site }; const res = await qu.getAll(url, '.videoBlock');
release.title = document.querySelector('.popup_data_set_head label').textContent; return res.ok ? scrapeAll(res.items, channel) : res.status;
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
const { poster, trailer } = await getTrailer(release.entryId);
release.poster = poster;
release.trailer = { src: trailer };
release.channel = document.querySelector('.popup_left_top div img').alt;
return release;
} }
async function fetchLatest(site, page = 1) { async function fetchScene(url, entity) {
const res = page === 1 const res = await qu.get(url, '.trailerArea');
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php return res.ok ? scrapeScene(res.item, entity) : res.status;
return latest;
} }
async function fetchScene(url, site) { async function fetchProfile(actorName) {
const res = await bhttp.get(url); const url = `https://pervcity.com/models/${slugify(actorName)}.html`;
const res = await qu.get(url);
if (res.statusCode === 200) { if (res.ok) {
if (site.isNetwork) { return scrapeProfile(res.item);
const entryId = scrapeFallbackLanding(res.body.toString(), url);
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
setId: entryId,
});
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
}
return scrapeScene(res.body.toString(), url, site);
} }
return null; const url2 = `https://pervcity.com/models/${slugify(actorName, '')}.html`;
const res2 = await qu.get(url2);
if (res2.ok) {
return scrapeProfile(res2.item);
}
return res2.status;
} }
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchScene, fetchScene,
fetchProfile,
}; };

View File

@ -0,0 +1,144 @@
'use strict';
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
async function getTrailer(entryId) {
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
setId: entryId,
});
if (trailerRes.statusCode === 200) {
return {
poster: trailerRes.body.TrailerImg,
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
};
}
return null;
}
function scrapeLatestScene(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const entryId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
const poster = $('a:nth-child(2) > img').attr('src');
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
const stars = $('img[src*="/star.png"]')
.toArray()
.map(element => $(element).attr('src'))
.length || 0;
return {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
stars,
},
site,
};
}
async function scrapeScene(html, url, site) {
const { document } = new JSDOM(html).window;
const release = { url, site };
release.entryId = document.querySelector('input#set_ID').value;
release.title = document.querySelector('title').textContent;
release.description = document.querySelector('.player_data').textContent.trim();
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
const [minutes, seconds] = durationString.match(/\d+/g);
release.duration = Number(minutes) * 60 + Number(seconds);
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
const { poster, trailer } = await getTrailer(release.entryId);
release.poster = poster;
release.trailer = { src: trailer };
return release;
}
function scrapeFallbackLanding(html) {
const { document } = new JSDOM(html).window;
return document.querySelector('input#set_ID').value;
}
async function scrapeFallbackScene(html, entryId, url, site) {
const { document } = new JSDOM(html).window;
const release = { url, entryId, site };
release.title = document.querySelector('.popup_data_set_head label').textContent;
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
const { poster, trailer } = await getTrailer(release.entryId);
release.poster = poster;
release.trailer = { src: trailer };
release.channel = document.querySelector('.popup_left_top div img').alt;
return release;
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&webchannelid=0&deviceview=browser&tourId=${channel.parameters.tourId}`;
const pagedUrl = `${channel.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&webchannelid=0&deviceview=browser&tourId=${channel.parameters.tourId}`;
const res = page === 1
? await bhttp.get(url)
: await bhttp.get(pagedUrl);
const elements = JSON.parse(res.body.toString());
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, channel)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
return latest;
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
if (res.statusCode === 200) {
if (site.isNetwork) {
const entryId = scrapeFallbackLanding(res.body.toString(), url);
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
setId: entryId,
});
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
}
return scrapeScene(res.body.toString(), url, site);
}
return null;
}
module.exports = {
fetchLatest,
fetchScene,
};

View File

@ -193,6 +193,7 @@ module.exports = {
nubilesporn: nubiles, nubilesporn: nubiles,
nympho: mikeadriano, nympho: mikeadriano,
onlyprince: fullpornnetwork, onlyprince: fullpornnetwork,
pervcity,
pervertgallery: fullpornnetwork, pervertgallery: fullpornnetwork,
peternorth: famedigital, peternorth: famedigital,
pimpxxx: cherrypimps, pimpxxx: cherrypimps,

View File

@ -1,19 +1,25 @@
'use strict'; 'use strict';
function inchesToCm(inches) { function inchesToCm(inches) {
if (!inches) return null;
return Math.round(Number(inches) * 2.54); return Math.round(Number(inches) * 2.54);
} }
function feetInchesToCm(feet, inches) { function feetInchesToCm(feet, inches) {
if (!feet && !inches) return null;
if (typeof feet === 'string' && !inches) { if (typeof feet === 'string' && !inches) {
const [feetPart, inchesPart] = feet.match(/\d+/g); const [feetPart, inchesPart] = feet.match(/\d+/g);
return feetInchesToCm(feetPart, inchesPart); return feetInchesToCm(Number(feetPart), Number(inchesPart));
} }
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54)); return Math.round((Number(feet) * 30.48) + ((Number(inches) || 0) * 2.54));
} }
function cmToFeetInches(centimeters) { function cmToFeetInches(centimeters) {
if (!centimeters) return null;
const feet = Math.floor(centimeters / 30.48); const feet = Math.floor(centimeters / 30.48);
const inches = Math.round((centimeters / 2.54) % (feet * 12)); const inches = Math.round((centimeters / 2.54) % (feet * 12));
@ -21,18 +27,24 @@ function cmToFeetInches(centimeters) {
} }
function heightToCm(height) { function heightToCm(height) {
if (!height) return null;
const [feet, inches] = height.match(/\d+/g); const [feet, inches] = height.match(/\d+/g);
return feetInchesToCm(feet, inches); return feetInchesToCm(feet, inches);
} }
function lbsToKg(lbs) { function lbsToKg(lbs) {
if (!lbs) return null;
const pounds = lbs.toString().match(/\d+/)[0]; const pounds = lbs.toString().match(/\d+/)[0];
return Math.round(Number(pounds) * 0.453592); return Math.round(Number(pounds) * 0.453592);
} }
function kgToLbs(kgs) { function kgToLbs(kgs) {
if (!kgs) return null;
const kilos = kgs.toString().match(/\d+/)[0]; const kilos = kgs.toString().match(/\d+/)[0];
return Math.round(Number(kilos) / 0.453592); return Math.round(Number(kilos) / 0.453592);