86 lines
3.3 KiB
JavaScript
86 lines
3.3 KiB
JavaScript
'use strict';
|
|
|
|
const bhttp = require('@thependulum/bhttp');
|
|
const { JSDOM } = require('jsdom');
|
|
const moment = require('moment');
|
|
|
|
const ethnicityMap = {
|
|
White: 'Caucasian',
|
|
};
|
|
|
|
const hairMap = {
|
|
Brunette: 'brown',
|
|
};
|
|
|
|
async function scrapeProfile(html, _url, actorName) {
|
|
const { document } = new JSDOM(html).window;
|
|
|
|
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
|
|
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
|
|
|
|
const profile = {
|
|
name: actorName,
|
|
};
|
|
|
|
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
|
|
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
|
|
|
|
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
|
|
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
|
|
|
|
if (descriptionString) profile.description = descriptionString.textContent;
|
|
|
|
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
|
|
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
|
|
|
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
|
|
profile.residencePlace = bio['City and Country'];
|
|
|
|
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
|
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
|
|
|
|
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
|
|
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
|
|
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
|
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
|
|
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
|
|
|
|
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
|
|
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function fetchProfile({ name: actorName }) {
|
|
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
|
|
|
/* Model pages are not reliably associated with actual porn stars
|
|
const modelUrl = `https://pornhub.com/model/${actorSlug}`;
|
|
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
|
|
|
const [modelRes, pornstarRes] = await Promise.all([
|
|
bhttp.get(modelUrl),
|
|
bhttp.get(pornstarUrl),
|
|
]);
|
|
|
|
const model = modelRes.statusCode === 200 && await scrapeProfile(modelRes.body.toString(), modelUrl, actorName);
|
|
const pornstar = pornstarRes.statusCode === 200 && await scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
|
|
|
if (model && pornstar) {
|
|
return {
|
|
...model,
|
|
...pornstar,
|
|
};
|
|
}
|
|
*/
|
|
|
|
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
|
const pornstarRes = await bhttp.get(pornstarUrl);
|
|
|
|
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
|
}
|
|
|
|
module.exports = {
|
|
fetchProfile,
|
|
};
|