'use strict'; const bhttp = require('bhttp'); const { JSDOM } = require('jsdom'); const moment = require('moment'); const knex = require('../knex'); const ethnicityMap = { White: 'Caucasian', }; const hairMap = { Brunette: 'brown', }; const countryMap = { 'United States of America': 'United States', }; async function scrapeProfile(html, _url, actorName) { const { document } = new JSDOM(html).window; const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':')); const bio = entries.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {}); const profile = { name: actorName, boobs: {}, }; const descriptionString = document.querySelector('div[itemprop="description"]'); const birthPlaceString = bio['Birth Place'] || bio.Birthplace; const residencePlaceString = bio['City and Country']; const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img'); if (bio.Gender) profile.gender = bio.Gender.toLowerCase(); if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity; if (descriptionString) profile.description = descriptionString.textContent; if (bio.Birthday) bio.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); if (bio.Born) bio.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate(); if (birthPlaceString) { const birthPlaceSegments = birthPlaceString.split(','); const birthCountryName = birthPlaceSegments.slice(-1)[0].trim(); const birthCountryEntry = await knex('countries').where('name', countryMap[birthCountryName] || birthCountryName).first(); profile.birthPlace = birthPlaceSegments.slice(0, -1).join(',').trim(); profile.birthCountry = birthCountryEntry ? birthCountryEntry.alpha2 : null; } if (residencePlaceString) { const residencePlaceSegments = residencePlaceString.split(','); const residenceCountryAlpha2 = residencePlaceSegments.slice(-1)[0].trim(); const residenceCountryEntry = await knex('countries').where('alpha2', residenceCountryAlpha2).first(); profile.residencePlace = residencePlaceSegments.slice(0, -1).join(',').trim(); profile.residenceCountry = residenceCountryEntry ? residenceCountryEntry.alpha2 : null; } if (bio.Measurements && bio.Measurements !== '--') profile.boobs.size = bio.Measurements; if (bio['Fake Boobs']) profile.boobs.natural = bio['Fake Boobs'] === 'No'; if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1)); if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1)); if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase(); if (bio.Piercings) profile.piercings = bio.Piercings === 'Yes'; if (bio.Tattoos) profile.tattoos = bio.tattoos === 'Yes'; if (avatarEl) profile.avatar = avatarEl.src; profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason return profile; } async function fetchProfile(actorName) { const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); /* Model pages are not reliably associated with actual porn stars const modelUrl = `https://pornhub.com/model/${actorSlug}`; const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`; const [modelRes, pornstarRes] = await Promise.all([ bhttp.get(modelUrl), bhttp.get(pornstarUrl), ]); const model = modelRes.statusCode === 200 && await scrapeProfile(modelRes.body.toString(), modelUrl, actorName); const pornstar = pornstarRes.statusCode === 200 && await scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName); if (model && pornstar) { return { ...model, ...pornstar, }; } */ const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`; const pornstarRes = await bhttp.get(pornstarUrl); return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName); } module.exports = { fetchProfile, };