2019-11-20 03:53:36 +00:00
'use strict' ;
const bhttp = require ( 'bhttp' ) ;
const { JSDOM } = require ( 'jsdom' ) ;
const moment = require ( 'moment' ) ;
const ethnicityMap = {
White : 'Caucasian' ,
} ;
const hairMap = {
Brunette : 'brown' ,
} ;
async function scrapeProfile ( html , _url , actorName ) {
const { document } = new JSDOM ( html ) . window ;
const entries = Array . from ( document . querySelectorAll ( '.infoPiece' ) , el => el . textContent . replace ( /\n|\t/g , '' ) . split ( ':' ) ) ;
const bio = entries . reduce ( ( acc , [ key , value ] ) => ( { ... acc , [ key . trim ( ) ] : value . trim ( ) } ) , { } ) ;
const profile = {
name : actorName ,
} ;
2019-11-29 04:46:06 +00:00
const descriptionString = document . querySelector ( 'div[itemprop="description"]' ) || document . querySelector ( '.longBio' ) ;
2019-11-20 03:53:36 +00:00
const avatarEl = document . querySelector ( '#getAvatar' ) || document . querySelector ( '.thumbImage img' ) ;
if ( bio . Gender ) profile . gender = bio . Gender . toLowerCase ( ) ;
if ( bio . ethnicity ) profile . ethnicity = ethnicityMap [ bio . Ethnicity ] || bio . Ethnicity ;
if ( descriptionString ) profile . description = descriptionString . textContent ;
2019-11-29 04:46:06 +00:00
if ( bio . Birthday ) profile . birthdate = moment . utc ( bio . Birthday , 'MMM D, YYYY' ) . toDate ( ) ;
if ( bio . Born ) profile . birthdate = moment . utc ( bio . Born , 'YYYY-MM-DD' ) . toDate ( ) ;
2019-11-20 03:53:36 +00:00
2019-11-29 04:46:06 +00:00
profile . birthPlace = bio [ 'Birth Place' ] || bio . Birthplace ;
profile . residencePlace = bio [ 'City and Country' ] ;
2019-11-20 03:53:36 +00:00
2019-11-29 04:46:06 +00:00
if ( bio . Measurements && bio . Measurements !== '--' ) [ profile . bust , profile . waist , profile . hip ] = bio . Measurements . split ( '-' ) ;
2019-11-21 03:05:32 +00:00
if ( bio [ 'Fake Boobs' ] ) profile . naturalBoobs = bio [ 'Fake Boobs' ] === 'No' ;
2019-11-20 03:53:36 +00:00
if ( bio . Height ) profile . height = Number ( bio . Height . match ( /\(\d+/ ) [ 0 ] . slice ( 1 ) ) ;
if ( bio . Weight ) profile . weight = Number ( bio . Weight . match ( /\(\d+/ ) [ 0 ] . slice ( 1 ) ) ;
if ( bio [ 'Hair Color' ] ) profile . hair = hairMap [ bio [ 'Hair Color' ] ] || bio [ 'Hair Color' ] . toLowerCase ( ) ;
2019-11-21 03:05:32 +00:00
if ( bio . Piercings ) profile . hasPiercings = bio . Piercings === 'Yes' ;
2019-11-29 04:46:06 +00:00
if ( bio . Tattoos ) profile . hasTattoos = bio . Tattoos === 'Yes' ;
2019-11-20 03:53:36 +00:00
if ( avatarEl ) profile . avatar = avatarEl . src ;
profile . social = Array . from ( document . querySelectorAll ( '.socialList a' ) , el => el . href ) . filter ( link => link !== 'https://www.twitter.com/' ) ; // PH links to Twitter itself for some reason
return profile ;
}
async function fetchProfile ( actorName ) {
const actorSlug = actorName . toLowerCase ( ) . replace ( /\s+/g , '-' ) ;
/ * M o d e l p a g e s a r e n o t r e l i a b l y a s s o c i a t e d w i t h a c t u a l p o r n s t a r s
const modelUrl = ` https://pornhub.com/model/ ${ actorSlug } ` ;
const pornstarUrl = ` https://pornhub.com/pornstar/ ${ actorSlug } ` ;
const [ modelRes , pornstarRes ] = await Promise . all ( [
bhttp . get ( modelUrl ) ,
bhttp . get ( pornstarUrl ) ,
] ) ;
const model = modelRes . statusCode === 200 && await scrapeProfile ( modelRes . body . toString ( ) , modelUrl , actorName ) ;
const pornstar = pornstarRes . statusCode === 200 && await scrapeProfile ( pornstarRes . body . toString ( ) , pornstarUrl , actorName ) ;
if ( model && pornstar ) {
return {
... model ,
... pornstar ,
} ;
}
* /
const pornstarUrl = ` https://pornhub.com/pornstar/ ${ actorSlug } ` ;
const pornstarRes = await bhttp . get ( pornstarUrl ) ;
return scrapeProfile ( pornstarRes . body . toString ( ) , pornstarUrl , actorName ) ;
}
module . exports = {
fetchProfile ,
} ;