2019-11-20 03:53:36 +00:00
'use strict' ;
const { JSDOM } = require ( 'jsdom' ) ;
const moment = require ( 'moment' ) ;
2020-11-22 23:05:02 +00:00
const http = require ( '../utils/http' ) ;
2019-11-20 03:53:36 +00:00
const ethnicityMap = {
2020-05-14 02:26:05 +00:00
White : 'Caucasian' ,
2019-11-20 03:53:36 +00:00
} ;
const hairMap = {
2020-05-14 02:26:05 +00:00
Brunette : 'brown' ,
2019-11-20 03:53:36 +00:00
} ;
async function scrapeProfile ( html , _url , actorName ) {
2020-05-14 02:26:05 +00:00
const { document } = new JSDOM ( html ) . window ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
const entries = Array . from ( document . querySelectorAll ( '.infoPiece' ) , el => el . textContent . replace ( /\n|\t/g , '' ) . split ( ':' ) ) ;
const bio = entries . reduce ( ( acc , [ key , value ] ) => ( key ? { ... acc , [ key . trim ( ) ] : value . trim ( ) } : acc ) , { } ) ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
const profile = {
name : actorName ,
} ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
const descriptionString = document . querySelector ( 'div[itemprop="description"]' ) || document . querySelector ( '.longBio' ) ;
const avatarEl = document . querySelector ( '#getAvatar' ) || document . querySelector ( '.thumbImage img' ) ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . Gender ) profile . gender = bio . Gender . toLowerCase ( ) ;
if ( bio . ethnicity ) profile . ethnicity = ethnicityMap [ bio . Ethnicity ] || bio . Ethnicity ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
if ( descriptionString ) profile . description = descriptionString . textContent ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . Birthday && ! /-0001/ . test ( bio . Birthday ) ) profile . birthdate = moment . utc ( bio . Birthday , 'MMM D, YYYY' ) . toDate ( ) ; // birthyear sometimes -0001, see Spencer Bradley as of january 2020
if ( bio . Born ) profile . birthdate = moment . utc ( bio . Born , 'YYYY-MM-DD' ) . toDate ( ) ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
profile . birthPlace = bio [ 'Birth Place' ] || bio . Birthplace ;
profile . residencePlace = bio [ 'City and Country' ] ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . Measurements && bio . Measurements !== '--' ) [ profile . bust , profile . waist , profile . hip ] = bio . Measurements . split ( '-' ) ;
if ( bio [ 'Fake Boobs' ] ) profile . naturalBoobs = bio [ 'Fake Boobs' ] === 'No' ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . Height ) profile . height = Number ( bio . Height . match ( /\(\d+/ ) [ 0 ] . slice ( 1 ) ) ;
if ( bio . Weight ) profile . weight = Number ( bio . Weight . match ( /\(\d+/ ) [ 0 ] . slice ( 1 ) ) ;
if ( bio [ 'Hair Color' ] ) profile . hair = hairMap [ bio [ 'Hair Color' ] ] || bio [ 'Hair Color' ] . toLowerCase ( ) ;
if ( bio . Piercings ) profile . hasPiercings = bio . Piercings === 'Yes' ;
if ( bio . Tattoos ) profile . hasTattoos = bio . Tattoos === 'Yes' ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
if ( avatarEl && ! /default\// . test ( avatarEl . src ) ) profile . avatar = avatarEl . src ;
profile . social = Array . from ( document . querySelectorAll ( '.socialList a' ) , el => el . href ) . filter ( link => link !== 'https://www.twitter.com/' ) ; // PH links to Twitter itself for some reason
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
return profile ;
2019-11-20 03:53:36 +00:00
}
2020-07-20 23:44:51 +00:00
async function fetchProfile ( { name : actorName } ) {
2020-05-14 02:26:05 +00:00
const actorSlug = actorName . toLowerCase ( ) . replace ( /\s+/g , '-' ) ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
/ * M o d e l p a g e s a r e n o t r e l i a b l y a s s o c i a t e d w i t h a c t u a l p o r n s t a r s
2019-11-20 03:53:36 +00:00
const modelUrl = ` https://pornhub.com/model/ ${ actorSlug } ` ;
const pornstarUrl = ` https://pornhub.com/pornstar/ ${ actorSlug } ` ;
const [ modelRes , pornstarRes ] = await Promise . all ( [
2020-11-22 23:05:02 +00:00
http . get ( modelUrl ) ,
http . get ( pornstarUrl ) ,
2019-11-20 03:53:36 +00:00
] ) ;
const model = modelRes . statusCode === 200 && await scrapeProfile ( modelRes . body . toString ( ) , modelUrl , actorName ) ;
const pornstar = pornstarRes . statusCode === 200 && await scrapeProfile ( pornstarRes . body . toString ( ) , pornstarUrl , actorName ) ;
if ( model && pornstar ) {
return {
... model ,
... pornstar ,
} ;
}
* /
2020-05-14 02:26:05 +00:00
const pornstarUrl = ` https://pornhub.com/pornstar/ ${ actorSlug } ` ;
2020-11-22 23:05:02 +00:00
const pornstarRes = await http . get ( pornstarUrl ) ;
2019-11-20 03:53:36 +00:00
2020-05-14 02:26:05 +00:00
return scrapeProfile ( pornstarRes . body . toString ( ) , pornstarUrl , actorName ) ;
2019-11-20 03:53:36 +00:00
}
module . exports = {
2020-05-14 02:26:05 +00:00
fetchProfile ,
2019-11-20 03:53:36 +00:00
} ;