2020-02-02 04:14:58 +00:00
'use strict' ;
2020-02-05 22:57:55 +00:00
const { ex , exa , get } = require ( '../utils/q' ) ;
2020-02-02 04:14:58 +00:00
const slugify = require ( '../utils/slugify' ) ;
2020-11-22 23:05:02 +00:00
const http = require ( '../utils/http' ) ;
2020-02-02 04:14:58 +00:00
const { heightToCm , lbsToKg } = require ( '../utils/convert' ) ;
function scrapePhotos ( html ) {
2020-05-14 02:26:05 +00:00
const { qis } = ex ( html , '#photos-page' ) ;
const photos = qis ( 'img' ) ;
2021-11-20 22:59:15 +00:00
return photos . map ( ( photo ) => [
2020-05-14 02:26:05 +00:00
photo
. replace ( 'x_800' , 'x_xl' )
. replace ( '_tn' , '' ) ,
photo ,
] ) ;
2020-02-02 04:14:58 +00:00
}
async function fetchPhotos ( url ) {
2020-11-22 23:05:02 +00:00
const res = await http . get ( url ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( res . statusCode === 200 ) {
return scrapePhotos ( res . body . toString ( ) , url ) ;
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return [ ] ;
2020-02-02 04:14:58 +00:00
}
2020-02-03 23:18:53 +00:00
function scrapeAll ( html , site ) {
2020-05-14 02:26:05 +00:00
return exa ( html , '.container .video, .container-fluid .video' ) . map ( ( { q , qa , qd , ql } ) => {
const release = { } ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
release . title = q ( '.title, .i-title' , true ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
const linkEl = q ( 'a' ) ;
const url = new URL ( linkEl . href ) ;
release . url = ` ${ url . origin } ${ url . pathname } ` ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
// this is a photo album, not a scene (used for profiles)
if ( /photos\// . test ( url ) ) return null ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
[ release . entryId ] = url . pathname . split ( '/' ) . slice ( - 2 ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
release . date = qd ( '.i-date' , 'MMM DD' , /\w+ \d{1,2}$/ )
2020-02-02 23:39:43 +00:00
|| qd ( '.dt-box' , 'MMM.DD YYYY' ) ;
2020-05-14 02:26:05 +00:00
release . actors = site ? . parameters ? . actors || qa ( '.model, .i-model' , true ) ;
release . duration = ql ( '.i-amount, .amount' ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
const posterEl = q ( '.item-img img' ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( posterEl ) {
release . poster = ` https: ${ posterEl . src } ` ;
}
2020-02-02 23:39:43 +00:00
2020-05-14 02:26:05 +00:00
if ( posterEl ? . dataset . gifPreview ) {
release . teaser = {
src : ` https: ${ posterEl . dataset . gifPreview } ` ,
} ;
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return release ;
} ) . filter ( Boolean ) ;
2020-02-02 04:14:58 +00:00
}
2020-02-03 23:18:53 +00:00
async function scrapeScene ( html , url , site ) {
2020-08-15 00:05:35 +00:00
const { qu } = ex ( html , '#videos-page, #content section' ) ;
2020-05-14 02:26:05 +00:00
const release = { } ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
[ release . entryId ] = new URL ( url ) . pathname . split ( '/' ) . slice ( - 2 ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
release . title = qu . q ( 'h2.text-uppercase, h2.title, #breadcrumb-top + h1' , true )
2020-03-09 01:02:29 +00:00
|| qu . q ( 'h1.m-title' , true ) ? . split ( /»|\// ) . slice ( - 1 ) [ 0 ] . trim ( ) ;
2020-05-14 02:26:05 +00:00
release . description = qu . text ( '.p-desc, .desc' ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
release . actors = qu . all ( '.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]' , true ) ;
2020-02-03 23:18:53 +00:00
2020-05-14 02:26:05 +00:00
if ( release . actors . length === 0 ) {
2021-11-20 22:59:15 +00:00
const actorEl = qu . all ( '.stat' ) . find ( ( stat ) => / Featuring / . test ( stat . textContent ) ) ;
2020-05-14 02:26:05 +00:00
const actorString = qu . text ( actorEl ) ;
2020-02-03 23:18:53 +00:00
2021-11-20 22:59:15 +00:00
release . actors = actorString ? . split ( /,\band\b|,/g ) . map ( ( actor ) => actor . trim ( ) ) || [ ] ;
2020-05-14 02:26:05 +00:00
}
2020-02-03 23:18:53 +00:00
2020-05-14 02:26:05 +00:00
if ( release . actors . length === 0 && site . parameters ? . actors ) release . actors = site . parameters . actors ;
2020-02-03 23:18:53 +00:00
2020-05-14 02:26:05 +00:00
release . tags = qu . all ( 'a[href*=tag]' , true ) ;
2020-02-02 04:14:58 +00:00
2021-11-20 22:59:15 +00:00
const dateEl = qu . all ( '.value' ) . find ( ( el ) => / \ w + \ d + \ w + , \ d { 4 } / . test ( el . textContent ) ) ;
2020-05-14 02:26:05 +00:00
release . date = qu . date ( dateEl , null , 'MMMM Do, YYYY' )
2020-03-09 01:02:29 +00:00
|| qu . date ( '.date' , 'MMMM Do, YYYY' , /\w+ \d{1,2}\w+, \d{4}/ )
|| qu . date ( '.info .holder' , 'MM/DD/YYYY' , /\d{2}\/\d{2}\/\d{4}/ ) ;
2020-02-02 04:14:58 +00:00
2021-11-20 22:59:15 +00:00
const durationEl = qu . all ( 'value' ) . find ( ( el ) => / \ d { 1 , 3 } : \ d { 2 } / . test ( el . textContent ) ) ;
2020-05-14 02:26:05 +00:00
release . duration = qu . dur ( durationEl ) ;
2020-02-02 04:14:58 +00:00
2020-08-15 00:05:35 +00:00
release . poster = qu . poster ( 'video' ) || qu . img ( '.flowplayer img' ) || html . match ( /posterImage: '(.*\.jpg)'/ ) ? . [ 1 ] || null ; // _800.jpg is larger than _xl.jpg in landscape
2020-05-14 02:26:05 +00:00
const photosUrl = qu . url ( '.stat a[href*=photos]' ) ;
2020-02-02 23:39:43 +00:00
2020-05-14 02:26:05 +00:00
if ( photosUrl ) {
release . photos = await fetchPhotos ( photosUrl ) ;
} else {
2021-11-20 22:59:15 +00:00
release . photos = qu . imgs ( 'img[src*=ThumbNails], .p-photos .tn img' ) . map ( ( photo ) => [
2020-05-14 02:26:05 +00:00
photo . replace ( '_tn' , '' ) ,
photo ,
] ) ;
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
const trailers = qu . all ( 'a[href*=Trailers]' ) ;
2020-02-03 23:18:53 +00:00
2020-05-14 02:26:05 +00:00
if ( trailers ) {
release . trailer = trailers . map ( ( trailer ) => {
const src = ` https: ${ trailer . href } ` ;
const format = trailer . textContent . trim ( ) . match ( /^\w+/ ) [ 0 ] . toLowerCase ( ) ;
const quality = parseInt ( trailer . textContent . trim ( ) . match ( /\d+([a-zA-Z]+)?$/ ) [ 0 ] , 10 ) ;
2020-02-03 23:18:53 +00:00
2020-05-14 02:26:05 +00:00
return format === 'mp4' ? { src , quality } : null ;
} ) . filter ( Boolean ) ;
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
const stars = qu . q ( '.rate-box' ) . dataset . score ;
if ( stars ) release . rating = { stars } ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return release ;
2020-02-02 04:14:58 +00:00
}
function scrapeModels ( html , actorName ) {
2020-05-14 02:26:05 +00:00
const { qa } = ex ( html ) ;
2021-11-20 22:59:15 +00:00
const model = qa ( '.model a' ) . find ( ( link ) => link . title === actorName ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return model ? . href || null ;
2020-02-02 04:14:58 +00:00
}
2020-02-05 22:57:55 +00:00
async function fetchActorReleases ( url , accReleases = [ ] ) {
2020-05-14 02:26:05 +00:00
const res = await get ( url ) ;
2020-02-05 22:57:55 +00:00
2020-05-14 02:26:05 +00:00
if ( res . ok ) {
const releases = accReleases . concat ( scrapeAll ( res . item . document . body . outerHTML ) ) ;
const nextPage = res . item . qu . url ( '.next-pg' ) ;
2020-02-05 22:57:55 +00:00
2020-05-14 02:26:05 +00:00
if ( nextPage && new URL ( nextPage ) . searchParams . has ( 'page' ) ) { // last page has 'next' button linking to join page
return fetchActorReleases ( nextPage , releases ) ;
}
2020-02-05 22:57:55 +00:00
2020-05-14 02:26:05 +00:00
return releases ;
}
2020-02-05 22:57:55 +00:00
2020-05-14 02:26:05 +00:00
return null ;
2020-02-05 22:57:55 +00:00
}
2020-02-25 21:32:13 +00:00
async function scrapeProfile ( html , actorUrl , withReleases ) {
2020-05-14 02:26:05 +00:00
const { q , qa , qi } = ex ( html , '#model-page' ) ;
const profile = { gender : 'female' } ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
const bio = qa ( '.stat' ) . reduce ( ( acc , el ) => {
const prop = q ( el , '.label' , true ) . slice ( 0 , - 1 ) ;
const key = slugify ( prop , '_' ) ;
const value = q ( el , '.value' , true ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return {
... acc ,
[ key ] : value ,
} ;
} , { } ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . location ) profile . residencePlace = bio . location . replace ( 'Czech Repulic' , 'Czech Republic' ) ; // see Laura Lion
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . birthday ) {
const birthMonth = bio . birthday . match ( /^\w+/ ) [ 0 ] . toLowerCase ( ) ;
const [ birthDay ] = bio . birthday . match ( /\d+/ ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
profile . birthday = [ birthMonth , birthDay ] ; // currently unused, not to be confused with birthdate
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . ethnicity ) profile . ethnicity = bio . ethnicity ;
if ( bio . hair _color ) profile . hair = bio . hair _color ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . height ) profile . height = heightToCm ( bio . height ) ;
if ( bio . weight ) profile . weight = lbsToKg ( bio . weight ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . bra _size ) profile . bust = bio . bra _size ;
if ( bio . measurements ) [ , profile . waist , profile . hip ] = bio . measurements . split ( '-' ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( bio . occupation ) profile . occupation = bio . occupation ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
const avatar = qi ( 'img' ) ;
if ( avatar ) profile . avatar = avatar ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( withReleases ) {
const { origin , pathname } = new URL ( actorUrl ) ;
profile . releases = await fetchActorReleases ( ` ${ origin } ${ pathname } /scenes?page=1 ` ) ;
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return profile ;
2020-02-02 04:14:58 +00:00
}
async function fetchLatest ( site , page = 1 ) {
2020-05-14 02:26:05 +00:00
const latestPath = site . parameters ? . path || '/big-boob-videos' ;
const url = ` ${ site . url } ${ latestPath } ?page= ${ page } ` ;
2020-11-22 23:05:02 +00:00
const res = await http . get ( url ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( res . statusCode === 200 ) {
return scrapeAll ( res . body . toString ( ) , site ) ;
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return res . statusCode ;
2020-02-02 04:14:58 +00:00
}
async function fetchScene ( url , site ) {
2020-11-22 23:05:02 +00:00
const res = await http . get ( url ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( res . statusCode === 200 ) {
return scrapeScene ( res . body . toString ( ) , url , site ) ;
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return null ;
2020-02-02 04:14:58 +00:00
}
2020-07-20 23:44:51 +00:00
async function fetchProfile ( { name : actorName } , context , include , page = 1 , source = 0 ) {
2020-05-14 02:26:05 +00:00
const letter = actorName . charAt ( 0 ) . toUpperCase ( ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
const sources = [
` https://www.scoreland.com/big-boob-models/browse/ ${ letter } /?page= ${ page } ` ,
` https://www.50plusmilfs.com/xxx-milf-models/browse/ ${ letter } /?page= ${ page } ` ,
] ;
2020-02-03 01:57:53 +00:00
2020-05-14 02:26:05 +00:00
const url = sources [ source ] ;
2020-02-03 01:57:53 +00:00
2020-11-22 23:05:02 +00:00
const res = await http . get ( url , {
2020-05-14 02:26:05 +00:00
followRedirects : false ,
} ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( res . statusCode === 200 ) {
const actorUrl = scrapeModels ( res . body . toString ( ) , actorName ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( actorUrl ) {
2020-11-22 23:05:02 +00:00
const actorRes = await http . get ( actorUrl ) ;
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
if ( actorRes . statusCode === 200 ) {
return scrapeProfile ( actorRes . body . toString ( ) , actorUrl , include . scenes ) ;
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return null ;
}
2020-02-02 04:14:58 +00:00
2020-07-20 23:44:51 +00:00
return fetchProfile ( { name : actorName } , context , include , page + 1 , source ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-03 01:57:53 +00:00
2020-05-14 02:26:05 +00:00
if ( sources [ source + 1 ] ) {
2020-07-20 23:44:51 +00:00
return fetchProfile ( { name : actorName } , context , include , 1 , source + 1 ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-02 04:14:58 +00:00
2020-05-14 02:26:05 +00:00
return null ;
2020-02-02 04:14:58 +00:00
}
module . exports = {
2020-05-14 02:26:05 +00:00
fetchLatest ,
fetchScene ,
fetchProfile ,
2020-02-02 04:14:58 +00:00
} ;