2023-07-31 21:41:32 +00:00
'use strict' ;
const unprint = require ( 'unprint' ) ;
function scrapeAll ( scenes ) {
return scenes . map ( ( { query } ) => {
const release = { } ;
release . entryId = query . attribute ( null , 'video-id' ) ;
const url = query . url ( null ) ;
if ( url ) {
const { origin , pathname , searchParams } = new URL ( url ) ;
release . url = ` ${ origin } ${ pathname } ` ;
2023-08-02 00:14:41 +00:00
release . shootId = pathname . match ( /((LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))[\w-]+/ ) ? . [ 0 ] ; // pathname sometimes contains other text, match at least two letters to prevent false positives
release . actors = searchParams . get ( 'models_name' ) ? . split ( ',' ) . map ( ( actor ) => {
const [ han , english ] = actor . split ( '/' ) . map ( ( name ) => name . trim ( ) ) ;
if ( /amateur/i . test ( english ) ) {
// not a name
return null ;
}
return {
name : english || han ,
alias : english && han ,
} ;
} ) . filter ( Boolean ) ;
}
const rawTitle = query . content ( '.video-title div' ) ? . replace ( release . shootId , '' ) ;
if ( rawTitle ) {
// find / closest to Han in case there are multiple, account for no / at all
const hanIndex = rawTitle . match ( /\p{Script_Extensions=Han}/u ) ? . index ;
const splitIndex = rawTitle . slice ( 0 , hanIndex ) . lastIndexOf ( '/' ) || hanIndex ;
if ( hanIndex && splitIndex > - 1 ) {
release . title = rawTitle . slice ( 0 , splitIndex ) . trim ( ) ;
release . altTitles = [ rawTitle . slice ( splitIndex + 1 ) . trim ( ) ] ;
} else {
release . title = rawTitle ;
}
2023-07-31 21:41:32 +00:00
}
release . duration = query . duration ( '.timestamp' ) ;
const poster = query . img ( 'img' , { attribute : 'data-src' } ) ;
if ( poster ) {
release . poster = [
poster . replace ( /w=\d+/ , 'w=1920' ) . replace ( /h=\d+/ , 'h=1080' ) ,
poster ,
] ;
}
release . teaser = query . video ( null , { attribute : 'data-video-src' } ) ;
return release ;
} ) ;
}
function scrapeProfile ( { query } ) {
const profile = { } ;
const avatar = query . img ( 'div[class*="prof-pic"] > img' ) ;
if ( avatar ) {
profile . avatar = [
avatar . replace ( /w=\d+/ , 'w=720' ) . replace ( /h=\d+/ , 'h=1080' ) ,
avatar ,
] ;
}
profile . description = query . content ( 'h2' ) || null ;
2023-08-02 00:14:41 +00:00
profile . height = query . number ( '//span[text()="Height"]/following-sibling::span' , { match : /(\d+) cm/ , matchIndex : 1 } ) ;
2023-07-31 21:41:32 +00:00
profile . weight = query . number ( '//span[text()="Weight"]/following-sibling::span' , { match : /(\d+) kg/ , matchIndex : 1 } ) ;
2023-08-02 00:14:41 +00:00
// can't find a single profile wiht this information available, but add for good measure
profile . measurements = query . content ( '//span[text()="Measurements"]/following-sibling::span' ) ;
2023-07-31 21:41:32 +00:00
profile . birthPlace = query . number ( '//span[text()="Birth Place"]/following-sibling::span' ) ;
profile . banner = query . img ( 'div[class*="banner"] > img' ) ;
profile . photos = query . imgs ( '#MusModelSwiper img' ) ;
return profile ;
}
async function getCookie ( channel ) {
const tokenRes = await unprint . get ( channel . url ) ;
if ( ! tokenRes . ok ) {
return tokenRes . status ;
}
const csrfToken = tokenRes . context ? . query . attribute ( 'meta[name="csrf-token"]' , 'content' ) ;
const cookie = tokenRes . response . headers [ 'set-cookie' ] ? . join ( ';' ) ;
if ( ! csrfToken || ! cookie ) {
return null ;
}
const confirmAdultRes = await unprint . post ( ` ${ channel . url } /adult_confirmation_and_accept_cookie ` , null , {
headers : {
cookie ,
'x-csrf-token' : csrfToken ,
} ,
} ) ;
if ( ! confirmAdultRes . ok ) {
return confirmAdultRes . status ;
}
return cookie ;
}
async function fetchLatest ( channel , page ) {
const cookie = await getCookie ( channel ) ;
const res = await unprint . get ( ` ${ channel . url } /videos?sort=published_at&page= ${ page } ` , {
selectAll : '.row a[video-id]' ,
headers : {
cookie ,
} ,
} ) ;
if ( res . ok ) {
return scrapeAll ( res . context , channel ) ;
}
return res . status ;
}
// deep pages are paywalled
async function searchProfile ( actor , context , cookie ) {
const searchRes = await unprint . get ( ` ${ context . channel . url } /livesearch?keyword= ${ actor . name } ` , {
headers : {
cookie ,
} ,
} ) ;
if ( ! searchRes . ok ) {
return searchRes . status ;
}
return searchRes . context . query . url ( ` a[title=" ${ actor . name } "] ` ) ;
}
async function fetchProfile ( actor , context ) {
const cookie = await getCookie ( context . entity ) ;
const actorUrl = actor . url || await searchProfile ( actor , context , cookie ) ;
if ( ! actorUrl ) {
return null ;
}
const res = await unprint . get ( actorUrl , {
headers : {
cookie ,
} ,
} ) ;
if ( res . ok ) {
return scrapeProfile ( res . context , actorUrl ) ;
}
return null ;
}
module . exports = {
fetchLatest ,
fetchProfile ,
} ;