2020-11-23 03:32:56 +00:00
'use strict' ;
const qu = require ( '../utils/qu' ) ;
2020-11-24 03:29:44 +00:00
const slugify = require ( '../utils/slugify' ) ;
2020-11-23 03:32:56 +00:00
2020-11-24 03:29:44 +00:00
function matchChannel ( release , channel ) {
2020-11-29 02:59:47 +00:00
const series = channel . children || channel . parent ? . children ;
if ( ! series ) {
return null ;
}
2020-11-24 03:29:44 +00:00
const serieNames = series . reduce ( ( acc , serie ) => ( {
... acc ,
[ serie . name ] : serie ,
[ serie . slug ] : serie ,
2020-11-27 02:23:12 +00:00
} ) , { } ) ;
serieNames . vr = serieNames . littlecapricevr ;
serieNames . superprivat = serieNames . superprivatex ;
serieNames . superprivate = serieNames . superprivatex ;
serieNames . nasst = serieNames . nassty ;
serieNames . sexlesson = serieNames . sexlessons ;
2020-11-24 03:29:44 +00:00
2020-11-27 23:46:30 +00:00
// ensure longest key matches first
const serieKeys = Object . keys ( serieNames ) . sort ( ( nameA , nameB ) => nameB . length - nameA . length ) ;
const serieName = release . title . match ( new RegExp ( serieKeys . join ( '|' ) , 'i' ) ) ? . [ 0 ] ;
2020-11-24 03:29:44 +00:00
const serie = serieName && serieNames [ slugify ( serieName , '' ) ] ;
2020-11-27 23:46:30 +00:00
if ( serie ) {
return {
2020-11-29 02:59:47 +00:00
channel : serie . slug ,
2020-11-27 23:46:30 +00:00
title : release . title . replace ( new RegExp ( ` ( ${ serieName } | ${ serie . name } | ${ serie . slug } ) \\ s*[-– :/]+ \\ s* ` , 'ig' ) , '' ) ,
} ;
}
return null ;
2020-11-24 03:29:44 +00:00
}
function scrapeAll ( scenes , channel ) {
2020-11-23 03:32:56 +00:00
return scenes . map ( ( { query , el } ) => {
const release = { } ;
release . url = query . url ( 'a' ) ;
release . entryId = query . q ( el , null , 'id' ) ? . match ( /post-(\d+)/ ) ? . [ 1 ] ;
release . title = query . cnt ( '.meta h3' ) ;
release . date = query . date ( '.meta .post-meta' , 'MMMM D, YYYY' ) ;
2020-11-27 02:23:12 +00:00
release . poster = {
src : query . img ( 'img' ) ,
referer : channel . url ,
2020-11-24 03:29:44 +00:00
} ;
2020-11-27 02:23:12 +00:00
2020-11-27 23:46:30 +00:00
return {
... release ,
... matchChannel ( release , channel ) ,
} ;
2020-11-23 03:32:56 +00:00
} ) ;
}
2020-11-24 03:29:44 +00:00
async function fetchPhotos ( url ) {
if ( url ) {
const res = await qu . get ( url , '.et_post_gallery' ) ;
if ( res . ok ) {
2021-11-20 22:59:15 +00:00
return res . item . query . urls ( 'a' ) . map ( ( imgUrl ) => ( {
2020-11-24 03:29:44 +00:00
src : imgUrl ,
referer : url ,
} ) ) ;
}
}
return null ;
}
async function scrapeScene ( { query } , url , channel , include ) {
2020-11-23 03:32:56 +00:00
const release = { } ;
const script = query . cnt ( 'script.yoast-schema-graph' ) ;
const data = script && JSON . parse ( script ) ;
release . entryId = query . q ( 'article.project' , 'id' ) ? . match ( /post-(\d+)/ ) ? . [ 1 ] ;
release . title = query . cnt ( '.vid_title' ) ;
release . description = query . cnt ( '.vid_desc p' ) ;
release . date = query . date ( '.vid_date' , 'MMMM D, YYYY' ) ;
release . duration = query . dur ( '.vid_length' ) ;
2021-11-20 22:59:15 +00:00
release . actors = query . all ( '.vid_infos a[href*="author/"]' ) . map ( ( actorEl ) => ( {
2020-11-23 03:32:56 +00:00
name : query . cnt ( actorEl ) ,
url : query . url ( actorEl , null ) ,
} ) ) ;
release . tags = query . cnts ( '.vid_infos a[rel="tag"]' ) ;
2021-11-20 22:59:15 +00:00
const posterData = data [ '@graph' ] ? . find ( ( item ) => item [ '@type' ] === 'ImageObject' ) ;
2020-11-23 03:32:56 +00:00
2020-11-24 03:29:44 +00:00
const poster = posterData ? . url
2020-11-23 03:32:56 +00:00
|| query . q ( 'meta[property="og:image"]' , 'content' )
|| query . q ( 'meta[name="twitter:image"]' , 'content' ) ;
2020-11-24 03:29:44 +00:00
release . poster = {
src : poster ,
referer : url ,
} ;
2020-11-23 03:32:56 +00:00
release . stars = Math . min ( Number ( query . q ( '.post-ratings-image' , 'title' ) ? . match ( /average:\s*(\d\.\d+)/ ) ? . [ 1 ] ) , 5 ) || null ; // rating out of 5, yet sometimes 5.07?
2020-11-24 03:29:44 +00:00
if ( include . photos ) {
release . photos = await fetchPhotos ( query . url ( '.vid_buttons a[href*="project/"]' ) ) ;
}
2020-11-29 02:59:47 +00:00
release . trailer = {
src : query . video ( ) ,
type : query . video ( 'source' , 'type' ) ,
quality : query . video ( 'source' , 'data-res' ) ,
referer : url ,
} ;
2020-11-27 23:46:30 +00:00
return {
... release ,
... matchChannel ( release , channel ) ,
} ;
}
2020-11-27 02:23:12 +00:00
2020-11-29 02:59:47 +00:00
function scrapeProfile ( { query , el } , { url , gender } , baseActor , entity ) {
const profile = { url , gender } ;
2020-11-27 23:46:30 +00:00
2020-11-29 02:59:47 +00:00
profile . age = query . number ( 'div:nth-child(2) > p' ) ;
profile . birthPlace = query . cnt ( 'div:nth-child(3) > p' ) ? . match ( /nationality[\s:]+(\w+)/i ) ? . [ 1 ] ;
2020-11-27 23:46:30 +00:00
2020-11-29 02:59:47 +00:00
profile . description = query . cnt ( 'div:nth-child(4) > p' ) ;
2020-11-27 23:46:30 +00:00
profile . avatar = {
src : query . img ( '.model-page' ) ,
referer : url ,
} ;
2020-11-29 02:59:47 +00:00
profile . scenes = scrapeAll ( qu . initAll ( el , '.project_category-videos' ) , entity ) ;
2020-11-27 23:46:30 +00:00
return profile ;
2020-11-23 03:32:56 +00:00
}
async function fetchLatest ( channel ) {
// no apparent pagination, all updates on one page
2020-11-24 03:29:44 +00:00
// using channels in part because main overview contains indistinguishable photo albums
2020-11-27 02:23:12 +00:00
// however, some serie pages contain videos from other series
2020-11-24 03:29:44 +00:00
const res = await qu . getAll ( channel . url , '.project' ) ;
2020-11-23 03:32:56 +00:00
if ( res . ok ) {
return scrapeAll ( res . items , channel ) ;
}
return res . status ;
}
2020-11-24 03:29:44 +00:00
async function fetchScene ( url , channel , baseRelease , include ) {
2020-11-23 03:32:56 +00:00
const res = await qu . get ( url ) ;
if ( res . ok ) {
2020-11-24 03:29:44 +00:00
return scrapeScene ( res . item , url , channel , include ) ;
2020-11-23 03:32:56 +00:00
}
return res . status ;
}
2020-11-29 02:59:47 +00:00
async function getActorUrl ( baseActor , gender = 'female' ) {
2020-11-27 23:46:30 +00:00
if ( baseActor . url ) {
return baseActor . url ;
}
2020-11-29 02:59:47 +00:00
const overviewUrl = gender === 'female'
? 'https://www.littlecaprice-dreams.com/pornstars/'
: 'https://www.littlecaprice-dreams.com/male-models-pornstars/' ;
const overviewRes = await qu . getAll ( overviewUrl , '.models' ) ;
2020-11-27 23:46:30 +00:00
if ( ! overviewRes . ok ) {
return overviewRes . status ;
}
const actorItem = overviewRes . items . find ( ( { query } ) => slugify ( query . q ( 'img' , 'title' ) ) === baseActor . slug ) ;
if ( ! actorItem ) {
2020-11-29 02:59:47 +00:00
if ( gender === 'female' ) {
return getActorUrl ( baseActor , 'male' ) ;
}
2020-11-27 23:46:30 +00:00
return null ;
}
2020-11-29 02:59:47 +00:00
const actorUrl = actorItem . query . url ( 'a' ) ;
if ( actorUrl ) {
return {
url : actorUrl ,
gender ,
} ;
}
return null ;
2020-11-27 23:46:30 +00:00
}
2020-11-29 02:59:47 +00:00
async function fetchProfile ( baseActor , { entity } ) {
2020-11-27 23:46:30 +00:00
const actorUrl = await getActorUrl ( baseActor ) ;
if ( ! actorUrl ) {
return null ;
}
2020-11-29 02:59:47 +00:00
const actorRes = await qu . get ( actorUrl . url , '#main-content' ) ;
2020-11-27 23:46:30 +00:00
if ( actorRes . ok ) {
2020-11-29 02:59:47 +00:00
return scrapeProfile ( actorRes . item , actorUrl , baseActor , entity ) ;
2020-11-27 23:46:30 +00:00
}
return actorRes . status ;
}
2020-11-23 03:32:56 +00:00
module . exports = {
fetchLatest ,
fetchScene ,
2020-11-27 23:46:30 +00:00
fetchProfile ,
2020-11-23 03:32:56 +00:00
} ;