@@ -1,7 +1,19 @@
'use strict' ;
const q u = require ( '../utils/qu ' ) ;
const unprint = require ( 'unprint ' ) ;
const slugify = require ( '../utils/slugify' ) ;
const { stripQuery } = require ( '../utils/url' ) ;
const { convert } = require ( '../utils/convert' ) ;
const channelMap = {
vr : 'littlecapricevr' ,
vrporn : 'littlecapricevr' ,
superprivat : 'superprivatex' ,
superprivate : 'superprivatex' ,
nasst : 'nassty' ,
sexlesson : 'sexlessons' ,
} ;
function matchChannel ( release , channel ) {
const series = channel . children || channel . parent ? . children ;
@@ -16,188 +28,176 @@ function matchChannel(release, channel) {
[ serie . slug ] : serie ,
} ) , { } ) ;
serieNames . vr = serieNames . littlecapricevr ;
serieNames . superprivat = serieNames . superprivatex ;
serieNames . superprivate = serieNames . superprivatex ;
serieNames . nasst = serieNames . nassty ;
serieNames . sexlesson = serieNames . sexlessons ;
// ensure longest key matches first
const serieKeys = Object . keys ( serieNames ) . sort ( ( nameA , nameB ) => nameB . length - nameA . length ) ;
const serieName = release . title . match ( new RegExp ( serieKeys . join ( '|' ) , 'i' ) ) ? . [ 0 ] ;
const serie = serieName && serieNames [ slugify( serieName , '' ) ] ;
const serieName = release . title ? . match ( new RegExp ( serieKeys . join ( '|' ) , 'i' ) ) ? . [ 0 ] ;
const serieSlug = slugify ( serieName , '' ) ;
const serie = serieName && serieNames [ channelMap [ serieSlug ] || serieSlug ] ;
if ( serie ) {
return {
channel : serie . slug ,
title : release . title . replace ( new RegExp ( ` ( ${ serieName } | ${ serie . name } | ${ serie . slug } ) \\ s*[-– :/]+ \\ s* ` , 'ig' ) , '' ) ,
} ;
return serie . slug ;
}
return null ;
}
function scrapeAll ( scenes , channel ) {
return scenes . map ( ( { query , el } ) => {
return scenes . map ( ( { query } ) => {
const release = { } ;
release . url = query . url ( 'a' ) ;
release . entryId = query . q ( el , null , 'id ' ) ? . match ( /pos t-(\d+ )/ ) ? . [ 1 ] ;
release . url = query . url ( null ) ;
release . entryId = query . attribute ( null , 'class ' ) . match ( /projec t-(\d{3,} )/ ) ? . [ 1 ] ;
release . title = query . cnt ( '.meta h3 ' ) ;
release . date = query . date ( '.meta .post-meta' , 'MMMM D, YYYY' ) ;
release . title = query . conte nt ( 'h2' ) ? . trim ( ) . replace ( /\.\.\.$/ , ' ' ) ;
release . poster = {
src : query . img ( 'img' ) ,
referer : channel . url ,
} ;
const poster = query . img ( 'img' ) ;
return {
... release ,
... matchChannel ( release , channel ) ,
} ;
} ) ;
}
async function fetchPhotos ( url ) {
if ( url ) {
const res = await qu . get ( url , '.et_post_gallery' ) ;
if ( res . ok ) {
return res . item . query . urls ( 'a' ) . map ( ( imgUrl ) => ( {
src : imgUrl ,
referer : url ,
if ( poster ) {
release . poster = [
stripQuery ( poster ) ,
poster ,
] . map ( ( src ) => ( {
src ,
referer : channel . url ,
} ) ) ;
}
}
return null ;
}
release . channel = matchChannel ( release , channel ) ;
async function scrapeScene ( { query } , url , channel , include ) {
const release = { } ;
const script = query . cnt ( 'script.yoast-schema-graph' ) ;
const data = script && JSON . parse ( script ) ;
release . entryId = query . q ( 'article.project' , 'id' ) ? . match ( /post-(\d+)/ ) ? . [ 1 ] ;
release . title = query . cnt ( '.vid_title' ) ;
release . description = query . cnt ( '.vid_desc p' ) ;
release . date = query . date ( '.vid_date' , 'MMMM D, YYYY' ) ;
release . duration = query . dur ( '.vid_length' ) ;
release . actors = query . all ( '.vid_infos a[href*="author/"]' ) . map ( ( actorEl ) => ( {
name : query . cnt ( actorEl ) ,
url : query . url ( actorEl , null ) ,
} ) ) ;
release . tags = query . cnts ( '.vid_infos a[rel="tag"]' ) ;
const posterData = data [ '@graph' ] ? . find ( ( item ) => item [ '@type' ] === 'ImageObject' ) ;
const poster = posterData ? . url
|| query . q ( 'meta[property="og:image"]' , 'content' )
|| query . q ( 'meta[name="twitter:image"]' , 'content' ) ;
release . poster = {
src : poster ,
referer : url ,
} ;
release . stars = Math . min ( Number ( query . q ( '.post-ratings-image' , 'title' ) ? . match ( /average:\s*(\d\.\d+)/ ) ? . [ 1 ] ) , 5 ) || null ; // rating out of 5, yet sometimes 5.07?
if ( include . photos ) {
release . photos = await fetchPhotos ( query . url ( '.vid_buttons a[href*="project/"]' ) ) ;
}
release . trailer = {
src : query . video ( ) ,
type : query . video ( 'source' , 'type' ) ,
quality : query . video ( 'source' , 'data-res' ) ,
referer : url ,
} ;
return {
... release ,
... matchChannel ( release , channel ) ,
} ;
}
function scrapeProfile ( { query , el } , { url , gender } , baseActor , entity ) {
const profile = { url , gender } ;
profile . age = query . number ( 'div:nth-child(2) > p' ) ;
profile . birthPlace = query . cnt ( 'div:nth-child(3) > p' ) ? . match ( /nationality[\s:]+(\w+)/i ) ? . [ 1 ] ;
profile . description = query . cnt ( 'div:nth-child(4) > p' ) ;
profile . avatar = {
src : query . img ( '.model-page' ) ,
referer : url ,
} ;
profile . scenes = scrapeAll ( qu . initAll ( el , '.project_category-videos' ) , entity ) ;
return profile ;
return release ;
} ) ;
}
async function fetchLatest ( channel ) {
// no apparent pagination, all updates on one page
// using channels in part because main overview contains indistinguishable photo albums
// however, some serie pages contain videos from other series
const res = await q u. getAll ( channel . url , '.project' ) ;
const res = await unprint . get ( channel . url , { selectAll : '.project-type-video' } ) ;
if ( res . ok ) {
return scrapeAll ( res . items , channel ) ;
return scrapeAll ( res . context , channel ) ;
}
return res . status ;
}
async function fetchScene ( url , channel , baseRelease , includ e) {
const res = await qu . get ( url ) ;
async function attachPhotos ( url , releas e) {
if ( url ) {
const res = await unprint . get ( url ) ;
if ( res . ok ) {
return scrapeScene ( res . item , url , channel , include ) ;
if ( res . ok ) {
release . photos = res . context . query . imgs ( '.gallery img' ) . map ( ( imgUrl ) => ( { // eslint-disable-line no-param-reassign
src : imgUrl ,
referer : url ,
} ) ) ;
release . photoCount = res . context . query . number ( '.image-amount' ) ; // eslint-disable-line no-param-reassign
}
}
return res . status ;
return null ;
}
async function getActorUrl ( baseActor , gender = 'female' ) {
if ( baseActor . url ) {
return baseActor . url ;
async function scrapeScene ( { query } , { url , include } ) {
const release = { } ;
release . entryId = query . attribute ( '#main-project-content' , 'class' ) . match ( /project-(\d{3,})/ ) ? . [ 1 ] ;
release . title = query . content ( '.project-header h1' ) ;
release . description = query . content ( '.desc-text' ) ;
release . date = query . date ( '.relese-date' , 'D. MMM YYYY' , { match : /\d{1,2}\. \w{3} \d{4}/ } ) ; // sic
release . duration = query . duration ( '.video-duration' ) ;
release . actors = query . all ( '.project-models .list a' ) . map ( ( actorEl ) => ( {
name : unprint . query . content ( actorEl ) ,
url : unprint . query . url ( actorEl , null ) ,
} ) ) ;
release . tags = query . contents ( '.project-tags a[href*="videos/#"]' ) ;
const poster = query . attribute ( 'meta[property="og:image"]' , 'content' )
|| query . attribute ( 'meta[name="twitter:image"]' , 'content' ) ;
release . poster = {
src : poster ,
referer : url ,
} ;
if ( include . photos ) {
await attachPhotos ( url . replace ( /(\/)?$/ , '-2$1' ) , release ) ;
}
const overviewUrl = gender === 'female'
? 'https://www.littlecaprice-dreams.com/pornstars/'
: 'https://www.littlecaprice-dreams.com/male-models-pornstars/' ;
const trailerFrame = query . url ( '.video iframe' , { attribute : 'src' } ) ;
const trailerId = trailerFrame ? . match ( /\/embed\/\d+\/([a-z0-9-]+)/ ) ? . [ 1 ] ;
const overviewRes = await qu . getAll ( overviewUrl , '.models' ) ;
if ( trailerId ) {
release . trailer = {
stream : ` https://trailer.littlecaprice-dreams.com/ ${ trailerId } /1920x1080/video.m3u8 ` ,
quality : 1080 ,
referer : url ,
} ;
}
const channelSlug = slugify ( query . content ( '.project-tags a[href*="collection/"]' ) , '' ) ;
release . channel = channelMap [ channelSlug ] || channelSlug ;
return release ;
}
function scrapeProfile ( { query } , { url , avatar } , entity ) {
const profile = { url } ;
profile . nationality = query . content ( '.info h2' ) . match ( /nationality: (\w+)/i ) ? . [ 1 ] ;
profile . cup = query . content ( '.info h2' ) . match ( /cu[pb]-size: (\w{1,2})/i ) ? . [ 1 ] ; // sic
profile . measurements = query . content ( '.info h2' ) . match ( /\d{2}-\d{2}-\d{2}/i ) ? . [ 0 ] ; // sic
profile . height = convert ( query . content ( '.info h2' ) ? . match ( /\d′ \d{1,2}″/ ) ? . [ 0 ] , 'cm' ) ;
const description = query . content ( '.info div:last-child' ) ;
if ( ! /coming soon/i . test ( description ) || description . length > 50 ) {
profile . description = description ;
}
if ( avatar ) {
profile . avatar = [
stripQuery ( avatar ) ,
avatar ,
] . map ( ( src ) => ( {
src ,
referer : url ,
} ) ) ;
}
profile . photos = query . imgs ( '.img-poster' ) ;
profile . scenes = scrapeAll ( unprint . initAll ( query . all ( '.project-type-video' ) ) , entity ) ;
return profile ;
}
async function getActorUrl ( baseActor ) {
// male performers are listed, but hidden
const overviewRes = await unprint . get ( 'https://www.littlecaprice-dreams.com/models/' , { selectAll : '.model-preview' } ) ;
if ( ! overviewRes . ok ) {
return overviewRes . status ;
}
const actorItem = overviewRes . items . find ( ( { query } ) => slugify ( query . q ( 'img' , 'title ' ) ) === baseActor . slug ) ;
const actorItem = overviewRes . context . find ( ( { query } ) => slugify ( query . text ( 'h2 ' ) ) === baseActor . slug ) ;
if ( ! actorItem ) {
if ( gender === 'female' ) {
return getActorUrl ( baseActor , 'male' ) ;
}
return null ;
}
const actorUrl = actorItem . query . url ( 'a' ) ;
const actorUrl = actorItem . query . url ( null ) ;
const actorAvatar = actorItem . query . img ( ) ;
if ( actorUrl ) {
return {
url : actorUrl ,
gende r,
avatar : actorAvata r,
} ;
}
@@ -205,16 +205,17 @@ async function getActorUrl(baseActor, gender = 'female') {
}
async function fetchProfile ( baseActor , { entity } ) {
const actorUrl = await getActorUrl ( baseActor ) ;
// using search for avatar, not on model page
const actorResult = await getActorUrl ( baseActor ) ;
if ( ! actorUrl ) {
if ( ! actorResult ) {
return null ;
}
const actorRes = await q u. get ( actorUrl . url , '#main-content' ) ;
const actorRes = await unprint . get ( actorResult . url , { select : '.model-page' } ) ;
if ( actorRes . ok ) {
return scrapeProfile ( actorRes . item , actorUrl , baseActor , entity ) ;
return scrapeProfile ( actorRes . context , actorResult , entity ) ;
}
return actorRes . status ;
@@ -222,6 +223,6 @@ async function fetchProfile(baseActor, { entity }) {
module . exports = {
fetchLatest ,
fetchScene ,
fetchProfile ,
scrapeScene ,
} ;