2020-09-16 02:55:30 +00:00
'use strict' ;
2020-11-19 01:01:13 +00:00
const qu = require ( '../utils/qu' ) ;
2020-09-16 02:55:30 +00:00
const slugify = require ( '../utils/slugify' ) ;
2020-11-19 01:01:13 +00:00
function scrapeAll ( scenes , channel ) {
2020-09-16 02:55:30 +00:00
return scenes . map ( ( { query } ) => {
const release = { } ;
2020-11-19 01:01:13 +00:00
release . url = query . url ( '.title' , 'href' , { origin : channel . url } ) ;
release . entryId = new URL ( release . url ) . pathname . match ( /\/scene\/(\d+)/ ) ? . [ 1 ] ;
2020-09-16 02:55:30 +00:00
2020-11-19 01:01:13 +00:00
release . title = query . cnt ( '.title' ) ;
2020-09-16 02:55:30 +00:00
2021-11-20 22:59:15 +00:00
release . actors = query . all ( '.actors a' ) . map ( ( actorEl ) => ( {
2020-09-16 02:55:30 +00:00
name : query . cnt ( actorEl ) ,
2020-11-19 01:01:13 +00:00
url : query . url ( actorEl , null , 'href' , { origin : channel . url } ) ,
2020-09-16 02:55:30 +00:00
} ) ) ;
2020-11-19 01:01:13 +00:00
const fallbackPoster = query . img ( '.thumb img' ) ;
release . poster = query . sourceSet ( '.thumb img' , 'data-srcset' ) || [ fallbackPoster . replace ( '_crop' , '' ) , fallbackPoster ] ;
2020-09-16 02:55:30 +00:00
2021-02-10 22:49:37 +00:00
release . teaser = [
2021-02-10 22:46:15 +00:00
query . video ( '.thumb-ratio' , 'data-hq-preview' ) ,
query . video ( '.thumb-ratio' , 'data-preview' ) ,
] ;
2020-09-16 02:55:30 +00:00
return release ;
} ) ;
}
2020-11-19 01:01:13 +00:00
function scrapeScene ( { query } , url , channel ) {
2020-09-16 02:55:30 +00:00
const release = { } ;
2020-11-19 01:01:13 +00:00
release . entryId = new URL ( url ) . pathname . match ( /\/scene\/(\d+)/ ) ? . [ 1 ] ;
2020-09-16 02:55:30 +00:00
2020-11-19 01:01:13 +00:00
release . title = query . cnt ( 'h1.title' ) ;
release . description = query . cnt ( '.content-description .full p' ) ;
2020-09-16 02:55:30 +00:00
2020-11-19 01:01:13 +00:00
release . date = query . date ( '.publish_date' , 'MMMM DD, YYYY' ) ;
release . duration = query . dur ( '.duration' ) ;
2020-09-16 02:55:30 +00:00
2021-11-20 22:59:15 +00:00
release . actors = query . all ( '.actress a' ) . map ( ( actorEl ) => ( {
2020-09-16 02:55:30 +00:00
name : query . cnt ( actorEl ) ,
2020-11-19 01:01:13 +00:00
url : query . url ( actorEl , null , 'href' , { origin : channel . url } ) ,
2020-09-16 02:55:30 +00:00
} ) ) ;
2020-11-19 01:01:13 +00:00
release . director = query . cnt ( '.director' ) ? . split ( /\s*:\s*/ ) [ 1 ] ;
2020-11-22 03:07:09 +00:00
const fallbackPoster = query . img ( '.player img' ) ;
2023-06-16 00:29:01 +00:00
release . poster = query . sourceSet ( '.player img' , 'data-srcset' ) || [ fallbackPoster ? . replace ( '_crop' , '' ) , fallbackPoster ] ;
2020-09-16 02:55:30 +00:00
2021-06-02 01:27:32 +00:00
const movieUrl = query . url ( '.movie a' , 'href' , { origin : channel . url } ) ;
if ( movieUrl ) {
release . movie = {
entryId : new URL ( movieUrl ) . pathname . match ( /\/porn-movie\/([\w-]+)/ ) ? . [ 1 ] ,
title : query . cnt ( '.movie a' ) ,
url : query . url ( '.movie a' , 'href' , { origin : channel . url } ) ,
} ;
}
2020-09-16 02:55:30 +00:00
2020-11-19 01:01:13 +00:00
return release ;
}
function scrapeMovies ( movies , channel ) {
return movies . map ( ( { query } ) => {
const release = { } ;
release . url = query . url ( null , 'href' , { origin : channel . url } ) ;
release . entryId = new URL ( release . url ) . pathname . match ( /\/porn-movie\/([\w-]+)/ ) ? . [ 1 ] ;
release . title = query . cnt ( 'h2' ) ;
release . covers = [ query . sourceSet ( 'img' , 'data-srcset' ) ] ;
return release ;
} ) ;
}
function scrapeMovie ( { query , el } , url , channel ) {
const release = { } ;
release . title = query . cnt ( '.header h1' ) ;
release . description = query . cnt ( '.content-text p' ) ;
release . entryId = new URL ( url ) . pathname . match ( /\/porn-movie\/([\w-]+)/ ) ? . [ 1 ] ;
release . date = query . date ( '.out_date' , 'YYYY' ) ;
release . datePrecision = 'year' ;
release . duration = query . dur ( '.duration' ) ;
2021-11-20 22:59:15 +00:00
release . actors = query . all ( '.actors .actor' ) . map ( ( actorEl ) => ( {
2020-11-19 01:01:13 +00:00
name : query . cnt ( actorEl , '.name' ) ,
url : query . url ( actorEl , 'a' , 'href' , { origin : channel . url } ) ,
avatar : query . sourceSet ( actorEl , '.thumbnail img' , 'data-srcset' ) ,
} ) ) ;
2021-06-02 01:27:32 +00:00
release . poster = query . sourceSet ( '.banner' , 'data-src' ) ? . [ 0 ] ;
release . covers = [ query . all ( query . el ( '.cover' ) . parentElement , 'source' )
2021-11-20 22:59:15 +00:00
? . map ( ( coverEl ) => query . sourceSet ( coverEl , null , 'data-srcset' ) )
2021-06-02 01:27:32 +00:00
. flat ( )
. sort ( ( coverA , coverB ) => {
const resA = Number ( coverA . match ( /_(\d{3,})_/ ) ? . [ 1 ] ) ;
const resB = Number ( coverB . match ( /_(\d{3,})_/ ) ? . [ 1 ] ) ;
if ( resA < resB ) return 1 ;
if ( resA > resB ) return - 1 ;
return 0 ;
} )
. concat ( query . sourceSet ( '.cover' , 'data-src' ) ? . [ 0 ] ) ] ;
2020-11-19 01:01:13 +00:00
release . scenes = scrapeAll ( qu . initAll ( el , '.scene' ) , channel ) ;
2020-09-16 02:55:30 +00:00
return release ;
}
2020-11-19 01:01:13 +00:00
async function scrapeProfile ( { query , el } , entity , avatar ) {
2020-09-16 02:55:30 +00:00
const profile = { } ;
2020-11-19 01:01:13 +00:00
profile . description = query . cnt ( '.content-description .content-text > p, .content-description .full p' ) ; // different structure for overflowing vs short text
profile . nationality = query . cnt ( '.nationality' ) ;
2020-09-16 02:55:30 +00:00
2020-11-19 01:01:13 +00:00
profile . banner = query . img ( '.header img:not([src*="actor/banner"])' ) ; // ignore stock banner
2020-09-16 02:55:30 +00:00
if ( avatar ) {
profile . avatar = [
2020-11-19 01:01:13 +00:00
avatar . replace ( 'crop_' , '' ) ,
2020-09-16 02:55:30 +00:00
avatar ,
] ;
}
2020-11-19 01:01:13 +00:00
profile . releases = scrapeAll ( qu . initAll ( el , '.scene' ) , entity ) ;
2020-09-16 02:55:30 +00:00
return profile ;
}
2021-06-02 01:27:32 +00:00
async function beforeFetchLatest ( channel ) {
// scene page only seems to accept language preferences from session
const session = qu . session ( ) ;
await qu . getAll ( ` ${ channel . url } /en/news-videos-x-marc-dorcel ` , '.scene' , {
'X-Requested-With' : 'XMLHttpRequest' ,
'Accept-Language' : 'en-US,en' , // fetch English rather than French titles
} , { session } ) ;
return session ;
}
async function fetchLatest ( channel , page = 1 , options , { beforeFetchLatest : session } ) {
2020-11-19 01:01:13 +00:00
const url = ` ${ channel . url } /scene/list/more/?lang=en&page= ${ page } &sorting=new ` ;
2020-09-16 02:55:30 +00:00
const res = await qu . getAll ( url , '.scene' , {
'X-Requested-With' : 'XMLHttpRequest' ,
2020-11-19 01:01:13 +00:00
'Accept-Language' : 'en-US,en' , // fetch English rather than French titles
2021-06-02 01:27:32 +00:00
} , { session } ) ;
2020-09-16 02:55:30 +00:00
if ( res . ok ) {
return scrapeAll ( res . items , channel ) ;
}
return res . status ;
}
2020-11-19 01:01:13 +00:00
async function fetchMovies ( channel , page = 1 ) {
const url = ` ${ channel . url } /movies/more?lang=en&page= ${ page } &sorting=new ` ;
const res = await qu . getAll ( url , '.movie' , {
'X-Requested-With' : 'XMLHttpRequest' ,
'Accept-Language' : 'en-US,en' , // fetch English rather than French titles
Referer : 'https://www.dorcelclub.com/en/porn-movie?sorting=new' , // might be used to derive sorting
} ) ;
2020-09-16 02:55:30 +00:00
2023-06-16 00:29:01 +00:00
if ( res . ok && res . items ) {
2020-11-19 01:01:13 +00:00
return scrapeMovies ( res . items , channel ) ;
2020-09-16 02:55:30 +00:00
}
return res . status ;
}
2020-11-19 01:01:13 +00:00
async function fetchScene ( url , channel ) {
2021-06-02 01:27:32 +00:00
const res = await qu . get ( url , null , {
2020-11-19 01:01:13 +00:00
'Accept-Language' : 'en-US,en' , // fetch English rather than French titles
2021-06-02 01:27:32 +00:00
Referer : ` ${ channel . url } /en/news-videos-x-marc-dorcel ` ,
2020-11-19 01:01:13 +00:00
} ) ;
2020-09-16 02:55:30 +00:00
2020-11-19 01:01:13 +00:00
if ( res . ok ) {
return scrapeScene ( res . item , url , channel ) ;
}
return res . status ;
}
2020-09-16 02:55:30 +00:00
2020-11-19 01:01:13 +00:00
async function fetchMovie ( url , channel ) {
const res = await qu . get ( url , '.content' , {
'Accept-Language' : 'en-US,en' , // fetch English rather than French titles
2021-06-02 01:27:32 +00:00
Referer : ` ${ channel . url } /en/porn-movie ` ,
2020-11-19 01:01:13 +00:00
} ) ;
2020-09-16 02:55:30 +00:00
2023-06-16 00:29:01 +00:00
if ( res . ok && res . item ) {
2020-11-19 01:01:13 +00:00
return scrapeMovie ( res . item , url , channel ) ;
2020-09-16 02:55:30 +00:00
}
return res . status ;
}
2020-11-19 01:01:13 +00:00
async function fetchProfile ( baseActor , { entity } ) {
// URL slugs are unpredictable: /jessie-volt, /aleska_diamond, /liza-del_sierra
const searchRes = await qu . postAll ( ` ${ entity . url } /en/search ` , { s : baseActor . name } , '.actors .actor' , { 'Accept-Language' : 'en-US,en' } ) ;
if ( ! searchRes . ok ) {
return searchRes . status ;
}
const actorItem = searchRes . items . find ( ( { query } ) => slugify ( query . cnt ( '.name' ) ) === baseActor . slug ) ;
if ( ! actorItem ) {
return null ;
}
const actorUrl = actorItem . query . url ( 'a' , 'href' , { origin : entity . url } ) ;
const actorAvatar = actorItem . query . img ( ) ;
const actorRes = await qu . get ( actorUrl , null , { 'Accept-Language' : 'en-US,en' } ) ;
if ( actorRes . ok ) {
return scrapeProfile ( actorRes . item , entity , actorAvatar ) ;
}
return null ;
}
2020-09-16 02:55:30 +00:00
module . exports = {
2021-06-02 01:27:32 +00:00
beforeFetchLatest ,
2020-09-16 02:55:30 +00:00
fetchLatest ,
fetchScene ,
2020-11-19 01:01:13 +00:00
fetchMovie ,
fetchMovies ,
2020-09-16 02:55:30 +00:00
fetchProfile ,
} ;