2019-11-04 04:47:37 +00:00
'use strict' ;
2021-01-11 15:20:01 +00:00
const slugify = require ( '../utils/slugify' ) ;
const qu = require ( '../utils/qu' ) ;
2020-11-22 23:05:02 +00:00
2022-04-04 21:56:26 +00:00
async function getPhotos ( albumUrl , channel ) {
2022-04-02 22:49:39 +00:00
const res = await qu . get ( albumUrl ) ;
if ( ! res . ok ) {
return [ ] ;
}
2019-11-04 04:47:37 +00:00
2022-04-03 21:00:05 +00:00
const lastPhotoPage = res . item . query . urls ( '.pics-container .preview-image-container a' ) . at ( - 1 ) ;
2020-05-14 02:26:05 +00:00
const lastPhotoIndex = parseInt ( lastPhotoPage . match ( /\d+.jpg/ ) [ 0 ] , 10 ) ;
2019-11-04 04:47:37 +00:00
2020-05-14 02:26:05 +00:00
const photoUrls = Array . from ( { length : lastPhotoIndex } , ( value , index ) => {
2022-04-04 21:56:26 +00:00
const pageUrl = ` ${ channel . url } ${ lastPhotoPage . replace ( /\d+.jpg/ , ` ${ ( index + 1 ) . toString ( ) . padStart ( 3 , '0' ) } .jpg ` ) } ` ;
const networkPageUrl = ` https://dogfartnetwork.com ${ lastPhotoPage . replace ( 'tourx' , 'tour' ) . replace ( /\d+.jpg/ , ` ${ ( index + 1 ) . toString ( ) . padStart ( 3 , '0' ) } .jpg ` ) } ` ;
const extract = ( { query } ) => query . img ( '.scenes-module img' ) ;
return [
{
url : pageUrl ,
extract ,
} ,
{
url : networkPageUrl ,
extract ,
} ,
] ;
2020-05-14 02:26:05 +00:00
} ) ;
2019-11-04 04:47:37 +00:00
2020-05-14 02:26:05 +00:00
return photoUrls ;
2019-11-04 04:47:37 +00:00
}
2022-04-02 22:49:39 +00:00
function scrapeLatest ( scenes , site , filter = true ) {
return scenes . reduce ( ( acc , { query } ) => {
const release = { } ;
const siteUrl = query . cnt ( '.recent-details-title .help-block, .model-details-title .site-name' ) ;
2022-04-03 21:00:05 +00:00
release . url = query . url ( '.thumbnail, .preview-image-container > a' , 'href' , { origin : site . url } ) ;
2022-04-02 22:49:39 +00:00
release . entryId = ` ${ site . slug } _ ${ new URL ( release . url ) . pathname . split ( '/' ) [ 4 ] } ` ;
release . title = query . cnt ( '.scene-title' ) ;
2022-04-03 21:00:05 +00:00
// release.actors = release.title.split(/[,&]|\band\b/).map((actor) => actor.replace(/BTS/i, '').trim()); // the titles don't always list the actors, e.g. BarbCummings.com
2022-04-02 22:49:39 +00:00
// release.poster = `https:${element.querySelector('img').src}`;
release . poster = query . img ( ) ;
2022-04-03 21:00:05 +00:00
release . teaser = query . video ( '.thumbnail, .preview-thumbnail' , 'data-preview_clip_url' ) ;
2020-05-14 02:26:05 +00:00
2022-04-02 22:49:39 +00:00
release . channel = siteUrl ? . match ( /(.*).com/ ) ? . [ 1 ] . toLowerCase ( ) ;
2020-05-14 02:26:05 +00:00
2022-04-03 21:00:05 +00:00
if ( filter && siteUrl && ` www. ${ siteUrl . toLowerCase ( ) } ` !== new URL ( site . url ) . host ) {
2020-05-14 02:26:05 +00:00
// different dogfart site
2022-04-02 22:49:39 +00:00
return { ... acc , unextracted : [ ... acc . unextracted , release ] } ;
2020-05-14 02:26:05 +00:00
}
2022-04-02 22:49:39 +00:00
return { ... acc , scenes : [ ... acc . scenes , release ] } ;
} , {
scenes : [ ] ,
unextracted : [ ] ,
} ) ;
}
2020-05-14 02:26:05 +00:00
2022-04-02 22:49:39 +00:00
async function scrapeScene ( { query } , url , channel , baseScene , parameters ) {
const release = { } ;
const { origin , pathname } = new URL ( url ) ;
2020-05-14 02:26:05 +00:00
2022-04-03 21:00:05 +00:00
release . channel = channel . type === 'channel' ? channel . slug : query . cnt ( '.site-name' ) . split ( '.' ) [ 0 ] . toLowerCase ( ) ;
2022-04-02 22:49:39 +00:00
release . entryId = ` ${ release . channel } _ ${ pathname . split ( '/' ) . slice ( - 2 ) [ 0 ] } ` ;
2020-05-14 02:26:05 +00:00
2022-04-03 21:00:05 +00:00
release . title = query . cnt ( '.description-title' ) || query . text ( '.scene-title' ) ;
release . actors = query . all ( '.more-scenes a, .starring-list a' ) . map ( ( actorEl ) => ( {
2022-04-02 22:49:39 +00:00
name : query . cnt ( actorEl ) ,
url : query . url ( actorEl , null , 'href' , { origin : channel . url } ) ,
} ) ) ;
2021-01-11 15:20:01 +00:00
2022-04-03 21:31:36 +00:00
release . description = query . meta ( 'meta[itemprop="description"]' ) || query . cnt ( '.description, [itemprop="description"]' ) ? . replace ( /[ \t\n]{2,}/g , ' ' ) . replace ( '...read more' , '' ) . trim ( ) ;
2019-11-04 04:47:37 +00:00
2022-04-02 22:49:39 +00:00
release . date = query . date ( 'meta[itemprop="uploadDate"]' , null , null , 'content' ) ;
2022-04-03 21:00:05 +00:00
release . duration = query . duration ( '.extra-info p:nth-child(2), .run-time-container' ) ;
2019-11-04 04:47:37 +00:00
2022-04-03 21:00:05 +00:00
release . tags = query . exists ( '.scene-details .categories a' ) ? query . cnts ( '.scene-details .categories a' ) : query . text ( '.categories' ) ? . split ( /,\s+/ ) ;
2022-04-02 22:49:39 +00:00
2022-04-02 23:29:16 +00:00
const trailer = query . video ( '.html5-video' , 'data-trailer' ) ;
const lastPhotosUrl = query . urls ( '.pagination a' ) . at ( - 1 ) ;
2022-04-03 21:00:05 +00:00
release . poster = query . poster ( '.html5-video' , 'data-poster' ) || query . img ( '.trailer-image' ) ;
2022-04-02 22:49:39 +00:00
2022-04-02 23:29:16 +00:00
if ( trailer && ! trailer ? . includes ( 'join' ) ) {
release . trailer = trailer ;
}
2022-04-02 22:49:39 +00:00
if ( lastPhotosUrl && parameters . includePhotos ) {
release . photos = await getPhotos ( ` ${ origin } ${ pathname } ${ lastPhotosUrl } ` , channel , url ) ;
}
2019-11-04 04:47:37 +00:00
2022-04-03 21:00:05 +00:00
release . stars = Number ( ( ( query . number ( 'span[itemprop="average"], span[itemprop="ratingValue"]' ) || query . number ( 'canvas[data-score]' , null , 'data-score' ) ) / 2 ) . toFixed ( 2 ) ) ;
2022-04-02 22:49:39 +00:00
return release ;
2019-11-04 04:47:37 +00:00
}
2022-04-04 16:21:51 +00:00
async function fetchLatest ( channel , page = 1 , { parameters } ) {
const res = await qu . getAll ( parameters . latest ? ` ${ parameters . latest } /?p= ${ page } ` : ` ${ channel . url } /tour/scenes/?p= ${ page } ` , '.recent-updates, .preview-image-container' ) ;
2022-04-02 22:49:39 +00:00
if ( res . ok ) {
2022-04-03 21:00:05 +00:00
return scrapeLatest ( res . items , channel ) ;
2022-04-02 22:49:39 +00:00
}
2019-11-04 04:47:37 +00:00
2022-04-02 22:49:39 +00:00
return res . status ;
2019-11-04 04:47:37 +00:00
}
2021-01-11 15:20:01 +00:00
async function fetchProfile ( baseActor , entity ) {
const slug = slugify ( baseActor . name , '+' ) ;
const url = ` https://www.dogfartnetwork.com/tour/girls/ ${ slug } / ` ;
2022-04-02 22:49:39 +00:00
const res = await qu . getAll ( url , '.recent-updates' ) ;
2021-01-11 15:20:01 +00:00
if ( res . ok ) {
2022-04-02 22:53:27 +00:00
const { scenes } = scrapeLatest ( res . items , entity , false ) ;
2021-01-11 15:20:01 +00:00
2022-04-02 22:53:27 +00:00
// no bio available
2021-01-11 15:20:01 +00:00
return { scenes } ;
}
return res . status ;
}
2019-11-04 04:47:37 +00:00
module . exports = {
2020-05-14 02:26:05 +00:00
fetchLatest ,
2021-01-11 15:20:01 +00:00
fetchProfile ,
2022-04-02 22:49:39 +00:00
scrapeScene ,
2019-11-04 04:47:37 +00:00
} ;