2019-03-04 01:46:33 +00:00
'use strict' ;
const bhttp = require ( 'bhttp' ) ;
const cheerio = require ( 'cheerio' ) ;
2019-03-04 03:19:03 +00:00
const moment = require ( 'moment' ) ;
function scrape ( html , site ) {
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
2019-11-16 02:33:36 +00:00
const entryId = $ ( 'li' ) . attr ( 'id' ) ;
2019-03-04 03:19:03 +00:00
const sceneLinkElement = $ ( '#scene_title_border a' ) ;
const url = ` ${ site . url } / ${ sceneLinkElement . attr ( 'href' ) } ` ;
2019-03-11 03:19:36 +00:00
const title = sceneLinkElement . attr ( 'title' ) . replace ( /\u00E2\u0080\u0099/g , '\'' ) ; // replace weird apostrophes
2019-03-04 03:19:03 +00:00
2019-03-11 03:19:36 +00:00
const actors = $ ( '.home_model_name a' ) . toArray ( ) . map ( element => $ ( element ) . text ( ) . replace ( /,[\u0020\u00A0\u202F]/ , '' ) ) ; // replace weird commas
2019-03-04 03:19:03 +00:00
const date = moment . utc ( $ ( '.add_date' ) . text ( ) , 'DD-MM-YYYY' ) . toDate ( ) ;
2019-03-18 03:46:53 +00:00
const stars = $ ( 'img[src*="/star.png"]' )
. toArray ( )
. map ( element => $ ( element ) . attr ( 'src' ) )
2019-03-23 21:48:39 +00:00
. length || 0 ;
2019-03-04 03:19:03 +00:00
return {
url ,
2019-11-16 02:33:36 +00:00
entryId ,
2019-03-04 03:19:03 +00:00
title ,
actors ,
date ,
rating : {
stars ,
} ,
2019-03-11 03:19:36 +00:00
site ,
2019-03-04 03:19:03 +00:00
} ;
}
2019-04-05 01:45:40 +00:00
async function fetchLatest ( site , page = 1 ) {
const res = page === 1
? await bhttp . get ( ` ${ site . url } /final_latestupdateview.php?limitstart= ${ ( page - 1 ) * 9 } &limitend=9&websiteid=0&deviceview=browser&tourId= ${ site . parameters . tourId } ` )
: await bhttp . get ( ` ${ site . url } /final_load_latestupdate_grid_view.php?limitstart=0&limitend= ${ ( page - 1 ) * 8 + 1 } &websiteid=0&deviceview=browser&tourId= ${ site . parameters . tourId } ` ) ;
2019-03-04 03:19:03 +00:00
const elements = JSON . parse ( res . body . toString ( ) ) ;
2019-03-04 01:46:33 +00:00
2019-04-05 01:45:40 +00:00
const latest = Object . values ( elements . total _arr ) . map ( html => scrape ( html , site ) ) ; // total_arr is a key-value object for final_load_latestupdate_grid_view.php
2019-03-04 01:46:33 +00:00
2019-03-11 03:19:36 +00:00
return latest ;
2019-03-04 01:46:33 +00:00
}
2019-03-23 21:48:39 +00:00
module . exports = {
fetchLatest ,
} ;