2019-03-04 01:46:33 +00:00
'use strict' ;
const bhttp = require ( 'bhttp' ) ;
const cheerio = require ( 'cheerio' ) ;
2019-12-05 00:26:22 +00:00
const { JSDOM } = require ( 'jsdom' ) ;
2019-03-04 03:19:03 +00:00
const moment = require ( 'moment' ) ;
2019-12-05 00:26:22 +00:00
async function getTrailer ( entryId ) {
const trailerRes = await bhttp . post ( 'https://www.pervcity.com/gettoken.php' , {
setId : entryId ,
} ) ;
if ( trailerRes . statusCode === 200 ) {
return {
poster : trailerRes . body . TrailerImg ,
trailer : trailerRes . body . TrailerPath || trailerRes . body . Trailerfallback ,
} ;
}
return null ;
}
function scrapeLatestScene ( html , site ) {
2019-03-04 03:19:03 +00:00
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
2019-11-16 02:33:36 +00:00
const entryId = $ ( 'li' ) . attr ( 'id' ) ;
2019-03-04 03:19:03 +00:00
const sceneLinkElement = $ ( '#scene_title_border a' ) ;
const url = ` ${ site . url } / ${ sceneLinkElement . attr ( 'href' ) } ` ;
2019-03-11 03:19:36 +00:00
const title = sceneLinkElement . attr ( 'title' ) . replace ( /\u00E2\u0080\u0099/g , '\'' ) ; // replace weird apostrophes
2019-03-04 03:19:03 +00:00
2019-03-11 03:19:36 +00:00
const actors = $ ( '.home_model_name a' ) . toArray ( ) . map ( element => $ ( element ) . text ( ) . replace ( /,[\u0020\u00A0\u202F]/ , '' ) ) ; // replace weird commas
2019-03-04 03:19:03 +00:00
const date = moment . utc ( $ ( '.add_date' ) . text ( ) , 'DD-MM-YYYY' ) . toDate ( ) ;
2019-12-05 00:26:22 +00:00
const poster = $ ( 'a:nth-child(2) > img' ) . attr ( 'src' ) ;
const photos = $ ( '.sample-picker img' ) . map ( ( index , element ) => $ ( element ) . attr ( 'src' ) . replace ( 'tourpics' , 'trailer' ) ) . toArray ( ) ;
2019-03-18 03:46:53 +00:00
const stars = $ ( 'img[src*="/star.png"]' )
. toArray ( )
. map ( element => $ ( element ) . attr ( 'src' ) )
2019-03-23 21:48:39 +00:00
. length || 0 ;
2019-03-04 03:19:03 +00:00
return {
url ,
2019-11-16 02:33:36 +00:00
entryId ,
2019-03-04 03:19:03 +00:00
title ,
actors ,
date ,
2019-12-05 00:26:22 +00:00
poster ,
photos ,
2019-03-04 03:19:03 +00:00
rating : {
stars ,
} ,
2019-03-11 03:19:36 +00:00
site ,
2019-03-04 03:19:03 +00:00
} ;
}
2019-12-05 00:26:22 +00:00
async function scrapeScene ( html , url , site ) {
const { document } = new JSDOM ( html ) . window ;
const release = { url , site } ;
release . entryId = document . querySelector ( 'input#set_ID' ) . value ;
release . title = document . querySelector ( 'title' ) . textContent ;
release . description = document . querySelector ( '.player_data' ) . textContent . trim ( ) ;
const durationString = document . querySelector ( '.tag_lineR div:nth-child(2) span' ) . textContent ;
const [ minutes , seconds ] = durationString . match ( /\d+/g ) ;
release . duration = Number ( minutes ) * 60 + Number ( seconds ) ;
release . tags = document . querySelector ( 'meta[name="keywords"]' ) . content . split ( ',' ) ;
const { poster , trailer } = await getTrailer ( release . entryId ) ;
release . poster = poster ;
release . trailer = { src : trailer } ;
return release ;
}
function scrapeFallbackLanding ( html ) {
const { document } = new JSDOM ( html ) . window ;
return document . querySelector ( 'input#set_ID' ) . value ;
}
async function scrapeFallbackScene ( html , entryId , url , site ) {
const { document } = new JSDOM ( html ) . window ;
const release = { url , entryId , site } ;
release . title = document . querySelector ( '.popup_data_set_head label' ) . textContent ;
release . description = document . querySelector ( '.popup_data_set_des p' ) . textContent . trim ( ) ;
release . date = moment . utc ( document . querySelector ( '.popup_left_top div span' ) . textContent , 'MM-DD-YYYY' ) . toDate ( ) ;
release . actors = Array . from ( document . querySelectorAll ( '.popup_data_set_models a' ) , el => el . textContent ) ;
const { poster , trailer } = await getTrailer ( release . entryId ) ;
release . poster = poster ;
release . trailer = { src : trailer } ;
release . channel = document . querySelector ( '.popup_left_top div img' ) . alt ;
return release ;
}
2019-04-05 01:45:40 +00:00
async function fetchLatest ( site , page = 1 ) {
const res = page === 1
? await bhttp . get ( ` ${ site . url } /final_latestupdateview.php?limitstart= ${ ( page - 1 ) * 9 } &limitend=9&websiteid=0&deviceview=browser&tourId= ${ site . parameters . tourId } ` )
: await bhttp . get ( ` ${ site . url } /final_load_latestupdate_grid_view.php?limitstart=0&limitend= ${ ( page - 1 ) * 8 + 1 } &websiteid=0&deviceview=browser&tourId= ${ site . parameters . tourId } ` ) ;
2019-03-04 03:19:03 +00:00
const elements = JSON . parse ( res . body . toString ( ) ) ;
2019-03-04 01:46:33 +00:00
2019-12-05 00:26:22 +00:00
const latest = Object . values ( elements . total _arr ) . map ( html => scrapeLatestScene ( html , site ) ) ; // total_arr is a key-value object for final_load_latestupdate_grid_view.php
2019-03-04 01:46:33 +00:00
2019-03-11 03:19:36 +00:00
return latest ;
2019-03-04 01:46:33 +00:00
}
2019-12-05 00:26:22 +00:00
async function fetchScene ( url , site ) {
const res = await bhttp . get ( url ) ;
if ( res . statusCode === 200 ) {
if ( site . isFallback ) {
const entryId = scrapeFallbackLanding ( res . body . toString ( ) , url ) ;
const fallbackRes = await bhttp . post ( 'https://www.pervcity.com/set_popupvideo.php' , {
setId : entryId ,
} ) ;
return scrapeFallbackScene ( fallbackRes . body . toString ( ) , entryId , url , site ) ;
}
return scrapeScene ( res . body . toString ( ) , url , site ) ;
}
return null ;
}
2019-03-23 21:48:39 +00:00
module . exports = {
fetchLatest ,
2019-12-05 00:26:22 +00:00
fetchScene ,
2019-03-23 21:48:39 +00:00
} ;