2019-03-04 01:46:33 +00:00
'use strict' ;
const bhttp = require ( 'bhttp' ) ;
const cheerio = require ( 'cheerio' ) ;
2019-03-04 03:19:03 +00:00
const moment = require ( 'moment' ) ;
2019-03-04 01:46:33 +00:00
2020-01-22 21:25:58 +00:00
const { getPhotos , fetchProfile } = require ( './gamma' ) ;
2019-10-29 02:13:56 +00:00
2019-03-04 01:46:33 +00:00
function scrape ( html , site ) {
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
2019-03-26 00:26:47 +00:00
const scenesElements = $ ( 'li[data-itemtype=scene]' ) . toArray ( ) ;
2019-03-04 01:46:33 +00:00
return scenesElements . map ( ( element ) => {
const sceneLinkElement = $ ( element ) . find ( '.sceneTitle a' ) ;
2019-10-29 02:13:56 +00:00
2019-03-04 01:46:33 +00:00
const url = ` ${ site . url } ${ sceneLinkElement . attr ( 'href' ) } ` ;
const title = sceneLinkElement . attr ( 'title' ) ;
2019-04-07 18:51:14 +00:00
const entryId = $ ( element ) . attr ( 'data-itemid' ) ;
2019-03-26 00:26:47 +00:00
2019-03-18 03:46:53 +00:00
const date = moment
. utc ( $ ( element ) . find ( '.sceneDate' ) . text ( ) , 'MM-DD-YYYY' )
. toDate ( ) ;
2019-03-04 01:46:33 +00:00
const actors = $ ( element ) . find ( '.sceneActors a' )
. map ( ( actorIndex , actorElement ) => $ ( actorElement ) . attr ( 'title' ) )
. toArray ( ) ;
2019-03-18 03:46:53 +00:00
const [ likes , dislikes ] = $ ( element ) . find ( '.value' )
. toArray ( )
. map ( value => Number ( $ ( value ) . text ( ) ) ) ;
2019-03-04 01:46:33 +00:00
2019-10-29 02:13:56 +00:00
const poster = $ ( element ) . find ( '.imgLink img' ) . attr ( 'data-original' ) ;
const trailer = ` https://videothumb.gammacdn.com/307x224/ ${ entryId } .mp4 ` ;
2019-03-04 01:46:33 +00:00
return {
url ,
2019-04-07 18:51:14 +00:00
entryId ,
2019-03-04 01:46:33 +00:00
title ,
actors ,
2019-04-04 02:00:28 +00:00
director : 'Mason' ,
2019-03-04 01:46:33 +00:00
date ,
2019-10-29 02:13:56 +00:00
poster ,
trailer : {
src : trailer ,
quality : 224 ,
} ,
2019-03-04 01:46:33 +00:00
rating : {
likes ,
dislikes ,
} ,
2019-03-11 03:19:36 +00:00
site ,
2019-03-04 01:46:33 +00:00
} ;
} ) ;
}
2019-03-25 02:57:33 +00:00
async function scrapeScene ( html , url , site ) {
2019-03-24 00:29:22 +00:00
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
const json = $ ( 'script[type="application/ld+json"]' ) . html ( ) ;
2019-12-12 02:12:05 +00:00
const json2 = $ ( 'script:contains("dataLayer = ")' ) . html ( ) ;
2019-10-29 02:13:56 +00:00
const videoJson = $ ( 'script:contains("window.ScenePlayerOptions")' ) . html ( ) ;
2019-03-24 00:29:22 +00:00
const data = JSON . parse ( json ) [ 0 ] ;
2019-12-12 02:12:05 +00:00
const data2 = JSON . parse ( json2 . slice ( json2 . indexOf ( '[{' ) , - 1 ) ) [ 0 ] ;
2019-10-29 02:13:56 +00:00
const videoData = JSON . parse ( videoJson . slice ( videoJson . indexOf ( '{"id":' ) , videoJson . indexOf ( '};' ) + 1 ) ) ;
2019-12-12 02:12:05 +00:00
const entryId = data2 . sceneDetails . sceneId || new URL ( url ) . pathname . split ( '/' ) . slice ( - 1 ) [ 0 ] ;
2019-03-24 00:29:22 +00:00
2019-12-12 02:12:05 +00:00
const title = data2 . sceneDetails . sceneTitle || $ ( 'meta[name="twitter:title"]' ) . attr ( 'content' ) ;
const description = data2 . sceneDetails . sceneDescription || data . description || $ ( 'meta[name="twitter:description"]' ) . attr ( 'content' ) ;
2019-09-26 01:27:01 +00:00
// date in data object is not the release date of the scene, but the date the entry was added
const date = moment . utc ( $ ( '.updatedDate' ) . first ( ) . text ( ) , 'MM-DD-YYYY' ) . toDate ( ) ;
2019-03-24 00:29:22 +00:00
2019-12-12 02:12:05 +00:00
const actors = ( data2 . sceneDetails . sceneActors || data . actor ) . map ( actor => actor . actorName || actor . name ) ;
2019-03-24 00:29:22 +00:00
const stars = ( data . aggregateRating . ratingValue / data . aggregateRating . bestRating ) * 5 ;
2019-03-24 04:28:18 +00:00
const duration = moment . duration ( data . duration . slice ( 2 ) . split ( ':' ) ) . asSeconds ( ) ;
2019-03-24 00:29:22 +00:00
2019-12-12 02:12:05 +00:00
const siteDomain = $ ( 'meta[name="twitter:domain"]' ) . attr ( 'content' ) || 'allblackx.com' ; // only AllBlackX has no twitter domain, no other useful hints available
2019-11-30 04:55:32 +00:00
const siteSlug = siteDomain && siteDomain . split ( '.' ) [ 0 ] . toLowerCase ( ) ;
2019-04-07 18:51:14 +00:00
const siteUrl = siteDomain && ` https://www. ${ siteDomain } ` ;
2019-10-29 02:13:56 +00:00
const poster = videoData . picPreview ;
const trailer = ` ${ videoData . playerOptions . host } ${ videoData . url } ` ;
2019-11-27 03:58:38 +00:00
const photos = await getPhotos ( $ ( '.picturesItem a' ) . attr ( 'href' ) , siteDomain , site ) ;
2019-10-29 02:13:56 +00:00
2020-01-22 21:25:58 +00:00
const tags = data . keywords . split ( ', ' ) ;
2019-03-24 00:29:22 +00:00
2019-03-23 21:48:39 +00:00
return {
2019-12-12 02:12:05 +00:00
url : ` ${ siteUrl } /en/video/ ${ new URL ( url ) . pathname . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } ` ,
2019-04-07 18:51:14 +00:00
entryId ,
2019-03-23 21:48:39 +00:00
title ,
date ,
actors ,
2019-04-04 02:00:28 +00:00
director : 'Mason' ,
2019-03-23 21:48:39 +00:00
description ,
2019-03-24 00:29:22 +00:00
duration ,
2019-10-29 02:13:56 +00:00
poster ,
photos ,
2020-01-24 22:36:06 +00:00
trailer : [
{
src : trailer . replace ( 'hd' , '1080p' ) ,
quality : 1080 ,
} ,
{
src : trailer ,
quality : parseInt ( videoData . sizeOnLoad , 10 ) ,
} ,
] ,
2019-03-24 00:29:22 +00:00
tags ,
2019-03-23 21:48:39 +00:00
rating : {
stars ,
} ,
2019-12-12 02:12:05 +00:00
site ,
channel : siteSlug ,
2019-03-23 21:48:39 +00:00
} ;
}
2019-04-05 01:45:40 +00:00
async function fetchLatest ( site , page = 1 ) {
const res = await bhttp . get ( ` ${ site . url } /en/videos/AllCategories/0/ ${ page } ` ) ;
2019-03-23 21:48:39 +00:00
2019-04-05 01:45:40 +00:00
return scrape ( res . body . toString ( ) , site ) ;
2019-03-23 21:48:39 +00:00
}
async function fetchUpcoming ( site ) {
const res = await bhttp . get ( ` ${ site . url } /en/videos/AllCategories/0/1/upcoming ` ) ;
return scrape ( res . body . toString ( ) , site ) ;
}
async function fetchScene ( url , site ) {
const res = await bhttp . get ( url ) ;
2019-03-04 01:46:33 +00:00
2019-03-23 21:48:39 +00:00
return scrapeScene ( res . body . toString ( ) , url , site ) ;
2019-03-04 01:46:33 +00:00
}
2020-01-22 21:25:58 +00:00
async function xEmpireFetchProfile ( actorName ) {
return fetchProfile ( actorName , 'xempire' ) ;
2019-11-30 04:55:32 +00:00
}
2019-03-23 21:48:39 +00:00
module . exports = {
fetchLatest ,
2020-01-22 21:25:58 +00:00
fetchProfile : xEmpireFetchProfile ,
2019-03-23 21:48:39 +00:00
fetchUpcoming ,
fetchScene ,
} ;