2020-01-22 21:25:58 +00:00
'use strict' ;
const Promise = require ( 'bluebird' ) ;
const bhttp = require ( 'bhttp' ) ;
const { JSDOM } = require ( 'jsdom' ) ;
const cheerio = require ( 'cheerio' ) ;
2020-02-01 00:15:40 +00:00
const moment = require ( 'moment' ) ;
2020-01-22 21:25:58 +00:00
2020-02-07 00:48:21 +00:00
const argv = require ( '../argv' ) ;
2020-02-08 01:49:39 +00:00
const logger = require ( '../logger' ) ( _ _filename ) ;
2020-02-07 00:48:21 +00:00
const { ex , get } = require ( '../utils/q' ) ;
2020-02-06 22:59:32 +00:00
const slugify = require ( '../utils/slugify' ) ;
2020-02-06 22:51:13 +00:00
2020-02-08 03:52:32 +00:00
function getAlbumUrl ( albumPath , site ) {
if ( site . parameters ? . photos ) {
return /^http/ . test ( site . parameters . photos )
2020-02-13 22:05:28 +00:00
? ` ${ site . parameters . photos } / ${ albumPath . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } `
: ` ${ site . url } ${ site . parameters . photos } / ${ albumPath . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } ` ;
2020-02-08 03:52:32 +00:00
}
2020-02-13 22:05:28 +00:00
if ( site . url ) {
return ` ${ site . url } ${ albumPath } ` ;
}
return null ;
2020-02-08 03:52:32 +00:00
}
2020-01-22 21:25:58 +00:00
async function fetchPhotos ( url ) {
const res = await bhttp . get ( url ) ;
return res . body . toString ( ) ;
}
2020-03-06 03:28:01 +00:00
function scrapePhotos ( html , includeThumbnails = true ) {
2020-01-22 21:25:58 +00:00
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
2020-02-08 01:49:39 +00:00
return $ ( '.preview .imgLink, .pgFooterThumb a' ) . toArray ( ) . map ( ( linkEl ) => {
2020-01-22 21:25:58 +00:00
const url = $ ( linkEl ) . attr ( 'href' ) ;
2020-02-08 01:49:39 +00:00
if ( /\/join|\/createaccount/ . test ( url ) ) {
2020-03-06 03:28:01 +00:00
if ( ! includeThumbnails ) return null ;
2020-01-22 21:25:58 +00:00
// URL links to join page instead of full photo, extract thumbnail
2020-02-08 03:52:32 +00:00
// /createaccount is used by e.g. Tricky Spa native site
2020-01-22 21:25:58 +00:00
const src = $ ( linkEl ) . find ( 'img' ) . attr ( 'src' ) ;
if ( src . match ( 'previews/' ) ) {
// resource often serves full photo at a modifier URL anyway, add as primary source
const highRes = src
. replace ( 'previews/' , '' )
. replace ( '_tb.jpg' , '.jpg' ) ;
// keep original thumbnail as fallback in case full photo is not available
return [ highRes , src ] ;
}
return src ;
}
// URL links to full photo
return url ;
2020-03-06 03:28:01 +00:00
} ) . filter ( Boolean ) ;
2020-01-22 21:25:58 +00:00
}
2020-03-06 03:28:01 +00:00
async function getPhotos ( albumPath , site , includeThumbnails = true ) {
2020-02-08 03:52:32 +00:00
const albumUrl = getAlbumUrl ( albumPath , site ) ;
2020-02-13 22:05:28 +00:00
if ( ! albumUrl ) {
return [ ] ;
}
2020-01-22 21:25:58 +00:00
try {
const html = await fetchPhotos ( albumUrl ) ;
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
2020-03-06 03:28:01 +00:00
const photos = scrapePhotos ( html , includeThumbnails ) ;
2020-01-22 21:25:58 +00:00
2020-02-01 00:15:40 +00:00
const lastPage = $ ( '.Gamma_Paginator a.last' ) . attr ( 'href' ) ? . match ( /\d+$/ ) [ 0 ] ;
2020-01-22 21:25:58 +00:00
2020-02-01 00:15:40 +00:00
if ( lastPage ) {
const otherPages = Array . from ( { length : Number ( lastPage ) } , ( _value , index ) => index + 1 ) . slice ( 1 ) ;
2020-01-22 21:25:58 +00:00
2020-02-01 00:15:40 +00:00
const otherPhotos = await Promise . map ( otherPages , async ( page ) => {
2020-02-13 22:05:28 +00:00
const pageUrl = ` ${ albumUrl } / ${ page } ` ;
2020-02-01 00:15:40 +00:00
const pageHtml = await fetchPhotos ( pageUrl ) ;
2020-01-22 21:25:58 +00:00
2020-03-06 03:28:01 +00:00
return scrapePhotos ( pageHtml , includeThumbnails ) ;
2020-02-01 00:15:40 +00:00
} , {
concurrency : 2 ,
} ) ;
return photos . concat ( otherPhotos . flat ( ) ) ;
}
return photos ;
2020-01-22 21:25:58 +00:00
} catch ( error ) {
2020-02-07 18:53:16 +00:00
logger . warn ( ` Failed to fetch ${ site . name } photos from ${ albumUrl } : ${ error . message } ` ) ;
2020-01-22 21:25:58 +00:00
return [ ] ;
}
}
2020-02-01 00:15:40 +00:00
async function scrapeApiReleases ( json , site ) {
return json . map ( ( scene ) => {
2020-02-10 22:11:11 +00:00
if ( site . parameters ? . extract && scene . sitename !== site . parameters . extract ) {
return null ;
}
2020-02-01 00:15:40 +00:00
const release = {
entryId : scene . clip _id ,
title : scene . title ,
description : scene . description ,
duration : scene . length ,
likes : scene . ratings _up ,
dislikes : scene . ratings _down ,
} ;
2020-02-09 18:41:39 +00:00
release . path = ` / ${ scene . url _title } / ${ release . entryId } ` ;
if ( site . parameters ? . scene ) release . url = ` ${ site . parameters . scene } ${ release . path } ` ;
2020-02-13 22:05:28 +00:00
else if ( site . url && site . parameters ? . scene !== false ) release . url = ` ${ site . url } /en/video ${ release . path } ` ;
2020-02-07 18:53:16 +00:00
2020-02-01 00:15:40 +00:00
release . date = moment . utc ( scene . release _date , 'YYYY-MM-DD' ) . toDate ( ) ;
2020-02-09 22:25:54 +00:00
release . actors = scene . actors . map ( actor => ( { name : actor . name , gender : actor . gender } ) ) ;
2020-02-09 18:41:39 +00:00
release . director = scene . directors [ 0 ] ? . name || null ;
2020-02-01 00:15:40 +00:00
2020-02-08 01:49:39 +00:00
release . tags = scene . master _categories
. concat ( scene . categories ? . map ( category => category . name ) )
. filter ( Boolean ) ; // some categories don't have a name
2020-02-01 00:15:40 +00:00
const posterPath = scene . pictures . resized || ( scene . pictures . nsfw ? . top && Object . values ( scene . pictures . nsfw . top ) [ 0 ] ) ;
if ( posterPath ) {
release . poster = [
` https://images-evilangel.gammacdn.com/movies ${ posterPath } ` ,
` https://transform.gammacdn.com/movies ${ posterPath } ` ,
] ;
}
release . movie = ` ${ site . url } /en/movie/ ${ scene . url _movie _title } / ${ scene . movie _id } ` ;
return release ;
2020-02-10 22:11:11 +00:00
} ) . filter ( Boolean ) ;
2020-02-01 00:15:40 +00:00
}
2020-02-07 02:40:11 +00:00
function scrapeAll ( html , site , networkUrl , hasTeaser = true ) {
2020-02-01 00:15:40 +00:00
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
2020-02-08 01:49:39 +00:00
const scenesElements = $ ( 'li[data-itemtype=scene], div[data-itemtype=scenes]' ) . toArray ( ) ;
2020-02-01 00:15:40 +00:00
return scenesElements . map ( ( element ) => {
2020-02-06 22:15:28 +00:00
const release = { } ;
2020-02-08 01:49:39 +00:00
const sceneLinkElement = $ ( element ) . find ( '.sceneTitle a, .tlcTitle a' ) ;
2020-02-01 00:15:40 +00:00
2020-02-07 00:48:21 +00:00
if ( site ) release . url = ` ${ networkUrl ? site . network . url : site . url } ${ sceneLinkElement . attr ( 'href' ) } ` ;
else release . url = ` ${ networkUrl } ${ sceneLinkElement . attr ( 'href' ) } ` ;
2020-02-06 22:15:28 +00:00
release . title = sceneLinkElement . attr ( 'title' ) ;
release . entryId = $ ( element ) . attr ( 'data-itemid' ) ;
2020-02-01 00:15:40 +00:00
2020-02-08 01:49:39 +00:00
const dateEl = $ ( element ) . find ( '.sceneDate, .tlcSpecsDate .tlcDetailsValue' ) . text ( ) || null ;
2020-02-06 22:15:28 +00:00
if ( dateEl ) {
release . date = moment
2020-02-08 01:49:39 +00:00
. utc ( dateEl , [ 'MM-DD-YYYY' , 'YYYY-MM-DD' ] )
2020-02-06 22:15:28 +00:00
. toDate ( ) ;
}
2020-02-01 00:15:40 +00:00
2020-02-08 01:49:39 +00:00
release . actors = $ ( element ) . find ( '.sceneActors a, .tlcActors a' )
2020-02-01 00:15:40 +00:00
. map ( ( actorIndex , actorElement ) => $ ( actorElement ) . attr ( 'title' ) )
. toArray ( ) ;
2020-02-08 01:49:39 +00:00
[ release . likes , release . dislikes ] = $ ( element ) . find ( '.value' )
2020-02-01 00:15:40 +00:00
. toArray ( )
. map ( value => Number ( $ ( value ) . text ( ) ) ) ;
2020-02-08 01:49:39 +00:00
const posterEl = $ ( element ) . find ( '.imgLink img, .tlcImageItem' ) ;
2020-02-06 22:15:28 +00:00
if ( posterEl ) release . poster = posterEl . attr ( 'data-original' ) || posterEl . attr ( 'src' ) ;
2020-02-19 03:49:54 +00:00
if ( hasTeaser ) {
2020-02-19 03:47:20 +00:00
release . teaser = [
{ src : ` https://videothumb.gammacdn.com/600x339/ ${ release . entryId } .mp4 ` } ,
{ src : ` https://videothumb.gammacdn.com/307x224/ ${ release . entryId } .mp4 ` } ,
] ;
2020-02-07 02:40:11 +00:00
}
2020-02-06 22:15:28 +00:00
return release ;
2020-02-01 00:15:40 +00:00
} ) ;
}
2020-03-06 03:28:01 +00:00
async function scrapeScene ( html , url , site , scrapedRelease , mobileHtml ) {
2020-02-01 00:15:40 +00:00
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
2020-03-06 03:28:01 +00:00
const m$ = mobileHtml && cheerio . load ( mobileHtml , { normalizeWhitespace : true } ) ;
const release = { $ , url } ;
2020-02-01 00:15:40 +00:00
const json = $ ( 'script[type="application/ld+json"]' ) . html ( ) ;
const videoJson = $ ( 'script:contains("window.ScenePlayerOptions")' ) . html ( ) ;
2020-02-06 22:15:28 +00:00
const [ data , data2 ] = json ? JSON . parse ( json ) : [ ] ;
2020-02-11 03:58:18 +00:00
const videoData = videoJson && JSON . parse ( videoJson . slice ( videoJson . indexOf ( '{' ) , videoJson . indexOf ( '};' ) + 1 ) ) ;
2020-02-01 00:15:40 +00:00
2020-02-09 18:41:39 +00:00
[ release . entryId ] = ( scrapedRelease ? . path || new URL ( url ) . pathname ) . split ( '/' ) . slice ( - 1 ) ;
2020-02-07 02:40:11 +00:00
release . title = videoData ? . playerOptions ? . sceneInfos . sceneTitle || data ? . name ;
2020-02-01 00:15:40 +00:00
2020-02-06 22:15:28 +00:00
// date in data object is not the release date of the scene, but the date the entry was added; only use as fallback
2020-02-01 00:15:40 +00:00
const dateString = $ ( '.updatedDate' ) . first ( ) . text ( ) . trim ( ) ;
2020-02-09 01:01:39 +00:00
const dateMatch = dateString . match ( /\d{2,4}[-/]\d{2}[-/]\d{2,4}/ ) ? . [ 0 ] ;
2020-02-01 00:15:40 +00:00
2020-02-06 22:15:28 +00:00
if ( dateMatch ) release . date = moment . utc ( dateMatch , [ 'MM-DD-YYYY' , 'YYYY-MM-DD' ] ) . toDate ( ) ;
else if ( data ? . dateCreated ) release . date = moment . utc ( data . dateCreated , 'YYYY-MM-DD' ) . toDate ( ) ;
else release . date = videoData . playerOptions . sceneInfos . sceneReleaseDate ;
if ( data ) {
release . description = data . description ;
2020-02-09 18:41:39 +00:00
if ( data . director ? . [ 0 ] ? . name ) release . director = data . director [ 0 ] . name ;
else if ( data2 ? . director ? . [ 0 ] ? . name ) release . director = data2 . director [ 0 ] . name ;
2020-02-01 00:15:40 +00:00
2020-02-06 22:15:28 +00:00
const stars = ( data . aggregateRating . ratingValue / data . aggregateRating . bestRating ) * 5 ;
if ( stars ) release . rating = { stars } ;
2020-02-01 00:15:40 +00:00
2020-02-06 22:15:28 +00:00
release . duration = moment . duration ( data . duration . slice ( 2 ) ) . asSeconds ( ) ;
}
2020-02-01 00:15:40 +00:00
2020-02-09 22:25:54 +00:00
const actors = data ? . actor || data2 ? . actor ;
if ( actors ) {
release . actors = actors . map ( actor => ( {
name : actor . name ,
gender : actor . gender ,
} ) ) ;
}
2020-02-11 03:58:18 +00:00
const hasTrans = release . actors ? . some ( actor => actor . gender === 'shemale' ) ;
2020-03-06 17:59:32 +00:00
const rawTags = data ? . keywords ? . split ( ', ' ) || data2 ? . keywords ? . split ( ', ' ) || [ ] ;
2020-02-08 01:49:39 +00:00
release . tags = hasTrans ? [ ... rawTags , 'transsexual' ] : rawTags ;
2020-02-08 04:16:04 +00:00
const channel = data ? . productionCompany ? . name || $ ( '.studioLink a, .siteLink a' ) . attr ( 'title' ) ? . trim ( ) ;
2020-02-06 22:59:32 +00:00
if ( channel ) release . channel = slugify ( channel , { delimiter : '' } ) ;
2020-02-11 03:58:18 +00:00
if ( videoData . picPreview && new URL ( videoData . picPreview ) . pathname . length > 1 ) release . poster = videoData . picPreview ; // sometimes links to just https://images02-fame.gammacdn.com/
2020-02-06 22:15:28 +00:00
const photoLink = $ ( '.picturesItem a' ) . attr ( 'href' ) ;
2020-03-06 03:28:01 +00:00
const mobilePhotos = m$ ? m$ ( '.preview-displayer a img' ) . map ( ( photoIndex , photoEl ) => $ ( photoEl ) . attr ( 'src' ) ) . toArray ( ) : [ ] ;
if ( photoLink ) {
const photos = await getPhotos ( photoLink , site , mobilePhotos . length < 3 ) ; // only get thumbnails when less than 3 mobile photos are available
release . photos = [ ... photos , ... mobilePhotos ] ;
} else {
release . photos = mobilePhotos ;
}
2020-02-01 00:15:40 +00:00
const trailer = ` ${ videoData . playerOptions . host } ${ videoData . url } ` ;
release . trailer = [
{
src : trailer . replace ( 'hd' , 'sm' ) ,
quality : 240 ,
} ,
{
src : trailer . replace ( 'hd' , 'med' ) ,
quality : 360 ,
} ,
{
src : trailer . replace ( 'hd' , 'big' ) ,
quality : 480 ,
} ,
{
// probably 540p
src : trailer ,
quality : parseInt ( videoData . sizeOnLoad , 10 ) ,
} ,
{
src : trailer . replace ( 'hd' , '720p' ) ,
quality : 720 ,
} ,
{
src : trailer . replace ( 'hd' , '1080p' ) ,
quality : 1080 ,
} ,
{
src : trailer . replace ( 'hd' , '4k' ) ,
quality : 2160 ,
} ,
] ;
return release ;
}
2020-01-22 21:25:58 +00:00
function scrapeActorSearch ( html , url , actorName ) {
const { document } = new JSDOM ( html ) . window ;
const actorLink = document . querySelector ( ` a[title=" ${ actorName } " i] ` ) ;
return actorLink ? actorLink . href : null ;
}
2020-02-07 00:48:21 +00:00
async function fetchActorReleases ( profileUrl , getActorReleasesUrl , page = 1 , accReleases = [ ] ) {
const { origin , pathname } = new URL ( profileUrl ) ;
const profilePath = ` / ${ pathname . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } ` ;
const url = getActorReleasesUrl ( profilePath , page ) ;
const { html , qu } = await get ( url ) ;
const releases = scrapeAll ( html , null , origin ) ;
const nextPage = qu ( '.Gamma_Paginator a.next' ) ;
if ( nextPage ) {
return fetchActorReleases ( profileUrl , getActorReleasesUrl , page + 1 , accReleases . concat ( releases ) ) ;
}
return accReleases . concat ( releases ) ;
}
async function scrapeProfile ( html , url , actorName , _siteSlug , getActorReleasesUrl ) {
2020-02-06 22:52:00 +00:00
const { q } = ex ( html ) ;
2020-01-22 21:25:58 +00:00
2020-02-06 22:51:13 +00:00
const avatar = q ( 'img.actorPicture' ) ;
const hair = q ( '.actorProfile .attribute_hair_color' , true ) ;
const height = q ( '.actorProfile .attribute_height' , true ) ;
const weight = q ( '.actorProfile .attribute_weight' , true ) ;
const alias = q ( '.actorProfile .attribute_alternate_names' , true ) ;
const nationality = q ( '.actorProfile .attribute_home' , true ) ;
2020-01-22 21:25:58 +00:00
const profile = {
name : actorName ,
} ;
2020-02-06 22:51:13 +00:00
if ( avatar ) {
2020-01-22 21:25:58 +00:00
// larger sizes usually available, provide fallbacks
const avatars = [
2020-02-06 22:51:13 +00:00
avatar . src . replace ( /\d+x\d+/ , '500x750' ) ,
avatar . src . replace ( /\d+x\d+/ , '240x360' ) ,
avatar . src . replace ( /\d+x\d+/ , '200x300' ) ,
avatar . src ,
2020-01-22 21:25:58 +00:00
] ;
profile . avatar = avatars ;
}
2020-02-06 22:51:13 +00:00
profile . description = q ( '.actorBio p:not(.bioTitle)' , true ) ;
if ( hair ) profile . hair = hair . split ( ':' ) [ 1 ] . trim ( ) ;
if ( height ) profile . height = Number ( height . match ( /\d+/ ) [ 0 ] ) ;
if ( weight ) profile . weight = Number ( weight . match ( /\d+/ ) [ 0 ] ) ;
if ( alias ) profile . aliases = alias . split ( ':' ) [ 1 ] . trim ( ) . split ( ', ' ) ;
if ( nationality ) profile . nationality = nationality . split ( ':' ) [ 1 ] . trim ( ) ;
2020-01-22 21:25:58 +00:00
2020-02-07 00:48:21 +00:00
if ( getActorReleasesUrl && argv . withReleases ) {
profile . releases = await fetchActorReleases ( url , getActorReleasesUrl ) ;
}
2020-01-22 21:25:58 +00:00
return profile ;
}
2020-02-01 00:15:40 +00:00
function scrapeApiProfile ( data , releases , siteSlug ) {
const profile = { } ;
2020-01-22 21:25:58 +00:00
2020-02-01 00:15:40 +00:00
if ( data . male === 1 ) profile . gender = 'male' ;
if ( data . female === 1 ) profile . gender = 'female' ;
if ( data . shemale === 1 || data . trans === 1 ) profile . gender = 'transsexual' ;
2020-01-22 21:25:58 +00:00
2020-02-01 00:15:40 +00:00
if ( data . description ) profile . description = data . description . trim ( ) ;
2020-01-22 21:25:58 +00:00
2020-02-01 00:15:40 +00:00
if ( data . attributes . ethnicity ) profile . ethnicity = data . attributes . ethnicity ;
if ( data . attributes . eye _color ) profile . eyes = data . attributes . eye _color ;
if ( data . attributes . hair _color ) profile . hair = data . attributes . hair _color ;
2020-01-22 21:25:58 +00:00
2020-02-01 00:15:40 +00:00
const avatarPath = Object . values ( data . pictures ) . reverse ( ) [ 0 ] ;
if ( avatarPath ) profile . avatar = ` https://images01-evilangel.gammacdn.com/actors ${ avatarPath } ` ;
2020-01-22 21:25:58 +00:00
2020-02-01 00:15:40 +00:00
profile . releases = releases . map ( release => ` https:// ${ siteSlug } .com/en/video/ ${ release . url _title } / ${ release . clip _id } ` ) ;
2020-01-22 21:25:58 +00:00
2020-02-01 00:15:40 +00:00
return profile ;
2020-01-22 21:25:58 +00:00
}
2020-02-07 18:53:16 +00:00
function getApiUrl ( appId , apiKey ) {
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0' ;
const apiUrl = ` https:// ${ appId . toLowerCase ( ) } -dsn.algolia.net/1/indexes/*/queries?x-algolia-agent= ${ userAgent } &x-algolia-application-id= ${ appId } &x-algolia-api-key= ${ apiKey } ` ;
return {
appId ,
apiKey ,
userAgent ,
apiUrl ,
} ;
}
async function fetchApiCredentials ( referer , site ) {
2020-02-08 01:49:39 +00:00
if ( site ? . parameters ? . appId && site ? . parameters ? . apiKey ) {
2020-02-07 18:53:16 +00:00
return getApiUrl ( site . parameters . appId , site . parameters . apiKey ) ;
}
2020-01-30 00:14:31 +00:00
const res = await bhttp . get ( referer ) ;
const body = res . body . toString ( ) ;
const apiLine = body . split ( '\n' ) . find ( bodyLine => bodyLine . match ( 'apiKey' ) ) ;
2020-02-06 22:15:28 +00:00
if ( ! apiLine ) {
2020-02-07 18:53:16 +00:00
throw new Error ( ` No Gamma API key found for ${ referer } ` ) ;
2020-02-06 22:15:28 +00:00
}
2020-01-30 00:14:31 +00:00
const apiSerial = apiLine . slice ( apiLine . indexOf ( '{' ) , apiLine . indexOf ( '};' ) + 1 ) ;
const apiData = JSON . parse ( apiSerial ) ;
const { applicationID : appId , apiKey } = apiData . api . algolia ;
2020-02-07 18:53:16 +00:00
return getApiUrl ( appId , apiKey ) ;
2020-01-30 00:14:31 +00:00
}
2020-02-01 00:15:40 +00:00
async function fetchApiLatest ( site , page = 1 , upcoming = false ) {
2020-02-07 18:53:16 +00:00
const referer = site . parameters ? . referer || ` ${ site . parameters ? . networkReferer ? site . network . url : site . url } /en/videos ` ;
const { apiUrl } = await fetchApiCredentials ( referer , site ) ;
2020-01-30 00:14:31 +00:00
const res = await bhttp . post ( apiUrl , {
requests : [
{
indexName : 'all_scenes' ,
2020-02-08 01:49:39 +00:00
params : ` query=&hitsPerPage=36&maxValuesPerFacet=100&page= ${ page - 1 } &facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming: ${ upcoming ? 1 : 0 } "]]&filters=sitename: ${ site . slug } OR channels.id: ${ site . slug } ` ,
2020-01-30 00:14:31 +00:00
} ,
] ,
} , {
headers : {
Referer : referer ,
} ,
encodeJSON : true ,
} ) ;
2020-02-06 22:51:13 +00:00
if ( res . statusCode === 200 && res . body . results ? . [ 0 ] ? . hits ) {
2020-02-06 22:15:28 +00:00
return scrapeApiReleases ( res . body . results [ 0 ] . hits , site ) ;
}
return [ ] ;
2020-02-01 00:15:40 +00:00
}
async function fetchApiUpcoming ( site ) {
return fetchApiLatest ( site , 1 , true ) ;
}
2020-02-08 03:52:32 +00:00
function getLatestUrl ( site , page ) {
if ( site . parameters ? . latest ) {
return /^http/ . test ( site . parameters . latest )
? ` ${ site . parameters . latest } ${ page } `
: ` ${ site . url } ${ site . parameters . latest } ${ page } ` ;
}
return ` ${ site . url } /en/videos/AllCategories/0/ ${ page } ` ;
}
function getUpcomingUrl ( site ) {
if ( site . parameters ? . upcoming ) {
return /^http/ . test ( site . parameters . upcoming )
? ` ${ site . parameters . upcoming } `
: ` ${ site . url } ${ site . parameters . upcoming } ` ;
}
return ` ${ site . url } /en/videos/AllCategories/0/1/upcoming ` ;
}
2020-02-01 00:15:40 +00:00
async function fetchLatest ( site , page = 1 ) {
2020-02-08 03:52:32 +00:00
const url = getLatestUrl ( site , page ) ;
2020-02-01 01:26:00 +00:00
const res = await bhttp . get ( url ) ;
2020-01-30 00:14:31 +00:00
2020-02-01 00:15:40 +00:00
return scrapeAll ( res . body . toString ( ) , site ) ;
}
async function fetchUpcoming ( site ) {
2020-02-08 03:52:32 +00:00
const url = getUpcomingUrl ( site ) ;
2020-02-07 00:06:39 +00:00
const res = await bhttp . get ( url ) ;
2020-02-01 00:15:40 +00:00
2020-02-19 03:49:54 +00:00
return scrapeAll ( res . body . toString ( ) , site , null , false ) ;
2020-02-01 00:15:40 +00:00
}
2020-03-06 18:26:23 +00:00
function getDeepUrl ( url , site , release , mobile ) {
2020-03-06 22:40:15 +00:00
const filter = new Set ( [ 'en' , 'video' , 'scene' , site . slug , site . network . slug ] ) ;
const pathname = release ? . path || new URL ( url ) . pathname
. split ( '/' )
. filter ( component => ! filter . has ( component ) )
. join ( '/' ) ; // reduce to scene ID and title slug
2020-02-08 03:52:32 +00:00
2020-03-06 03:28:01 +00:00
if ( mobile ) {
2020-03-06 18:44:20 +00:00
return ` ${ mobile } ${ pathname } ` ;
2020-02-08 03:52:32 +00:00
}
if ( site . parameters ? . deep ) {
2020-03-06 18:44:20 +00:00
return ` ${ site . parameters . deep } ${ pathname } ` ;
2020-02-08 03:52:32 +00:00
}
return url ;
}
2020-02-07 18:53:16 +00:00
async function fetchScene ( url , site , release ) {
if ( site . parameters ? . deep === false ) {
return release ;
}
2020-02-09 18:41:39 +00:00
const deepUrl = getDeepUrl ( url , site , release ) ;
2020-03-06 18:26:23 +00:00
const mobileUrl = getDeepUrl ( url , site , release , site . parameters ? . mobile || site . network . parameters ? . mobile ) ;
2020-03-06 03:28:01 +00:00
2020-02-13 22:05:28 +00:00
if ( deepUrl ) {
2020-03-06 03:28:01 +00:00
const [ res , mobileRes ] = await Promise . all ( [
bhttp . get ( deepUrl ) ,
mobileUrl && bhttp . get ( mobileUrl , {
headers : {
// don't redirect to main site
'user-agent' : 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Mobile Safari/537.36' ,
} ,
} ) ,
] ) ;
2020-02-11 03:58:18 +00:00
2020-02-13 22:05:28 +00:00
if ( res . statusCode === 200 ) {
2020-03-06 17:59:32 +00:00
const mobileBody = mobileRes ? . statusCode === 200 ? mobileRes . body . toString ( ) : null ;
2020-03-06 03:28:01 +00:00
const scene = await scrapeScene ( res . body . toString ( ) , url , site , release , mobileBody ) ;
2020-02-13 22:05:28 +00:00
return { ... scene , deepUrl } ;
}
2020-02-08 04:16:04 +00:00
}
return null ;
2020-02-01 00:15:40 +00:00
}
async function fetchActorScenes ( actorName , apiUrl , siteSlug ) {
const res = await bhttp . post ( apiUrl , {
requests : [
{
indexName : 'all_scenes' ,
2020-02-06 22:15:28 +00:00
params : ` query=&filters=sitename: ${ siteSlug } &hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name: ${ actorName } "]] ` ,
2020-02-01 00:15:40 +00:00
} ,
] ,
} , {
headers : {
Referer : ` https://www. ${ siteSlug } .com/en/videos ` ,
} ,
encodeJSON : true ,
} ) ;
if ( res . statusCode === 200 && res . body . results [ 0 ] . hits . length > 0 ) {
return res . body . results [ 0 ] . hits ;
}
return [ ] ;
}
2020-02-07 00:48:21 +00:00
async function fetchProfile ( actorName , siteSlug , altSearchUrl , getActorReleasesUrl ) {
2020-02-01 00:15:40 +00:00
const actorSlug = actorName . toLowerCase ( ) . replace ( /\s+/ , '+' ) ;
const searchUrl = altSearchUrl
? ` https://www. ${ siteSlug } .com/en/search/ ${ actorSlug } /1/actor `
: ` https://www. ${ siteSlug } .com/en/search/ ${ siteSlug } /actor/ ${ actorSlug } ` ;
const searchRes = await bhttp . get ( searchUrl ) ;
if ( searchRes . statusCode !== 200 ) {
return null ;
}
const actorUrl = scrapeActorSearch ( searchRes . body . toString ( ) , searchUrl , actorName ) ;
if ( actorUrl ) {
const url = ` https:// ${ siteSlug } .com ${ actorUrl } ` ;
const actorRes = await bhttp . get ( url ) ;
if ( actorRes . statusCode !== 200 ) {
return null ;
}
2020-02-07 00:48:21 +00:00
return scrapeProfile ( actorRes . body . toString ( ) , url , actorName , siteSlug , getActorReleasesUrl ) ;
2020-02-01 00:15:40 +00:00
}
return null ;
}
async function fetchApiProfile ( actorName , siteSlug ) {
const actorSlug = encodeURI ( actorName ) ;
2020-02-06 22:15:28 +00:00
const referer = ` https://www. ${ siteSlug } .com/en/search ` ;
2020-02-01 00:15:40 +00:00
const { apiUrl } = await fetchApiCredentials ( referer ) ;
const res = await bhttp . post ( apiUrl , {
requests : [
{
indexName : 'all_actors' ,
params : ` query= ${ actorSlug } ` ,
} ,
] ,
} , {
headers : {
Referer : referer ,
} ,
encodeJSON : true ,
} ) ;
if ( res . statusCode === 200 && res . body . results [ 0 ] . hits . length > 0 ) {
2020-02-07 00:06:39 +00:00
const actorData = res . body . results [ 0 ] . hits . find ( actor => slugify ( actor . name ) === slugify ( actorName ) ) ;
2020-02-01 00:15:40 +00:00
if ( actorData ) {
2020-02-07 00:06:39 +00:00
const actorScenes = await fetchActorScenes ( actorData . name , apiUrl , siteSlug ) ;
2020-02-01 00:15:40 +00:00
return scrapeApiProfile ( actorData , actorScenes , siteSlug ) ;
}
}
return null ;
2020-01-30 00:14:31 +00:00
}
2020-01-22 21:25:58 +00:00
module . exports = {
2020-02-01 00:15:40 +00:00
fetchApiLatest ,
fetchApiProfile ,
fetchApiUpcoming ,
fetchLatest ,
2020-01-22 21:25:58 +00:00
fetchProfile ,
2020-02-01 00:15:40 +00:00
fetchScene ,
fetchUpcoming ,
getPhotos ,
scrapeApiProfile ,
scrapeApiReleases ,
2020-01-22 21:25:58 +00:00
scrapeProfile ,
2020-02-01 00:15:40 +00:00
scrapeAll ,
scrapeScene ,
2020-01-22 21:25:58 +00:00
} ;