2020-01-22 21:25:58 +00:00
'use strict' ;
const Promise = require ( 'bluebird' ) ;
2020-03-07 01:35:13 +00:00
const util = require ( 'util' ) ;
2020-01-22 21:25:58 +00:00
const { JSDOM } = require ( 'jsdom' ) ;
2020-02-01 00:15:40 +00:00
const moment = require ( 'moment' ) ;
2021-01-30 00:12:42 +00:00
const format = require ( 'template-format' ) ;
2020-01-22 21:25:58 +00:00
2020-02-08 01:49:39 +00:00
const logger = require ( '../logger' ) ( _ _filename ) ;
2020-11-22 03:07:09 +00:00
const qu = require ( '../utils/qu' ) ;
2020-09-04 01:07:28 +00:00
const http = require ( '../utils/http' ) ;
2020-02-06 22:59:32 +00:00
const slugify = require ( '../utils/slugify' ) ;
2020-02-06 22:51:13 +00:00
2021-08-09 08:31:12 +00:00
function getApiUrl ( appId , apiKey ) {
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0' ;
const apiUrl = ` https:// ${ appId . toLowerCase ( ) } -dsn.algolia.net/1/indexes/*/queries?x-algolia-agent= ${ userAgent } &x-algolia-application-id= ${ appId } &x-algolia-api-key= ${ apiKey } ` ;
return {
appId ,
apiKey ,
userAgent ,
apiUrl ,
} ;
}
async function fetchApiCredentials ( referer , site ) {
if ( site ? . parameters ? . appId && site ? . parameters ? . apiKey ) {
return getApiUrl ( site . parameters . appId , site . parameters . apiKey ) ;
}
const res = await http . get ( referer ) ;
const body = res . body . toString ( ) ;
2021-11-20 22:59:15 +00:00
const apiLine = body . split ( '\n' ) . find ( ( bodyLine ) => bodyLine . match ( 'apiKey' ) ) ;
2021-08-09 08:31:12 +00:00
if ( ! apiLine ) {
throw new Error ( ` No Gamma API key found for ${ referer } ` ) ;
}
const apiSerial = apiLine . slice ( apiLine . indexOf ( '{' ) , apiLine . indexOf ( '};' ) + 1 ) ;
const apiData = JSON . parse ( apiSerial ) ;
const { applicationID : appId , apiKey } = apiData . api . algolia ;
return getApiUrl ( appId , apiKey ) ;
}
2020-02-08 03:52:32 +00:00
function getAlbumUrl ( albumPath , site ) {
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . photos ) {
return /^http/ . test ( site . parameters . photos )
? ` ${ site . parameters . photos } / ${ albumPath . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } `
: ` ${ site . url } ${ site . parameters . photos } / ${ albumPath . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } ` ;
}
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
if ( site . url && site . parameters ? . photos !== false ) {
return ` ${ site . url } ${ albumPath } ` ;
}
2020-02-13 22:05:28 +00:00
2020-05-14 02:26:05 +00:00
return null ;
2020-02-08 03:52:32 +00:00
}
2020-01-22 21:25:58 +00:00
async function fetchPhotos ( url ) {
2021-11-29 01:49:07 +00:00
const res = await qu . get ( url ) ;
2020-01-22 21:25:58 +00:00
2021-11-29 01:49:07 +00:00
return res . item ;
2020-01-22 21:25:58 +00:00
}
2021-11-29 01:49:07 +00:00
function scrapePhotos ( { query } , includeThumbnails = true ) {
return query . all ( '.preview .imgLink, .pgFooterThumb a' ) . map ( ( linkEl ) => {
const url = linkEl . href ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( /\/join|\/createaccount/ . test ( url ) ) {
// URL links to join page instead of full photo, extract thumbnail
// /createaccount is used by e.g. Tricky Spa native site
2021-11-29 01:49:07 +00:00
const src = query . img ( linkEl ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( /previews\// . test ( src ) ) {
// resource often serves full photo at a modifier URL anyway, add as primary source
const highRes = src
. replace ( 'previews/' , '' )
. replace ( '_tb.jpg' , '.jpg' ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
// keep original thumbnail as fallback in case full photo is not available
return [ highRes , src ] ;
}
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( ! includeThumbnails ) return null ;
2020-03-07 01:35:13 +00:00
2020-05-14 02:26:05 +00:00
return src ;
}
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
// URL links to full photo
return url ;
} ) . filter ( Boolean ) ;
2020-01-22 21:25:58 +00:00
}
2020-03-06 03:28:01 +00:00
async function getPhotos ( albumPath , site , includeThumbnails = true ) {
2020-05-14 02:26:05 +00:00
const albumUrl = getAlbumUrl ( albumPath , site ) ;
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
if ( ! albumUrl ) {
return [ ] ;
}
2020-02-13 22:05:28 +00:00
2020-05-14 02:26:05 +00:00
try {
2021-11-29 01:49:07 +00:00
const item = await fetchPhotos ( albumUrl ) ;
const photos = scrapePhotos ( item , includeThumbnails ) ;
2020-01-22 21:25:58 +00:00
2021-11-29 01:49:07 +00:00
const lastPage = item . query . url ( '.Gamma_Paginator a.last' ) ? . match ( /\d+$/ ) [ 0 ] ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( lastPage ) {
const otherPages = Array . from ( { length : Number ( lastPage ) } , ( _value , index ) => index + 1 ) . slice ( 1 ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
const otherPhotos = await Promise . map ( otherPages , async ( page ) => {
2021-11-29 01:49:07 +00:00
const pageItem = await fetchPhotos ( ` ${ albumUrl } / ${ page } ` ) ;
2020-01-22 21:25:58 +00:00
2021-11-29 01:49:07 +00:00
return scrapePhotos ( pageItem , includeThumbnails ) ;
2020-05-14 02:26:05 +00:00
} , {
concurrency : 2 ,
} ) ;
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
return photos . concat ( otherPhotos . flat ( ) ) ;
}
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
return photos ;
} catch ( error ) {
logger . warn ( ` Failed to fetch ${ site . name } photos from ${ albumUrl } : ${ error . message } ` ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
return [ ] ;
}
2020-01-22 21:25:58 +00:00
}
2021-08-09 08:31:12 +00:00
async function getFullPhotos ( entryId , site ) {
const res = await http . get ( ` ${ site . url } /media/signPhotoset/ ${ entryId } ` , {
headers : {
'X-Requested-With' : 'XMLHttpRequest' ,
} ,
} ) ;
if ( res . ok ) {
return Object . values ( res . body ) ;
}
return [ ] ;
}
async function getThumbs ( entryId , site , parameters ) {
const referer = parameters ? . referer || ` ${ parameters ? . networkReferer ? site . parent . url : site . url } /en/videos ` ;
const { apiUrl } = await fetchApiCredentials ( referer , site ) ;
const res = await http . post ( apiUrl , {
requests : [
{
indexName : 'all_photosets' ,
params : ` query=&page=0&facets=[]&tagFilters=&facetFilters=[["set_id: ${ entryId } "]] ` ,
} ,
] ,
} , {
headers : {
Referer : referer ,
} ,
} , {
encodeJSON : true ,
} ) ;
if ( res . ok && res . body . results ? . [ 0 ] ? . hits [ 0 ] ? . set _pictures ) {
2021-11-20 22:59:15 +00:00
return res . body . results [ 0 ] . hits [ 0 ] . set _pictures . map ( ( img ) => ( [
2021-08-09 08:31:12 +00:00
` https://transform.gammacdn.com/photo_set ${ img . thumb _path } ` ,
` https://images-evilangel.gammacdn.com/photo_set ${ img . thumb _path } ` ,
] ) ) ;
}
return [ ] ;
}
async function getPhotosApi ( entryId , site , parameters ) {
const [ photos , thumbs ] = await Promise . all ( [
getFullPhotos ( entryId , site , parameters ) ,
getThumbs ( entryId , site , parameters ) ,
] ) ;
return photos . concat ( thumbs . slice ( photos . length ) ) ;
}
2020-02-01 00:15:40 +00:00
async function scrapeApiReleases ( json , site ) {
2020-05-14 02:26:05 +00:00
return json . map ( ( scene ) => {
if ( site . parameters ? . extract && scene . sitename !== site . parameters . extract ) {
return null ;
}
2020-09-10 01:43:16 +00:00
if ( site . parameters ? . filterExclusive && scene . availableOnSite . length > 1 ) {
return null ;
}
2020-05-14 02:26:05 +00:00
const release = {
entryId : scene . clip _id ,
title : scene . title ,
description : scene . description ,
duration : scene . length ,
likes : scene . ratings _up ,
dislikes : scene . ratings _down ,
} ;
release . path = ` / ${ scene . url _title } / ${ release . entryId } ` ;
if ( site . parameters ? . scene ) release . url = ` ${ site . parameters . scene } ${ release . path } ` ;
else if ( site . url && site . parameters ? . scene !== false ) release . url = ` ${ site . url } /en/video ${ release . path } ` ;
release . date = moment . utc ( scene . release _date , 'YYYY-MM-DD' ) . toDate ( ) ;
release . director = scene . directors [ 0 ] ? . name || null ;
2021-11-20 22:59:15 +00:00
release . actors = scene . actors . map ( ( actor ) => ( {
2020-09-10 01:56:09 +00:00
entryId : actor . actor _id ,
name : actor . name ,
gender : actor . gender ,
avatar : [
` https://images03-openlife.gammacdn.com/actors/ ${ actor . actor _id } / ${ actor . actor _id } _500x750.jpg ` ,
` https://images03-openlife.gammacdn.com/actors/ ${ actor . actor _id } / ${ actor . actor _id } _240x360.jpg ` ,
` https://images03-openlife.gammacdn.com/actors/ ${ actor . actor _id } / ${ actor . actor _id } _200x300.jpg ` ,
] ,
} ) ) ;
2020-05-14 02:26:05 +00:00
release . tags = scene . master _categories
2021-11-20 22:59:15 +00:00
. concat ( scene . categories ? . map ( ( category ) => category . name ) )
2020-05-14 02:26:05 +00:00
. filter ( Boolean ) ; // some categories don't have a name
const posterPath = scene . pictures . resized || ( scene . pictures . nsfw ? . top && Object . values ( scene . pictures . nsfw . top ) [ 0 ] ) ;
if ( posterPath ) {
release . poster = [
` https://images-evilangel.gammacdn.com/movies ${ posterPath } ` ,
` https://transform.gammacdn.com/movies ${ posterPath } ` ,
] ;
}
// release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`;
return release ;
} ) . filter ( Boolean ) ;
2020-02-01 00:15:40 +00:00
}
2021-11-29 01:49:07 +00:00
function scrapeAll ( scenes , site , networkUrl , hasTeaser = true ) {
return scenes . map ( ( { query , el } ) => {
2020-05-14 02:26:05 +00:00
const release = { } ;
2020-02-06 22:15:28 +00:00
2021-11-29 01:49:07 +00:00
release . url = query . url ( '.sceneTitle a, .tlcTitle a' , 'href' , { origin : networkUrl ? site . parent . url : site . url } ) ;
2020-02-01 00:15:40 +00:00
2021-11-29 01:49:07 +00:00
release . title = query . cnt ( '.sceneTitle a' , 'tlcTitle a' , 'title' ) ;
release . entryId = el . dataset . itemid ;
2020-02-01 00:15:40 +00:00
2021-11-29 01:49:07 +00:00
release . date = query . date ( '.sceneDate, .tlcSpecsDate .tlcDetailsValue' , [ 'MM-DD-YYYY' , 'YYYY-MM-DD' ] ) ;
release . actors = query . cnts ( '.sceneActors a, .tlcActors a' , ' title' ) ;
2020-02-01 00:15:40 +00:00
2021-11-29 01:49:07 +00:00
[ release . likes , release . dislikes ] = query . all ( '.value' ) . map ( ( likeEl ) => query . number ( likeEl ) ) ;
2020-02-01 00:15:40 +00:00
2021-11-29 01:49:07 +00:00
release . poster = query . img ( '.imgLink img, .tlcImageItem' , 'data-original' ) || query . img ( '.imgLink img, .tlcImageItem' ) ;
2020-05-20 01:00:46 +00:00
2020-05-14 02:26:05 +00:00
if ( hasTeaser ) {
release . teaser = [
{ src : ` https://videothumb.gammacdn.com/600x339/ ${ release . entryId } .mp4 ` } ,
{ src : ` https://videothumb.gammacdn.com/307x224/ ${ release . entryId } .mp4 ` } ,
] ;
}
2020-02-06 22:15:28 +00:00
2021-11-29 01:49:07 +00:00
release . channel = query . el ( '.fromSite a' , 'title' ) ? . replace ( '.com' , '' ) ;
2020-05-14 02:26:05 +00:00
return release ;
} ) ;
2020-02-01 00:15:40 +00:00
}
2021-11-29 01:49:07 +00:00
async function scrapeScene ( { query } , url , channel , baseRelease , mobileItem , options ) {
const release = { query } ; // used by XEmpire scraper to resolve channel-specific details
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
const json = query . html ( 'script[type="application/ld+json"]' ) ;
const videoJson = query . htmls ( 'script' ) . find ( ( script ) => / ScenePlayerOptions / i . test ( script ) ) ;
2020-05-14 02:26:05 +00:00
const [ data , data2 ] = json ? JSON . parse ( json ) : [ ] ;
const videoData = videoJson && JSON . parse ( videoJson . slice ( videoJson . indexOf ( '{' ) , videoJson . indexOf ( '};' ) + 1 ) ) ;
release . entryId = ( baseRelease ? . path || new URL ( url ) . pathname ) . match ( /\/(\d{2,})(\/|$)/ ) ? . [ 1 ] ;
release . title = videoData ? . playerOptions ? . sceneInfos . sceneTitle || data ? . name ;
2021-11-29 01:49:07 +00:00
release . description = data ? . description ;
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
release . date = query . date ( '.updatedDate' , [ 'MM-DD-YYYY' , 'YYYY-MM-DD' ] )
|| qu . extractDate ( data ? . dateCreated , 'YYYY-MM-DD' )
|| videoData ? . playerOptions ? . sceneInfos . sceneReleaseDate ;
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
release . actors = ( data ? . actor || data2 ? . actor ) ? . map ( ( actor ) => ( {
name : actor . name ,
gender : actor . gender ,
} ) ) || [ ] ;
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
release . duration = qu . durationToSeconds ( data . duration ) ;
release . director = data ? . director ? . [ 0 ] ? . name || data2 ? . director ? . [ 0 ] ? . name ;
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
release . tags = data ? . keywords ? . split ( ', ' ) || data2 ? . keywords ? . split ( ', ' ) || [ ] ;
release . stars = ( data . aggregateRating . ratingValue / data . aggregateRating . bestRating ) * 5 || null ;
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
release . channel = slugify ( data ? . productionCompany ? . name
|| query . el ( '.studioLink a, .siteLink a' , 'title' )
|| query . cnt ( '.siteNameSpan' ) ? . toLowerCase ( ) . replace ( '.com' , '' )
|| query . meta ( 'meta[name="twitter:domain"]' ) ? . replace ( '.com' , '' ) , '' ) ;
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
if ( videoData ? . picPreview && new URL ( videoData . picPreview ) . pathname . length > 1 ) {
// sometimes links to just https://images02-fame.gammacdn.com/
const poster = new URL ( videoData . picPreview ) ;
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
release . poster = [
` ${ poster . origin } ${ poster . pathname } ` ,
videoData . picPreview ,
] ;
2020-05-14 02:26:05 +00:00
}
2021-11-29 01:49:07 +00:00
const photoLink = query . url ( '.picturesItem a' ) ;
const mobilePhotos = mobileItem ? . query . imgs ( '.preview-displayer a img' ) || [ ] ;
2020-05-14 02:26:05 +00:00
2021-01-26 23:21:58 +00:00
if ( photoLink && options . includePhotos ) {
2021-11-29 01:49:07 +00:00
const photos = await getPhotos ( photoLink , channel , mobilePhotos . length < 3 ) ; // only get thumbnails when less than 3 mobile photos are available
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
if ( photos . length < 7 ) {
release . photos = [ ... photos , ... mobilePhotos ] ; // probably only teaser photos available, supplement with mobile album
} else {
release . photos = photos ;
}
2020-05-14 02:26:05 +00:00
} else {
release . photos = mobilePhotos ;
}
2021-08-09 08:31:12 +00:00
const trailer = videoData && ` ${ videoData . playerOptions . host } ${ videoData . url } ` ;
if ( trailer ) {
release . trailer = [
{
src : trailer . replace ( 'hd' , 'sm' ) ,
quality : 240 ,
} ,
{
src : trailer . replace ( 'hd' , 'med' ) ,
quality : 360 ,
} ,
{
src : trailer . replace ( 'hd' , 'big' ) ,
quality : 480 ,
} ,
{
// probably 540p
src : trailer ,
quality : parseInt ( videoData . sizeOnLoad , 10 ) ,
} ,
{
src : trailer . replace ( 'hd' , '720p' ) ,
quality : 720 ,
} ,
{
src : trailer . replace ( 'hd' , '1080p' ) ,
quality : 1080 ,
} ,
{
src : trailer . replace ( 'hd' , '4k' ) ,
quality : 2160 ,
} ,
] ;
}
2020-05-14 02:26:05 +00:00
2021-11-29 01:49:07 +00:00
const movieUrl = query . url ( '.dvdLink' , 'href' , { origin : channel . url } ) ;
2021-01-22 21:55:20 +00:00
2021-01-25 22:24:51 +00:00
if ( movieUrl ) {
2021-01-22 21:55:20 +00:00
release . movie = {
url : movieUrl ,
2021-11-29 01:49:07 +00:00
title : query . el ( '.dvdLink' , 'title' ) ,
2021-01-22 21:55:20 +00:00
entryId : movieUrl . match ( /\/(\d+)(\/|$)/ ) ? . [ 1 ] ,
2021-11-29 01:49:07 +00:00
covers : [ qu . imgs ( '.dvdLink img' ) ] ,
2021-01-22 21:55:20 +00:00
} ;
}
2020-05-14 02:26:05 +00:00
return release ;
2020-02-01 00:15:40 +00:00
}
2021-08-09 08:31:12 +00:00
async function scrapeSceneApi ( data , site , options ) {
const release = { } ;
release . entryId = data . clip _id ;
release . title = data . title ;
release . duration = data . length ;
release . date = new Date ( data . date * 1000 ) || qu . parseDate ( data . release _date , 'YYYY-MM-DD' ) ;
2021-11-20 22:59:15 +00:00
release . actors = data . actors . map ( ( actor ) => ( {
2021-08-09 08:31:12 +00:00
entryId : actor . actor _id ,
name : actor . name ,
gender : actor . gender ,
url : options . parameters ? . actors
? format ( options . parameters . actors , { id : actor . actor _id , slug : actor . url _name } )
: qu . prefixUrl ( ` /en/pornstar/ ${ actor . url _name } / ${ data . actor _id } ` , site . url ) ,
} ) ) ;
2021-11-20 22:59:15 +00:00
release . tags = data . categories . map ( ( category ) => category . name ) ;
2021-08-09 08:31:12 +00:00
if ( data . pictures ) {
release . poster = [
` https://transform.gammacdn.com/movies ${ data . pictures [ '1920x1080' ] } ` ,
` https://images-evilangel.gammacdn.com/movies ${ data . pictures [ '1920x1080' ] } ` ,
` https://transform.gammacdn.com/movies ${ data . pictures . resized } ` ,
` https://images-evilangel.gammacdn.com/movies ${ data . pictures . resized } ` ,
] ;
}
if ( data . photoset _id && options . includePhotos ) {
release . photos = await getPhotosApi ( data . photoset _id , site , options . parameters ) ;
}
if ( data . trailers ) {
release . trailer = Object . entries ( data . trailers ) . map ( ( [ quality , source ] ) => ( { src : source , quality } ) ) ;
}
if ( data . movie _id ) {
release . movie = {
entryId : data . movie _id ,
title : data . movie _title ,
url : qu . prefixUrl ( ` /en/movie/ ${ data . url _movie _title } / ${ data . movie _id } ` , site . url ) ,
} ;
}
release . channel = data . sitename ;
release . qualities = data . download _sizes ;
return release ;
}
2021-01-25 22:01:07 +00:00
async function fetchMovieTrailer ( release ) {
if ( ! release . entryId ) {
return null ;
}
const url = ` https://www.evilangel.com/en/dvdtrailer/ ${ release . entryId } ` ;
const res = await qu . get ( url ) ;
if ( ! res . ok ) {
return null ;
}
const trailerHost = res . html . match ( /"host":\s*"(.*\.com)"/ ) ? . [ 1 ] . replace ( /\\\//g , '/' ) ;
const trailerPath = res . html . match ( /"url":\s*"(.*\.mp4)"/ ) ? . [ 1 ] . replace ( /\\\//g , '/' ) ;
if ( trailerHost && trailerPath ) {
return qu . prefixUrl ( trailerPath , trailerHost ) ;
}
return null ;
}
async function scrapeMovie ( { query , html } , window , url , entity , options ) {
const release = { } ;
const data = window . dataLayer [ 0 ] ? . dvdDetails ;
// const options = html.match(/options = {.*};/);
release . entryId = new URL ( url ) . pathname . match ( /\/(\d+)(\/|$)/ ) ? . [ 1 ] ;
release . covers = [
query . img ( '.frontCoverImg' , 'href' ) ,
query . img ( '.backCoverImg' , 'href' ) ,
] ;
release . description = query . cnt ( '.descriptionText' ) ;
release . date = qu . extractDate ( data . dvdReleaseDate ) ;
release . title = data . dvdName ;
2021-11-20 22:59:15 +00:00
release . actors = data . dvdActors . map ( ( actor ) => ( { name : actor . actorName , entryId : actor . actorId } ) ) ;
2021-01-25 22:01:07 +00:00
release . tags = query . cnts ( '.dvdCol a' ) ;
release . scenes = scrapeAll ( html , entity , entity . url ) ;
if ( options . includeTrailers ) {
release . trailer = await fetchMovieTrailer ( release ) ;
}
return release ;
}
2020-01-22 21:25:58 +00:00
function scrapeActorSearch ( html , url , actorName ) {
2020-05-14 02:26:05 +00:00
const { document } = new JSDOM ( html ) . window ;
const actorLink = document . querySelector ( ` a[title=" ${ actorName } " i] ` ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
return actorLink ? actorLink . href : null ;
2020-01-22 21:25:58 +00:00
}
2021-01-30 00:12:42 +00:00
async function fetchActorReleases ( profileUrl , getActorReleasesUrl , page = 1 , accReleases = [ ] , context ) {
2020-05-14 02:26:05 +00:00
const { origin , pathname } = new URL ( profileUrl ) ;
const profilePath = ` / ${ pathname . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } ` ;
2020-02-07 00:48:21 +00:00
2021-01-30 00:12:42 +00:00
const url = ( context . parameters . actorScenes && format ( context . parameters . actorScenes , { path : profilePath , page } ) )
|| getActorReleasesUrl ? . ( profilePath , page ) ;
if ( ! url ) {
return [ ] ;
}
2020-11-22 03:07:09 +00:00
const res = await qu . get ( url ) ;
2020-02-07 00:48:21 +00:00
2021-01-30 00:12:42 +00:00
if ( ! res . ok ) {
return [ ] ;
}
2020-03-09 01:02:29 +00:00
2020-05-20 01:00:46 +00:00
const releases = scrapeAll ( res . item . html , null , origin ) ;
2020-09-08 01:26:34 +00:00
const nextPage = res . item . query . url ( '.Gamma_Paginator a.next' ) ;
2020-02-07 00:48:21 +00:00
2020-05-14 02:26:05 +00:00
if ( nextPage ) {
2021-01-30 00:12:42 +00:00
return fetchActorReleases ( profileUrl , getActorReleasesUrl , page + 1 , accReleases . concat ( releases ) , context ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-07 00:48:21 +00:00
2020-05-14 02:26:05 +00:00
return accReleases . concat ( releases ) ;
2020-02-07 00:48:21 +00:00
}
2021-11-29 01:49:07 +00:00
async function scrapeProfile ( { query } , url , actorName , _siteSlug , getActorReleasesUrl , withReleases , context ) {
2020-11-22 03:07:09 +00:00
const avatar = query . el ( 'img.actorPicture' ) ;
const hair = query . cnt ( '.actorProfile .attribute_hair_color' ) ;
const height = query . cnt ( '.actorProfile .attribute_height' ) ;
const weight = query . cnt ( '.actorProfile .attribute_weight' ) ;
const alias = query . cnt ( '.actorProfile .attribute_alternate_names' ) ;
const nationality = query . cnt ( '.actorProfile .attribute_home' ) ;
2020-05-14 02:26:05 +00:00
const profile = {
name : actorName ,
} ;
if ( avatar ) {
// larger sizes usually available, provide fallbacks
const avatars = [
avatar . src . replace ( /\d+x\d+/ , '500x750' ) ,
avatar . src . replace ( /\d+x\d+/ , '240x360' ) ,
avatar . src . replace ( /\d+x\d+/ , '200x300' ) ,
avatar . src ,
] ;
profile . avatar = avatars ;
}
2020-11-22 03:07:09 +00:00
profile . description = query . cnt ( '.actorBio p:not(.bioTitle)' ) ;
2020-05-14 02:26:05 +00:00
if ( hair ) profile . hair = hair . split ( ':' ) [ 1 ] . trim ( ) ;
if ( height ) profile . height = Number ( height . match ( /\d+/ ) [ 0 ] ) ;
if ( weight ) profile . weight = Number ( weight . match ( /\d+/ ) [ 0 ] ) ;
if ( alias ) profile . aliases = alias . split ( ':' ) [ 1 ] . trim ( ) . split ( ', ' ) ;
if ( nationality ) profile . nationality = nationality . split ( ':' ) [ 1 ] . trim ( ) ;
2021-01-30 00:12:42 +00:00
if ( ( getActorReleasesUrl || context . parameters . actorScenes ) && withReleases ) {
profile . releases = await fetchActorReleases ( url , getActorReleasesUrl , 1 , [ ] , context ) ;
2020-05-14 02:26:05 +00:00
}
return profile ;
2020-01-22 21:25:58 +00:00
}
2020-02-01 00:15:40 +00:00
function scrapeApiProfile ( data , releases , siteSlug ) {
2020-05-14 02:26:05 +00:00
const profile = { } ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( data . male === 1 ) profile . gender = 'male' ;
if ( data . female === 1 ) profile . gender = 'female' ;
if ( data . shemale === 1 || data . trans === 1 ) profile . gender = 'transsexual' ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( data . description ) profile . description = data . description . trim ( ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( data . attributes . ethnicity ) profile . ethnicity = data . attributes . ethnicity ;
if ( data . attributes . eye _color ) profile . eyes = data . attributes . eye _color ;
if ( data . attributes . hair _color ) profile . hair = data . attributes . hair _color ;
2020-01-22 21:25:58 +00:00
2020-05-16 02:36:45 +00:00
const avatarPaths = Object . values ( data . pictures ) . reverse ( ) ;
2021-11-20 22:59:15 +00:00
if ( avatarPaths . length > 0 ) profile . avatar = avatarPaths . map ( ( avatarPath ) => ` https://images01-evilangel.gammacdn.com/actors ${ avatarPath } ` ) ;
2020-01-22 21:25:58 +00:00
2021-11-20 22:59:15 +00:00
if ( releases ) profile . releases = releases . map ( ( release ) => ` https:// ${ siteSlug } .com/en/video/ ${ release . url _title } / ${ release . clip _id } ` ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
return profile ;
2020-01-22 21:25:58 +00:00
}
2021-08-09 08:31:12 +00:00
async function fetchLatestApi ( site , page = 1 , preData , include , upcoming = false ) {
const referer = site . parameters ? . referer || ` ${ site . parameters ? . networkReferer ? site . parent . url : site . url } /en/videos ` ;
const { apiUrl } = await fetchApiCredentials ( referer , site ) ;
2020-01-30 00:14:31 +00:00
2021-08-09 08:31:12 +00:00
const res = await http . post ( apiUrl , {
requests : [
{
indexName : 'all_scenes' ,
params : ` query=&hitsPerPage=36&maxValuesPerFacet=100&page= ${ page - 1 } &facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming: ${ upcoming ? 1 : 0 } "]]&filters=sitename: ${ site . slug } OR channels.id: ${ site . slug } ` ,
} ,
] ,
} , {
headers : {
Referer : referer ,
} ,
} , {
encodeJSON : true ,
} ) ;
2020-02-06 22:15:28 +00:00
2021-08-09 08:31:12 +00:00
if ( res . status === 200 && res . body . results ? . [ 0 ] ? . hits ) {
return scrapeApiReleases ( res . body . results [ 0 ] . hits , site ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-06 22:15:28 +00:00
2021-08-09 08:31:12 +00:00
return res . status ;
}
2020-01-30 00:14:31 +00:00
2021-08-09 08:31:12 +00:00
async function fetchUpcomingApi ( site , page = 1 , preData , include ) {
return fetchLatestApi ( site , page , preData , include , true ) ;
2020-01-30 00:14:31 +00:00
}
2021-08-09 08:31:12 +00:00
async function fetchSceneApi ( url , site , baseRelease , options ) {
const referer = options . parameters ? . referer || ` ${ site . parameters ? . networkReferer ? site . parent . url : site . url } /en/videos ` ;
2020-05-14 02:26:05 +00:00
const { apiUrl } = await fetchApiCredentials ( referer , site ) ;
2021-08-09 08:31:12 +00:00
const entryId = ( baseRelease ? . path || new URL ( url ) . pathname ) . match ( /\/(\d{2,})(\/|$)/ ) ? . [ 1 ] ;
2020-09-10 15:41:29 +00:00
const res = await http . post ( apiUrl , {
2020-05-14 02:26:05 +00:00
requests : [
{
indexName : 'all_scenes' ,
2021-08-09 08:31:12 +00:00
params : ` query=&page=0&facets=[]&tagFilters=&facetFilters=[["clip_id: ${ entryId } "]] ` ,
} ,
{
indexName : 'all_scenes' ,
params : 'query=&page=0&hitsPerPage=1&attributesToRetrieve=[]&attributesToHighlight=[]&attributesToSnippet=[]&tagFilters=&analytics=false&clickAnalytics=false&facets=clip_id' ,
2020-05-14 02:26:05 +00:00
} ,
] ,
} , {
2020-12-14 02:16:47 +00:00
headers : {
Referer : referer ,
} ,
2020-09-10 15:41:29 +00:00
} , {
2020-05-14 02:26:05 +00:00
encodeJSON : true ,
} ) ;
2020-09-10 15:54:23 +00:00
if ( res . status === 200 && res . body . results ? . [ 0 ] ? . hits ) {
2021-08-09 08:31:12 +00:00
return scrapeSceneApi ( res . body . results [ 0 ] . hits [ 0 ] , site , options ) ;
2020-05-14 02:26:05 +00:00
}
2020-09-10 15:54:23 +00:00
return res . status ;
2020-02-01 00:15:40 +00:00
}
2020-02-08 03:52:32 +00:00
function getLatestUrl ( site , page ) {
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . latest ) {
if ( /^http/ . test ( site . parameters . latest ) ) {
return /%d/ . test ( site . parameters . latest )
? util . format ( site . parameters . latest , page )
: ` ${ site . parameters . latest } ${ page } ` ;
}
return /%d/ . test ( site . parameters . latest )
? util . format ( ` ${ site . url } ${ site . parameters . latest } ` , page )
: ` ${ site . url } ${ site . parameters . latest } ${ page } ` ;
}
return ` ${ site . url } /en/videos/AllCategories/0/ ${ page } ` ;
2020-02-08 03:52:32 +00:00
}
function getUpcomingUrl ( site ) {
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . upcoming ) {
return /^http/ . test ( site . parameters . upcoming )
? ` ${ site . parameters . upcoming } `
: ` ${ site . url } ${ site . parameters . upcoming } ` ;
}
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
return ` ${ site . url } /en/videos/AllCategories/0/1/upcoming ` ;
2020-02-08 03:52:32 +00:00
}
2020-02-01 00:15:40 +00:00
async function fetchLatest ( site , page = 1 ) {
2020-05-14 02:26:05 +00:00
const url = getLatestUrl ( site , page ) ;
2021-11-29 01:49:07 +00:00
const res = await qu . getAll ( url , 'li[data-itemtype=scene], div[data-itemtype=scenes]' ) ;
2020-01-30 00:14:31 +00:00
2020-09-10 15:54:23 +00:00
if ( res . ok ) {
2021-11-29 01:49:07 +00:00
return scrapeAll ( res . items , site ) ;
2020-09-10 15:54:23 +00:00
}
return res . status ;
2020-02-01 00:15:40 +00:00
}
async function fetchUpcoming ( site ) {
2020-05-14 02:26:05 +00:00
const url = getUpcomingUrl ( site ) ;
2021-11-29 01:49:07 +00:00
const res = await qu . getAll ( url , 'li[data-itemtype=scene], div[data-itemtype=scenes]' ) ;
2020-02-01 00:15:40 +00:00
2020-09-10 15:54:23 +00:00
if ( res . ok ) {
2021-11-29 01:49:07 +00:00
return scrapeAll ( res . items , site , null , false ) ;
2020-09-10 15:54:23 +00:00
}
return res . status ;
2020-02-01 00:15:40 +00:00
}
2020-03-07 01:35:13 +00:00
function getDeepUrl ( url , site , baseRelease , mobile ) {
2020-07-02 02:04:28 +00:00
const filter = new Set ( [ 'en' , 'video' , 'scene' , site . slug , site . parent . slug ] ) ;
2020-05-14 02:26:05 +00:00
const pathname = baseRelease ? . path || new URL ( url ) . pathname
. split ( '/' )
2021-11-20 22:59:15 +00:00
. filter ( ( component ) => ! filter . has ( component ) )
2020-05-14 02:26:05 +00:00
. join ( '/' ) ; // reduce to scene ID and title slug
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
const sceneId = baseRelease ? . entryId || pathname . match ( /\/(\d+)\// ) ? . [ 1 ] ;
2020-03-07 01:35:13 +00:00
2020-05-14 02:26:05 +00:00
if ( mobile && /%d/ . test ( mobile ) ) {
return util . format ( mobile , sceneId ) ;
}
2020-03-07 01:35:13 +00:00
2020-05-14 02:26:05 +00:00
if ( mobile && sceneId ) {
return ` ${ mobile } ${ pathname } ` ;
}
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . deep ) {
return ` ${ site . parameters . deep } ${ pathname } ` ;
}
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
return url ;
2020-02-08 03:52:32 +00:00
}
2021-01-26 23:21:58 +00:00
async function fetchScene ( url , site , baseRelease , options ) {
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . deep === false ) {
return baseRelease ;
}
const deepUrl = getDeepUrl ( url , site , baseRelease ) ;
2020-07-02 02:04:28 +00:00
const mobileUrl = getDeepUrl ( url , site , baseRelease , site . parameters ? . mobile || site . parent ? . parameters ? . mobile ) ;
2020-05-14 02:26:05 +00:00
if ( deepUrl ) {
const [ res , mobileRes ] = await Promise . all ( [
2021-11-29 01:49:07 +00:00
qu . get ( deepUrl ) ,
mobileUrl && qu . get ( mobileUrl , null , {
2020-11-22 03:09:44 +00:00
headers : {
// don't redirect to main site
'user-agent' : 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Mobile Safari/537.36' ,
} ,
2020-05-14 02:26:05 +00:00
} ) ,
] ) ;
2020-09-10 15:54:23 +00:00
if ( res . status === 200 ) {
2021-11-29 01:49:07 +00:00
const mobileItem = mobileRes ? . status === 200 ? mobileRes . item : null ;
const scene = await scrapeScene ( res . item , url , site , baseRelease , mobileItem , options ) ;
2020-09-04 01:07:28 +00:00
2020-05-14 02:26:05 +00:00
return { ... scene , deepUrl } ;
}
}
return null ;
2020-02-01 00:15:40 +00:00
}
2021-01-25 22:01:07 +00:00
async function fetchMovie ( url , channel , baseRelease , options ) {
const res = await qu . get ( url , null , null , {
extract : {
runScripts : 'dangerously' ,
} ,
} ) ;
if ( res . ok ) {
return scrapeMovie ( res . item , res . window , url , channel , options ) ;
}
return res . status ;
}
2020-02-01 00:15:40 +00:00
async function fetchActorScenes ( actorName , apiUrl , siteSlug ) {
2020-09-10 15:41:29 +00:00
const res = await http . post ( apiUrl , {
2020-05-14 02:26:05 +00:00
requests : [
{
indexName : 'all_scenes' ,
params : ` query=&filters=sitename: ${ siteSlug } &hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name: ${ actorName } "]] ` ,
} ,
] ,
} , {
2020-12-14 02:16:47 +00:00
headers : {
Referer : ` https://www. ${ siteSlug } .com/en/videos ` ,
} ,
2020-09-10 15:41:29 +00:00
} , {
2020-05-14 02:26:05 +00:00
encodeJSON : true ,
} ) ;
2020-09-10 15:54:23 +00:00
if ( res . status === 200 && res . body . results [ 0 ] . hits . length > 0 ) {
2020-05-14 02:26:05 +00:00
return res . body . results [ 0 ] . hits ;
}
return [ ] ;
2020-02-01 00:15:40 +00:00
}
2021-01-30 00:12:42 +00:00
async function fetchProfile ( { name : actorName } , context , include , altSearchUrl , getActorReleasesUrl ) {
2020-07-02 02:04:28 +00:00
const siteSlug = context . entity . slug || context . site ? . slug || context . network ? . slug ;
2020-05-18 01:22:03 +00:00
2020-05-14 02:26:05 +00:00
const actorSlug = actorName . toLowerCase ( ) . replace ( /\s+/ , '+' ) ;
const searchUrl = altSearchUrl
? ` https://www. ${ siteSlug } .com/en/search/ ${ actorSlug } /1/actor `
: ` https://www. ${ siteSlug } .com/en/search/ ${ siteSlug } /actor/ ${ actorSlug } ` ;
2020-09-10 15:41:29 +00:00
const searchRes = await http . get ( searchUrl ) ;
2020-02-01 00:15:40 +00:00
2020-09-10 15:54:23 +00:00
if ( searchRes . status !== 200 ) {
2020-05-14 02:26:05 +00:00
return null ;
}
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
const actorUrl = scrapeActorSearch ( searchRes . body . toString ( ) , searchUrl , actorName ) ;
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
if ( actorUrl ) {
const url = ` https:// ${ siteSlug } .com ${ actorUrl } ` ;
2021-11-29 01:49:07 +00:00
const actorRes = await qu . get ( url ) ;
2020-02-01 00:15:40 +00:00
2020-09-10 15:54:23 +00:00
if ( actorRes . status !== 200 ) {
2020-05-14 02:26:05 +00:00
return null ;
}
2020-02-01 00:15:40 +00:00
2021-11-29 01:49:07 +00:00
return scrapeProfile ( actorRes . item , url , actorName , siteSlug , getActorReleasesUrl , include . scenes , context ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
return null ;
2020-02-01 00:15:40 +00:00
}
2020-09-08 01:26:34 +00:00
async function fetchApiProfile ( { name : actorName } , context , include ) {
2020-07-02 02:04:28 +00:00
const siteSlug = context . entity . slug || context . site ? . slug || context . network ? . slug ;
2020-05-18 01:22:03 +00:00
2020-05-14 02:26:05 +00:00
const actorSlug = encodeURI ( actorName ) ;
const referer = ` https://www. ${ siteSlug } .com/en/search ` ;
const { apiUrl } = await fetchApiCredentials ( referer ) ;
2020-09-08 01:26:34 +00:00
const res = await http . post ( apiUrl , {
2020-05-14 02:26:05 +00:00
requests : [
{
indexName : 'all_actors' ,
params : ` query= ${ actorSlug } ` ,
} ,
] ,
} , {
2020-12-14 02:16:47 +00:00
headers : {
Referer : referer ,
} ,
2020-09-08 01:26:34 +00:00
} , {
2020-05-14 02:26:05 +00:00
encodeJSON : true ,
} ) ;
2020-09-08 01:26:34 +00:00
if ( res . status === 200 && res . body . results [ 0 ] . hits . length > 0 ) {
2021-11-20 22:59:15 +00:00
const actorData = res . body . results [ 0 ] . hits . find ( ( actor ) => slugify ( actor . name ) === slugify ( actorName ) ) ;
2020-05-14 02:26:05 +00:00
if ( actorData ) {
2020-05-17 01:00:44 +00:00
const actorScenes = include . releases && await fetchActorScenes ( actorData . name , apiUrl , siteSlug ) ;
2020-05-14 02:26:05 +00:00
return scrapeApiProfile ( actorData , actorScenes , siteSlug ) ;
}
}
return null ;
2020-01-30 00:14:31 +00:00
}
2020-01-22 21:25:58 +00:00
module . exports = {
2021-08-09 08:31:12 +00:00
fetchApiLatest : fetchLatestApi ,
2020-05-14 02:26:05 +00:00
fetchApiProfile ,
2021-08-09 08:31:12 +00:00
fetchApiUpcoming : fetchUpcomingApi ,
2020-05-14 02:26:05 +00:00
fetchLatest ,
2021-08-09 08:31:12 +00:00
fetchLatestApi ,
2021-01-25 22:01:07 +00:00
fetchMovie ,
2020-05-14 02:26:05 +00:00
fetchProfile ,
fetchScene ,
2021-08-09 08:31:12 +00:00
fetchSceneApi ,
2020-05-14 02:26:05 +00:00
fetchUpcoming ,
2021-08-09 08:31:12 +00:00
fetchUpcomingApi ,
2021-01-29 23:01:40 +00:00
api : {
2021-08-09 08:31:12 +00:00
fetchLatest : fetchLatestApi ,
fetchUpcoming : fetchUpcomingApi ,
2021-01-29 23:01:40 +00:00
fetchProfile : fetchApiProfile ,
2021-08-09 08:31:12 +00:00
// fetchScene,
fetchScene : fetchSceneApi ,
2021-01-29 23:01:40 +00:00
fetchMovie ,
} ,
2020-05-14 02:26:05 +00:00
getPhotos ,
scrapeApiProfile ,
scrapeApiReleases ,
scrapeProfile ,
scrapeAll ,
scrapeScene ,
2020-01-22 21:25:58 +00:00
} ;