2020-01-22 21:25:58 +00:00
'use strict' ;
const Promise = require ( 'bluebird' ) ;
2020-03-07 01:35:13 +00:00
const util = require ( 'util' ) ;
2020-01-22 21:25:58 +00:00
const { JSDOM } = require ( 'jsdom' ) ;
const cheerio = require ( 'cheerio' ) ;
2020-02-01 00:15:40 +00:00
const moment = require ( 'moment' ) ;
2021-01-30 00:12:42 +00:00
const format = require ( 'template-format' ) ;
2020-01-22 21:25:58 +00:00
2020-02-08 01:49:39 +00:00
const logger = require ( '../logger' ) ( _ _filename ) ;
2020-11-22 03:07:09 +00:00
const qu = require ( '../utils/qu' ) ;
2020-09-04 01:07:28 +00:00
const http = require ( '../utils/http' ) ;
2020-02-06 22:59:32 +00:00
const slugify = require ( '../utils/slugify' ) ;
2020-02-06 22:51:13 +00:00
2021-08-09 08:31:12 +00:00
function getApiUrl ( appId , apiKey ) {
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0' ;
const apiUrl = ` https:// ${ appId . toLowerCase ( ) } -dsn.algolia.net/1/indexes/*/queries?x-algolia-agent= ${ userAgent } &x-algolia-application-id= ${ appId } &x-algolia-api-key= ${ apiKey } ` ;
return {
appId ,
apiKey ,
userAgent ,
apiUrl ,
} ;
}
async function fetchApiCredentials ( referer , site ) {
if ( site ? . parameters ? . appId && site ? . parameters ? . apiKey ) {
return getApiUrl ( site . parameters . appId , site . parameters . apiKey ) ;
}
const res = await http . get ( referer ) ;
const body = res . body . toString ( ) ;
2021-11-20 22:59:15 +00:00
const apiLine = body . split ( '\n' ) . find ( ( bodyLine ) => bodyLine . match ( 'apiKey' ) ) ;
2021-08-09 08:31:12 +00:00
if ( ! apiLine ) {
throw new Error ( ` No Gamma API key found for ${ referer } ` ) ;
}
const apiSerial = apiLine . slice ( apiLine . indexOf ( '{' ) , apiLine . indexOf ( '};' ) + 1 ) ;
const apiData = JSON . parse ( apiSerial ) ;
const { applicationID : appId , apiKey } = apiData . api . algolia ;
return getApiUrl ( appId , apiKey ) ;
}
2020-02-08 03:52:32 +00:00
function getAlbumUrl ( albumPath , site ) {
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . photos ) {
return /^http/ . test ( site . parameters . photos )
? ` ${ site . parameters . photos } / ${ albumPath . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } `
: ` ${ site . url } ${ site . parameters . photos } / ${ albumPath . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } ` ;
}
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
if ( site . url && site . parameters ? . photos !== false ) {
return ` ${ site . url } ${ albumPath } ` ;
}
2020-02-13 22:05:28 +00:00
2020-05-14 02:26:05 +00:00
return null ;
2020-02-08 03:52:32 +00:00
}
2020-01-22 21:25:58 +00:00
async function fetchPhotos ( url ) {
2020-09-10 15:41:29 +00:00
const res = await http . get ( url ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
return res . body . toString ( ) ;
2020-01-22 21:25:58 +00:00
}
2020-03-06 03:28:01 +00:00
function scrapePhotos ( html , includeThumbnails = true ) {
2020-05-14 02:26:05 +00:00
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
return $ ( '.preview .imgLink, .pgFooterThumb a' ) . toArray ( ) . map ( ( linkEl ) => {
const url = $ ( linkEl ) . attr ( 'href' ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( /\/join|\/createaccount/ . test ( url ) ) {
// URL links to join page instead of full photo, extract thumbnail
// /createaccount is used by e.g. Tricky Spa native site
const src = $ ( linkEl ) . find ( 'img' ) . attr ( 'src' ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( /previews\// . test ( src ) ) {
// resource often serves full photo at a modifier URL anyway, add as primary source
const highRes = src
. replace ( 'previews/' , '' )
. replace ( '_tb.jpg' , '.jpg' ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
// keep original thumbnail as fallback in case full photo is not available
return [ highRes , src ] ;
}
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( ! includeThumbnails ) return null ;
2020-03-07 01:35:13 +00:00
2020-05-14 02:26:05 +00:00
return src ;
}
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
// URL links to full photo
return url ;
} ) . filter ( Boolean ) ;
2020-01-22 21:25:58 +00:00
}
2020-03-06 03:28:01 +00:00
async function getPhotos ( albumPath , site , includeThumbnails = true ) {
2020-05-14 02:26:05 +00:00
const albumUrl = getAlbumUrl ( albumPath , site ) ;
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
if ( ! albumUrl ) {
return [ ] ;
}
2020-02-13 22:05:28 +00:00
2020-05-14 02:26:05 +00:00
try {
const html = await fetchPhotos ( albumUrl ) ;
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
const photos = scrapePhotos ( html , includeThumbnails ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
const lastPage = $ ( '.Gamma_Paginator a.last' ) . attr ( 'href' ) ? . match ( /\d+$/ ) [ 0 ] ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( lastPage ) {
const otherPages = Array . from ( { length : Number ( lastPage ) } , ( _value , index ) => index + 1 ) . slice ( 1 ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
const otherPhotos = await Promise . map ( otherPages , async ( page ) => {
const pageUrl = ` ${ albumUrl } / ${ page } ` ;
const pageHtml = await fetchPhotos ( pageUrl ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
return scrapePhotos ( pageHtml , includeThumbnails ) ;
} , {
concurrency : 2 ,
} ) ;
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
return photos . concat ( otherPhotos . flat ( ) ) ;
}
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
return photos ;
} catch ( error ) {
logger . warn ( ` Failed to fetch ${ site . name } photos from ${ albumUrl } : ${ error . message } ` ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
return [ ] ;
}
2020-01-22 21:25:58 +00:00
}
2021-08-09 08:31:12 +00:00
async function getFullPhotos ( entryId , site ) {
const res = await http . get ( ` ${ site . url } /media/signPhotoset/ ${ entryId } ` , {
headers : {
'X-Requested-With' : 'XMLHttpRequest' ,
} ,
} ) ;
if ( res . ok ) {
return Object . values ( res . body ) ;
}
return [ ] ;
}
async function getThumbs ( entryId , site , parameters ) {
const referer = parameters ? . referer || ` ${ parameters ? . networkReferer ? site . parent . url : site . url } /en/videos ` ;
const { apiUrl } = await fetchApiCredentials ( referer , site ) ;
const res = await http . post ( apiUrl , {
requests : [
{
indexName : 'all_photosets' ,
params : ` query=&page=0&facets=[]&tagFilters=&facetFilters=[["set_id: ${ entryId } "]] ` ,
} ,
] ,
} , {
headers : {
Referer : referer ,
} ,
} , {
encodeJSON : true ,
} ) ;
if ( res . ok && res . body . results ? . [ 0 ] ? . hits [ 0 ] ? . set _pictures ) {
2021-11-20 22:59:15 +00:00
return res . body . results [ 0 ] . hits [ 0 ] . set _pictures . map ( ( img ) => ( [
2021-08-09 08:31:12 +00:00
` https://transform.gammacdn.com/photo_set ${ img . thumb _path } ` ,
` https://images-evilangel.gammacdn.com/photo_set ${ img . thumb _path } ` ,
] ) ) ;
}
return [ ] ;
}
async function getPhotosApi ( entryId , site , parameters ) {
const [ photos , thumbs ] = await Promise . all ( [
getFullPhotos ( entryId , site , parameters ) ,
getThumbs ( entryId , site , parameters ) ,
] ) ;
return photos . concat ( thumbs . slice ( photos . length ) ) ;
}
2020-02-01 00:15:40 +00:00
async function scrapeApiReleases ( json , site ) {
2020-05-14 02:26:05 +00:00
return json . map ( ( scene ) => {
if ( site . parameters ? . extract && scene . sitename !== site . parameters . extract ) {
return null ;
}
2020-09-10 01:43:16 +00:00
if ( site . parameters ? . filterExclusive && scene . availableOnSite . length > 1 ) {
return null ;
}
2020-05-14 02:26:05 +00:00
const release = {
entryId : scene . clip _id ,
title : scene . title ,
description : scene . description ,
duration : scene . length ,
likes : scene . ratings _up ,
dislikes : scene . ratings _down ,
} ;
release . path = ` / ${ scene . url _title } / ${ release . entryId } ` ;
if ( site . parameters ? . scene ) release . url = ` ${ site . parameters . scene } ${ release . path } ` ;
else if ( site . url && site . parameters ? . scene !== false ) release . url = ` ${ site . url } /en/video ${ release . path } ` ;
release . date = moment . utc ( scene . release _date , 'YYYY-MM-DD' ) . toDate ( ) ;
release . director = scene . directors [ 0 ] ? . name || null ;
2021-11-20 22:59:15 +00:00
release . actors = scene . actors . map ( ( actor ) => ( {
2020-09-10 01:56:09 +00:00
entryId : actor . actor _id ,
name : actor . name ,
gender : actor . gender ,
avatar : [
` https://images03-openlife.gammacdn.com/actors/ ${ actor . actor _id } / ${ actor . actor _id } _500x750.jpg ` ,
` https://images03-openlife.gammacdn.com/actors/ ${ actor . actor _id } / ${ actor . actor _id } _240x360.jpg ` ,
` https://images03-openlife.gammacdn.com/actors/ ${ actor . actor _id } / ${ actor . actor _id } _200x300.jpg ` ,
] ,
} ) ) ;
2020-05-14 02:26:05 +00:00
release . tags = scene . master _categories
2021-11-20 22:59:15 +00:00
. concat ( scene . categories ? . map ( ( category ) => category . name ) )
2020-05-14 02:26:05 +00:00
. filter ( Boolean ) ; // some categories don't have a name
const posterPath = scene . pictures . resized || ( scene . pictures . nsfw ? . top && Object . values ( scene . pictures . nsfw . top ) [ 0 ] ) ;
if ( posterPath ) {
release . poster = [
` https://images-evilangel.gammacdn.com/movies ${ posterPath } ` ,
` https://transform.gammacdn.com/movies ${ posterPath } ` ,
] ;
}
// release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`;
return release ;
} ) . filter ( Boolean ) ;
2020-02-01 00:15:40 +00:00
}
2020-02-07 02:40:11 +00:00
function scrapeAll ( html , site , networkUrl , hasTeaser = true ) {
2020-05-14 02:26:05 +00:00
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
const scenesElements = $ ( 'li[data-itemtype=scene], div[data-itemtype=scenes]' ) . toArray ( ) ;
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
return scenesElements . map ( ( element ) => {
const release = { } ;
2020-02-06 22:15:28 +00:00
2020-05-14 02:26:05 +00:00
const sceneLinkElement = $ ( element ) . find ( '.sceneTitle a, .tlcTitle a' ) ;
2020-02-01 00:15:40 +00:00
2020-07-02 02:04:28 +00:00
if ( site ) release . url = ` ${ networkUrl ? site . parent . url : site . url } ${ sceneLinkElement . attr ( 'href' ) } ` ;
2020-05-14 02:26:05 +00:00
else release . url = ` ${ networkUrl } ${ sceneLinkElement . attr ( 'href' ) } ` ;
2020-02-07 00:48:21 +00:00
2020-05-14 02:26:05 +00:00
release . title = sceneLinkElement . attr ( 'title' ) ;
release . entryId = $ ( element ) . attr ( 'data-itemid' ) ;
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
const dateEl = $ ( element ) . find ( '.sceneDate, .tlcSpecsDate .tlcDetailsValue' ) . text ( ) || null ;
if ( dateEl ) {
release . date = moment
. utc ( dateEl , [ 'MM-DD-YYYY' , 'YYYY-MM-DD' ] )
. toDate ( ) ;
}
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
release . actors = $ ( element ) . find ( '.sceneActors a, .tlcActors a' )
. map ( ( actorIndex , actorElement ) => $ ( actorElement ) . attr ( 'title' ) )
. toArray ( ) ;
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
[ release . likes , release . dislikes ] = $ ( element ) . find ( '.value' )
. toArray ( )
2021-11-20 22:59:15 +00:00
. map ( ( value ) => Number ( $ ( value ) . text ( ) ) ) ;
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
const posterEl = $ ( element ) . find ( '.imgLink img, .tlcImageItem' ) ;
if ( posterEl ) release . poster = posterEl . attr ( 'data-original' ) || posterEl . attr ( 'src' ) ;
2020-02-06 22:15:28 +00:00
2020-05-20 01:00:46 +00:00
const channelEl = $ ( element ) . find ( '.fromSite a' ) ;
2020-07-05 02:10:35 +00:00
if ( channelEl . attr ( 'title' ) ) release . channel = channelEl . attr ( 'title' ) . replace ( '.com' , '' ) ;
2020-05-20 01:00:46 +00:00
2020-05-14 02:26:05 +00:00
if ( hasTeaser ) {
release . teaser = [
{ src : ` https://videothumb.gammacdn.com/600x339/ ${ release . entryId } .mp4 ` } ,
{ src : ` https://videothumb.gammacdn.com/307x224/ ${ release . entryId } .mp4 ` } ,
] ;
}
2020-02-06 22:15:28 +00:00
2020-05-14 02:26:05 +00:00
return release ;
} ) ;
2020-02-01 00:15:40 +00:00
}
2021-01-26 23:21:58 +00:00
async function scrapeScene ( html , url , site , baseRelease , mobileHtml , options ) {
2020-05-14 02:26:05 +00:00
const $ = cheerio . load ( html , { normalizeWhitespace : true } ) ;
const m$ = mobileHtml && cheerio . load ( mobileHtml , { normalizeWhitespace : true } ) ;
const release = { $ , url } ;
const json = $ ( 'script[type="application/ld+json"]' ) . html ( ) ;
const videoJson = $ ( 'script:contains("window.ScenePlayerOptions")' ) . html ( ) ;
const [ data , data2 ] = json ? JSON . parse ( json ) : [ ] ;
const videoData = videoJson && JSON . parse ( videoJson . slice ( videoJson . indexOf ( '{' ) , videoJson . indexOf ( '};' ) + 1 ) ) ;
release . entryId = ( baseRelease ? . path || new URL ( url ) . pathname ) . match ( /\/(\d{2,})(\/|$)/ ) ? . [ 1 ] ;
release . title = videoData ? . playerOptions ? . sceneInfos . sceneTitle || data ? . name ;
// date in data object is not the release date of the scene, but the date the entry was added; only use as fallback
const dateString = $ ( '.updatedDate' ) . first ( ) . text ( ) . trim ( ) ;
const dateMatch = dateString . match ( /\d{2,4}[-/]\d{2}[-/]\d{2,4}/ ) ? . [ 0 ] ;
if ( dateMatch ) release . date = moment . utc ( dateMatch , [ 'MM-DD-YYYY' , 'YYYY-MM-DD' ] ) . toDate ( ) ;
else if ( data ? . dateCreated ) release . date = moment . utc ( data . dateCreated , 'YYYY-MM-DD' ) . toDate ( ) ;
2021-08-09 08:31:12 +00:00
else release . date = videoData ? . playerOptions ? . sceneInfos . sceneReleaseDate ;
2020-05-14 02:26:05 +00:00
if ( data ) {
release . description = data . description ;
if ( data . director ? . [ 0 ] ? . name ) release . director = data . director [ 0 ] . name ;
else if ( data2 ? . director ? . [ 0 ] ? . name ) release . director = data2 . director [ 0 ] . name ;
const stars = ( data . aggregateRating . ratingValue / data . aggregateRating . bestRating ) * 5 ;
if ( stars ) release . rating = { stars } ;
release . duration = moment . duration ( data . duration . slice ( 2 ) ) . asSeconds ( ) ;
}
const actors = data ? . actor || data2 ? . actor ;
if ( actors ) {
2021-11-20 22:59:15 +00:00
release . actors = actors . map ( ( actor ) => ( {
2020-05-14 02:26:05 +00:00
name : actor . name ,
gender : actor . gender ,
} ) ) ;
}
2021-11-20 22:59:15 +00:00
const hasTrans = release . actors ? . some ( ( actor ) => actor . gender === 'shemale' ) ;
2020-05-14 02:26:05 +00:00
const rawTags = data ? . keywords ? . split ( ', ' ) || data2 ? . keywords ? . split ( ', ' ) || [ ] ;
release . tags = hasTrans ? [ ... rawTags , 'transsexual' ] : rawTags ;
2020-05-20 01:00:46 +00:00
const channel = data ? . productionCompany ? . name
|| $ ( '.studioLink a, .siteLink a' ) . attr ( 'title' ) ? . trim ( )
2021-02-21 21:58:46 +00:00
|| $ ( '.siteNameSpan' ) . text ( )
? . trim ( )
. toLowerCase ( )
. replace ( '.com' , '' )
2020-05-20 01:00:46 +00:00
|| $ ( 'meta[name="twitter:domain"]' ) . attr ( 'content' ) ? . replace ( '.com' , '' ) ;
2020-05-14 02:26:05 +00:00
if ( channel ) release . channel = slugify ( channel , '' ) ;
2021-08-09 08:31:12 +00:00
if ( videoData ? . picPreview && new URL ( videoData . picPreview ) . pathname . length > 1 ) release . poster = videoData . picPreview ; // sometimes links to just https://images02-fame.gammacdn.com/
2020-05-14 02:26:05 +00:00
const photoLink = $ ( '.picturesItem a' ) . attr ( 'href' ) ;
const mobilePhotos = m$ ? m$ ( '.preview-displayer a img' ) . map ( ( photoIndex , photoEl ) => $ ( photoEl ) . attr ( 'src' ) ) . toArray ( ) : [ ] ;
2021-01-26 23:21:58 +00:00
if ( photoLink && options . includePhotos ) {
2020-05-14 02:26:05 +00:00
const photos = await getPhotos ( photoLink , site , mobilePhotos . length < 3 ) ; // only get thumbnails when less than 3 mobile photos are available
if ( photos . length < 7 ) release . photos = [ ... photos , ... mobilePhotos ] ; // probably only teaser photos available, supplement with mobile album
else release . photos = photos ;
} else {
release . photos = mobilePhotos ;
}
2021-08-09 08:31:12 +00:00
const trailer = videoData && ` ${ videoData . playerOptions . host } ${ videoData . url } ` ;
if ( trailer ) {
release . trailer = [
{
src : trailer . replace ( 'hd' , 'sm' ) ,
quality : 240 ,
} ,
{
src : trailer . replace ( 'hd' , 'med' ) ,
quality : 360 ,
} ,
{
src : trailer . replace ( 'hd' , 'big' ) ,
quality : 480 ,
} ,
{
// probably 540p
src : trailer ,
quality : parseInt ( videoData . sizeOnLoad , 10 ) ,
} ,
{
src : trailer . replace ( 'hd' , '720p' ) ,
quality : 720 ,
} ,
{
src : trailer . replace ( 'hd' , '1080p' ) ,
quality : 1080 ,
} ,
{
src : trailer . replace ( 'hd' , '4k' ) ,
quality : 2160 ,
} ,
] ;
}
2020-05-14 02:26:05 +00:00
2021-01-22 21:55:20 +00:00
const movie = $ ( '.dvdLink' ) ;
2021-01-25 22:24:51 +00:00
const movieUrl = qu . prefixUrl ( movie . attr ( 'href' ) , site . url ) ;
2021-01-22 21:55:20 +00:00
2021-01-25 22:24:51 +00:00
if ( movieUrl ) {
2021-01-22 21:55:20 +00:00
release . movie = {
url : movieUrl ,
title : movie . attr ( 'title' ) ,
entryId : movieUrl . match ( /\/(\d+)(\/|$)/ ) ? . [ 1 ] ,
covers : [ movie . find ( 'img' ) . attr ( 'src' ) ] ,
} ;
}
2020-05-14 02:26:05 +00:00
return release ;
2020-02-01 00:15:40 +00:00
}
2021-08-09 08:31:12 +00:00
async function scrapeSceneApi ( data , site , options ) {
const release = { } ;
release . entryId = data . clip _id ;
release . title = data . title ;
release . duration = data . length ;
release . date = new Date ( data . date * 1000 ) || qu . parseDate ( data . release _date , 'YYYY-MM-DD' ) ;
2021-11-20 22:59:15 +00:00
release . actors = data . actors . map ( ( actor ) => ( {
2021-08-09 08:31:12 +00:00
entryId : actor . actor _id ,
name : actor . name ,
gender : actor . gender ,
url : options . parameters ? . actors
? format ( options . parameters . actors , { id : actor . actor _id , slug : actor . url _name } )
: qu . prefixUrl ( ` /en/pornstar/ ${ actor . url _name } / ${ data . actor _id } ` , site . url ) ,
} ) ) ;
2021-11-20 22:59:15 +00:00
release . tags = data . categories . map ( ( category ) => category . name ) ;
2021-08-09 08:31:12 +00:00
if ( data . pictures ) {
release . poster = [
` https://transform.gammacdn.com/movies ${ data . pictures [ '1920x1080' ] } ` ,
` https://images-evilangel.gammacdn.com/movies ${ data . pictures [ '1920x1080' ] } ` ,
` https://transform.gammacdn.com/movies ${ data . pictures . resized } ` ,
` https://images-evilangel.gammacdn.com/movies ${ data . pictures . resized } ` ,
] ;
}
if ( data . photoset _id && options . includePhotos ) {
release . photos = await getPhotosApi ( data . photoset _id , site , options . parameters ) ;
}
if ( data . trailers ) {
release . trailer = Object . entries ( data . trailers ) . map ( ( [ quality , source ] ) => ( { src : source , quality } ) ) ;
}
if ( data . movie _id ) {
release . movie = {
entryId : data . movie _id ,
title : data . movie _title ,
url : qu . prefixUrl ( ` /en/movie/ ${ data . url _movie _title } / ${ data . movie _id } ` , site . url ) ,
} ;
}
release . channel = data . sitename ;
release . qualities = data . download _sizes ;
return release ;
}
2021-01-25 22:01:07 +00:00
async function fetchMovieTrailer ( release ) {
if ( ! release . entryId ) {
return null ;
}
const url = ` https://www.evilangel.com/en/dvdtrailer/ ${ release . entryId } ` ;
const res = await qu . get ( url ) ;
if ( ! res . ok ) {
return null ;
}
const trailerHost = res . html . match ( /"host":\s*"(.*\.com)"/ ) ? . [ 1 ] . replace ( /\\\//g , '/' ) ;
const trailerPath = res . html . match ( /"url":\s*"(.*\.mp4)"/ ) ? . [ 1 ] . replace ( /\\\//g , '/' ) ;
if ( trailerHost && trailerPath ) {
return qu . prefixUrl ( trailerPath , trailerHost ) ;
}
return null ;
}
async function scrapeMovie ( { query , html } , window , url , entity , options ) {
const release = { } ;
const data = window . dataLayer [ 0 ] ? . dvdDetails ;
// const options = html.match(/options = {.*};/);
release . entryId = new URL ( url ) . pathname . match ( /\/(\d+)(\/|$)/ ) ? . [ 1 ] ;
release . covers = [
query . img ( '.frontCoverImg' , 'href' ) ,
query . img ( '.backCoverImg' , 'href' ) ,
] ;
release . description = query . cnt ( '.descriptionText' ) ;
release . date = qu . extractDate ( data . dvdReleaseDate ) ;
release . title = data . dvdName ;
2021-11-20 22:59:15 +00:00
release . actors = data . dvdActors . map ( ( actor ) => ( { name : actor . actorName , entryId : actor . actorId } ) ) ;
2021-01-25 22:01:07 +00:00
release . tags = query . cnts ( '.dvdCol a' ) ;
release . scenes = scrapeAll ( html , entity , entity . url ) ;
if ( options . includeTrailers ) {
release . trailer = await fetchMovieTrailer ( release ) ;
}
return release ;
}
2020-01-22 21:25:58 +00:00
function scrapeActorSearch ( html , url , actorName ) {
2020-05-14 02:26:05 +00:00
const { document } = new JSDOM ( html ) . window ;
const actorLink = document . querySelector ( ` a[title=" ${ actorName } " i] ` ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
return actorLink ? actorLink . href : null ;
2020-01-22 21:25:58 +00:00
}
2021-01-30 00:12:42 +00:00
async function fetchActorReleases ( profileUrl , getActorReleasesUrl , page = 1 , accReleases = [ ] , context ) {
2020-05-14 02:26:05 +00:00
const { origin , pathname } = new URL ( profileUrl ) ;
const profilePath = ` / ${ pathname . split ( '/' ) . slice ( - 2 ) . join ( '/' ) } ` ;
2020-02-07 00:48:21 +00:00
2021-01-30 00:12:42 +00:00
const url = ( context . parameters . actorScenes && format ( context . parameters . actorScenes , { path : profilePath , page } ) )
|| getActorReleasesUrl ? . ( profilePath , page ) ;
if ( ! url ) {
return [ ] ;
}
2020-11-22 03:07:09 +00:00
const res = await qu . get ( url ) ;
2020-02-07 00:48:21 +00:00
2021-01-30 00:12:42 +00:00
if ( ! res . ok ) {
return [ ] ;
}
2020-03-09 01:02:29 +00:00
2020-05-20 01:00:46 +00:00
const releases = scrapeAll ( res . item . html , null , origin ) ;
2020-09-08 01:26:34 +00:00
const nextPage = res . item . query . url ( '.Gamma_Paginator a.next' ) ;
2020-02-07 00:48:21 +00:00
2020-05-14 02:26:05 +00:00
if ( nextPage ) {
2021-01-30 00:12:42 +00:00
return fetchActorReleases ( profileUrl , getActorReleasesUrl , page + 1 , accReleases . concat ( releases ) , context ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-07 00:48:21 +00:00
2020-05-14 02:26:05 +00:00
return accReleases . concat ( releases ) ;
2020-02-07 00:48:21 +00:00
}
2021-01-30 00:12:42 +00:00
async function scrapeProfile ( html , url , actorName , _siteSlug , getActorReleasesUrl , withReleases , context ) {
2020-11-22 03:07:09 +00:00
const { query } = qu . extract ( html ) ;
2020-05-14 02:26:05 +00:00
2020-11-22 03:07:09 +00:00
const avatar = query . el ( 'img.actorPicture' ) ;
const hair = query . cnt ( '.actorProfile .attribute_hair_color' ) ;
const height = query . cnt ( '.actorProfile .attribute_height' ) ;
const weight = query . cnt ( '.actorProfile .attribute_weight' ) ;
const alias = query . cnt ( '.actorProfile .attribute_alternate_names' ) ;
const nationality = query . cnt ( '.actorProfile .attribute_home' ) ;
2020-05-14 02:26:05 +00:00
const profile = {
name : actorName ,
} ;
if ( avatar ) {
// larger sizes usually available, provide fallbacks
const avatars = [
avatar . src . replace ( /\d+x\d+/ , '500x750' ) ,
avatar . src . replace ( /\d+x\d+/ , '240x360' ) ,
avatar . src . replace ( /\d+x\d+/ , '200x300' ) ,
avatar . src ,
] ;
profile . avatar = avatars ;
}
2020-11-22 03:07:09 +00:00
profile . description = query . cnt ( '.actorBio p:not(.bioTitle)' ) ;
2020-05-14 02:26:05 +00:00
if ( hair ) profile . hair = hair . split ( ':' ) [ 1 ] . trim ( ) ;
if ( height ) profile . height = Number ( height . match ( /\d+/ ) [ 0 ] ) ;
if ( weight ) profile . weight = Number ( weight . match ( /\d+/ ) [ 0 ] ) ;
if ( alias ) profile . aliases = alias . split ( ':' ) [ 1 ] . trim ( ) . split ( ', ' ) ;
if ( nationality ) profile . nationality = nationality . split ( ':' ) [ 1 ] . trim ( ) ;
2021-01-30 00:12:42 +00:00
if ( ( getActorReleasesUrl || context . parameters . actorScenes ) && withReleases ) {
profile . releases = await fetchActorReleases ( url , getActorReleasesUrl , 1 , [ ] , context ) ;
2020-05-14 02:26:05 +00:00
}
return profile ;
2020-01-22 21:25:58 +00:00
}
2020-02-01 00:15:40 +00:00
function scrapeApiProfile ( data , releases , siteSlug ) {
2020-05-14 02:26:05 +00:00
const profile = { } ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( data . male === 1 ) profile . gender = 'male' ;
if ( data . female === 1 ) profile . gender = 'female' ;
if ( data . shemale === 1 || data . trans === 1 ) profile . gender = 'transsexual' ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( data . description ) profile . description = data . description . trim ( ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
if ( data . attributes . ethnicity ) profile . ethnicity = data . attributes . ethnicity ;
if ( data . attributes . eye _color ) profile . eyes = data . attributes . eye _color ;
if ( data . attributes . hair _color ) profile . hair = data . attributes . hair _color ;
2020-01-22 21:25:58 +00:00
2020-05-16 02:36:45 +00:00
const avatarPaths = Object . values ( data . pictures ) . reverse ( ) ;
2021-11-20 22:59:15 +00:00
if ( avatarPaths . length > 0 ) profile . avatar = avatarPaths . map ( ( avatarPath ) => ` https://images01-evilangel.gammacdn.com/actors ${ avatarPath } ` ) ;
2020-01-22 21:25:58 +00:00
2021-11-20 22:59:15 +00:00
if ( releases ) profile . releases = releases . map ( ( release ) => ` https:// ${ siteSlug } .com/en/video/ ${ release . url _title } / ${ release . clip _id } ` ) ;
2020-01-22 21:25:58 +00:00
2020-05-14 02:26:05 +00:00
return profile ;
2020-01-22 21:25:58 +00:00
}
2021-08-09 08:31:12 +00:00
async function fetchLatestApi ( site , page = 1 , preData , include , upcoming = false ) {
const referer = site . parameters ? . referer || ` ${ site . parameters ? . networkReferer ? site . parent . url : site . url } /en/videos ` ;
const { apiUrl } = await fetchApiCredentials ( referer , site ) ;
2020-01-30 00:14:31 +00:00
2021-08-09 08:31:12 +00:00
const res = await http . post ( apiUrl , {
requests : [
{
indexName : 'all_scenes' ,
params : ` query=&hitsPerPage=36&maxValuesPerFacet=100&page= ${ page - 1 } &facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming: ${ upcoming ? 1 : 0 } "]]&filters=sitename: ${ site . slug } OR channels.id: ${ site . slug } ` ,
} ,
] ,
} , {
headers : {
Referer : referer ,
} ,
} , {
encodeJSON : true ,
} ) ;
2020-02-06 22:15:28 +00:00
2021-08-09 08:31:12 +00:00
if ( res . status === 200 && res . body . results ? . [ 0 ] ? . hits ) {
return scrapeApiReleases ( res . body . results [ 0 ] . hits , site ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-06 22:15:28 +00:00
2021-08-09 08:31:12 +00:00
return res . status ;
}
2020-01-30 00:14:31 +00:00
2021-08-09 08:31:12 +00:00
async function fetchUpcomingApi ( site , page = 1 , preData , include ) {
return fetchLatestApi ( site , page , preData , include , true ) ;
2020-01-30 00:14:31 +00:00
}
2021-08-09 08:31:12 +00:00
async function fetchSceneApi ( url , site , baseRelease , options ) {
const referer = options . parameters ? . referer || ` ${ site . parameters ? . networkReferer ? site . parent . url : site . url } /en/videos ` ;
2020-05-14 02:26:05 +00:00
const { apiUrl } = await fetchApiCredentials ( referer , site ) ;
2021-08-09 08:31:12 +00:00
const entryId = ( baseRelease ? . path || new URL ( url ) . pathname ) . match ( /\/(\d{2,})(\/|$)/ ) ? . [ 1 ] ;
2020-09-10 15:41:29 +00:00
const res = await http . post ( apiUrl , {
2020-05-14 02:26:05 +00:00
requests : [
{
indexName : 'all_scenes' ,
2021-08-09 08:31:12 +00:00
params : ` query=&page=0&facets=[]&tagFilters=&facetFilters=[["clip_id: ${ entryId } "]] ` ,
} ,
{
indexName : 'all_scenes' ,
params : 'query=&page=0&hitsPerPage=1&attributesToRetrieve=[]&attributesToHighlight=[]&attributesToSnippet=[]&tagFilters=&analytics=false&clickAnalytics=false&facets=clip_id' ,
2020-05-14 02:26:05 +00:00
} ,
] ,
} , {
2020-12-14 02:16:47 +00:00
headers : {
Referer : referer ,
} ,
2020-09-10 15:41:29 +00:00
} , {
2020-05-14 02:26:05 +00:00
encodeJSON : true ,
} ) ;
2020-09-10 15:54:23 +00:00
if ( res . status === 200 && res . body . results ? . [ 0 ] ? . hits ) {
2021-08-09 08:31:12 +00:00
return scrapeSceneApi ( res . body . results [ 0 ] . hits [ 0 ] , site , options ) ;
2020-05-14 02:26:05 +00:00
}
2020-09-10 15:54:23 +00:00
return res . status ;
2020-02-01 00:15:40 +00:00
}
2020-02-08 03:52:32 +00:00
function getLatestUrl ( site , page ) {
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . latest ) {
if ( /^http/ . test ( site . parameters . latest ) ) {
return /%d/ . test ( site . parameters . latest )
? util . format ( site . parameters . latest , page )
: ` ${ site . parameters . latest } ${ page } ` ;
}
return /%d/ . test ( site . parameters . latest )
? util . format ( ` ${ site . url } ${ site . parameters . latest } ` , page )
: ` ${ site . url } ${ site . parameters . latest } ${ page } ` ;
}
return ` ${ site . url } /en/videos/AllCategories/0/ ${ page } ` ;
2020-02-08 03:52:32 +00:00
}
function getUpcomingUrl ( site ) {
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . upcoming ) {
return /^http/ . test ( site . parameters . upcoming )
? ` ${ site . parameters . upcoming } `
: ` ${ site . url } ${ site . parameters . upcoming } ` ;
}
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
return ` ${ site . url } /en/videos/AllCategories/0/1/upcoming ` ;
2020-02-08 03:52:32 +00:00
}
2020-02-01 00:15:40 +00:00
async function fetchLatest ( site , page = 1 ) {
2020-05-14 02:26:05 +00:00
const url = getLatestUrl ( site , page ) ;
2020-09-10 15:41:29 +00:00
const res = await http . get ( url ) ;
2020-01-30 00:14:31 +00:00
2020-09-10 15:54:23 +00:00
if ( res . ok ) {
return scrapeAll ( res . body . toString ( ) , site ) ;
}
return res . status ;
2020-02-01 00:15:40 +00:00
}
async function fetchUpcoming ( site ) {
2020-05-14 02:26:05 +00:00
const url = getUpcomingUrl ( site ) ;
2020-09-10 15:41:29 +00:00
const res = await http . get ( url ) ;
2020-02-01 00:15:40 +00:00
2020-09-10 15:54:23 +00:00
if ( res . ok ) {
return scrapeAll ( res . body . toString ( ) , site , null , false ) ;
}
return res . status ;
2020-02-01 00:15:40 +00:00
}
2020-03-07 01:35:13 +00:00
function getDeepUrl ( url , site , baseRelease , mobile ) {
2020-07-02 02:04:28 +00:00
const filter = new Set ( [ 'en' , 'video' , 'scene' , site . slug , site . parent . slug ] ) ;
2020-05-14 02:26:05 +00:00
const pathname = baseRelease ? . path || new URL ( url ) . pathname
. split ( '/' )
2021-11-20 22:59:15 +00:00
. filter ( ( component ) => ! filter . has ( component ) )
2020-05-14 02:26:05 +00:00
. join ( '/' ) ; // reduce to scene ID and title slug
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
const sceneId = baseRelease ? . entryId || pathname . match ( /\/(\d+)\// ) ? . [ 1 ] ;
2020-03-07 01:35:13 +00:00
2020-05-14 02:26:05 +00:00
if ( mobile && /%d/ . test ( mobile ) ) {
return util . format ( mobile , sceneId ) ;
}
2020-03-07 01:35:13 +00:00
2020-05-14 02:26:05 +00:00
if ( mobile && sceneId ) {
return ` ${ mobile } ${ pathname } ` ;
}
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . deep ) {
return ` ${ site . parameters . deep } ${ pathname } ` ;
}
2020-02-08 03:52:32 +00:00
2020-05-14 02:26:05 +00:00
return url ;
2020-02-08 03:52:32 +00:00
}
2021-01-26 23:21:58 +00:00
async function fetchScene ( url , site , baseRelease , options ) {
2020-05-14 02:26:05 +00:00
if ( site . parameters ? . deep === false ) {
return baseRelease ;
}
const deepUrl = getDeepUrl ( url , site , baseRelease ) ;
2020-07-02 02:04:28 +00:00
const mobileUrl = getDeepUrl ( url , site , baseRelease , site . parameters ? . mobile || site . parent ? . parameters ? . mobile ) ;
2020-05-14 02:26:05 +00:00
if ( deepUrl ) {
const [ res , mobileRes ] = await Promise . all ( [
2020-09-10 15:41:29 +00:00
http . get ( deepUrl ) ,
mobileUrl && http . get ( mobileUrl , {
2020-11-22 03:09:44 +00:00
headers : {
// don't redirect to main site
'user-agent' : 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Mobile Safari/537.36' ,
} ,
2020-05-14 02:26:05 +00:00
} ) ,
] ) ;
2020-09-10 15:54:23 +00:00
if ( res . status === 200 ) {
const mobileBody = mobileRes ? . status === 200 ? mobileRes . body . toString ( ) : null ;
2021-01-26 23:21:58 +00:00
const scene = await scrapeScene ( res . body . toString ( ) , url , site , baseRelease , mobileBody , options ) ;
2020-09-04 01:07:28 +00:00
2020-05-14 02:26:05 +00:00
return { ... scene , deepUrl } ;
}
}
return null ;
2020-02-01 00:15:40 +00:00
}
2021-01-25 22:01:07 +00:00
async function fetchMovie ( url , channel , baseRelease , options ) {
const res = await qu . get ( url , null , null , {
extract : {
runScripts : 'dangerously' ,
} ,
} ) ;
if ( res . ok ) {
return scrapeMovie ( res . item , res . window , url , channel , options ) ;
}
return res . status ;
}
2020-02-01 00:15:40 +00:00
async function fetchActorScenes ( actorName , apiUrl , siteSlug ) {
2020-09-10 15:41:29 +00:00
const res = await http . post ( apiUrl , {
2020-05-14 02:26:05 +00:00
requests : [
{
indexName : 'all_scenes' ,
params : ` query=&filters=sitename: ${ siteSlug } &hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name: ${ actorName } "]] ` ,
} ,
] ,
} , {
2020-12-14 02:16:47 +00:00
headers : {
Referer : ` https://www. ${ siteSlug } .com/en/videos ` ,
} ,
2020-09-10 15:41:29 +00:00
} , {
2020-05-14 02:26:05 +00:00
encodeJSON : true ,
} ) ;
2020-09-10 15:54:23 +00:00
if ( res . status === 200 && res . body . results [ 0 ] . hits . length > 0 ) {
2020-05-14 02:26:05 +00:00
return res . body . results [ 0 ] . hits ;
}
return [ ] ;
2020-02-01 00:15:40 +00:00
}
2021-01-30 00:12:42 +00:00
async function fetchProfile ( { name : actorName } , context , include , altSearchUrl , getActorReleasesUrl ) {
2020-07-02 02:04:28 +00:00
const siteSlug = context . entity . slug || context . site ? . slug || context . network ? . slug ;
2020-05-18 01:22:03 +00:00
2020-05-14 02:26:05 +00:00
const actorSlug = actorName . toLowerCase ( ) . replace ( /\s+/ , '+' ) ;
const searchUrl = altSearchUrl
? ` https://www. ${ siteSlug } .com/en/search/ ${ actorSlug } /1/actor `
: ` https://www. ${ siteSlug } .com/en/search/ ${ siteSlug } /actor/ ${ actorSlug } ` ;
2020-09-10 15:41:29 +00:00
const searchRes = await http . get ( searchUrl ) ;
2020-02-01 00:15:40 +00:00
2020-09-10 15:54:23 +00:00
if ( searchRes . status !== 200 ) {
2020-05-14 02:26:05 +00:00
return null ;
}
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
const actorUrl = scrapeActorSearch ( searchRes . body . toString ( ) , searchUrl , actorName ) ;
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
if ( actorUrl ) {
const url = ` https:// ${ siteSlug } .com ${ actorUrl } ` ;
2020-09-10 15:41:29 +00:00
const actorRes = await http . get ( url ) ;
2020-02-01 00:15:40 +00:00
2020-09-10 15:54:23 +00:00
if ( actorRes . status !== 200 ) {
2020-05-14 02:26:05 +00:00
return null ;
}
2020-02-01 00:15:40 +00:00
2021-01-30 00:12:42 +00:00
return scrapeProfile ( actorRes . body . toString ( ) , url , actorName , siteSlug , getActorReleasesUrl , include . scenes , context ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-01 00:15:40 +00:00
2020-05-14 02:26:05 +00:00
return null ;
2020-02-01 00:15:40 +00:00
}
2020-09-08 01:26:34 +00:00
async function fetchApiProfile ( { name : actorName } , context , include ) {
2020-07-02 02:04:28 +00:00
const siteSlug = context . entity . slug || context . site ? . slug || context . network ? . slug ;
2020-05-18 01:22:03 +00:00
2020-05-14 02:26:05 +00:00
const actorSlug = encodeURI ( actorName ) ;
const referer = ` https://www. ${ siteSlug } .com/en/search ` ;
const { apiUrl } = await fetchApiCredentials ( referer ) ;
2020-09-08 01:26:34 +00:00
const res = await http . post ( apiUrl , {
2020-05-14 02:26:05 +00:00
requests : [
{
indexName : 'all_actors' ,
params : ` query= ${ actorSlug } ` ,
} ,
] ,
} , {
2020-12-14 02:16:47 +00:00
headers : {
Referer : referer ,
} ,
2020-09-08 01:26:34 +00:00
} , {
2020-05-14 02:26:05 +00:00
encodeJSON : true ,
} ) ;
2020-09-08 01:26:34 +00:00
if ( res . status === 200 && res . body . results [ 0 ] . hits . length > 0 ) {
2021-11-20 22:59:15 +00:00
const actorData = res . body . results [ 0 ] . hits . find ( ( actor ) => slugify ( actor . name ) === slugify ( actorName ) ) ;
2020-05-14 02:26:05 +00:00
if ( actorData ) {
2020-05-17 01:00:44 +00:00
const actorScenes = include . releases && await fetchActorScenes ( actorData . name , apiUrl , siteSlug ) ;
2020-05-14 02:26:05 +00:00
return scrapeApiProfile ( actorData , actorScenes , siteSlug ) ;
}
}
return null ;
2020-01-30 00:14:31 +00:00
}
2020-01-22 21:25:58 +00:00
module . exports = {
2021-08-09 08:31:12 +00:00
fetchApiLatest : fetchLatestApi ,
2020-05-14 02:26:05 +00:00
fetchApiProfile ,
2021-08-09 08:31:12 +00:00
fetchApiUpcoming : fetchUpcomingApi ,
2020-05-14 02:26:05 +00:00
fetchLatest ,
2021-08-09 08:31:12 +00:00
fetchLatestApi ,
2021-01-25 22:01:07 +00:00
fetchMovie ,
2020-05-14 02:26:05 +00:00
fetchProfile ,
fetchScene ,
2021-08-09 08:31:12 +00:00
fetchSceneApi ,
2020-05-14 02:26:05 +00:00
fetchUpcoming ,
2021-08-09 08:31:12 +00:00
fetchUpcomingApi ,
2021-01-29 23:01:40 +00:00
api : {
2021-08-09 08:31:12 +00:00
fetchLatest : fetchLatestApi ,
fetchUpcoming : fetchUpcomingApi ,
2021-01-29 23:01:40 +00:00
fetchProfile : fetchApiProfile ,
2021-08-09 08:31:12 +00:00
// fetchScene,
fetchScene : fetchSceneApi ,
2021-01-29 23:01:40 +00:00
fetchMovie ,
} ,
2020-05-14 02:26:05 +00:00
getPhotos ,
scrapeApiProfile ,
scrapeApiReleases ,
scrapeProfile ,
scrapeAll ,
scrapeScene ,
2020-01-22 21:25:58 +00:00
} ;