'use strict'; const Promise = require('bluebird'); const util = require('util'); const bhttp = require('bhttp'); const { JSDOM } = require('jsdom'); const cheerio = require('cheerio'); const moment = require('moment'); const logger = require('../logger')(__filename); const { ex, get } = require('../utils/q'); const http = require('../utils/http'); const slugify = require('../utils/slugify'); function getAlbumUrl(albumPath, site) { if (site.parameters?.photos) { return /^http/.test(site.parameters.photos) ? `${site.parameters.photos}/${albumPath.split('/').slice(-2).join('/')}` : `${site.url}${site.parameters.photos}/${albumPath.split('/').slice(-2).join('/')}`; } if (site.url && site.parameters?.photos !== false) { return `${site.url}${albumPath}`; } return null; } async function fetchPhotos(url) { const res = await bhttp.get(url); return res.body.toString(); } function scrapePhotos(html, includeThumbnails = true) { const $ = cheerio.load(html, { normalizeWhitespace: true }); return $('.preview .imgLink, .pgFooterThumb a').toArray().map((linkEl) => { const url = $(linkEl).attr('href'); if (/\/join|\/createaccount/.test(url)) { // URL links to join page instead of full photo, extract thumbnail // /createaccount is used by e.g. Tricky Spa native site const src = $(linkEl).find('img').attr('src'); if (/previews\//.test(src)) { // resource often serves full photo at a modifier URL anyway, add as primary source const highRes = src .replace('previews/', '') .replace('_tb.jpg', '.jpg'); // keep original thumbnail as fallback in case full photo is not available return [highRes, src]; } if (!includeThumbnails) return null; return src; } // URL links to full photo return url; }).filter(Boolean); } async function getPhotos(albumPath, site, includeThumbnails = true) { const albumUrl = getAlbumUrl(albumPath, site); if (!albumUrl) { return []; } try { const html = await fetchPhotos(albumUrl); const $ = cheerio.load(html, { normalizeWhitespace: true }); const photos = scrapePhotos(html, includeThumbnails); const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0]; if (lastPage) { const otherPages = Array.from({ length: Number(lastPage) }, (_value, index) => index + 1).slice(1); const otherPhotos = await Promise.map(otherPages, async (page) => { const pageUrl = `${albumUrl}/${page}`; const pageHtml = await fetchPhotos(pageUrl); return scrapePhotos(pageHtml, includeThumbnails); }, { concurrency: 2, }); return photos.concat(otherPhotos.flat()); } return photos; } catch (error) { logger.warn(`Failed to fetch ${site.name} photos from ${albumUrl}: ${error.message}`); return []; } } async function scrapeApiReleases(json, site) { return json.map((scene) => { if (site.parameters?.extract && scene.sitename !== site.parameters.extract) { return null; } const release = { entryId: scene.clip_id, title: scene.title, description: scene.description, duration: scene.length, likes: scene.ratings_up, dislikes: scene.ratings_down, }; release.path = `/${scene.url_title}/${release.entryId}`; if (site.parameters?.scene) release.url = `${site.parameters.scene}${release.path}`; else if (site.url && site.parameters?.scene !== false) release.url = `${site.url}/en/video${release.path}`; release.date = moment.utc(scene.release_date, 'YYYY-MM-DD').toDate(); release.actors = scene.actors.map(actor => ({ name: actor.name, gender: actor.gender })); release.director = scene.directors[0]?.name || null; release.tags = scene.master_categories .concat(scene.categories?.map(category => category.name)) .filter(Boolean); // some categories don't have a name const posterPath = scene.pictures.resized || (scene.pictures.nsfw?.top && Object.values(scene.pictures.nsfw.top)[0]); if (posterPath) { release.poster = [ `https://images-evilangel.gammacdn.com/movies${posterPath}`, `https://transform.gammacdn.com/movies${posterPath}`, ]; } // release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`; return release; }).filter(Boolean); } function scrapeAll(html, site, networkUrl, hasTeaser = true) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const scenesElements = $('li[data-itemtype=scene], div[data-itemtype=scenes]').toArray(); return scenesElements.map((element) => { const release = {}; const sceneLinkElement = $(element).find('.sceneTitle a, .tlcTitle a'); if (site) release.url = `${networkUrl ? site.parent.url : site.url}${sceneLinkElement.attr('href')}`; else release.url = `${networkUrl}${sceneLinkElement.attr('href')}`; release.title = sceneLinkElement.attr('title'); release.entryId = $(element).attr('data-itemid'); const dateEl = $(element).find('.sceneDate, .tlcSpecsDate .tlcDetailsValue').text() || null; if (dateEl) { release.date = moment .utc(dateEl, ['MM-DD-YYYY', 'YYYY-MM-DD']) .toDate(); } release.actors = $(element).find('.sceneActors a, .tlcActors a') .map((actorIndex, actorElement) => $(actorElement).attr('title')) .toArray(); [release.likes, release.dislikes] = $(element).find('.value') .toArray() .map(value => Number($(value).text())); const posterEl = $(element).find('.imgLink img, .tlcImageItem'); if (posterEl) release.poster = posterEl.attr('data-original') || posterEl.attr('src'); const channelEl = $(element).find('.fromSite a'); if (channelEl.attr('title')) release.channel = channelEl.attr('title').replace('.com', ''); if (hasTeaser) { release.teaser = [ { src: `https://videothumb.gammacdn.com/600x339/${release.entryId}.mp4` }, { src: `https://videothumb.gammacdn.com/307x224/${release.entryId}.mp4` }, ]; } return release; }); } async function scrapeScene(html, url, site, baseRelease, mobileHtml) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const m$ = mobileHtml && cheerio.load(mobileHtml, { normalizeWhitespace: true }); const release = { $, url }; const json = $('script[type="application/ld+json"]').html(); const videoJson = $('script:contains("window.ScenePlayerOptions")').html(); const [data, data2] = json ? JSON.parse(json) : []; const videoData = videoJson && JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1)); release.entryId = (baseRelease?.path || new URL(url).pathname).match(/\/(\d{2,})(\/|$)/)?.[1]; release.title = videoData?.playerOptions?.sceneInfos.sceneTitle || data?.name; // date in data object is not the release date of the scene, but the date the entry was added; only use as fallback const dateString = $('.updatedDate').first().text().trim(); const dateMatch = dateString.match(/\d{2,4}[-/]\d{2}[-/]\d{2,4}/)?.[0]; if (dateMatch) release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate(); else if (data?.dateCreated) release.date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate(); else release.date = videoData.playerOptions.sceneInfos.sceneReleaseDate; if (data) { release.description = data.description; if (data.director?.[0]?.name) release.director = data.director[0].name; else if (data2?.director?.[0]?.name) release.director = data2.director[0].name; const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5; if (stars) release.rating = { stars }; release.duration = moment.duration(data.duration.slice(2)).asSeconds(); } const actors = data?.actor || data2?.actor; if (actors) { release.actors = actors.map(actor => ({ name: actor.name, gender: actor.gender, })); } const hasTrans = release.actors?.some(actor => actor.gender === 'shemale'); const rawTags = data?.keywords?.split(', ') || data2?.keywords?.split(', ') || []; release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags; const channel = data?.productionCompany?.name || $('.studioLink a, .siteLink a').attr('title')?.trim() || $('.siteNameSpan').text()?.trim().toLowerCase().replace('.com', '') || $('meta[name="twitter:domain"]').attr('content')?.replace('.com', ''); if (channel) release.channel = slugify(channel, ''); if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/ const photoLink = $('.picturesItem a').attr('href'); const mobilePhotos = m$ ? m$('.preview-displayer a img').map((photoIndex, photoEl) => $(photoEl).attr('src')).toArray() : []; if (photoLink) { const photos = await getPhotos(photoLink, site, mobilePhotos.length < 3); // only get thumbnails when less than 3 mobile photos are available if (photos.length < 7) release.photos = [...photos, ...mobilePhotos]; // probably only teaser photos available, supplement with mobile album else release.photos = photos; } else { release.photos = mobilePhotos; } const trailer = `${videoData.playerOptions.host}${videoData.url}`; release.trailer = [ { src: trailer.replace('hd', 'sm'), quality: 240, }, { src: trailer.replace('hd', 'med'), quality: 360, }, { src: trailer.replace('hd', 'big'), quality: 480, }, { // probably 540p src: trailer, quality: parseInt(videoData.sizeOnLoad, 10), }, { src: trailer.replace('hd', '720p'), quality: 720, }, { src: trailer.replace('hd', '1080p'), quality: 1080, }, { src: trailer.replace('hd', '4k'), quality: 2160, }, ]; return release; } function scrapeActorSearch(html, url, actorName) { const { document } = new JSDOM(html).window; const actorLink = document.querySelector(`a[title="${actorName}" i]`); return actorLink ? actorLink.href : null; } async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, accReleases = []) { const { origin, pathname } = new URL(profileUrl); const profilePath = `/${pathname.split('/').slice(-2).join('/')}`; const url = getActorReleasesUrl(profilePath, page); const res = await get(url); if (!res.ok) return []; const releases = scrapeAll(res.item.html, null, origin); const nextPage = res.item.query.url('.Gamma_Paginator a.next'); if (nextPage) { return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases)); } return accReleases.concat(releases); } async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) { const { q } = ex(html); const avatar = q('img.actorPicture'); const hair = q('.actorProfile .attribute_hair_color', true); const height = q('.actorProfile .attribute_height', true); const weight = q('.actorProfile .attribute_weight', true); const alias = q('.actorProfile .attribute_alternate_names', true); const nationality = q('.actorProfile .attribute_home', true); const profile = { name: actorName, }; if (avatar) { // larger sizes usually available, provide fallbacks const avatars = [ avatar.src.replace(/\d+x\d+/, '500x750'), avatar.src.replace(/\d+x\d+/, '240x360'), avatar.src.replace(/\d+x\d+/, '200x300'), avatar.src, ]; profile.avatar = avatars; } profile.description = q('.actorBio p:not(.bioTitle)', true); if (hair) profile.hair = hair.split(':')[1].trim(); if (height) profile.height = Number(height.match(/\d+/)[0]); if (weight) profile.weight = Number(weight.match(/\d+/)[0]); if (alias) profile.aliases = alias.split(':')[1].trim().split(', '); if (nationality) profile.nationality = nationality.split(':')[1].trim(); if (getActorReleasesUrl && withReleases) { profile.releases = await fetchActorReleases(url, getActorReleasesUrl); } return profile; } function scrapeApiProfile(data, releases, siteSlug) { const profile = {}; if (data.male === 1) profile.gender = 'male'; if (data.female === 1) profile.gender = 'female'; if (data.shemale === 1 || data.trans === 1) profile.gender = 'transsexual'; if (data.description) profile.description = data.description.trim(); if (data.attributes.ethnicity) profile.ethnicity = data.attributes.ethnicity; if (data.attributes.eye_color) profile.eyes = data.attributes.eye_color; if (data.attributes.hair_color) profile.hair = data.attributes.hair_color; const avatarPaths = Object.values(data.pictures).reverse(); if (avatarPaths.length > 0) profile.avatar = avatarPaths.map(avatarPath => `https://images01-evilangel.gammacdn.com/actors${avatarPath}`); if (releases) profile.releases = releases.map(release => `https://${siteSlug}.com/en/video/${release.url_title}/${release.clip_id}`); return profile; } function getApiUrl(appId, apiKey) { const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0'; const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`; return { appId, apiKey, userAgent, apiUrl, }; } async function fetchApiCredentials(referer, site) { if (site?.parameters?.appId && site?.parameters?.apiKey) { return getApiUrl(site.parameters.appId, site.parameters.apiKey); } const res = await http.get(referer); const body = res.body.toString(); const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey')); if (!apiLine) { throw new Error(`No Gamma API key found for ${referer}`); } const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1); const apiData = JSON.parse(apiSerial); const { applicationID: appId, apiKey } = apiData.api.algolia; return getApiUrl(appId, apiKey); } async function fetchApiLatest(site, page = 1, preData, include, upcoming = false) { const referer = site.parameters?.referer || `${site.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`; const { apiUrl } = await fetchApiCredentials(referer, site); const res = await bhttp.post(apiUrl, { requests: [ { indexName: 'all_scenes', params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]&filters=sitename:${site.slug} OR channels.id:${site.slug}`, }, ], }, { headers: { Referer: referer, }, encodeJSON: true, }); if (res.statusCode === 200 && res.body.results?.[0]?.hits) { return scrapeApiReleases(res.body.results[0].hits, site); } return []; } async function fetchApiUpcoming(site, page = 1, preData, include) { return fetchApiLatest(site, page, preData, include, true); } function getLatestUrl(site, page) { if (site.parameters?.latest) { if (/^http/.test(site.parameters.latest)) { return /%d/.test(site.parameters.latest) ? util.format(site.parameters.latest, page) : `${site.parameters.latest}${page}`; } return /%d/.test(site.parameters.latest) ? util.format(`${site.url}${site.parameters.latest}`, page) : `${site.url}${site.parameters.latest}${page}`; } return `${site.url}/en/videos/AllCategories/0/${page}`; } function getUpcomingUrl(site) { if (site.parameters?.upcoming) { return /^http/.test(site.parameters.upcoming) ? `${site.parameters.upcoming}` : `${site.url}${site.parameters.upcoming}`; } return `${site.url}/en/videos/AllCategories/0/1/upcoming`; } async function fetchLatest(site, page = 1) { const url = getLatestUrl(site, page); const res = await bhttp.get(url); return scrapeAll(res.body.toString(), site); } async function fetchUpcoming(site) { const url = getUpcomingUrl(site); const res = await bhttp.get(url); return scrapeAll(res.body.toString(), site, null, false); } function getDeepUrl(url, site, baseRelease, mobile) { const filter = new Set(['en', 'video', 'scene', site.slug, site.parent.slug]); const pathname = baseRelease?.path || new URL(url).pathname .split('/') .filter(component => !filter.has(component)) .join('/'); // reduce to scene ID and title slug const sceneId = baseRelease?.entryId || pathname.match(/\/(\d+)\//)?.[1]; if (mobile && /%d/.test(mobile)) { return util.format(mobile, sceneId); } if (mobile && sceneId) { return `${mobile}${pathname}`; } if (site.parameters?.deep) { return `${site.parameters.deep}${pathname}`; } return url; } async function fetchScene(url, site, baseRelease) { if (site.parameters?.deep === false) { return baseRelease; } const deepUrl = getDeepUrl(url, site, baseRelease); const mobileUrl = getDeepUrl(url, site, baseRelease, site.parameters?.mobile || site.parent?.parameters?.mobile); if (deepUrl) { const [res, mobileRes] = await Promise.all([ bhttp.get(deepUrl), mobileUrl && bhttp.get(mobileUrl, { headers: { // don't redirect to main site 'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Mobile Safari/537.36', }, }), ]); if (res.statusCode === 200) { const mobileBody = mobileRes?.statusCode === 200 ? mobileRes.body.toString() : null; const scene = await scrapeScene(res.body.toString(), url, site, baseRelease, mobileBody); return { ...scene, deepUrl }; } } return null; } async function fetchActorScenes(actorName, apiUrl, siteSlug) { const res = await bhttp.post(apiUrl, { requests: [ { indexName: 'all_scenes', params: `query=&filters=sitename:${siteSlug}&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`, }, ], }, { headers: { Referer: `https://www.${siteSlug}.com/en/videos`, }, encodeJSON: true, }); if (res.statusCode === 200 && res.body.results[0].hits.length > 0) { return res.body.results[0].hits; } return []; } async function fetchProfile({ name: actorName }, context, altSearchUrl, getActorReleasesUrl, include) { const siteSlug = context.entity.slug || context.site?.slug || context.network?.slug; const actorSlug = actorName.toLowerCase().replace(/\s+/, '+'); const searchUrl = altSearchUrl ? `https://www.${siteSlug}.com/en/search/${actorSlug}/1/actor` : `https://www.${siteSlug}.com/en/search/${siteSlug}/actor/${actorSlug}`; const searchRes = await bhttp.get(searchUrl); if (searchRes.statusCode !== 200) { return null; } const actorUrl = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName); if (actorUrl) { const url = `https://${siteSlug}.com${actorUrl}`; const actorRes = await bhttp.get(url); if (actorRes.statusCode !== 200) { return null; } return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes); } return null; } async function fetchApiProfile({ name: actorName }, context, include) { const siteSlug = context.entity.slug || context.site?.slug || context.network?.slug; const actorSlug = encodeURI(actorName); const referer = `https://www.${siteSlug}.com/en/search`; const { apiUrl } = await fetchApiCredentials(referer); const res = await http.post(apiUrl, { requests: [ { indexName: 'all_actors', params: `query=${actorSlug}`, }, ], }, { Referer: referer, }, { encodeJSON: true, }); if (res.status === 200 && res.body.results[0].hits.length > 0) { const actorData = res.body.results[0].hits.find(actor => slugify(actor.name) === slugify(actorName)); if (actorData) { const actorScenes = include.releases && await fetchActorScenes(actorData.name, apiUrl, siteSlug); return scrapeApiProfile(actorData, actorScenes, siteSlug); } } return null; } module.exports = { fetchApiLatest, fetchApiProfile, fetchApiUpcoming, fetchLatest, fetchProfile, fetchScene, fetchUpcoming, getPhotos, scrapeApiProfile, scrapeApiReleases, scrapeProfile, scrapeAll, scrapeScene, };