'use strict'; const Promise = require('bluebird'); const bhttp = require('bhttp'); const { JSDOM } = require('jsdom'); const cheerio = require('cheerio'); const moment = require('moment'); const argv = require('../argv'); const logger = require('../logger')(__filename); const { ex, get } = require('../utils/q'); const slugify = require('../utils/slugify'); async function fetchPhotos(url) { const res = await bhttp.get(url); return res.body.toString(); } function scrapePhotos(html) { const $ = cheerio.load(html, { normalizeWhitespace: true }); return $('.preview .imgLink, .pgFooterThumb a').toArray().map((linkEl) => { const url = $(linkEl).attr('href'); if (/\/join|\/createaccount/.test(url)) { // URL links to join page instead of full photo, extract thumbnail // /createaccount is used by e.g. Tricky Spa const src = $(linkEl).find('img').attr('src'); if (src.match('previews/')) { // resource often serves full photo at a modifier URL anyway, add as primary source const highRes = src .replace('previews/', '') .replace('_tb.jpg', '.jpg'); // keep original thumbnail as fallback in case full photo is not available return [highRes, src]; } return src; } // URL links to full photo return url; }); } async function getPhotos(albumPath, site) { const albumUrl = site.parameters?.photos ? `${site.url}${site.parameters.photos}${albumPath.split('/').slice(-2).join('/')}` : `${site.url}${albumPath}`; try { const html = await fetchPhotos(albumUrl); const $ = cheerio.load(html, { normalizeWhitespace: true }); const photos = scrapePhotos(html, site); const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0]; if (lastPage) { const otherPages = Array.from({ length: Number(lastPage) }, (_value, index) => index + 1).slice(1); const otherPhotos = await Promise.map(otherPages, async (page) => { const pageUrl = `${site.url}/${albumPath}/${page}`; const pageHtml = await fetchPhotos(pageUrl); return scrapePhotos(pageHtml, site); }, { concurrency: 2, }); return photos.concat(otherPhotos.flat()); } return photos; } catch (error) { logger.warn(`Failed to fetch ${site.name} photos from ${albumUrl}: ${error.message}`); return []; } } async function scrapeApiReleases(json, site) { return json.map((scene) => { const release = { entryId: scene.clip_id, title: scene.title, description: scene.description, duration: scene.length, likes: scene.ratings_up, dislikes: scene.ratings_down, }; release.url = site.parameters?.scene ? `${site.parameters.scene}/${scene.url_title}/${release.entryId}` : `${site.url}/en/video/${scene.url_title}/${release.entryId}`; release.date = moment.utc(scene.release_date, 'YYYY-MM-DD').toDate(); release.actors = scene.actors.map(({ name }) => name); release.director = scene.directors[0].name; release.tags = scene.master_categories .concat(scene.categories?.map(category => category.name)) .filter(Boolean); // some categories don't have a name const posterPath = scene.pictures.resized || (scene.pictures.nsfw?.top && Object.values(scene.pictures.nsfw.top)[0]); if (posterPath) { release.poster = [ `https://images-evilangel.gammacdn.com/movies${posterPath}`, `https://transform.gammacdn.com/movies${posterPath}`, ]; } release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`; return release; }); } function scrapeAll(html, site, networkUrl, hasTeaser = true) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const scenesElements = $('li[data-itemtype=scene], div[data-itemtype=scenes]').toArray(); return scenesElements.map((element) => { const release = {}; const sceneLinkElement = $(element).find('.sceneTitle a, .tlcTitle a'); if (site) release.url = `${networkUrl ? site.network.url : site.url}${sceneLinkElement.attr('href')}`; else release.url = `${networkUrl}${sceneLinkElement.attr('href')}`; release.title = sceneLinkElement.attr('title'); release.entryId = $(element).attr('data-itemid'); const dateEl = $(element).find('.sceneDate, .tlcSpecsDate .tlcDetailsValue').text() || null; if (dateEl) { release.date = moment .utc(dateEl, ['MM-DD-YYYY', 'YYYY-MM-DD']) .toDate(); } release.actors = $(element).find('.sceneActors a, .tlcActors a') .map((actorIndex, actorElement) => $(actorElement).attr('title')) .toArray(); [release.likes, release.dislikes] = $(element).find('.value') .toArray() .map(value => Number($(value).text())); const posterEl = $(element).find('.imgLink img, .tlcImageItem'); if (posterEl) release.poster = posterEl.attr('data-original') || posterEl.attr('src'); if (hasTeaser) { release.teaser = { src: `https://videothumb.gammacdn.com/307x224/${release.entryId}.mp4`, quality: 224, }; } return release; }); } async function scrapeScene(html, url, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const release = { $, url }; const json = $('script[type="application/ld+json"]').html(); const videoJson = $('script:contains("window.ScenePlayerOptions")').html(); const [data, data2] = json ? JSON.parse(json) : []; const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1)); [release.entryId] = new URL(url).pathname.split('/').slice(-1); release.title = videoData?.playerOptions?.sceneInfos.sceneTitle || data?.name; // date in data object is not the release date of the scene, but the date the entry was added; only use as fallback const dateString = $('.updatedDate').first().text().trim(); const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0]; if (dateMatch) release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate(); else if (data?.dateCreated) release.date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate(); else release.date = videoData.playerOptions.sceneInfos.sceneReleaseDate; if (data) { release.description = data.description; release.director = data.director?.[0].name || data2?.director?.[0].name; const actors = data?.actor || data2?.actor || []; release.actors = actors.map(actor => actor.name); const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5; if (stars) release.rating = { stars }; release.duration = moment.duration(data.duration.slice(2)).asSeconds(); } const hasTrans = release.actors.some(actor => actor.gender === 'shemale'); const rawTags = data?.keywords?.split(', ') || data2?.keywords?.split(', '); release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags; const channel = data?.productionCompany?.name || $('.studioLink a').attr('title')?.trim(); if (channel) release.channel = slugify(channel, { delimiter: '' }); release.poster = videoData.picPreview; const photoLink = $('.picturesItem a').attr('href'); if (photoLink) release.photos = await getPhotos(photoLink, site); const trailer = `${videoData.playerOptions.host}${videoData.url}`; release.trailer = [ { src: trailer.replace('hd', 'sm'), quality: 240, }, { src: trailer.replace('hd', 'med'), quality: 360, }, { src: trailer.replace('hd', 'big'), quality: 480, }, { // probably 540p src: trailer, quality: parseInt(videoData.sizeOnLoad, 10), }, { src: trailer.replace('hd', '720p'), quality: 720, }, { src: trailer.replace('hd', '1080p'), quality: 1080, }, { src: trailer.replace('hd', '4k'), quality: 2160, }, ]; return release; } function scrapeActorSearch(html, url, actorName) { const { document } = new JSDOM(html).window; const actorLink = document.querySelector(`a[title="${actorName}" i]`); return actorLink ? actorLink.href : null; } async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, accReleases = []) { const { origin, pathname } = new URL(profileUrl); const profilePath = `/${pathname.split('/').slice(-2).join('/')}`; const url = getActorReleasesUrl(profilePath, page); const { html, qu } = await get(url); const releases = scrapeAll(html, null, origin); const nextPage = qu('.Gamma_Paginator a.next'); if (nextPage) { return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases)); } return accReleases.concat(releases); } async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl) { const { q } = ex(html); const avatar = q('img.actorPicture'); const hair = q('.actorProfile .attribute_hair_color', true); const height = q('.actorProfile .attribute_height', true); const weight = q('.actorProfile .attribute_weight', true); const alias = q('.actorProfile .attribute_alternate_names', true); const nationality = q('.actorProfile .attribute_home', true); const profile = { name: actorName, }; if (avatar) { // larger sizes usually available, provide fallbacks const avatars = [ avatar.src.replace(/\d+x\d+/, '500x750'), avatar.src.replace(/\d+x\d+/, '240x360'), avatar.src.replace(/\d+x\d+/, '200x300'), avatar.src, ]; profile.avatar = avatars; } profile.description = q('.actorBio p:not(.bioTitle)', true); if (hair) profile.hair = hair.split(':')[1].trim(); if (height) profile.height = Number(height.match(/\d+/)[0]); if (weight) profile.weight = Number(weight.match(/\d+/)[0]); if (alias) profile.aliases = alias.split(':')[1].trim().split(', '); if (nationality) profile.nationality = nationality.split(':')[1].trim(); if (getActorReleasesUrl && argv.withReleases) { profile.releases = await fetchActorReleases(url, getActorReleasesUrl); } return profile; } function scrapeApiProfile(data, releases, siteSlug) { const profile = {}; if (data.male === 1) profile.gender = 'male'; if (data.female === 1) profile.gender = 'female'; if (data.shemale === 1 || data.trans === 1) profile.gender = 'transsexual'; if (data.description) profile.description = data.description.trim(); if (data.attributes.ethnicity) profile.ethnicity = data.attributes.ethnicity; if (data.attributes.eye_color) profile.eyes = data.attributes.eye_color; if (data.attributes.hair_color) profile.hair = data.attributes.hair_color; const avatarPath = Object.values(data.pictures).reverse()[0]; if (avatarPath) profile.avatar = `https://images01-evilangel.gammacdn.com/actors${avatarPath}`; profile.releases = releases.map(release => `https://${siteSlug}.com/en/video/${release.url_title}/${release.clip_id}`); return profile; } function getApiUrl(appId, apiKey) { const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0'; const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`; return { appId, apiKey, userAgent, apiUrl, }; } async function fetchApiCredentials(referer, site) { if (site?.parameters?.appId && site?.parameters?.apiKey) { return getApiUrl(site.parameters.appId, site.parameters.apiKey); } const res = await bhttp.get(referer); const body = res.body.toString(); const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey')); if (!apiLine) { throw new Error(`No Gamma API key found for ${referer}`); } const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1); const apiData = JSON.parse(apiSerial); const { applicationID: appId, apiKey } = apiData.api.algolia; return getApiUrl(appId, apiKey); } async function fetchApiLatest(site, page = 1, upcoming = false) { const referer = site.parameters?.referer || `${site.parameters?.networkReferer ? site.network.url : site.url}/en/videos`; const { apiUrl } = await fetchApiCredentials(referer, site); const res = await bhttp.post(apiUrl, { requests: [ { indexName: 'all_scenes', params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]&filters=sitename:${site.slug} OR channels.id:${site.slug}`, }, ], }, { headers: { Referer: referer, }, encodeJSON: true, }); console.log(res.body); if (res.statusCode === 200 && res.body.results?.[0]?.hits) { return scrapeApiReleases(res.body.results[0].hits, site); } return []; } async function fetchApiUpcoming(site) { return fetchApiLatest(site, 1, true); } async function fetchLatest(site, page = 1) { const url = `${site.url}${site.parameters?.latest || '/en/videos/AllCategories/0/'}${page}`; const res = await bhttp.get(url); return scrapeAll(res.body.toString(), site); } async function fetchUpcoming(site) { const url = `${site.url}${site.parameters?.upcoming || '/en/videos/AllCategories/0/1/upcoming'}`; const res = await bhttp.get(url); return scrapeAll(res.body.toString(), site); } async function fetchScene(url, site, release) { if (site.parameters?.deep === false) { return release; } const res = await bhttp.get(url); return scrapeScene(res.body.toString(), url, site); } async function fetchActorScenes(actorName, apiUrl, siteSlug) { const res = await bhttp.post(apiUrl, { requests: [ { indexName: 'all_scenes', params: `query=&filters=sitename:${siteSlug}&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`, }, ], }, { headers: { Referer: `https://www.${siteSlug}.com/en/videos`, }, encodeJSON: true, }); if (res.statusCode === 200 && res.body.results[0].hits.length > 0) { return res.body.results[0].hits; } return []; } async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesUrl) { const actorSlug = actorName.toLowerCase().replace(/\s+/, '+'); const searchUrl = altSearchUrl ? `https://www.${siteSlug}.com/en/search/${actorSlug}/1/actor` : `https://www.${siteSlug}.com/en/search/${siteSlug}/actor/${actorSlug}`; const searchRes = await bhttp.get(searchUrl); if (searchRes.statusCode !== 200) { return null; } const actorUrl = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName); if (actorUrl) { const url = `https://${siteSlug}.com${actorUrl}`; const actorRes = await bhttp.get(url); if (actorRes.statusCode !== 200) { return null; } return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl); } return null; } async function fetchApiProfile(actorName, siteSlug) { const actorSlug = encodeURI(actorName); const referer = `https://www.${siteSlug}.com/en/search`; const { apiUrl } = await fetchApiCredentials(referer); const res = await bhttp.post(apiUrl, { requests: [ { indexName: 'all_actors', params: `query=${actorSlug}`, }, ], }, { headers: { Referer: referer, }, encodeJSON: true, }); if (res.statusCode === 200 && res.body.results[0].hits.length > 0) { const actorData = res.body.results[0].hits.find(actor => slugify(actor.name) === slugify(actorName)); if (actorData) { const actorScenes = await fetchActorScenes(actorData.name, apiUrl, siteSlug); return scrapeApiProfile(actorData, actorScenes, siteSlug); } } return null; } module.exports = { fetchApiLatest, fetchApiProfile, fetchApiUpcoming, fetchLatest, fetchProfile, fetchScene, fetchUpcoming, getPhotos, scrapeApiProfile, scrapeApiReleases, scrapeProfile, scrapeAll, scrapeScene, };