'use strict'; const Promise = require('bluebird'); const bhttp = require('bhttp'); const { JSDOM } = require('jsdom'); const cheerio = require('cheerio'); const moment = require('moment'); async function fetchPhotos(url) { const res = await bhttp.get(url); return res.body.toString(); } function scrapePhotos(html) { const $ = cheerio.load(html, { normalizeWhitespace: true }); return $('.preview .imgLink').toArray().map((linkEl) => { const url = $(linkEl).attr('href'); if (url.match('/join')) { // URL links to join page instead of full photo, extract thumbnail const src = $(linkEl).find('img').attr('src'); if (src.match('previews/')) { // resource often serves full photo at a modifier URL anyway, add as primary source const highRes = src .replace('previews/', '') .replace('_tb.jpg', '.jpg'); // keep original thumbnail as fallback in case full photo is not available return [highRes, src]; } return src; } // URL links to full photo return url; }); } async function getPhotos(albumPath, site) { const albumUrl = `${site.url}${albumPath}`; try { const html = await fetchPhotos(albumUrl); const $ = cheerio.load(html, { normalizeWhitespace: true }); const photos = scrapePhotos(html); const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0]; if (lastPage) { const otherPages = Array.from({ length: Number(lastPage) }, (_value, index) => index + 1).slice(1); const otherPhotos = await Promise.map(otherPages, async (page) => { const pageUrl = `${site.url}/${albumPath}/${page}`; const pageHtml = await fetchPhotos(pageUrl); return scrapePhotos(pageHtml); }, { concurrency: 2, }); return photos.concat(otherPhotos.flat()); } return photos; } catch (error) { console.error(`Failed to fetch ${site.name} photos from ${albumUrl}: ${error.message}`); return []; } } async function scrapeApiReleases(json, site) { return json.map((scene) => { const release = { entryId: scene.clip_id, title: scene.title, description: scene.description, duration: scene.length, likes: scene.ratings_up, dislikes: scene.ratings_down, }; release.url = `${site.url}/en/video/${scene.url_title}/${release.entryId}`; release.date = moment.utc(scene.release_date, 'YYYY-MM-DD').toDate(); release.actors = scene.actors.map(({ name }) => name); release.director = scene.directors[0].name; release.tags = scene.master_categories.concat(scene.categories?.map(category => category.name)); const posterPath = scene.pictures.resized || (scene.pictures.nsfw?.top && Object.values(scene.pictures.nsfw.top)[0]); if (posterPath) { release.poster = [ `https://images-evilangel.gammacdn.com/movies${posterPath}`, `https://transform.gammacdn.com/movies${posterPath}`, ]; } release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`; return release; }); } function scrapeAll(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const scenesElements = $('li[data-itemtype=scene]').toArray(); return scenesElements.map((element) => { const sceneLinkElement = $(element).find('.sceneTitle a'); const url = `${site.url}${sceneLinkElement.attr('href')}`; const title = sceneLinkElement.attr('title'); const entryId = $(element).attr('data-itemid'); const date = moment .utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY') .toDate(); const actors = $(element).find('.sceneActors a') .map((actorIndex, actorElement) => $(actorElement).attr('title')) .toArray(); const [likes, dislikes] = $(element).find('.value') .toArray() .map(value => Number($(value).text())); const poster = $(element).find('.imgLink img').attr('data-original'); const trailer = `https://videothumb.gammacdn.com/307x224/${entryId}.mp4`; return { url, entryId, title, actors, director: 'Mason', date, poster, trailer: { src: trailer, quality: 224, }, rating: { likes, dislikes, }, site, }; }); } async function scrapeScene(html, url, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const release = { $ }; const json = $('script[type="application/ld+json"]').html(); const videoJson = $('script:contains("window.ScenePlayerOptions")').html(); const [data, data2] = JSON.parse(json); const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1)); [release.entryId] = new URL(url).pathname.split('/').slice(-1); release.title = data.name; release.description = data.description; // date in data object is not the release date of the scene, but the date the entry was added const dateString = $('.updatedDate').first().text().trim(); const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0]; release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate(); release.director = data.director?.[0].name || data2?.director?.[0].name; release.actors = data.actor.map(actor => actor.name); const hasTrans = data.actor.some(actor => actor.gender === 'shemale'); const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5; if (stars) release.rating = { stars }; release.duration = moment.duration(data.duration.slice(2).split(':')).asSeconds(); const rawTags = data.keywords?.split(', '); release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags; release.poster = videoData.picPreview; release.photos = await getPhotos($('.picturesItem a').attr('href'), site); const trailer = `${videoData.playerOptions.host}${videoData.url}`; release.trailer = [ { src: trailer.replace('hd', 'sm'), quality: 240, }, { src: trailer.replace('hd', 'med'), quality: 360, }, { src: trailer.replace('hd', 'big'), quality: 480, }, { // probably 540p src: trailer, quality: parseInt(videoData.sizeOnLoad, 10), }, { src: trailer.replace('hd', '720p'), quality: 720, }, { src: trailer.replace('hd', '1080p'), quality: 1080, }, { src: trailer.replace('hd', '4k'), quality: 2160, }, ]; return release; } function scrapeActorSearch(html, url, actorName) { const { document } = new JSDOM(html).window; const actorLink = document.querySelector(`a[title="${actorName}" i]`); return actorLink ? actorLink.href : null; } function scrapeProfile(html, url, actorName, siteSlug) { const { document } = new JSDOM(html).window; const avatarEl = document.querySelector('img.actorPicture'); const descriptionEl = document.querySelector('.actorBio p:not(.bioTitle)'); const hairEl = document.querySelector('.actorProfile .attribute_hair_color'); const heightEl = document.querySelector('.actorProfile .attribute_height'); const weightEl = document.querySelector('.actorProfile .attribute_weight'); const aliasEl = document.querySelector('.actorProfile .attribute_alternate_names'); const nationalityEl = document.querySelector('.actorProfile .attribute_home'); const profile = { name: actorName, }; if (avatarEl) { // larger sizes usually available, provide fallbacks const avatars = [ avatarEl.src.replace(/\d+x\d+/, '500x750'), avatarEl.src.replace(/\d+x\d+/, '240x360'), avatarEl.src.replace(/\d+x\d+/, '200x300'), avatarEl.src, ]; profile.avatar = avatars; } if (descriptionEl) profile.description = descriptionEl.textContent.trim(); if (hairEl) profile.hair = hairEl.textContent.split(':')[1].trim(); if (heightEl) profile.height = Number(heightEl.textContent.match(/\d+/)[0]); if (weightEl) profile.weight = Number(weightEl.textContent.match(/\d+/)[0]); if (aliasEl) profile.aliases = aliasEl.textContent.split(':')[1].trim().split(', '); if (nationalityEl) profile.nationality = nationalityEl.textContent.split(':')[1].trim(); profile.releases = Array.from(document.querySelectorAll('.sceneList .scene a.imgLink'), el => `https://${siteSlug}.com${el.href}`); return profile; } function scrapeApiProfile(data, releases, siteSlug) { const profile = {}; if (data.male === 1) profile.gender = 'male'; if (data.female === 1) profile.gender = 'female'; if (data.shemale === 1 || data.trans === 1) profile.gender = 'transsexual'; if (data.description) profile.description = data.description.trim(); if (data.attributes.ethnicity) profile.ethnicity = data.attributes.ethnicity; if (data.attributes.eye_color) profile.eyes = data.attributes.eye_color; if (data.attributes.hair_color) profile.hair = data.attributes.hair_color; const avatarPath = Object.values(data.pictures).reverse()[0]; if (avatarPath) profile.avatar = `https://images01-evilangel.gammacdn.com/actors${avatarPath}`; profile.releases = releases.map(release => `https://${siteSlug}.com/en/video/${release.url_title}/${release.clip_id}`); return profile; } async function fetchApiCredentials(referer) { const res = await bhttp.get(referer); const body = res.body.toString(); const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey')); const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1); const apiData = JSON.parse(apiSerial); const { applicationID: appId, apiKey } = apiData.api.algolia; const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0'; const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`; return { appId, apiKey, userAgent, apiUrl, }; } async function fetchApiLatest(site, page = 1, upcoming = false) { const referer = `${site.url}/en/videos`; const { apiUrl } = await fetchApiCredentials(referer); const res = await bhttp.post(apiUrl, { requests: [ { indexName: 'all_scenes', params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`, }, ], }, { headers: { Referer: referer, }, encodeJSON: true, }); return scrapeApiReleases(res.body.results[0].hits, site); } async function fetchApiUpcoming(site) { return fetchApiLatest(site, 1, true); } async function fetchLatest(site, page = 1) { const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`); return scrapeAll(res.body.toString(), site); } async function fetchUpcoming(site) { const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`); return scrapeAll(res.body.toString(), site); } async function fetchScene(url, site) { const res = await bhttp.get(url); return scrapeScene(res.body.toString(), url, site); } async function fetchActorScenes(actorName, apiUrl, siteSlug) { const res = await bhttp.post(apiUrl, { requests: [ { indexName: 'all_scenes', params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`, }, ], }, { headers: { Referer: `https://www.${siteSlug}.com/en/videos`, }, encodeJSON: true, }); if (res.statusCode === 200 && res.body.results[0].hits.length > 0) { return res.body.results[0].hits; } return []; } async function fetchProfile(actorName, siteSlug, altSearchUrl) { const actorSlug = actorName.toLowerCase().replace(/\s+/, '+'); const searchUrl = altSearchUrl ? `https://www.${siteSlug}.com/en/search/${actorSlug}/1/actor` : `https://www.${siteSlug}.com/en/search/${siteSlug}/actor/${actorSlug}`; const searchRes = await bhttp.get(searchUrl); if (searchRes.statusCode !== 200) { return null; } const actorUrl = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName); if (actorUrl) { const url = `https://${siteSlug}.com${actorUrl}`; const actorRes = await bhttp.get(url); if (actorRes.statusCode !== 200) { return null; } return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug); } return null; } async function fetchApiProfile(actorName, siteSlug) { const actorSlug = encodeURI(actorName); const referer = `https://www.${siteSlug}.com/en/search?query=${actorSlug}&tab=actors`; const { apiUrl } = await fetchApiCredentials(referer); const res = await bhttp.post(apiUrl, { requests: [ { indexName: 'all_actors', params: `query=${actorSlug}`, }, ], }, { headers: { Referer: referer, }, encodeJSON: true, }); if (res.statusCode === 200 && res.body.results[0].hits.length > 0) { const actorData = res.body.results[0].hits.find(actor => actor.name === actorName); if (actorData) { const actorScenes = await fetchActorScenes(actorName, apiUrl, siteSlug); return scrapeApiProfile(actorData, actorScenes, siteSlug); } } return null; } module.exports = { fetchApiLatest, fetchApiProfile, fetchApiUpcoming, fetchLatest, fetchProfile, fetchScene, fetchUpcoming, getPhotos, scrapeApiProfile, scrapeApiReleases, scrapeProfile, scrapeAll, scrapeScene, };