'use strict'; /* eslint-disable newline-per-chained-call */ const Promise = require('bluebird'); const { CookieJar } = Promise.promisifyAll(require('tough-cookie')); const cookie = require('cookie'); const moment = require('moment'); const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); const http = require('../utils/http'); const { inchesToCm, lbsToKg } = require('../utils/convert'); function getBasePath(channel, path = '/scene') { return channel.parameters?.scene || ((channel.parameters?.native || channel.type === 'network') && `${channel.url}${path}`) || `${channel.parent.url}${path}`; } function getThumbs(scene) { if (scene.images.poster) { return Object.values(scene.images.poster) // can be { 0: {}, 1: {}, ... } instead of array .filter((img) => typeof img === 'object') // remove alternateText property .map((image) => image.xl.url); } if (Array.isArray(scene.images.card_main_rect)) { return scene.images.card_main_rect .concat(scene.images.card_secondary_rect || []) .map((image) => image.xl.url.replace('.thumb', '')); } return []; } function getCovers(images, target = 'cover') { if (!images[target]) { return []; } const covers = [ images[target][0].md?.url, images[target][0].sm?.url, images[target][0].xs?.url, // bigger but usually upscaled images[target][0].xx?.url, images[target][0].xl?.url, images[target][0].lg?.url, ]; if (target === 'poster') { return covers; } return [covers]; } function getVideos(data) { const teaserSources = data.videos.mediabook?.files; const trailerSources = data.children.find((child) => child.type === 'trailer')?.videos.full?.files; const teaser = teaserSources && Object.values(teaserSources).map((source) => ({ src: source.urls.view, quality: parseInt(source.format, 10), })); const trailer = trailerSources && Object.values(trailerSources).map((source) => ({ src: source.urls.view, quality: parseInt(source.format, 10), })); return { teaser, trailer }; } function scrapeLatestX(data, site, filterChannel) { const release = { entryId: data.id, title: data.title, description: data.description, }; const basepath = getBasePath(site); release.url = `${basepath}/${release.entryId}/${slugify(release.title)}`; release.date = new Date(data.dateReleased); release.duration = data.videos.mediabook?.length > 1 ? data.videos.mediabook.length : null; release.actors = data.actors.map((actor) => ({ name: actor.name, gender: actor.gender })); release.tags = data.tags.map((tag) => tag.name); [release.poster, ...release.photos] = getThumbs(data); const { teaser, trailer } = getVideos(data); if (teaser) release.teaser = teaser; if (trailer) release.trailer = trailer; release.chapters = data.timeTags?.map((chapter) => ({ time: chapter.startTime, duration: chapter.endTime - chapter.startTime, tags: [chapter.name], })); if ((site.parameters?.extract === true && data.collections.length > 0) // release should not belong to any channel || (typeof site.parameters?.extract === 'string' && !data.collections.some((collection) => collection.shortName === site.parameters.extract)) // release should belong to specific channel || (filterChannel && !data.collections?.some((collection) => collection.id === site.parameters?.siteId))) { // used to separate upcoming Brazzers scenes return { ...release, exclude: true, }; } const siteName = data.collections[0]?.name || data.brand; release.channel = slugify(siteName, ''); return release; } async function scrapeLatest(items, site, filterChannel) { const latestReleases = items.map((data) => scrapeLatestX(data, site, filterChannel)); return { scenes: latestReleases.filter((scene) => !scene.exclude), unextracted: latestReleases.filter((scene) => scene.exclude), }; } function scrapeRelease(data, url, channel, networkName) { const release = {}; const { id: entryId, title, description } = data; release.entryId = data.id; release.title = title; release.description = description; release.date = new Date(data.dateReleased); release.duration = data.videos.mediabook?.length > 1 ? data.videos.mediabook.length : null; release.actors = data.actors.map((actor) => ({ name: actor.name, gender: actor.gender })); release.tags = data.tags.map((tag) => tag.name); [release.poster, ...release.photos] = getThumbs(data); const { teaser, trailer } = getVideos(data); if (teaser) release.teaser = teaser; if (trailer) release.trailer = trailer; release.chapters = data.timeTags?.map((chapter) => ({ time: chapter.startTime, duration: chapter.endTime - chapter.startTime, tags: [chapter.name], })); const siteName = data.collections[0]?.name || data.brand; release.channel = slugify(siteName, ''); release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`; if (data.parent?.type === 'movie' || data.parent?.type === 'serie') { release[data.parent.type] = { entryId: data.parent.id, url: `${getBasePath(channel, data.parent.type === 'movie' ? '/movie' : '/series')}/${data.parent.id}/${slugify(data.parent.title, '-', { removePunctuation: true })}`, title: data.parent.title, description: data.parent.description, date: new Date(data.parent.dateReleased), channel: slugify(data.parent.collections?.name || data.parent.brand), poster: getCovers(data.parent.images, 'poster'), shallow: true, }; } if (data.type === 'movie') { release.covers = getCovers(data.images); release.scenes = data.children?.map((scene) => ({ entryId: scene.id, url: `${getBasePath(channel)}/${scene.id}/${slugify(scene.title)}`, title: scene.title, shallow: true, })); } return release; } function getUrl(site) { const { searchParams, pathname } = new URL(site.url); // if (search.match(/\?site=\d+/)) { if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) { return site.url; } if (site.parameters?.native) { return `${site.url}/scenes`; } if (site.parameters?.extract) { return `${site.url}/scenes`; } if (site.parameters?.siteId) { return `${site.parent.url}/scenes?site=${site.parameters.siteId}`; } throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`); } async function getSession(site, parameters, url) { if (site.slug === 'mindgeek' || site.parameters?.parentSession === false) { // most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels return null; } const cookieJar = new CookieJar(); const session = http.session({ cookieJar }); const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession) ? site.parent.url : (url || site.url); const res = await http.get(sessionUrl, { session, headers: { 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites }, interval: parameters?.interval, concurrency: parameters?.concurrency, parse: false, }); if (res.statusCode === 200) { const cookieString = await cookieJar.getCookieStringAsync(sessionUrl); const { instance_token: instanceToken } = cookie.parse(cookieString); if (instanceToken) { return { session, instanceToken }; } } throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`); } function scrapeProfile(data, releases = [], networkName) { const profile = { description: data.bio, aliases: data.aliases.filter(Boolean), }; profile.gender = data.gender === 'other' ? 'transsexual' : data.gender; profile.measurements = data.measurements; profile.dateOfBirth = qu.parseDate(data.birthday); profile.birthPlace = data.birthPlace; profile.height = inchesToCm(data.height); profile.weight = lbsToKg(data.weight); profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name; profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name; if (data.images.card_main_rect?.[0]) { profile.avatar = data.images.card_main_rect[0].xl?.url || data.images.card_main_rect[0].lg?.url || data.images.card_main_rect[0].md?.url || data.images.card_main_rect[0].sm?.url || data.images.card_main_rect[0].xs?.url; } if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) { profile.naturalBoobs = true; } if (data.tags.some((tag) => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) { profile.naturalBoobs = false; } if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) { profile.hasTattoos = true; } if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) { profile.hasPiercings = true; } profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName)); return profile; } async function fetchLatest(site, page = 1, options) { const url = getUrl(site); const { searchParams, pathname } = new URL(url); const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]); if (!siteId && !site.parameters?.native && !site.parameters?.extract) { return null; } const { session, instanceToken } = options.beforeNetwork?.headers?.Instance ? options.beforeNetwork : await getSession(site, options.parameters, url); const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD'); const limit = 24; const apiUrl = site.parameters?.native || site.parameters?.extract ? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene` : `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`; const res = await http.get(apiUrl, { session, interval: options.parameters.interval, concurrency: options.parameters.concurrency, headers: { Instance: instanceToken, Origin: site.url, Referer: url, 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites }, }); if (res.status === 200 && res.body.result) { return scrapeLatest(res.body.result, site); } return res.statusCode; } async function fetchUpcoming(site, page, options) { const url = getUrl(site); const { session, instanceToken } = await getSession(site, options.parameters); const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases'; const res = await http.get(apiUrl, { session, interval: options.parameters.interval, concurrency: options.parameters.concurrency, headers: { Instance: instanceToken, Origin: site.url, Referer: url, 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites }, }); if (res.statusCode === 200 && res.body.result) { return scrapeLatest(res.body.result, site, true); } return res.statusCode; } async function fetchRelease(url, site, baseScene, options) { if (baseScene?.entryId && !baseScene.shallow && !options.parameters.forceDeep) { // overview and deep data is the same, don't hit server unnecessarily return baseScene; } const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1]; const { session, instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters); const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, { session, interval: options.parameters.interval, concurrency: options.parameters.concurrency, headers: { Instance: instanceToken, 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites }, }); if (res.status === 200 && res.body.result) { return { scene: scrapeRelease(res.body.result, url, site), }; } return null; } async function fetchProfile({ name: actorName }, { entity, parameters }, include) { // const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`; const { session, instanceToken } = await getSession(entity, parameters); const res = await http.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, { session, interval: parameters.interval, concurrency: parameters.concurrency, headers: { Instance: instanceToken, 'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites }, }); if (res.statusCode === 200) { const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase()); if (actorData) { const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`; const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, { session, interval: parameters.interval, concurrency: parameters.concurrency, headers: { Instance: instanceToken, }, }); if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) { return scrapeProfile(actorData, actorReleasesRes.body.result, entity.slug); } return scrapeProfile(actorData, [], entity.slug); } } return null; } module.exports = { beforeNetwork: getSession, beforeFetchScenes: getSession, requireBeforeNetwork: false, scrapeLatestX, fetchLatest, fetchUpcoming, fetchScene: fetchRelease, fetchMovie: fetchRelease, fetchProfile, };