Updated AnalVids.
|  | @ -375,7 +375,7 @@ module.exports = { | ||||||
| 		thumbnailQuality: 100, | 		thumbnailQuality: 100, | ||||||
| 		lazySize: 90, | 		lazySize: 90, | ||||||
| 		lazyQuality: 90, | 		lazyQuality: 90, | ||||||
| 		trailerQuality: [480, 540, 360, 720, 960, 1080, 320, 1440, 1600, 1920, 2160, 270, 240, 180], | 		trailerQuality: [540, 720, 480, 360, 960, 1080, 320, 1440, 1600, 1920, 2160, 270, 240, 180], | ||||||
| 		limit: 25, // max number of photos per release
 | 		limit: 25, // max number of photos per release
 | ||||||
| 		attempts: 2, | 		attempts: 2, | ||||||
| 		fetchStreams: true, | 		fetchStreams: true, | ||||||
|  |  | ||||||
|  | @ -81,7 +81,7 @@ | ||||||
|                 "tunnel": "0.0.6", |                 "tunnel": "0.0.6", | ||||||
|                 "ua-parser-js": "^1.0.32", |                 "ua-parser-js": "^1.0.32", | ||||||
|                 "undici": "^4.13.0", |                 "undici": "^4.13.0", | ||||||
|                 "unprint": "^0.10.3", |                 "unprint": "^0.10.7", | ||||||
|                 "url-pattern": "^1.0.3", |                 "url-pattern": "^1.0.3", | ||||||
|                 "v-tooltip": "^2.0.3", |                 "v-tooltip": "^2.0.3", | ||||||
|                 "video.js": "^7.11.4", |                 "video.js": "^7.11.4", | ||||||
|  | @ -17626,9 +17626,9 @@ | ||||||
|             } |             } | ||||||
|         }, |         }, | ||||||
|         "node_modules/unprint": { |         "node_modules/unprint": { | ||||||
|             "version": "0.10.3", |             "version": "0.10.7", | ||||||
|             "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.3.tgz", |             "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.7.tgz", | ||||||
|             "integrity": "sha512-ui8BbBo4JmKR++w50rSUFyg8X6l9EAbLRpATxdjxyS7yYevjcGMEt3HT0nrBG2JXDMkLwWZ+WoOaz3qC5stSxQ==", |             "integrity": "sha512-AuWdWvxkNrFSa8jGE3HIUJ7Pwvp4GR5kJ6eYu5zO8Devizc2D7qifvQv1ksLa9ZenKRjmsgoFVVHOAroGFNE3g==", | ||||||
|             "dependencies": { |             "dependencies": { | ||||||
|                 "axios": "^0.27.2", |                 "axios": "^0.27.2", | ||||||
|                 "bottleneck": "^2.19.5", |                 "bottleneck": "^2.19.5", | ||||||
|  | @ -32536,9 +32536,9 @@ | ||||||
|             "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=" |             "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=" | ||||||
|         }, |         }, | ||||||
|         "unprint": { |         "unprint": { | ||||||
|             "version": "0.10.3", |             "version": "0.10.7", | ||||||
|             "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.3.tgz", |             "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.7.tgz", | ||||||
|             "integrity": "sha512-ui8BbBo4JmKR++w50rSUFyg8X6l9EAbLRpATxdjxyS7yYevjcGMEt3HT0nrBG2JXDMkLwWZ+WoOaz3qC5stSxQ==", |             "integrity": "sha512-AuWdWvxkNrFSa8jGE3HIUJ7Pwvp4GR5kJ6eYu5zO8Devizc2D7qifvQv1ksLa9ZenKRjmsgoFVVHOAroGFNE3g==", | ||||||
|             "requires": { |             "requires": { | ||||||
|                 "axios": "^0.27.2", |                 "axios": "^0.27.2", | ||||||
|                 "bottleneck": "^2.19.5", |                 "bottleneck": "^2.19.5", | ||||||
|  |  | ||||||
|  | @ -140,7 +140,7 @@ | ||||||
|         "tunnel": "0.0.6", |         "tunnel": "0.0.6", | ||||||
|         "ua-parser-js": "^1.0.32", |         "ua-parser-js": "^1.0.32", | ||||||
|         "undici": "^4.13.0", |         "undici": "^4.13.0", | ||||||
|         "unprint": "^0.10.3", |         "unprint": "^0.10.7", | ||||||
|         "url-pattern": "^1.0.3", |         "url-pattern": "^1.0.3", | ||||||
|         "v-tooltip": "^2.0.3", |         "v-tooltip": "^2.0.3", | ||||||
|         "video.js": "^7.11.4", |         "video.js": "^7.11.4", | ||||||
|  |  | ||||||
| Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 29 KiB | 
| Before Width: | Height: | Size: 1.6 KiB After Width: | Height: | Size: 1.3 KiB | 
| Before Width: | Height: | Size: 7.4 KiB After Width: | Height: | Size: 1.3 KiB | 
| Before Width: | Height: | Size: 1.6 KiB After Width: | Height: | Size: 1.3 KiB | 
| Before Width: | Height: | Size: 6.8 KiB After Width: | Height: | Size: 4.5 KiB | 
| Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.4 KiB | 
| Before Width: | Height: | Size: 7.2 KiB After Width: | Height: | Size: 7.2 KiB | 
| Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.4 KiB | 
| Before Width: | Height: | Size: 2.5 KiB After Width: | Height: | Size: 2.6 KiB | 
| Before Width: | Height: | Size: 6.8 KiB After Width: | Height: | Size: 4.5 KiB | 
| After Width: | Height: | Size: 29 KiB | 
| After Width: | Height: | Size: 19 KiB | 
| After Width: | Height: | Size: 7.4 KiB | 
| After Width: | Height: | Size: 1.6 KiB | 
| Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 29 KiB | 
| Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 8.8 KiB | 
| Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.4 KiB | 
| Before Width: | Height: | Size: 7.2 KiB After Width: | Height: | Size: 7.2 KiB | 
| Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.4 KiB | 
| Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB | 
| Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 8.8 KiB | 
|  | @ -1,8 +1,6 @@ | ||||||
| 'use strict'; | 'use strict'; | ||||||
| 
 | 
 | ||||||
| const { JSDOM } = require('jsdom'); | const unprint = require('unprint'); | ||||||
| const cheerio = require('cheerio'); |  | ||||||
| const moment = require('moment'); |  | ||||||
| 
 | 
 | ||||||
| const http = require('../utils/http'); | const http = require('../utils/http'); | ||||||
| const slugify = require('../utils/slugify'); | const slugify = require('../utils/slugify'); | ||||||
|  | @ -17,187 +15,132 @@ function extractTitle(originalTitle) { | ||||||
| 	return { shootId, title }; | 	return { shootId, title }; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function getPoster(posterElement, sceneId) { | function scrapeAll(scenes, channel) { | ||||||
| 	const posterStyle = posterElement.attr('style'); | 	return scenes.map(({ query }) => { | ||||||
|  | 		const release = {}; | ||||||
| 
 | 
 | ||||||
| 	if (posterStyle) { | 		release.url = query.url('.card-scene__view > a'); | ||||||
| 		return posterStyle.slice(posterStyle.indexOf('(') + 1, -1); | 		release.entryId = query.dataset(null, 'content') || new URL(release.url).pathname.match(/watch\/(\d+)/)?.[1]; | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	const posterRange = posterElement.attr('data-casting'); | 		release.title = query.content('.card-scene__text'); | ||||||
| 	const posterRangeData = posterRange ? JSON.parse(posterRange) : null; | 		release.shootId = extractTitle(release.title).shootId; | ||||||
| 	const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)]; |  | ||||||
| 
 | 
 | ||||||
| 	if (!posterTimeRange) { | 		release.date = query.date('.label--time:nth-child(2)', 'YYYY-MM-DD'); | ||||||
| 		return null; | 		release.duration = query.duration('.label--time:first-child'); | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	if (typeof posterTimeRange === 'number') { | 		release.poster = query.img('.card-scene__view img', { attribute: 'data-src' }); | ||||||
| 		// poster time is already a single time value
 | 		const caps = query.json('.card-scene__view > a', { attribute: 'data-casting' })?.map((timestamp) => `${channel.url}/casting/${release.entryId}/${timestamp}`); | ||||||
| 		return `https://analvids.com/casting/${sceneId}/${posterTimeRange}`; |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	const [max, min] = posterTimeRange.split('-'); | 		if (caps && release.poster) { | ||||||
| 	const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min)); | 			release.caps = caps; | ||||||
|  | 		} else if (caps) { | ||||||
|  | 			[release.poster, ...release.caps] = caps; | ||||||
|  | 		} | ||||||
| 
 | 
 | ||||||
| 	return `https://analvids.com/casting/${sceneId}/${posterTime}`; | 		release.teaser = query.video('.card-scene__view > a', { attribute: 'data-preview' }); | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| function scrapeAll(html) { | 		return release; | ||||||
| 	const $ = cheerio.load(html, { normalizeWhitespace: true }); |  | ||||||
| 	const scenesElements = $('.thumbnails > div').toArray(); |  | ||||||
| 
 |  | ||||||
| 	return scenesElements.map((element) => { |  | ||||||
| 		const sceneLinkElement = $(element).find('.thumbnail-title a'); |  | ||||||
| 		const url = sceneLinkElement.attr('href'); |  | ||||||
| 
 |  | ||||||
| 		const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
 |  | ||||||
| 		const { shootId, title } = extractTitle(originalTitle); |  | ||||||
| 		const entryId = new URL(url).pathname.split('/')[2]; |  | ||||||
| 
 |  | ||||||
| 		const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate(); |  | ||||||
| 
 |  | ||||||
| 		const sceneId = $(element).attr('data-content'); |  | ||||||
| 		const posterElement = $(element).find('.thumbnail-avatar'); |  | ||||||
| 
 |  | ||||||
| 		const poster = getPoster(posterElement, sceneId); |  | ||||||
| 
 |  | ||||||
| 		return { |  | ||||||
| 			url, |  | ||||||
| 			shootId, |  | ||||||
| 			entryId, |  | ||||||
| 			title, |  | ||||||
| 			date, |  | ||||||
| 			poster, |  | ||||||
| 		}; |  | ||||||
| 	}); | 	}); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function scrapeScene(html, url, site, useGallery) { | function scrapeScene({ query }, { url }) { | ||||||
| 	const $ = cheerio.load(html, { normalizeWhitespace: true }); | 	const release = {}; | ||||||
| 	const playerObject = $('script:contains("new WatchPage")').html(); |  | ||||||
| 	const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1); |  | ||||||
| 	const data = playerData && JSON.parse(playerData); |  | ||||||
| 
 | 
 | ||||||
| 	const release = { url }; | 	release.entryId = new URL(url).pathname.match(/watch\/(\d+)/)?.[1]; | ||||||
| 
 | 
 | ||||||
| 	const originalTitle = $('h1.watchpage-title').text().trim(); | 	const featuring = query.content('.watch__title .watch__featuring_models'); | ||||||
| 	const { shootId, title } = extractTitle(originalTitle); |  | ||||||
| 
 | 
 | ||||||
| 	release.shootId = shootId; | 	release.title = query.content('.watch__title').replace(featuring, ''); | ||||||
| 	release.entryId = new URL(url).pathname.split('/')[2]; | 	release.description = query.content('.text-mob-more'); | ||||||
|  | 	release.shootId = extractTitle(release.title).shootId; | ||||||
| 
 | 
 | ||||||
| 	release.title = title; | 	release.date = query.date('.bi-calendar3', 'YYYY-MM-DD'); | ||||||
| 	release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate(); | 	release.duration = query.duration('.bi-clock'); | ||||||
| 
 | 
 | ||||||
| 	const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray(); | 	release.actors = query.all('.watch__title a[href*="/model"]').map((el) => ({ | ||||||
|  | 		name: unprint.query.content(el), | ||||||
|  | 		url: unprint.query.url(el, null), | ||||||
|  | 	})); | ||||||
| 
 | 
 | ||||||
| 	release.description = $('meta[name="description"]')?.attr('content')?.trim() | 	release.tags = query.contents('.genres-list a[href*="/genre"]'); | ||||||
|         || (descriptionElement && $(descriptionElement).find('dd').text().trim()); |  | ||||||
| 
 | 
 | ||||||
| 	release.actors = $(actorsElement) | 	release.studio = slugify(query.content('.genres-list a[href*="/studios"]'), '', { removePunctuation: true }); | ||||||
| 		.find('a[href*="com/model"]') |  | ||||||
| 		.map((actorIndex, actorElement) => $(actorElement).text()).toArray(); |  | ||||||
| 
 | 
 | ||||||
| 	release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds(); | 	release.poster = query.img('.watch__video video', { attribute: 'data-poster' }); | ||||||
| 	release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); | 	release.photoCount = query.number('.bi-eye'); | ||||||
| 
 | 
 | ||||||
| 	const photos = useGallery | 	release.trailer = query.all('.watch__video source').map((el) => ({ | ||||||
| 		? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray() | 		src: unprint.query.video(el, null, { attribute: 'src' }), | ||||||
| 		: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray(); | 		quality: unprint.query.number(el, null, { attribute: 'size' }), | ||||||
| 
 | 	})); | ||||||
| 	release.photos = photos.map((source) => { |  | ||||||
| 		// source without parameters sometimes serves larger preview photo
 |  | ||||||
| 		const { origin, pathname } = new URL(source); |  | ||||||
| 
 |  | ||||||
| 		return `${origin}${pathname}`; |  | ||||||
| 	}); |  | ||||||
| 
 |  | ||||||
| 	const posterStyle = $('#player').attr('style'); |  | ||||||
| 	const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1); |  | ||||||
| 
 |  | ||||||
| 	release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
 |  | ||||||
| 
 |  | ||||||
| 	if (data) { |  | ||||||
| 		const qualityMap = { |  | ||||||
| 			web: 240, |  | ||||||
| 			vga: 480, |  | ||||||
| 			hd: 720, |  | ||||||
| 			'1080p': 1080, |  | ||||||
| 		}; |  | ||||||
| 
 |  | ||||||
| 		release.trailer = data.clip.qualities.map((trailer) => ({ |  | ||||||
| 			src: trailer.src, |  | ||||||
| 			type: trailer.type, |  | ||||||
| 			quality: qualityMap[trailer.quality] || trailer.quality, |  | ||||||
| 		})); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	const studioName = $('.watchpage-studioname').first().text().trim(); |  | ||||||
| 	release.studio = slugify(studioName, '', { removePunctuation: true }); |  | ||||||
| 
 | 
 | ||||||
| 	return release; | 	return release; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function scrapeProfile(html, _url, actorName) { | function scrapeProfile({ query }, url, channel) { | ||||||
| 	const { document } = new JSDOM(html).window; | 	const profile = { url }; | ||||||
| 
 | 
 | ||||||
| 	const profile = { | 	profile.nationality = query.content('.model__info a[href*="/nationality"]'); | ||||||
| 		name: actorName, | 	profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td'); | ||||||
| 	}; |  | ||||||
| 
 | 
 | ||||||
| 	const avatarEl = document.querySelector('.model--avatar img[src^="http"]'); | 	profile.avatar = query.img('.model__left img'); | ||||||
| 	const entries = Array.from(document.querySelectorAll('.model--description tr'), (el) => el.textContent.replace(/\n/g, '').split(':')); |  | ||||||
| 
 | 
 | ||||||
| 	const bio = entries | 	profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel); | ||||||
| 		.filter((entry) => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
 |  | ||||||
| 		.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {}); |  | ||||||
| 
 |  | ||||||
| 	profile.birthPlace = bio.Nationality; |  | ||||||
| 
 |  | ||||||
| 	if (bio.Age) profile.age = bio.Age; |  | ||||||
| 	if (avatarEl) profile.avatar = avatarEl.src; |  | ||||||
| 
 |  | ||||||
| 	profile.releases = scrapeAll(html); |  | ||||||
| 
 | 
 | ||||||
| 	return profile; | 	return profile; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function fetchLatest(site, page = 1) { | async function fetchLatest(channel, page) { | ||||||
| 	const res = await http.get(`${site.url}/new-videos/${page}`); | 	const res = await unprint.get(`${channel.url}/new-videos/${page}`, { selectAll: '.card-scene' }); | ||||||
| 
 | 
 | ||||||
| 	return scrapeAll(res.body.toString(), site); | 	if (res.ok) { | ||||||
|  | 		return scrapeAll(res.context, channel); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return res.status; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function fetchScene(url, site) { | async function getActorUrl(actor, channel) { | ||||||
| 	const useGallery = true; | 	if (actor.url) { | ||||||
|  | 		return actor.url; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	// TODO: fall back on screenshots when gallery is not available
 | 	const searchRes = await http.get(`${channel.url}/api/autocomplete/search?q=${slugify(actor.name, '+')}`); | ||||||
| 	const res = useGallery |  | ||||||
| 		? await http.get(`${url}/gallery#gallery`) |  | ||||||
| 		: await http.get(`${url}/screenshots#screenshots`); |  | ||||||
| 
 | 
 | ||||||
| 	return scrapeScene(res.body.toString(), url, site, useGallery); | 	if (!searchRes.ok) { | ||||||
| } | 		return searchRes.status; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| async function fetchProfile({ name: actorName }) { | 	const result = searchRes.body.terms.find((item) => item.type === 'model'); | ||||||
| 	const res = await http.get(`https://www.analvids.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`); |  | ||||||
| 	const data = res.body; |  | ||||||
| 
 |  | ||||||
| 	const result = data.terms.find((item) => item.type === 'model'); |  | ||||||
| 
 | 
 | ||||||
| 	if (result) { | 	if (result) { | ||||||
| 		const bioRes = await http.get(result.url); | 		return result.url; | ||||||
| 		const html = bioRes.body.toString(); |  | ||||||
| 
 |  | ||||||
| 		return scrapeProfile(html, result.url, actorName); |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return null; | 	return null; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | async function fetchProfile(actor, { channel }) { | ||||||
|  | 	const actorUrl = await getActorUrl(actor, channel); | ||||||
|  | 
 | ||||||
|  | 	if (typeof actorUrl !== 'string') { | ||||||
|  | 		return actorUrl; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	const bioRes = await unprint.get(actorUrl); | ||||||
|  | 
 | ||||||
|  | 	if (bioRes.ok) { | ||||||
|  | 		return scrapeProfile(bioRes.context, actorUrl, channel); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return bioRes.status; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| module.exports = { | module.exports = { | ||||||
| 	fetchLatest, | 	fetchLatest, | ||||||
|  | 	scrapeScene: { | ||||||
|  | 		scraper: scrapeScene, | ||||||
|  | 		unprint: true, | ||||||
|  | 	}, | ||||||
| 	fetchProfile, | 	fetchProfile, | ||||||
| 	fetchScene, |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -0,0 +1,203 @@ | ||||||
|  | 'use strict'; | ||||||
|  | 
 | ||||||
|  | const { JSDOM } = require('jsdom'); | ||||||
|  | const cheerio = require('cheerio'); | ||||||
|  | const moment = require('moment'); | ||||||
|  | 
 | ||||||
|  | const http = require('../utils/http'); | ||||||
|  | const slugify = require('../utils/slugify'); | ||||||
|  | 
 | ||||||
|  | function extractTitle(originalTitle) { | ||||||
|  | 	const titleComponents = originalTitle.split(' '); | ||||||
|  | 	// const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM|BTG)\d+/); // detect studio prefixes
 | ||||||
|  | 	const sceneIdMatch = titleComponents.slice(-1)[0].match(/\w+\d+\s*$/); // detect studio prefixes
 | ||||||
|  | 	const shootId = sceneIdMatch ? sceneIdMatch[0] : null; | ||||||
|  | 	const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle; | ||||||
|  | 
 | ||||||
|  | 	return { shootId, title }; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | function getPoster(posterElement, sceneId) { | ||||||
|  | 	const posterStyle = posterElement.attr('style'); | ||||||
|  | 
 | ||||||
|  | 	if (posterStyle) { | ||||||
|  | 		return posterStyle.slice(posterStyle.indexOf('(') + 1, -1); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	const posterRange = posterElement.attr('data-casting'); | ||||||
|  | 	const posterRangeData = posterRange ? JSON.parse(posterRange) : null; | ||||||
|  | 	const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)]; | ||||||
|  | 
 | ||||||
|  | 	if (!posterTimeRange) { | ||||||
|  | 		return null; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (typeof posterTimeRange === 'number') { | ||||||
|  | 		// poster time is already a single time value
 | ||||||
|  | 		return `https://analvids.com/casting/${sceneId}/${posterTimeRange}`; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	const [max, min] = posterTimeRange.split('-'); | ||||||
|  | 	const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min)); | ||||||
|  | 
 | ||||||
|  | 	return `https://analvids.com/casting/${sceneId}/${posterTime}`; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | function scrapeAll(html) { | ||||||
|  | 	const $ = cheerio.load(html, { normalizeWhitespace: true }); | ||||||
|  | 	const scenesElements = $('.thumbnails > div').toArray(); | ||||||
|  | 
 | ||||||
|  | 	return scenesElements.map((element) => { | ||||||
|  | 		const sceneLinkElement = $(element).find('.thumbnail-title a'); | ||||||
|  | 		const url = sceneLinkElement.attr('href'); | ||||||
|  | 
 | ||||||
|  | 		const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
 | ||||||
|  | 		const { shootId, title } = extractTitle(originalTitle); | ||||||
|  | 		const entryId = new URL(url).pathname.split('/')[2]; | ||||||
|  | 
 | ||||||
|  | 		const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate(); | ||||||
|  | 
 | ||||||
|  | 		const sceneId = $(element).attr('data-content'); | ||||||
|  | 		const posterElement = $(element).find('.thumbnail-avatar'); | ||||||
|  | 
 | ||||||
|  | 		const poster = getPoster(posterElement, sceneId); | ||||||
|  | 
 | ||||||
|  | 		return { | ||||||
|  | 			url, | ||||||
|  | 			shootId, | ||||||
|  | 			entryId, | ||||||
|  | 			title, | ||||||
|  | 			date, | ||||||
|  | 			poster, | ||||||
|  | 		}; | ||||||
|  | 	}); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | async function scrapeScene(html, url, site, useGallery) { | ||||||
|  | 	const $ = cheerio.load(html, { normalizeWhitespace: true }); | ||||||
|  | 	const playerObject = $('script:contains("new WatchPage")').html(); | ||||||
|  | 	const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1); | ||||||
|  | 	const data = playerData && JSON.parse(playerData); | ||||||
|  | 
 | ||||||
|  | 	const release = { url }; | ||||||
|  | 
 | ||||||
|  | 	const originalTitle = $('h1.watchpage-title').text().trim(); | ||||||
|  | 	const { shootId, title } = extractTitle(originalTitle); | ||||||
|  | 
 | ||||||
|  | 	release.shootId = shootId; | ||||||
|  | 	release.entryId = new URL(url).pathname.split('/')[2]; | ||||||
|  | 
 | ||||||
|  | 	release.title = title; | ||||||
|  | 	release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate(); | ||||||
|  | 
 | ||||||
|  | 	const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray(); | ||||||
|  | 
 | ||||||
|  | 	release.description = $('meta[name="description"]')?.attr('content')?.trim() | ||||||
|  |         || (descriptionElement && $(descriptionElement).find('dd').text().trim()); | ||||||
|  | 
 | ||||||
|  | 	release.actors = $(actorsElement) | ||||||
|  | 		.find('a[href*="com/model"]') | ||||||
|  | 		.map((actorIndex, actorElement) => $(actorElement).text()).toArray(); | ||||||
|  | 
 | ||||||
|  | 	release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds(); | ||||||
|  | 	release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); | ||||||
|  | 
 | ||||||
|  | 	const photos = useGallery | ||||||
|  | 		? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray() | ||||||
|  | 		: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray(); | ||||||
|  | 
 | ||||||
|  | 	release.photos = photos.map((source) => { | ||||||
|  | 		// source without parameters sometimes serves larger preview photo
 | ||||||
|  | 		const { origin, pathname } = new URL(source); | ||||||
|  | 
 | ||||||
|  | 		return `${origin}${pathname}`; | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	const posterStyle = $('#player').attr('style'); | ||||||
|  | 	const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1); | ||||||
|  | 
 | ||||||
|  | 	release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
 | ||||||
|  | 
 | ||||||
|  | 	if (data) { | ||||||
|  | 		const qualityMap = { | ||||||
|  | 			web: 240, | ||||||
|  | 			vga: 480, | ||||||
|  | 			hd: 720, | ||||||
|  | 			'1080p': 1080, | ||||||
|  | 		}; | ||||||
|  | 
 | ||||||
|  | 		release.trailer = data.clip.qualities.map((trailer) => ({ | ||||||
|  | 			src: trailer.src, | ||||||
|  | 			type: trailer.type, | ||||||
|  | 			quality: qualityMap[trailer.quality] || trailer.quality, | ||||||
|  | 		})); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	const studioName = $('.watchpage-studioname').first().text().trim(); | ||||||
|  | 	release.studio = slugify(studioName, '', { removePunctuation: true }); | ||||||
|  | 
 | ||||||
|  | 	return release; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | async function scrapeProfile(html, _url, actorName) { | ||||||
|  | 	const { document } = new JSDOM(html).window; | ||||||
|  | 
 | ||||||
|  | 	const profile = { | ||||||
|  | 		name: actorName, | ||||||
|  | 	}; | ||||||
|  | 
 | ||||||
|  | 	const avatarEl = document.querySelector('.model--avatar img[src^="http"]'); | ||||||
|  | 	const entries = Array.from(document.querySelectorAll('.model--description tr'), (el) => el.textContent.replace(/\n/g, '').split(':')); | ||||||
|  | 
 | ||||||
|  | 	const bio = entries | ||||||
|  | 		.filter((entry) => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
 | ||||||
|  | 		.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {}); | ||||||
|  | 
 | ||||||
|  | 	profile.birthPlace = bio.Nationality; | ||||||
|  | 
 | ||||||
|  | 	if (bio.Age) profile.age = bio.Age; | ||||||
|  | 	if (avatarEl) profile.avatar = avatarEl.src; | ||||||
|  | 
 | ||||||
|  | 	profile.releases = scrapeAll(html); | ||||||
|  | 
 | ||||||
|  | 	return profile; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | async function fetchLatest(site, page = 1) { | ||||||
|  | 	const res = await http.get(`${site.url}/new-videos/${page}`); | ||||||
|  | 
 | ||||||
|  | 	return scrapeAll(res.body.toString(), site); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | async function fetchScene(url, site) { | ||||||
|  | 	const useGallery = true; | ||||||
|  | 
 | ||||||
|  | 	// TODO: fall back on screenshots when gallery is not available
 | ||||||
|  | 	const res = useGallery | ||||||
|  | 		? await http.get(`${url}/gallery#gallery`) | ||||||
|  | 		: await http.get(`${url}/screenshots#screenshots`); | ||||||
|  | 
 | ||||||
|  | 	return scrapeScene(res.body.toString(), url, site, useGallery); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | async function fetchProfile({ name: actorName }) { | ||||||
|  | 	const res = await http.get(`https://www.analvids.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`); | ||||||
|  | 	const data = res.body; | ||||||
|  | 
 | ||||||
|  | 	const result = data.terms.find((item) => item.type === 'model'); | ||||||
|  | 
 | ||||||
|  | 	if (result) { | ||||||
|  | 		const bioRes = await http.get(result.url); | ||||||
|  | 		const html = bioRes.body.toString(); | ||||||
|  | 
 | ||||||
|  | 		return scrapeProfile(html, result.url, actorName); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return null; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | module.exports = { | ||||||
|  | 	fetchLatest, | ||||||
|  | 	fetchProfile, | ||||||
|  | 	fetchScene, | ||||||
|  | }; | ||||||