forked from DebaucheryLibrarian/traxxx
				
			Updated Insex scraper.
This commit is contained in:
		
							parent
							
								
									829a285a2d
								
							
						
					
					
						commit
						39813d4461
					
				|  | @ -89,6 +89,10 @@ module.exports = { | |||
| 			'uksinners', | ||||
| 			// mindgeek
 | ||||
| 			'pornhub', | ||||
| 			// insex
 | ||||
| 			'paintoy', | ||||
| 			'aganmedon', | ||||
| 			'sensualpain', | ||||
| 		], | ||||
| 		networks: [ | ||||
| 			// dummy network for testing
 | ||||
|  |  | |||
|  | @ -4219,7 +4219,6 @@ const sites = [ | |||
| 		tags: ['bdsm'], | ||||
| 		parent: 'insex', | ||||
| 		parameters: { | ||||
| 			scraper: 'alt', | ||||
| 			latest: 'https://www.sexuallybroken.com/sb', | ||||
| 		}, | ||||
| 	}, | ||||
|  | @ -4230,13 +4229,20 @@ const sites = [ | |||
| 		url: 'https://www.infernalrestraints.com', | ||||
| 		tags: ['bdsm'], | ||||
| 		parent: 'insex', | ||||
| 		parameters: { | ||||
| 			latest: 'https://www.infernalrestraints.com/ir', | ||||
| 		}, | ||||
| 	}, | ||||
| 	{ | ||||
| 		slug: 'hardtied', | ||||
| 		name: 'Hardtied', | ||||
| 		alias: ['ht'], | ||||
| 		url: 'https://www.hardtied.com', | ||||
| 		tags: ['bdsm'], | ||||
| 		parent: 'insex', | ||||
| 		parameters: { | ||||
| 			latest: 'https://www.hardtied.com/ht', | ||||
| 		}, | ||||
| 	}, | ||||
| 	{ | ||||
| 		slug: 'realtimebondage', | ||||
|  | @ -4245,6 +4251,9 @@ const sites = [ | |||
| 		url: 'https://www.realtimebondage.com', | ||||
| 		tags: ['bdsm', 'live'], | ||||
| 		parent: 'insex', | ||||
| 		parameters: { | ||||
| 			latest: 'https://www.realtimebondage.com/rtb', | ||||
| 		}, | ||||
| 	}, | ||||
| 	{ | ||||
| 		slug: 'topgrl', | ||||
|  | @ -4254,7 +4263,6 @@ const sites = [ | |||
| 		tags: ['bdsm', 'femdom'], | ||||
| 		parent: 'insex', | ||||
| 		parameters: { | ||||
| 			scraper: 'alt', | ||||
| 			latest: 'https://www.topgrl.com/tg', | ||||
| 		}, | ||||
| 	}, | ||||
|  |  | |||
|  | @ -5,6 +5,27 @@ const http = require('../utils/http'); | |||
| const slugify = require('../utils/slugify'); | ||||
| 
 | ||||
| function scrapeLatest(scenes, site) { | ||||
| 	return scenes.map(({ query }) => { | ||||
| 		const release = {}; | ||||
| 
 | ||||
| 		release.url = query.url('figure a', 'href', { origin: site.parameters.latest }); | ||||
| 
 | ||||
| 		release.title = query.cnt('.has-text-weight-bold, .is-size-6'); | ||||
| 		release.date = query.date('span.tag', 'YYYY-MM-DD'); | ||||
| 		release.actors = query.cnts('a.tag'); | ||||
| 
 | ||||
| 		const cover = query.img('.image img'); | ||||
| 
 | ||||
| 		release.poster = cover.replace('poster_noplay', 'trailer_noplay'); | ||||
| 		release.covers = [cover]; | ||||
| 
 | ||||
| 		release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title.split(/\s+/).slice(0, 5).join(' '))}`; | ||||
| 
 | ||||
| 		return release; | ||||
| 	}); | ||||
| } | ||||
| 
 | ||||
| function scrapeLatestLegacy(scenes, site) { | ||||
| 	return scenes.map(({ query }) => { | ||||
| 		// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
 | ||||
| 		const release = {}; | ||||
|  | @ -43,35 +64,39 @@ function scrapeLatest(scenes, site) { | |||
| 			cover, | ||||
| 		]]; | ||||
| 
 | ||||
| 		console.log(release); | ||||
| 
 | ||||
| 		return release; | ||||
| 	}); | ||||
| } | ||||
| 
 | ||||
| function scrapeLatestAlt(scenes, site) { | ||||
| 	return scenes.map(({ query }) => { | ||||
| 		const release = {}; | ||||
| async function scrapeScene({ query }, url, channel, session) { | ||||
| 	const release = {}; | ||||
| 
 | ||||
| 		release.url = query.url('figure a', 'href', { origin: site.parameters.latest }); | ||||
| 	release.title = query.cnt('.columns div.is-size-5.has-text-weight-bold'); | ||||
| 	release.description = query.cnt('.has-background-black-ter > div:nth-child(4)'); | ||||
| 	release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD'); | ||||
| 
 | ||||
| 		release.title = query.cnt('.has-text-weight-bold, .is-size-6'); | ||||
| 		release.date = query.date('span.tag', 'YYYY-MM-DD'); | ||||
| 		release.actors = query.cnts('a.tag'); | ||||
| 	release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]'); | ||||
| 	release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span'); | ||||
| 
 | ||||
| 		const cover = query.img('.image img'); | ||||
| 	release.poster = query.img('#videoPlayer, #iodvideo', 'poster'); | ||||
| 	release.photos = Array.from(query.html('body > div:nth-child(6)').matchAll(/src="(http.*jpg)"/g), (match) => match[1]); | ||||
| 
 | ||||
| 		release.poster = cover.replace('poster_noplay', 'trailer_noplay'); | ||||
| 		release.covers = [cover]; | ||||
| 	release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`; | ||||
| 
 | ||||
| 		release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title.split(/\s+/).slice(0, 5).join(' '))}`; | ||||
| 		console.log('alt', release); | ||||
| 	release.trailer = query.video(); | ||||
| 
 | ||||
| 		return release; | ||||
| 	}); | ||||
| 	if (!release.trailer) { | ||||
| 		const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session }); | ||||
| 
 | ||||
| 		if (trailerRes.ok) { | ||||
| 			release.trailer = trailerRes.body; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return release; | ||||
| } | ||||
| 
 | ||||
| function scrapeScene({ query }, site) { | ||||
| function scrapeSceneLegacy({ query }, site) { | ||||
| 	const release = {}; | ||||
| 
 | ||||
| 	const titleEl = query.q('.articleTitleText'); | ||||
|  | @ -97,60 +122,26 @@ function scrapeScene({ query }, site) { | |||
| 	const trailer = query.trailer(); | ||||
| 	if (trailer) release.trailer = { src: trailer }; | ||||
| 
 | ||||
| 	console.log(release); | ||||
| 
 | ||||
| 	return release; | ||||
| } | ||||
| 
 | ||||
| async function scrapeSceneAlt({ query }, url, channel, session) { | ||||
| 	const release = {}; | ||||
| 
 | ||||
| 	release.title = query.cnt('.columns div.is-size-5'); | ||||
| 	release.description = query.cnt('.has-background-black-ter > div:nth-child(4)'); | ||||
| 	release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD'); | ||||
| 
 | ||||
| 	release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]'); | ||||
| 	release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span'); | ||||
| 
 | ||||
| 	release.poster = query.img('#videoPlayer, #iodvideo', 'poster'); | ||||
| 	release.photos = query.imgs('body > div:nth-child(6) img'); | ||||
| 
 | ||||
| 	release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`; | ||||
| 
 | ||||
| 	release.trailer = query.video(); | ||||
| 
 | ||||
| 	if (!release.trailer) { | ||||
| 		const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session }); | ||||
| 
 | ||||
| 		if (trailerRes.ok) { | ||||
| 			release.trailer = trailerRes.body; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return release; | ||||
| } | ||||
| 
 | ||||
| async function fetchLatest(site, page = 1) { | ||||
| 	const url = (site.parameters?.scraper === 'alt' && `${site.parameters.latest}/home.php?o=latest&p=${page}`) | ||||
| 		// || (site.slug === 'paintoy' && `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`) // paintoy's site is (was?) partially broken, use front page
 | ||||
| 		|| `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`; | ||||
| 
 | ||||
| 	const res = await ((site.parameters?.scraper === 'alt' && qu.getAll(url, 'body > .columns .column')) | ||||
| 		// || (site.slug === 'paintoy' && qu.getAll(url, '#articleTable table[cellspacing="2"]'))
 | ||||
| 		|| qu.get(url)); // JSON containing html as a property
 | ||||
| 	const url = `${site.parameters.latest}/home.php?o=latest&p=${page}`; | ||||
| 	const res = await qu.getAll(url, 'body > .columns .column', { cookie: 'consent=yes' }); | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		if (site.parameters?.scraper === 'alt') { | ||||
| 			return scrapeLatestAlt(res.items, site); | ||||
| 		} | ||||
| 		return scrapeLatest(res.items, site); | ||||
| 	} | ||||
| 
 | ||||
| 		/* | ||||
| 		if (site.slug === 'paintoy') { | ||||
| 			return scrapeLatest(res.items, site); | ||||
| 		} | ||||
| 		*/ | ||||
| 	return res.status; | ||||
| } | ||||
| 
 | ||||
| 		return scrapeLatest(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site); | ||||
| async function fetchLatestLegacy(site, page = 1) { | ||||
| 	const url = `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`; | ||||
| 	const res = await qu.get(url); // JSON containing html as a property
 | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		return scrapeLatestLegacy(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site); | ||||
| 	} | ||||
| 
 | ||||
| 	return res.status; | ||||
|  | @ -158,14 +149,10 @@ async function fetchLatest(site, page = 1) { | |||
| 
 | ||||
| async function fetchScene(url, site) { | ||||
| 	const session = http.session(); | ||||
| 	const res = await qu.get(url, null, null, { session }); | ||||
| 	const res = await qu.get(url, null, { cookie: 'consent=yes' }, { session }); | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		if (site.parameters?.scraper === 'alt') { | ||||
| 			return scrapeSceneAlt(res.item, url, site, session); | ||||
| 		} | ||||
| 
 | ||||
| 		return scrapeScene(res.item, site); | ||||
| 		return scrapeScene(res.item, url, site, session); | ||||
| 	} | ||||
| 
 | ||||
| 	return res.status; | ||||
|  | @ -174,4 +161,8 @@ async function fetchScene(url, site) { | |||
| module.exports = { | ||||
| 	fetchLatest, | ||||
| 	fetchScene, | ||||
| 	legacy: { | ||||
| 		fetchLatest: fetchLatestLegacy, | ||||
| 		scrapeScene: scrapeSceneLegacy, | ||||
| 	}, | ||||
| }; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue