Added virtual entity spawning for multi-page updates (i.e. Elegant Angel). Fixed ffmpeg error freezing process. Refactored Adult Empire/Elegant Angel scraper.
This commit is contained in:
		
							parent
							
								
									958c6d83fa
								
							
						
					
					
						commit
						bca677b0a8
					
				|  | @ -189,12 +189,8 @@ module.exports = { | |||
| 			'hotcrazymess', | ||||
| 			'thatsitcomshow', | ||||
| 		], | ||||
| 		[ | ||||
| 			// Adult DVD Empire
 | ||||
| 			'elegantangel', | ||||
| 			'westcoastproductions', | ||||
| 		], | ||||
| 		'21sextury', | ||||
| 		'adultempire', | ||||
| 		'julesjordan', | ||||
| 		'dorcelclub', | ||||
| 		'bang', | ||||
|  |  | |||
|  | @ -0,0 +1,65 @@ | |||
| const config = require('config'); | ||||
| 
 | ||||
| exports.up = async (knex) => { | ||||
| 	await knex.schema.alterTable('entities', (table) => { | ||||
| 		// internal options, as opposed to parameters for scraper options
 | ||||
| 		table.json('options'); | ||||
| 	}); | ||||
| 
 | ||||
| 	await knex.schema.alterTable('releases', (table) => { | ||||
| 		table.dropForeign('entity_id'); | ||||
| 
 | ||||
| 		table.foreign('entity_id') | ||||
| 			.references('id') | ||||
| 			.inTable('entities') | ||||
| 			.onDelete('cascade'); | ||||
| 	}); | ||||
| 
 | ||||
| 	await knex.schema.alterTable('releases_caps', (table) => { | ||||
| 		table.unique(['release_id', 'media_id']); | ||||
| 	}); | ||||
| 
 | ||||
| 	await knex.schema.createTable('movies_tags', (table) => { | ||||
| 		table.integer('tag_id') | ||||
| 			.references('id') | ||||
| 			.inTable('tags'); | ||||
| 
 | ||||
| 		table.integer('movie_id') | ||||
| 			.notNullable() | ||||
| 			.references('id') | ||||
| 			.inTable('movies') | ||||
| 			.onDelete('cascade'); | ||||
| 
 | ||||
| 		table.text('original_tag'); | ||||
| 
 | ||||
| 		table.text('source') | ||||
| 			.defaultTo('scraper'); | ||||
| 
 | ||||
| 		table.unique(['tag_id', 'movie_id']); | ||||
| 	}); | ||||
| 
 | ||||
| 	await knex.raw('GRANT ALL ON ALL TABLES IN SCHEMA public TO :visitor;', { | ||||
| 		visitor: knex.raw(config.database.query.user), | ||||
| 	}); | ||||
| }; | ||||
| 
 | ||||
| exports.down = async (knex) => { | ||||
| 	await knex.schema.alterTable('entities', (table) => { | ||||
| 		table.dropColumn('options'); | ||||
| 	}); | ||||
| 
 | ||||
| 	await knex.schema.alterTable('releases', (table) => { | ||||
| 		table.dropForeign('entity_id'); | ||||
| 
 | ||||
| 		table.foreign('entity_id') | ||||
| 			.references('id') | ||||
| 			.inTable('entities') | ||||
| 			.onDelete('no action'); | ||||
| 	}); | ||||
| 
 | ||||
| 	await knex.schema.alterTable('releases_caps', (table) => { | ||||
| 		table.dropUnique(['release_id', 'media_id']); | ||||
| 	}); | ||||
| 
 | ||||
| 	await knex.schema.dropTable('movies_tags'); | ||||
| }; | ||||
|  | @ -47,7 +47,7 @@ | |||
|                 "express-session": "^1.17.3", | ||||
|                 "face-api.js": "^0.22.2", | ||||
|                 "file-type": "^18.7.0", | ||||
|                 "fluent-ffmpeg": "^2.1.2", | ||||
|                 "fluent-ffmpeg": "^2.1.3", | ||||
|                 "fs-extra": "^11.1.1", | ||||
|                 "graphile-build": "^4.14.0", | ||||
|                 "graphile-utils": "^4.14.0", | ||||
|  | @ -88,7 +88,7 @@ | |||
|                 "tunnel": "0.0.6", | ||||
|                 "ua-parser-js": "^1.0.37", | ||||
|                 "undici": "^5.28.1", | ||||
|                 "unprint": "^0.11.5", | ||||
|                 "unprint": "^0.11.8", | ||||
|                 "url-pattern": "^1.0.3", | ||||
|                 "v-tooltip": "^2.1.3", | ||||
|                 "video.js": "^8.6.1", | ||||
|  | @ -9851,17 +9851,22 @@ | |||
|             "integrity": "sha512-36yxDn5H7OFZQla0/jFJmbIKTdZAQHngCedGxiMmpNfEZM0sdEeT+WczLQrjK6D7o2aiyLYDnkw0R3JK0Qv1RQ==" | ||||
|         }, | ||||
|         "node_modules/fluent-ffmpeg": { | ||||
|             "version": "2.1.2", | ||||
|             "resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.2.tgz", | ||||
|             "integrity": "sha512-IZTB4kq5GK0DPp7sGQ0q/BWurGHffRtQQwVkiqDgeO6wYJLLV5ZhgNOQ65loZxxuPMKZKZcICCUnaGtlxBiR0Q==", | ||||
|             "version": "2.1.3", | ||||
|             "resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.3.tgz", | ||||
|             "integrity": "sha512-Be3narBNt2s6bsaqP6Jzq91heDgOEaDCJAXcE3qcma/EJBSy5FB4cvO31XBInuAuKBx8Kptf8dkhjK0IOru39Q==", | ||||
|             "dependencies": { | ||||
|                 "async": ">=0.2.9", | ||||
|                 "async": "^0.2.9", | ||||
|                 "which": "^1.1.1" | ||||
|             }, | ||||
|             "engines": { | ||||
|                 "node": ">=0.8.0" | ||||
|                 "node": ">=18" | ||||
|             } | ||||
|         }, | ||||
|         "node_modules/fluent-ffmpeg/node_modules/async": { | ||||
|             "version": "0.2.10", | ||||
|             "resolved": "https://registry.npmjs.org/async/-/async-0.2.10.tgz", | ||||
|             "integrity": "sha512-eAkdoKxU6/LkKDBzLpT+t6Ff5EtfSF4wx1WfJiPEEV7WNLnDaRXk0oVysiEPm262roaachGexwUv94WhSgN5TQ==" | ||||
|         }, | ||||
|         "node_modules/fluent-ffmpeg/node_modules/which": { | ||||
|             "version": "1.3.1", | ||||
|             "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", | ||||
|  | @ -18293,9 +18298,9 @@ | |||
|             } | ||||
|         }, | ||||
|         "node_modules/unprint": { | ||||
|             "version": "0.11.5", | ||||
|             "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.5.tgz", | ||||
|             "integrity": "sha512-tLhiFGeSU40GN12625+9oqmNGDFSToMPME60pB+DSGT9wd9fJM0L/lyZMQeNFmWMSThwa/id/FHAOnN7cE1aOw==", | ||||
|             "version": "0.11.8", | ||||
|             "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.8.tgz", | ||||
|             "integrity": "sha512-UCtfdbbHSNS/F0hlFwMa+ZmUqkVdp7V3SZVJjcMNnb0GUKm/7VWjhdvzHe+dIejhRdJykHfXWkI/BCbKwl51Vg==", | ||||
|             "dependencies": { | ||||
|                 "axios": "^0.27.2", | ||||
|                 "bottleneck": "^2.19.5", | ||||
|  |  | |||
|  | @ -106,7 +106,7 @@ | |||
|         "express-session": "^1.17.3", | ||||
|         "face-api.js": "^0.22.2", | ||||
|         "file-type": "^18.7.0", | ||||
|         "fluent-ffmpeg": "^2.1.2", | ||||
|         "fluent-ffmpeg": "^2.1.3", | ||||
|         "fs-extra": "^11.1.1", | ||||
|         "graphile-build": "^4.14.0", | ||||
|         "graphile-utils": "^4.14.0", | ||||
|  | @ -147,7 +147,7 @@ | |||
|         "tunnel": "0.0.6", | ||||
|         "ua-parser-js": "^1.0.37", | ||||
|         "undici": "^5.28.1", | ||||
|         "unprint": "^0.11.5", | ||||
|         "unprint": "^0.11.8", | ||||
|         "url-pattern": "^1.0.3", | ||||
|         "v-tooltip": "^2.1.3", | ||||
|         "video.js": "^8.6.1", | ||||
|  |  | |||
|  | @ -1251,6 +1251,10 @@ const tags = [ | |||
| 		name: 'voodoo', | ||||
| 		slug: 'voodoo', | ||||
| 	}, | ||||
| 	{ | ||||
| 		name: 'bikini', | ||||
| 		slug: 'bikini', | ||||
| 	}, | ||||
| ]; | ||||
| 
 | ||||
| const aliases = [ | ||||
|  | @ -2545,6 +2549,30 @@ const aliases = [ | |||
| 		name: 'parasites', | ||||
| 		for: 'parasite', | ||||
| 	}, | ||||
| 	{ | ||||
| 		name: 'threesome - fmm', | ||||
| 		for: 'mfm', | ||||
| 	}, | ||||
| 	{ | ||||
| 		name: '4k ultra hd', | ||||
| 		for: '4k', | ||||
| 	}, | ||||
| 	{ | ||||
| 		name: 'sex toy play', | ||||
| 		for: 'toys', | ||||
| 	}, | ||||
| 	{ | ||||
| 		name: 'cumshots', | ||||
| 		for: 'cumshot', | ||||
| 	}, | ||||
| 	{ | ||||
| 		name: 'bikini babes', | ||||
| 		for: 'bikini', | ||||
| 	}, | ||||
| 	{ | ||||
| 		name: 'threesomes', | ||||
| 		for: 'threesome', | ||||
| 	}, | ||||
| ]; | ||||
| 
 | ||||
| const priorities = [ // higher index is higher priority
 | ||||
|  |  | |||
|  | @ -104,6 +104,12 @@ const networks = [ | |||
| 		}, | ||||
| 		parent: '21sextury', | ||||
| 	}, | ||||
| 	{ | ||||
| 		slug: 'adultempire', | ||||
| 		name: 'Adult Empire', | ||||
| 		url: 'https://www.adultempire.com', | ||||
| 		type: 'info', | ||||
| 	}, | ||||
| 	{ | ||||
| 		slug: 'adulttime', | ||||
| 		name: 'Adult Time', | ||||
|  |  | |||
|  | @ -3270,6 +3270,15 @@ const sites = [ | |||
| 		slug: 'elegantangel', | ||||
| 		name: 'Elegant Angel', | ||||
| 		url: 'https://www.elegantangel.com', | ||||
| 		options: { | ||||
| 			spawn: [ | ||||
| 				{ | ||||
| 					parameters: { | ||||
| 						latest: 'https://www.elegantangel.com/watch-exclusive-elegant-angel-scenes.html', | ||||
| 					}, | ||||
| 				}, | ||||
| 			], | ||||
| 		}, | ||||
| 	}, | ||||
| 	// EVIL ANGEL
 | ||||
| 	{ | ||||
|  | @ -13478,7 +13487,6 @@ const sites = [ | |||
| 		tags: ['black-cock'], | ||||
| 		parameters: { | ||||
| 			studio: false, | ||||
| 			layout: 'grid', | ||||
| 		}, | ||||
| 	}, | ||||
| 	// WHALE MEMBER
 | ||||
|  | @ -13713,27 +13721,36 @@ exports.seed = (knex) => Promise.resolve() | |||
| 	.then(async () => { | ||||
| 		await Promise.all(sites.map(async (channel) => { | ||||
| 			if (channel.rename) { | ||||
| 				return knex('entities') | ||||
| 				await knex('entities') | ||||
| 					.where({ | ||||
| 						type: channel.type || 'channel', | ||||
| 						slug: channel.rename, | ||||
| 					}) | ||||
| 					.update('slug', channel.slug); | ||||
| 
 | ||||
| 				return; | ||||
| 			} | ||||
| 
 | ||||
| 			return null; | ||||
| 			if (channel.delete) { | ||||
| 				await knex('entities') | ||||
| 					.where({ | ||||
| 						type: channel.type || 'channel', | ||||
| 						slug: channel.slug, | ||||
| 					}) | ||||
| 					.delete(); | ||||
| 			} | ||||
| 		}).filter(Boolean)); | ||||
| 
 | ||||
| 		const networks = await knex('entities') | ||||
| 			.where('type', 'network') | ||||
| 			.orWhereNull('parent_id'); | ||||
| 
 | ||||
| 		const networksMap = networks.reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {}); | ||||
| 		const networksMap = networks.filter((network) => !network.delete).reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {}); | ||||
| 
 | ||||
| 		const tags = await knex('tags').select('*').whereNull('alias_for'); | ||||
| 		const tagsMap = tags.reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {}); | ||||
| 
 | ||||
| 		const sitesWithNetworks = sites.map((site) => ({ | ||||
| 		const sitesWithNetworks = sites.filter((site) => !site.delete).map((site) => ({ | ||||
| 			slug: site.slug, | ||||
| 			name: site.name, | ||||
| 			type: site.type || 'channel', | ||||
|  | @ -13741,6 +13758,7 @@ exports.seed = (knex) => Promise.resolve() | |||
| 			description: site.description, | ||||
| 			url: site.url, | ||||
| 			parameters: site.parameters, | ||||
| 			options: site.options, | ||||
| 			parent_id: networksMap[site.parent], | ||||
| 			priority: site.priority || 0, | ||||
| 			independent: !!site.independent, | ||||
|  |  | |||
|  | @ -410,7 +410,7 @@ async function curateProfile(profile, actor) { | |||
| 		curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null; | ||||
| 		curatedProfile.hairType = profile.hairType?.trim() || null; | ||||
| 		curatedProfile.hairColor = hairColors[(profile.hairColor || profile.hair)?.toLowerCase().replace('hair', '').trim()] || null; | ||||
| 		curatedProfile.eyes = eyeColors[profile.eyes?.trim().toLowerCase()] || null; | ||||
| 		curatedProfile.eyes = eyeColors[profile.eyes?.replace(/eyes?/i).trim().toLowerCase()] || null; | ||||
| 
 | ||||
| 		curatedProfile.tattoos = profile.tattoos?.trim() || null; | ||||
| 		curatedProfile.piercings = profile.piercings?.trim() || null; | ||||
|  | @ -878,7 +878,7 @@ async function scrapeActors(argNames) { | |||
| 	const entitySlugs = sources.flat(); | ||||
| 
 | ||||
| 	const [entitiesBySlug, existingActorEntries] = await Promise.all([ | ||||
| 		fetchEntitiesBySlug(entitySlugs, 'desc'), | ||||
| 		fetchEntitiesBySlug(entitySlugs, { types: ['channel', 'network', 'info'] }), | ||||
| 		knex('actors') | ||||
| 			.select(knex.raw('actors.id, actors.name, actors.slug, actors.entry_id, actors.entity_id, row_to_json(entities) as entity')) | ||||
| 			.whereIn('actors.slug', baseActors.map((baseActor) => baseActor.slug)) | ||||
|  |  | |||
|  | @ -84,7 +84,7 @@ async function fetchScene(scraper, url, entity, baseRelease, options, type = 'sc | |||
| 	} | ||||
| 
 | ||||
| 	if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) { | ||||
| 		if (scraper.useUnprint || scraper.scrapeScene?.unprint || scraper.scrapeMovie?.unprint) { | ||||
| 		if (scraper.useUnprint || (type === 'scene' && scraper.scrapeScene?.unprint) || (type === 'movie' && scraper.scrapeMovie?.unprint)) { | ||||
| 			return fetchUnprintScene(scraper, url, entity, baseRelease, options, type); | ||||
| 		} | ||||
| 
 | ||||
|  |  | |||
|  | @ -55,7 +55,8 @@ function curateEntity(entity, includeParameters = false) { | |||
| 	} | ||||
| 
 | ||||
| 	if (includeParameters) { | ||||
| 		curatedEntity.parameters = entity.parameters; | ||||
| 		curatedEntity.options = entity.options; // global internal options
 | ||||
| 		curatedEntity.parameters = entity.parameters; // scraper-specific parameters
 | ||||
| 	} | ||||
| 
 | ||||
| 	if (entity.children) { | ||||
|  | @ -66,10 +67,25 @@ function curateEntity(entity, includeParameters = false) { | |||
| 	} | ||||
| 
 | ||||
| 	if (entity.included_children) { | ||||
| 		curatedEntity.includedChildren = entity.included_children.map((child) => curateEntity({ | ||||
| 			...child, | ||||
| 			parent: curatedEntity.id ? curatedEntity : null, | ||||
| 		}, includeParameters)); | ||||
| 		curatedEntity.includedChildren = entity.included_children.flatMap((child) => { | ||||
| 			const curatedChild = curateEntity({ | ||||
| 				...child, | ||||
| 				parent: curatedEntity.id ? curatedEntity : null, | ||||
| 			}, includeParameters); | ||||
| 
 | ||||
| 			// allow entities to 'spawn' virtual copies of themselves, this is useful for sites that use two separate update pages (i.e. Elegant Angel)
 | ||||
| 			if (child.options?.spawn) { | ||||
| 				return [ | ||||
| 					curatedChild, | ||||
| 					...child.options.spawn.map((spawnEntity) => ({ | ||||
| 						...curatedChild, | ||||
| 						...spawnEntity, | ||||
| 					})), | ||||
| 				]; | ||||
| 			} | ||||
| 
 | ||||
| 			return curatedChild; | ||||
| 		}); | ||||
| 	} | ||||
| 
 | ||||
| 	const scraper = resolveScraper(curatedEntity); | ||||
|  | @ -199,7 +215,7 @@ async function fetchIncludedEntities() { | |||
| 	return curatedNetworks; | ||||
| } | ||||
| 
 | ||||
| async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') { | ||||
| async function fetchEntitiesBySlug(entitySlugs, options = { prefer: 'channel' }) { | ||||
| 	const entities = await knex.raw(` | ||||
| 		WITH RECURSIVE entity_tree as ( | ||||
| 			SELECT to_jsonb(entities) as entity, | ||||
|  | @ -208,7 +224,7 @@ async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') { | |||
| 			FROM entities | ||||
| 			WHERE (slug = ANY(:entitySlugs) | ||||
| 			OR url ILIKE ANY(:entityHosts)) | ||||
| 			AND type IN ('channel', 'network') | ||||
| 			AND type = ANY(:entityTypes) | ||||
| 
 | ||||
| 			UNION ALL | ||||
| 
 | ||||
|  | @ -236,7 +252,8 @@ async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') { | |||
| 	`, {
 | ||||
| 		entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')), | ||||
| 		entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}`), | ||||
| 		sort: knex.raw(prefer === 'channel' ? 'asc' : 'desc'), | ||||
| 		entityTypes: options.types || ['channel', 'network'], | ||||
| 		sort: knex.raw(options.prefer === 'channel' ? 'asc' : 'desc'), | ||||
| 	}); | ||||
| 
 | ||||
| 	// channel entity will overwrite network entity
 | ||||
|  | @ -263,7 +280,7 @@ async function fetchReleaseEntities(baseReleases) { | |||
| 			.filter(Boolean), | ||||
| 	)); | ||||
| 
 | ||||
| 	return fetchEntitiesBySlug(entitySlugs, argv.prefer || 'network'); | ||||
| 	return fetchEntitiesBySlug(entitySlugs, { prefer: argv.prefer || 'network' }); | ||||
| } | ||||
| 
 | ||||
| async function fetchEntity(entityId, type) { | ||||
|  |  | |||
|  | @ -648,7 +648,12 @@ streamQueue.define('fetchStreamSource', async ({ source, tempFileTarget, hashStr | |||
| 		.format('mp4') | ||||
| 		.outputOptions(['-movflags frag_keyframe+empty_moov']) | ||||
| 		.on('start', (cmd) => logger.verbose(`Fetching stream from ${source.stream} with "${cmd}"`)) | ||||
| 		.on('error', (error) => logger.error(`Failed to fetch stream from ${source.stream}: ${error.message}`)) | ||||
| 		.on('error', (error) => { | ||||
| 			logger.error(`Failed to fetch stream from ${source.stream}: ${error.message}`); | ||||
| 
 | ||||
| 			hashStream.end(); | ||||
| 			tempFileTarget.end(); | ||||
| 		}) | ||||
| 		.pipe(); | ||||
| 
 | ||||
| 	// await pipeline(video, hashStream, tempFileTarget);
 | ||||
|  |  | |||
|  | @ -1,97 +1,61 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const qu = require('../utils/qu'); | ||||
| const unprint = require('unprint'); | ||||
| 
 | ||||
| const http = require('../utils/http'); | ||||
| const slugify = require('../utils/slugify'); | ||||
| const { feetInchesToCm, lbsToKg } = require('../utils/convert'); | ||||
| 
 | ||||
| async function getPhotos(entryId, channel) { | ||||
| 	const res = await http.get(`${channel.url}/Membership/GetScreenshots?sceneID=scene_${entryId}`); | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		return res.body.split(/[\s,]+/).filter(Boolean); | ||||
| 	} | ||||
| 
 | ||||
| 	return []; | ||||
| } | ||||
| 
 | ||||
| function scrapeAllTour(scenes, channel) { | ||||
| function scrapeAll(scenes, channel, _options) { | ||||
| 	return scenes.map(({ query }) => { | ||||
| 		const release = {}; | ||||
| 
 | ||||
| 		release.url = query.url('.scene-update-details, .feature-update-details', 'href', { origin: channel.url }); | ||||
| 		release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; | ||||
| 		release.url = query.url('a.scene-title, a.scene-img', { origin: channel.url }); | ||||
| 		release.entryId = query.attribute('article[data-scene-id]', 'data-scene-id') || new URL(release.url).pathname.match(/^\/(\d+)/)?.[1]; | ||||
| 
 | ||||
| 		release.title = query.q('.scene-img-wrapper img', 'alt').replace(/\s*image$/i, ''); | ||||
| 		release.title = query.content('.scene-title')?.trim(); | ||||
| 		release.duration = query.duration('.scene-length'); | ||||
| 
 | ||||
| 		release.date = query.date('.scene-update-stats span, .feature-update-details span', 'MMM DD, YYYY'); | ||||
| 		release.actors = query.cnt('.scene-update-details h3, .feature-update-details h2')?.split(/\s*\|\s*/).map((actor) => actor.trim()); | ||||
| 		release.actors = query.content('.scene-performer-names')?.split(/[,&]/).map((actor) => actor.trim()); | ||||
| 
 | ||||
| 		const poster = query.img('.scene-img-wrapper img'); | ||||
| 		release.poster = [ | ||||
| 			poster.replace(/\/res\/\d+/, '/res/1920'), | ||||
| 			poster.replace(/\/res\/\d+/, '/res/1600'), | ||||
| 			poster, | ||||
| 		]; | ||||
| 		release.poster = query.sourceSet('.screenshot', 'data-srcset'); | ||||
| 
 | ||||
| 		release.trailer = { src: query.video('.scene-img-wrapper source') }; | ||||
| 		const sceneId = query.attribute('article[data-scene-id]', 'data-scene-id'); | ||||
| 		const masterId = query.attribute('article[data-master-id]', 'data-master-id'); | ||||
| 
 | ||||
| 		return release; | ||||
| 	}); | ||||
| } | ||||
| 
 | ||||
| async function scrapeAllGrid(scenes, channel, options) { | ||||
| 	return Promise.all(scenes.map(async ({ query, el }) => { | ||||
| 		const release = {}; | ||||
| 		const uri = query.url('.grid-item-title') || query.url('a.animated-screen'); | ||||
| 
 | ||||
| 		release.entryId = el.id.match(/\d+/)?.[0] || uri.match(/^(\d+)\//)?.[1]; | ||||
| 
 | ||||
| 		release.title = query.cnt('.grid-item-title'); | ||||
| 		release.url = qu.prefixUrl(uri, channel.url); | ||||
| 
 | ||||
| 		release.poster = query.img('.screenshot'); | ||||
| 
 | ||||
| 		if (options.includePhotos) { | ||||
| 			release.photos = await getPhotos(release.entryId, channel); | ||||
| 		if (sceneId && masterId) { | ||||
| 			release.teaser = `https://video.adultempire.com/hls/previewscene/${masterId}/${sceneId}/index-f1-v1.m3u8`; | ||||
| 		} | ||||
| 
 | ||||
| 		return release; | ||||
| 	})); | ||||
| } | ||||
| 
 | ||||
| function scrapeMovieScenes(scenes) { | ||||
| 	return scenes.map(({ query }) => { | ||||
| 		const release = {}; | ||||
| 
 | ||||
| 		release.title = query.cnt('.scene-title a'); | ||||
| 		release.url = query.url('.scene-title a', 'href', { origin: 'https://www.elegantangel.com' }); | ||||
| 		release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; | ||||
| 
 | ||||
| 		release.duration = query.number('.scene-length') * 60; | ||||
| 		release.actors = query.cnts('.scene-cast-list a'); | ||||
| 
 | ||||
| 		release.poster = query.img('a img'); | ||||
| 
 | ||||
| 		return release; | ||||
| 	}); | ||||
| } | ||||
| 
 | ||||
| async function scrapeRelease({ query, html }, url, channel, baseRelease, options) { | ||||
| const photoRegex = /(\/\w\/\d+\/)\d+/; | ||||
| 
 | ||||
| async function scrapeRelease({ query, html, element }, { url, entity, baseRelease, parameters }) { | ||||
| 	const release = {}; | ||||
| 	const type = query.exists('.scene-list-header') ? 'movie' : 'scene'; | ||||
| 
 | ||||
| 	release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1]; | ||||
| 
 | ||||
| 	release.title = query.cnt('.scene-page .description, .video-page .description'); | ||||
| 	const title = query.content('.scene-page .description, .video-page .description'); | ||||
| 
 | ||||
| 	if (/^scene \d+$/i.test(title)) { | ||||
| 		release.sceneIndex = unprint.extractNumber(title); | ||||
| 	} else { | ||||
| 		release.title = title; | ||||
| 	} | ||||
| 
 | ||||
| 	release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/); | ||||
| 	release.duration = query.duration('.release-date:last-child'); | ||||
| 
 | ||||
| 	release.actors = query.all('.video-performer').map((el) => { | ||||
| 		const avatar = qu.query.img(el, 'img', 'data-bgsrc'); | ||||
| 		const avatar = unprint.query.img(el, 'img', 'data-bgsrc'); | ||||
| 
 | ||||
| 		return { | ||||
| 			name: qu.query.cnt(el, 'span'), | ||||
| 			url: qu.query.url(el, 'a', 'href', { origin: channel.url }), | ||||
| 			name: unprint.query.content(el, 'span').trim(), | ||||
| 			url: unprint.query.url(el, 'a', { origin: entity.url }), | ||||
| 			avatar: [ | ||||
| 				avatar.replace(/\/actor\/\d+/, '/actor/1600'), | ||||
| 				avatar, | ||||
|  | @ -99,8 +63,8 @@ async function scrapeRelease({ query, html }, url, channel, baseRelease, options | |||
| 		}; | ||||
| 	}); | ||||
| 
 | ||||
| 	release.tags = query.cnts('.tags a, .categories a'); | ||||
| 	release.studio = options?.parameters.studio === false ? null : slugify(query.cnt('.studio span:last-child'), ''); | ||||
| 	release.tags = query.contents('.tags a, .categories a'); | ||||
| 	release.studio = parameters?.studio === false ? null : slugify(query.content('.studio span:last-child, .studio a'), ''); | ||||
| 
 | ||||
| 	if (type === 'scene') { | ||||
| 		release.director = query.text('.director'); | ||||
|  | @ -109,87 +73,44 @@ async function scrapeRelease({ query, html }, url, channel, baseRelease, options | |||
| 	} | ||||
| 
 | ||||
| 	if (type === 'movie') { | ||||
| 		release.director = query.cnt('.director a'); | ||||
| 		release.covers = query.imgs('.carousel-item > img'); | ||||
| 		release.director = query.content('.director a'); | ||||
| 		release.covers = [query.sourceSet('.carousel-item .boxcover-image', 'data-srcset')]; | ||||
| 
 | ||||
| 		release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel); | ||||
| 		release.scenes = scrapeAll(unprint.initAll(element, '#scenes .grid-item'), entity); | ||||
| 	} | ||||
| 
 | ||||
| 	if (query.exists('.video-title .movie-title')) { | ||||
| 		release.movie = { | ||||
| 			title: query.cnt('#viewLargeBoxcover .modal-title a'), | ||||
| 			url: query.url('#viewLargeBoxcover .modal-title a', 'href', { origin: channel.url }), | ||||
| 			title: query.content('#viewLargeBoxcover .modal-title a'), | ||||
| 			url: query.url('#viewLargeBoxcover .modal-title a', 'href', { origin: entity.url }), | ||||
| 			entryId: query.url('#viewLargeBoxcover .modal-title a')?.match(/(\d+)\//)[1], | ||||
| 			covers: query.imgs('#viewLargeBoxcover #viewLargeBoxcoverCarousel .carousel-item > img'), | ||||
| 		}; | ||||
| 	} | ||||
| 
 | ||||
| 	release.photos = query.imgs('#dv_frames a > img').map((photo) => [ | ||||
| 		photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`), | ||||
| 		photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1600`), | ||||
| 	release.caps = query.imgs('#dv_frames a > img', { attribute: 'data-src' }).map((photo) => [ | ||||
| 		photo.replace(photoRegex, (match, path) => `${path}1920`), | ||||
| 		photo.replace(photoRegex, (match, path) => `${path}1280`), | ||||
| 		photo, | ||||
| 	]); | ||||
| 
 | ||||
| 	const trailerId = html.match(/item: (\d+),/)?.[1]; | ||||
| 
 | ||||
| 	if (trailerId) { | ||||
| 		const trailerUrl = `https://www.adultempire.com/videoEmbed/${trailerId}?type=preview`; | ||||
| 		const trailerRes = await qu.get(trailerUrl); | ||||
| 		release.trailer = `https://trailer.adultempire.com/hls/trailer/${trailerId}/master.m3u8`; | ||||
| 	} | ||||
| 
 | ||||
| 		if (trailerRes.ok) { | ||||
| 			const stream = trailerRes.item.query.video(); | ||||
| 
 | ||||
| 			release.trailer = { stream }; | ||||
| 		} | ||||
| 	if (query.exists('.user-actions .btn-4k')) { | ||||
| 		release.qualities = [2160]; | ||||
| 	} | ||||
| 
 | ||||
| 	return release; | ||||
| } | ||||
| 
 | ||||
| function scrapeMovies(movies, channel) { | ||||
| 	return movies.map(({ query }) => { | ||||
| 		const release = {}; | ||||
| 
 | ||||
| 		release.url = query.url('.boxcover', 'href', { origin: channel.url }); | ||||
| 		release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; | ||||
| 
 | ||||
| 		release.title = query.cnt('span'); | ||||
| 
 | ||||
| 		const cover = query.img('picture img'); | ||||
| 
 | ||||
| 		release.covers = [ | ||||
| 			// filename is ignored, back-cover has suffix after media ID
 | ||||
| 			cover.replace('_sq.jpg', '/front.jpg').replace(/\/product\/\d+/, '/product/500'), | ||||
| 			cover.replace('_sq.jpg', 'b/back.jpg').replace(/\/product\/\d+/, '/product/500'), | ||||
| 		]; | ||||
| 
 | ||||
| 		return release; | ||||
| 	}); | ||||
| } | ||||
| 
 | ||||
| function scrapeActorScenes(scenes, channel) { | ||||
| 	return scenes.map(({ query }) => { | ||||
| 		const release = {}; | ||||
| 
 | ||||
| 		release.url = query.url('a', 'href', { origin: channel.url }); | ||||
| 		release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; | ||||
| 
 | ||||
| 		release.title = query.cnt('.grid-item-title'); | ||||
| 
 | ||||
| 		const poster = query.img('a img'); | ||||
| 		release.poster = [ | ||||
| 			poster.replace(/\/\d+\//, '/1600/'), | ||||
| 			poster, | ||||
| 		]; | ||||
| 
 | ||||
| 		return release; | ||||
| 	}); | ||||
| } | ||||
| 
 | ||||
| async function scrapeProfile({ query }, url, channel, include) { | ||||
| async function scrapeProfile({ query }) { | ||||
| 	const profile = {}; | ||||
| 
 | ||||
| 	const bio = query.cnts('.performer-page-header li').reduce((acc, info) => { | ||||
| 	const bio = query.contents('#profileModal .well li').reduce((acc, info) => { | ||||
| 		const [key, value] = info.split(':'); | ||||
| 
 | ||||
| 		return { | ||||
|  | @ -198,11 +119,14 @@ async function scrapeProfile({ query }, url, channel, include) { | |||
| 		}; | ||||
| 	}, {}); | ||||
| 
 | ||||
| 	const measurements = bio.meas?.match(/(\d+)(\w+)-(\d+)-(\d+)/); | ||||
| 	const bioText = query.content('#profileModal .well'); | ||||
| 
 | ||||
| 	if (measurements) { | ||||
| 		[profile.bust, profile.cup, profile.waist, profile.hip] = measurements.slice(1); | ||||
| 	} | ||||
| 	profile.description = query.content('#profileModal .modal-body') | ||||
| 		.slice(bioText.length) | ||||
| 		.replace(/Biography Text ©Adult DVD Empire/i, '') | ||||
| 		.trim(); | ||||
| 
 | ||||
| 	profile.measurements = bio.measurements?.replace(/["\s]+/g, ''); | ||||
| 
 | ||||
| 	profile.hair = bio.hair; | ||||
| 	profile.eyes = bio.eyes; | ||||
|  | @ -211,79 +135,41 @@ async function scrapeProfile({ query }, url, channel, include) { | |||
| 	profile.height = feetInchesToCm(bio.height); | ||||
| 	profile.weight = lbsToKg(bio.weight); | ||||
| 
 | ||||
| 	profile.avatar = query.img('picture img'); | ||||
| 	const avatar = query.img('picture img, .performer-image-container img'); | ||||
| 
 | ||||
| 	if (include) { | ||||
| 		const actorId = new URL(url).pathname.match(/\/(\d+)/)[1]; | ||||
| 		const res = await qu.getAll(`${channel.url}/www.elegantangel.com/streaming-video-by-scene.html?cast=${actorId}`, '.grid-item', null, { | ||||
| 			rejectUnauthorized: false, | ||||
| 		}); | ||||
| 
 | ||||
| 		if (res.ok) { | ||||
| 			profile.releases = scrapeActorScenes(res.items, channel); | ||||
| 		} | ||||
| 	if (avatar) { | ||||
| 		profile.avatar = [ | ||||
| 			avatar | ||||
| 				.replace('_bust', '_body') | ||||
| 				.replace(/\/actor\/\d+\//i, '/actor/1000/'), | ||||
| 			avatar, | ||||
| 		]; | ||||
| 	} | ||||
| 
 | ||||
| 	return profile; | ||||
| } | ||||
| 
 | ||||
| async function fetchLatestTour(channel, page = 1) { | ||||
| 	const url = `${channel.url}/tour?page=${page}`; | ||||
| 	const res = await qu.getAll(url, '.scene-update', null, { | ||||
| 		// invalid certificate
 | ||||
| 		rejectUnauthorized: false, | ||||
| 	}); | ||||
| async function fetchLatest(channel, page, options) { | ||||
| 	// const res = await qu.getAll(`${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&hybridview=member`, '.item-grid-scene .grid-item');
 | ||||
| 	const res = await unprint.get(options.parameters?.latest | ||||
| 		? `${options.parameters.latest}?page=${page}&view=grid` | ||||
| 		: `${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&view=grid`, { selectAll: '.item-grid-scene .grid-item' }); | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		return scrapeAllTour(res.items, channel); | ||||
| 		return scrapeAll(res.context, channel, options); | ||||
| 	} | ||||
| 
 | ||||
| 	return res.status; | ||||
| } | ||||
| 
 | ||||
| async function fetchLatestGrid(channel, page, options) { | ||||
| 	const res = await qu.getAll(`${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&hybridview=member`, '.item-grid-scene .grid-item'); | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		return scrapeAllGrid(res.items, channel, options); | ||||
| 	} | ||||
| 
 | ||||
| 	return res.status; | ||||
| } | ||||
| 
 | ||||
| async function fetchMovie(url, channel, baseRelease, options) { | ||||
| 	const res = await qu.get(url, null, null, { | ||||
| 		// invalid certificate
 | ||||
| async function fetchProfilePage(actorUrl) { | ||||
| 	const res = await unprint.get(actorUrl, { | ||||
| 		select: '#content', | ||||
| 		rejectUnauthorized: false, | ||||
| 	}); | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		return scrapeRelease(res.item, url, channel, baseRelease, options); | ||||
| 	} | ||||
| 
 | ||||
| 	return res.status; | ||||
| } | ||||
| 
 | ||||
| async function fetchMovies(channel, page = 1) { | ||||
| 	const res = await qu.getAll(`https://www.elegantangel.com/streaming-elegant-angel-dvds-on-video.html?page=${page}`, '.grid-item', null, { | ||||
| 		// invalid certificate
 | ||||
| 		rejectUnauthorized: false, | ||||
| 	}); | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		return scrapeMovies(res.items, channel); | ||||
| 	} | ||||
| 
 | ||||
| 	return res.status; | ||||
| } | ||||
| 
 | ||||
| async function fetchProfilePage(actorUrl, channel, include) { | ||||
| 	const res = await qu.get(actorUrl, '.performer-page', null, { | ||||
| 		rejectUnauthorized: false, | ||||
| 	}); | ||||
| 
 | ||||
| 	if (res.ok) { | ||||
| 		return scrapeProfile(res.item, actorUrl, channel, include); | ||||
| 		return scrapeProfile(res.context); | ||||
| 	} | ||||
| 
 | ||||
| 	return res.status; | ||||
|  | @ -298,13 +184,15 @@ async function fetchProfile(baseActor, channel, include) { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	const searchRes = await http.get(`${channel.url}/search/SearchAutoComplete_Agg_ByMedia?rows=9&name_startsWith=${slugify(baseActor.name, '+')}`); | ||||
| 	const searchRes = await http.get(`https://www.adultempire.com/search/SearchAutoComplete_Agg_EmpireDTRank?search_type=Pornstars&rows=9&name_startsWith=${slugify(baseActor.name, '+')}`); | ||||
| 
 | ||||
| 	if (searchRes.ok) { | ||||
| 	if (searchRes.ok && searchRes.body.Results) { | ||||
| 		const actorResult = searchRes.body.Results.find((result) => /performer/i.test(result.BasicResponseGroup?.displaytype) && new RegExp(baseActor.name, 'i').test(result.BasicResponseGroup?.description)); | ||||
| 
 | ||||
| 		if (actorResult) { | ||||
| 			return fetchProfilePage(`${channel.url}${actorResult.BasicResponseGroup.id}`, channel, include); | ||||
| 			const url = `https://www.adultempire.com/${actorResult.BasicResponseGroup.id}`; | ||||
| 
 | ||||
| 			return fetchProfilePage(url); | ||||
| 		} | ||||
| 
 | ||||
| 		return null; | ||||
|  | @ -314,16 +202,15 @@ async function fetchProfile(baseActor, channel, include) { | |||
| } | ||||
| 
 | ||||
| module.exports = { | ||||
| 	fetchLatest: fetchLatestTour, | ||||
| 	fetchMovies, | ||||
| 	fetchMovie, | ||||
| 	fetchLatest, | ||||
| 	// fetchMovies,
 | ||||
| 	fetchProfile, | ||||
| 	scrapeScene: scrapeRelease, | ||||
| 	scrapeMovie: scrapeRelease, | ||||
| 	grid: { | ||||
| 		fetchLatest: fetchLatestGrid, | ||||
| 		scrapeScene: scrapeRelease, | ||||
| 		fetchMovie, | ||||
| 		fetchProfile, | ||||
| 	scrapeScene: { | ||||
| 		scraper: scrapeRelease, | ||||
| 		unprint: true, | ||||
| 	}, | ||||
| 	scrapeMovie: { | ||||
| 		scraper: scrapeRelease, | ||||
| 		unprint: true, | ||||
| 	}, | ||||
| }; | ||||
|  |  | |||
|  | @ -3,6 +3,10 @@ | |||
| const scrapers = require('./scrapers'); | ||||
| 
 | ||||
| function resolveScraper(entity) { | ||||
| 	if (entity.parameters?.useScraper && scrapers.releases[entity.parameters.useScraper]) { | ||||
| 		return scrapers.releases[entity.parameters.useScraper]; | ||||
| 	} | ||||
| 
 | ||||
| 	if (scrapers.releases[entity.slug]) { | ||||
| 		return scrapers.releases[entity.slug]; | ||||
| 	} | ||||
|  |  | |||
|  | @ -177,6 +177,7 @@ const scrapers = { | |||
| 	actors: { | ||||
| 		'18vr': badoink, | ||||
| 		'21sextury': gamma, | ||||
| 		adultempire, | ||||
| 		allanal: mikeadriano, | ||||
| 		amateureuro: porndoe, | ||||
| 		americanpornstar, | ||||
|  | @ -217,7 +218,6 @@ const scrapers = { | |||
| 		dorcelclub: dorcel, | ||||
| 		doubleviewcasting: firstanalquest, | ||||
| 		dtfsluts: fullpornnetwork, | ||||
| 		elegantangel: adultempire, | ||||
| 		evilangel: gamma, | ||||
| 		exploitedcollegegirls: elevatedx, | ||||
| 		eyeontheguy: hush, | ||||
|  | @ -323,7 +323,6 @@ const scrapers = { | |||
| 		vixen, | ||||
| 		vrcosplayx: badoink, | ||||
| 		wankzvr, | ||||
| 		westcoastproductions: adultempire, | ||||
| 		wicked: gamma, | ||||
| 		wildoncam: cherrypimps, | ||||
| 		xempire: gamma, | ||||
|  |  | |||
|  | @ -288,23 +288,28 @@ async function associateMovieScenes(movies, movieScenes) { | |||
| 		}, | ||||
| 	}), {}); | ||||
| 
 | ||||
| 	const associations = movieScenes.map((scene) => { | ||||
| 		if (!scene.movie) { | ||||
| 	const associations = movieScenes | ||||
| 		.toSorted((sceneA, sceneB) => { | ||||
| 			return (sceneA.sceneIndex || 1) - (sceneB.sceneIndex || 1); | ||||
| 		}) | ||||
| 		.map((scene) => { | ||||
| 			if (!scene.movie) { | ||||
| 				return null; | ||||
| 			} | ||||
| 
 | ||||
| 			const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId] | ||||
| 				|| moviesByEntityIdAndEntryId[scene.entity.parent?.id]?.[scene.movie.entryId]; | ||||
| 
 | ||||
| 			if (sceneMovie?.id) { | ||||
| 				return { | ||||
| 					movie_id: sceneMovie.id, | ||||
| 					scene_id: scene.id, | ||||
| 				}; | ||||
| 			} | ||||
| 
 | ||||
| 			return null; | ||||
| 		} | ||||
| 
 | ||||
| 		const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId] | ||||
| 			|| moviesByEntityIdAndEntryId[scene.entity.parent?.id]?.[scene.movie.entryId]; | ||||
| 
 | ||||
| 		if (sceneMovie?.id) { | ||||
| 			return { | ||||
| 				movie_id: sceneMovie.id, | ||||
| 				scene_id: scene.id, | ||||
| 			}; | ||||
| 		} | ||||
| 
 | ||||
| 		return null; | ||||
| 	}).filter(Boolean); | ||||
| 		}) | ||||
| 		.filter(Boolean); | ||||
| 
 | ||||
| 	await bulkInsert('movies_scenes', associations, false); | ||||
| } | ||||
|  | @ -354,6 +359,7 @@ async function storeMovies(movies, useBatchId) { | |||
| 
 | ||||
| 	await updateMovieSearch(moviesWithId.map((movie) => movie.id)); | ||||
| 	await associateReleaseMedia(moviesWithId, 'movie'); | ||||
| 	await associateReleaseTags(moviesWithId, 'movie'); | ||||
| 
 | ||||
| 	return moviesWithId; | ||||
| } | ||||
|  |  | |||
|  | @ -298,6 +298,8 @@ async function scrapeNetworkParallel(networkEntity) { | |||
| async function fetchUpdates() { | ||||
| 	const includedNetworks = await fetchIncludedEntities(); | ||||
| 
 | ||||
| 	// console.log(includedNetworks[0]);
 | ||||
| 
 | ||||
| 	const scrapedNetworks = await Promise.map( | ||||
| 		includedNetworks, | ||||
| 		async (networkEntity) => (networkEntity.parameters?.sequential | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue