forked from DebaucheryLibrarian/traxxx
				
			Photo plucker will use discarded photos as fallback. Returning high res photo sources from LegalPorno.
This commit is contained in:
		
							parent
							
								
									6bfc5e4378
								
							
						
					
					
						commit
						db63be8f92
					
				
							
								
								
									
										78
									
								
								src/media.js
								
								
								
								
							
							
						
						
									
										78
									
								
								src/media.js
								
								
								
								
							|  | @ -42,11 +42,50 @@ async function createThumbnail(buffer) { | |||
|     return null; | ||||
| } | ||||
| 
 | ||||
| function pluckItems(items, specifiedLimit) { | ||||
| function groupFallbacksByPriority(chunks) { | ||||
|     /* | ||||
|         Chunks naturally give priority to all of the first item's fallbacks, generally lower quality images. | ||||
|         This function ensures every item's first source is tried, before trying every item's second source, etc., example: | ||||
|         IN: [[1, 2, 3,], 10, [1, 2, 3, 4, 5], [1, 2, 3]] | ||||
|         OUT [[1, 1, 1], [2, 2, 2], [3, 3, 3], [4], [5]] | ||||
|     */ | ||||
|     return chunks.map(group => group.reduce((acc, item) => { | ||||
|         if (Array.isArray(item)) { | ||||
|             // place provided fallbacks at same index (priority) in parent array
 | ||||
|             item.forEach((fallback, fallbackIndex) => { | ||||
|                 if (!acc[fallbackIndex]) { | ||||
|                     acc[fallbackIndex] = []; | ||||
|                 } | ||||
| 
 | ||||
|                 acc[fallbackIndex].push(fallback); | ||||
|             }); | ||||
| 
 | ||||
|             return acc; | ||||
|         } | ||||
| 
 | ||||
|         // no fallbacks provided, first priority
 | ||||
|         if (!acc[0]) { | ||||
|             acc[0] = []; | ||||
|         } | ||||
| 
 | ||||
|         acc[0].push(item); | ||||
| 
 | ||||
|         return acc; | ||||
|     }, []).flat()); | ||||
| } | ||||
| 
 | ||||
| function pluckItems(items, specifiedLimit, asFallbacks = true) { | ||||
|     const limit = specifiedLimit || config.media.limit; | ||||
| 
 | ||||
|     if (!items || items.length <= limit) return items; | ||||
| 
 | ||||
|     if (asFallbacks) { | ||||
|         const chunks = chunk(items, Math.ceil(items.length / limit)); | ||||
|         const fallbacks = groupFallbacksByPriority(chunks); | ||||
| 
 | ||||
|         return fallbacks; | ||||
|     } | ||||
| 
 | ||||
|     const plucked = [1] | ||||
|         .concat( | ||||
|             Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (items.length / (limit - 1)))), | ||||
|  | @ -93,8 +132,8 @@ async function extractItem(source) { | |||
|     return null; | ||||
| } | ||||
| 
 | ||||
| async function fetchSource(source, domain, role, originalSource) { | ||||
|     logger.verbose(`Fetching ${domain} ${role} from ${source.src || source}`); | ||||
| async function fetchSource(source, domain, role) { | ||||
|     logger.silly(`Fetching ${domain} ${role} from ${source.src || source}`); | ||||
| 
 | ||||
|     // const res = await bhttp.get(source.src || source);
 | ||||
|     const res = await get(source.src || source, { | ||||
|  | @ -111,7 +150,7 @@ async function fetchSource(source, domain, role, originalSource) { | |||
|         const hash = getHash(res.body); | ||||
|         const { entropy, size, width, height } = /image/.test(mimetype) ? await getMeta(res.body) : {}; | ||||
| 
 | ||||
|         logger.verbose(`Fetched media item from ${source.src || source}`); | ||||
|         logger.silly(`Fetched media item from ${source.src || source}`); | ||||
| 
 | ||||
|         return { | ||||
|             file: res.body, | ||||
|  | @ -123,7 +162,7 @@ async function fetchSource(source, domain, role, originalSource) { | |||
|             width: width || null, | ||||
|             height: height || null, | ||||
|             quality: source.quality || null, | ||||
|             source: originalSource?.src || originalSource || source.src || source, | ||||
|             source: source.src || source, | ||||
|             scraper: source.scraper, | ||||
|             copyright: source.copyright, | ||||
|         }; | ||||
|  | @ -133,9 +172,11 @@ async function fetchSource(source, domain, role, originalSource) { | |||
| } | ||||
| 
 | ||||
| async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) { | ||||
|     if (!source) return null; | ||||
| 
 | ||||
|     try { | ||||
|         if (!source) { | ||||
|             throw new Error(`Empty ${domain} ${role} source in ${originalSource}`); | ||||
|         } | ||||
| 
 | ||||
|         if (Array.isArray(source)) { | ||||
|             if (source.every(sourceX => sourceX.quality)) { | ||||
|                 // various video qualities provided
 | ||||
|  | @ -160,19 +201,18 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att | |||
|             return null; | ||||
|         } | ||||
| 
 | ||||
|         return fetchSource(source, domain, role, originalSource); | ||||
|         return await fetchSource(source, domain, role, originalSource); | ||||
|     } catch (error) { | ||||
|         logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`); | ||||
| 
 | ||||
|         /* | ||||
|         if (attempt < 3) { | ||||
|         if (source && attempt < 3) { | ||||
|             // only retry if source is provided at all
 | ||||
|             await Promise.delay(5000); | ||||
|             return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource, sourceIndex); | ||||
|         } | ||||
|         */ | ||||
| 
 | ||||
|         if (originalSource && sourceIndex < originalSource.length) { | ||||
|             throw error; | ||||
|         if (originalSource && sourceIndex < originalSource.length - 1) { | ||||
|             throw error; // gets caught to try next source
 | ||||
|         } | ||||
| 
 | ||||
|         return null; | ||||
|  | @ -285,6 +325,8 @@ async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) { | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     console.log(presentSources, presentSources.length); | ||||
| 
 | ||||
|     // split up source list to prevent excessive RAM usage
 | ||||
|     const itemChunksBySource = await Promise.all(chunk(presentSources, 50).map(async (sourceChunk, index) => { | ||||
|         try { | ||||
|  | @ -354,12 +396,12 @@ function associateTargetMedia(targetId, sources, mediaBySource, domain, role, pr | |||
|         .map((source) => { | ||||
|             if (!source) return null; | ||||
| 
 | ||||
|             const mediaItem = Array.isArray(source) | ||||
|                 ? mediaBySource[source.map(sourceX => sourceX.src || sourceX).toString()] | ||||
|                 : mediaBySource[source.src || source]; | ||||
|             if (Array.isArray(source)) { | ||||
|                 const availableSource = source.find(fallbackSource => mediaBySource[fallbackSource.src || fallbackSource]); | ||||
|                 return mediaBySource[availableSource]; | ||||
|             } | ||||
| 
 | ||||
|             // return mediaItem && { [`${domain}_id`]: targetId, media_id: mediaItem.id };
 | ||||
|             return mediaItem; | ||||
|             return mediaBySource[source.src || source]; | ||||
|         }) | ||||
|         .filter(Boolean) | ||||
|         // .sort((mediaItemA, mediaItemB) => mediaItemB.height - mediaItemA.height) // prefer high res images for primary item
 | ||||
|  |  | |||
|  | @ -76,53 +76,63 @@ async function scrapeScene(html, url, site, useGallery) { | |||
|     const playerObject = $('script:contains("new VideoPlayer")').html(); | ||||
|     const data = JSON.parse(playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.indexOf('} );') + 1)); | ||||
| 
 | ||||
|     const release = { url }; | ||||
| 
 | ||||
|     const originalTitle = $('h1.watchpage-title').text().trim(); | ||||
|     const { shootId, title } = extractTitle(originalTitle); | ||||
|     const entryId = new URL(url).pathname.split('/')[2]; | ||||
| 
 | ||||
|     const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate(); | ||||
|     release.shootId = shootId; | ||||
|     release.entryId = new URL(url).pathname.split('/')[2]; | ||||
| 
 | ||||
|     release.title = title; | ||||
|     release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate(); | ||||
| 
 | ||||
|     const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray(); | ||||
|     const actors = $(actorsElement) | ||||
| 
 | ||||
|     release.description = $('meta[name="description"]')?.attr('content')?.trim() | ||||
|         || (descriptionElement && $(descriptionElement).find('dd').text().trim()); | ||||
| 
 | ||||
|     release.actors = $(actorsElement) | ||||
|         .find('a[href*="com/model"]') | ||||
|         .map((actorIndex, actorElement) => $(actorElement).text()).toArray(); | ||||
| 
 | ||||
|     const description = $('meta[name="description"]')?.attr('content')?.trim() || (descriptionElement && $(descriptionElement).find('dd').text().trim()); | ||||
|     const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds(); | ||||
| 
 | ||||
|     const posterStyle = $('#player').attr('style'); | ||||
|     const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1); | ||||
|     release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds(); | ||||
|     release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); | ||||
| 
 | ||||
|     const photos = useGallery | ||||
|         ? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray() | ||||
|         : $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray(); | ||||
| 
 | ||||
|     release.photos = photos.map((source) => { | ||||
|         // source without parameters sometimes serves larger preview photo
 | ||||
|         const { origin, pathname } = new URL(source); | ||||
| 
 | ||||
|         return `${origin}${pathname}`; | ||||
| 
 | ||||
|         /* disable thumbnail as fallback, usually enough high res photos available | ||||
|         return [ | ||||
|             `${origin}${pathname}`, | ||||
|             source, | ||||
|         ]; | ||||
|         */ | ||||
|     }); | ||||
| 
 | ||||
|     const posterStyle = $('#player').attr('style'); | ||||
|     const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1); | ||||
| 
 | ||||
|     release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
 | ||||
| 
 | ||||
|     const trailer = data.clip.qualities.find(clip => clip.quality === 'vga' || clip.quality === 'hd'); | ||||
|     release.trailer = { | ||||
|         src: trailer.src, | ||||
|         type: trailer.type, | ||||
|         quality: trailer.quality === 'vga' ? 480 : 720, | ||||
|     }; | ||||
| 
 | ||||
|     const studioName = $('.watchpage-studioname').first().text().trim(); | ||||
|     const studio = studioName.replace(/[\s.']+/g, '').toLowerCase(); | ||||
|     const tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); | ||||
|     release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase(); | ||||
| 
 | ||||
|     return { | ||||
|         url, | ||||
|         shootId, | ||||
|         entryId, | ||||
|         title, | ||||
|         description, | ||||
|         date, | ||||
|         actors, | ||||
|         duration, | ||||
|         poster, | ||||
|         photos, | ||||
|         trailer: { | ||||
|             src: trailer.src, | ||||
|             type: trailer.type, | ||||
|             quality: trailer.quality === 'vga' ? 480 : 720, | ||||
|         }, | ||||
|         tags, | ||||
|         site, | ||||
|         studio, | ||||
|     }; | ||||
|     return release; | ||||
| } | ||||
| 
 | ||||
| async function scrapeProfile(html, _url, actorName) { | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue