Refactored media module for better duplicate handling, readability and DRY.
This commit is contained in:
		
							parent
							
								
									b9617c76a1
								
							
						
					
					
						commit
						c4d620c6ff
					
				
							
								
								
									
										260
									
								
								src/media.js
								
								
								
								
							
							
						
						
									
										260
									
								
								src/media.js
								
								
								
								
							|  | @ -46,58 +46,54 @@ async function createActorMediaDirectory(profile, actor) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function storePoster(release, releaseId) { | function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId, setAvatar = false) { | ||||||
|     if (!release.poster) { |     return files.map((file, index) => ({ | ||||||
|         console.warn(`No poster available for (${release.site.name}, ${releaseId}}) "${release.title}"`); |         path: file.filepath, | ||||||
|         return; |         thumbnail: file.thumbpath, | ||||||
|     } |         mime: file.mimetype, | ||||||
| 
 |         hash: file.hash, | ||||||
|     console.log(`Storing poster for (${release.site.name}, ${releaseId}) "${release.title}"`); |         source: file.source, | ||||||
| 
 |         index, | ||||||
|     const res = await bhttp.get(release.poster); |         domain, | ||||||
| 
 |         target_id: targetId, | ||||||
|     if (res.statusCode === 200) { |         role: setAvatar && index === 0 ? 'avatar' : role, | ||||||
|         const thumbnail = await getThumbnail(res.body); |     })); | ||||||
| 
 |  | ||||||
|         const { pathname } = new URL(release.poster); |  | ||||||
|         const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg'; |  | ||||||
|         const extension = mime.getExtension(mimetype); |  | ||||||
| 
 |  | ||||||
|         const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `poster.${extension}`); |  | ||||||
|         const thumbpath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `poster_thumb.${extension}`); |  | ||||||
|         const hash = getHash(res.body); |  | ||||||
| 
 |  | ||||||
|         await Promise.all([ |  | ||||||
|             fs.writeFile(path.join(config.media.path, filepath), res.body), |  | ||||||
|             fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), |  | ||||||
|         ]); |  | ||||||
| 
 |  | ||||||
|         await knex('media').insert({ |  | ||||||
|             path: filepath, |  | ||||||
|             thumbnail: thumbpath, |  | ||||||
|             mime: mimetype, |  | ||||||
|             hash, |  | ||||||
|             source: release.poster, |  | ||||||
|             domain: 'releases', |  | ||||||
|             target_id: releaseId, |  | ||||||
|             role: 'poster', |  | ||||||
|         }); |  | ||||||
| 
 |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     console.warn(`Failed to store poster for (${release.site.name}, ${releaseId}) "${release.title}": ${res.statusCode}`); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function storePhotos(release, releaseId) { | // before fetching
 | ||||||
|     if (!release.photos || release.photos.length === 0) { | async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) { | ||||||
|         console.warn(`No photos available for (${release.site.name}, ${releaseId}}) "${release.title}"`); |     const photoSourceEntries = await knex('media') | ||||||
|         return; |         .whereIn('source', photos) | ||||||
|  |         .whereIn('domain', [].concat(domains)) | ||||||
|  |         .whereIn('role', [].concat(roles)); // accept string argument
 | ||||||
|  | 
 | ||||||
|  |     const photoSources = new Set(photoSourceEntries.map(photo => photo.source)); | ||||||
|  |     const newPhotos = photos.filter(source => !photoSources.has(source)); | ||||||
|  | 
 | ||||||
|  |     if (photoSourceEntries.length > 0) { | ||||||
|  |         console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`); |     return newPhotos; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
|     const files = await Promise.map(release.photos, async (photoUrl, index) => { | // after fetching
 | ||||||
|  | async function filterHashDuplicates(files, domains = ['releases'], roles = ['photo'], identifier) { | ||||||
|  |     const photoHashEntries = await knex('media') | ||||||
|  |         .whereIn('hash', files.map(file => file.hash)) | ||||||
|  |         .whereIn('domain', [].concat(domains)) | ||||||
|  |         .whereIn('role', [].concat(roles)); // accept string argument
 | ||||||
|  | 
 | ||||||
|  |     const photoHashes = new Set(photoHashEntries.map(entry => entry.hash)); | ||||||
|  | 
 | ||||||
|  |     if (photoHashEntries.length > 0) { | ||||||
|  |         console.log(`Ignoring ${photoHashEntries.length} ${roles} items already present by hash for ${identifier}`); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return files.filter(file => file && !photoHashes.has(file.hash)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | async function fetchPhoto(photoUrl, index, identifier) { | ||||||
|     const { pathname } = new URL(photoUrl); |     const { pathname } = new URL(photoUrl); | ||||||
|     const mimetype = mime.getType(pathname); |     const mimetype = mime.getType(pathname); | ||||||
| 
 | 
 | ||||||
|  | @ -105,22 +101,13 @@ async function storePhotos(release, releaseId) { | ||||||
|         const res = await bhttp.get(photoUrl); |         const res = await bhttp.get(photoUrl); | ||||||
| 
 | 
 | ||||||
|         if (res.statusCode === 200) { |         if (res.statusCode === 200) { | ||||||
|                 const thumbnail = await getThumbnail(res.body); |  | ||||||
|             const extension = mime.getExtension(mimetype); |             const extension = mime.getExtension(mimetype); | ||||||
| 
 |  | ||||||
|                 const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}.${extension}`); |  | ||||||
|                 const thumbpath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}_thumb.${extension}`); |  | ||||||
|             const hash = getHash(res.body); |             const hash = getHash(res.body); | ||||||
| 
 | 
 | ||||||
|                 await Promise.all([ |  | ||||||
|                     fs.writeFile(path.join(config.media.path, filepath), res.body), |  | ||||||
|                     fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), |  | ||||||
|                 ]); |  | ||||||
| 
 |  | ||||||
|             return { |             return { | ||||||
|                     filepath, |                 photo: res.body, | ||||||
|                     thumbpath, |  | ||||||
|                 mimetype, |                 mimetype, | ||||||
|  |                 extension, | ||||||
|                 hash, |                 hash, | ||||||
|                 source: photoUrl, |                 source: photoUrl, | ||||||
|             }; |             }; | ||||||
|  | @ -128,27 +115,83 @@ async function storePhotos(release, releaseId) { | ||||||
| 
 | 
 | ||||||
|         throw new Error(`Response ${res.statusCode} not OK`); |         throw new Error(`Response ${res.statusCode} not OK`); | ||||||
|     } catch (error) { |     } catch (error) { | ||||||
|             console.warn(`Failed to store photo ${index + 1} for "${release.title}" (${photoUrl}, ${release.url}, ${release.site.name}, ${releaseId}): ${error}`); |         console.warn(`Failed to store photo ${index + 1} (${photoUrl}) for ${identifier}: ${error}`); | ||||||
| 
 | 
 | ||||||
|         return null; |         return null; | ||||||
|     } |     } | ||||||
|     }, { | } | ||||||
|  | 
 | ||||||
|  | async function savePhotos(files, release, releaseId, actorSlug, isPoster = false) { | ||||||
|  |     return Promise.map(files, async (file, index) => { | ||||||
|  |         const timestamp = new Date().getTime(); | ||||||
|  |         const thumbnail = await getThumbnail(file.photo); | ||||||
|  | 
 | ||||||
|  |         const filepath = actorSlug | ||||||
|  |             ? path.join('actors', actorSlug, `${timestamp + index}.${file.extension}`) | ||||||
|  |             : path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${isPoster ? 'poster' : index + 1}.${file.extension}`); | ||||||
|  | 
 | ||||||
|  |         const thumbpath = actorSlug | ||||||
|  |             ? path.join('actors', actorSlug, `${timestamp + index}_thumb.${file.extension}`) | ||||||
|  |             : path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${isPoster ? 'poster' : index + 1}_thumb.${file.extension}`); | ||||||
|  | 
 | ||||||
|  |         await Promise.all([ | ||||||
|  |             fs.writeFile(path.join(config.media.path, filepath), file.photo), | ||||||
|  |             fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), | ||||||
|  |         ]); | ||||||
|  | 
 | ||||||
|  |         return { | ||||||
|  |             ...file, | ||||||
|  |             thumbnail, | ||||||
|  |             filepath, | ||||||
|  |             thumbpath, | ||||||
|  |         }; | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | async function storePoster(release, releaseId) { | ||||||
|  |     if (!release.poster) { | ||||||
|  |         console.warn(`No poster available for (${release.site.name}, ${releaseId}}) "${release.title}"`); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     const [newPoster] = await filterSourceDuplicates([release.poster], 'releases', 'poster', `(${release.site.name}, ${releaseId}) "${release.title}"`); | ||||||
|  | 
 | ||||||
|  |     if (!newPoster) return; | ||||||
|  | 
 | ||||||
|  |     console.log(`Fetching poster for (${release.site.name}, ${releaseId}) "${release.title}"`); | ||||||
|  | 
 | ||||||
|  |     const metaFile = await fetchPhoto(release.poster, null, `(${release.site.name}, ${releaseId}) "${release.title}"`); | ||||||
|  |     const [uniquePoster] = await filterHashDuplicates([metaFile], 'releases', 'poster', `(${release.site.name}, ${releaseId}) "${release.title}"`); | ||||||
|  | 
 | ||||||
|  |     if (!uniquePoster) return; | ||||||
|  | 
 | ||||||
|  |     const savedPosters = await savePhotos([uniquePoster], release, releaseId, null, true); | ||||||
|  | 
 | ||||||
|  |     await knex('media').insert(curatePhotoEntries(savedPosters, 'releases', 'poster', releaseId)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | async function storePhotos(release, releaseId) { | ||||||
|  |     if (!release.photos || release.photos.length === 0) { | ||||||
|  |         console.warn(`No photos available for (${release.site.name}, ${releaseId}) "${release.title}"`); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const newPhotos = await filterSourceDuplicates(release.photos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`); | ||||||
|  | 
 | ||||||
|  |     if (newPhotos.length === 0) return; | ||||||
|  | 
 | ||||||
|  |     console.log(`Fetching ${newPhotos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`); | ||||||
|  | 
 | ||||||
|  |     const metaFiles = await Promise.map(newPhotos, async (photoUrl, index) => fetchPhoto(photoUrl, index, `(${release.site.name}, ${releaseId}) "${release.title}"`), { | ||||||
|         concurrency: 10, |         concurrency: 10, | ||||||
|     }); |     }); | ||||||
| 
 | 
 | ||||||
|     await knex('media') |     const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`); | ||||||
|         .insert(files.filter(file => file) |     const savedPhotos = await savePhotos(uniquePhotos, release, releaseId); | ||||||
|             .map((file, index) => ({ | 
 | ||||||
|                 path: file.filepath, |     await knex('media').insert(curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId)); | ||||||
|                 thumbnail: file.thumbpath, | 
 | ||||||
|                 mime: file.mimetype, |     console.log(`Stored ${newPhotos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`); | ||||||
|                 hash: file.hash, |  | ||||||
|                 source: file.source, |  | ||||||
|                 index, |  | ||||||
|                 domain: 'releases', |  | ||||||
|                 target_id: releaseId, |  | ||||||
|                 role: 'photo', |  | ||||||
|             }))); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function storeTrailer(release, releaseId) { | async function storeTrailer(release, releaseId) { | ||||||
|  | @ -185,69 +228,28 @@ async function storeAvatars(profile, actor) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     console.log(`Storing ${profile.avatars.length} avatars for '${profile.name}'`); |     const newPhotos = await filterSourceDuplicates(profile.avatars, 'actors', ['avatar', 'photo'], actor.name); | ||||||
| 
 | 
 | ||||||
|     const files = await Promise.map(profile.avatars, async (avatarUrl, index) => { |     if (newPhotos.length === 0) return; | ||||||
|         try { |  | ||||||
|             const { pathname } = new URL(avatarUrl); |  | ||||||
|             const mimetype = mime.getType(pathname); |  | ||||||
| 
 | 
 | ||||||
|             const res = await bhttp.get(avatarUrl); |     console.log(`Fetching ${newPhotos.length} avatars for '${actor.name}'`); | ||||||
| 
 | 
 | ||||||
|             if (res.statusCode === 200) { |     const metaFiles = await Promise.map(newPhotos, async (photoUrl, index) => fetchPhoto(photoUrl, index, actor.name), { | ||||||
|                 const thumbnail = await getThumbnail(res.body); |  | ||||||
|                 const extension = mime.getExtension(mimetype); |  | ||||||
| 
 |  | ||||||
|                 const timestamp = new Date().getTime(); |  | ||||||
| 
 |  | ||||||
|                 const filepath = path.join('actors', actor.slug, `${timestamp + index}.${extension}`); |  | ||||||
|                 const thumbpath = path.join('actors', actor.slug, `${timestamp + index}_thumb.${extension}`); |  | ||||||
|                 const hash = getHash(res.body); |  | ||||||
| 
 |  | ||||||
|                 await Promise.all([ |  | ||||||
|                     fs.writeFile(path.join(config.media.path, filepath), res.body), |  | ||||||
|                     fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), |  | ||||||
|                 ]); |  | ||||||
| 
 |  | ||||||
|                 return { |  | ||||||
|                     filepath, |  | ||||||
|                     thumbpath, |  | ||||||
|                     mimetype, |  | ||||||
|                     hash, |  | ||||||
|                     source: avatarUrl, |  | ||||||
|                 }; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             throw new Error(`Response ${res.statusCode} not OK`); |  | ||||||
|         } catch (error) { |  | ||||||
|             console.warn(`Failed to store avatar ${index + 1} for '${profile.name}': ${avatarUrl}`); |  | ||||||
| 
 |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|     }, { |  | ||||||
|         concurrency: 10, |         concurrency: 10, | ||||||
|     }); |     }); | ||||||
| 
 | 
 | ||||||
|     const avatars = files.filter(file => file); |     const uniquePhotos = await filterHashDuplicates(metaFiles, 'actors', ['avatar', 'photo'], actor.name); | ||||||
| 
 |     const [savedPhotos, avatarEntry] = await Promise.all([ | ||||||
|     const existingAvatars = await knex('media') |         savePhotos(uniquePhotos, null, null, actor.slug), | ||||||
|         .whereIn('hash', avatars.map(file => file.hash)); |         knex('media').where({ | ||||||
| 
 |  | ||||||
|     const newAvatars = avatars.filter(file => !existingAvatars.some(avatar => file.hash === avatar.hash)); |  | ||||||
|     const hasAvatar = existingAvatars.some(avatar => avatar.role === 'avatar'); |  | ||||||
| 
 |  | ||||||
|     await knex('media') |  | ||||||
|         .insert(newAvatars.map((file, index) => ({ |  | ||||||
|             path: file.filepath, |  | ||||||
|             thumbnail: file.thumbpath, |  | ||||||
|             mime: file.mimetype, |  | ||||||
|             hash: file.hash, |  | ||||||
|             source: file.source, |  | ||||||
|             index, |  | ||||||
|             domain: 'actors', |  | ||||||
|             target_id: actor.id, |             target_id: actor.id, | ||||||
|             role: index === 0 && !hasAvatar ? 'avatar' : 'photo', |             domain: 'actors', | ||||||
|         }))); |             role: 'avatar', | ||||||
|  |         }).first(), | ||||||
|  |     ]); | ||||||
|  | 
 | ||||||
|  |     // if no avatar entry is present, curatePhotoEntries will store the first photo as avatar
 | ||||||
|  |     await knex('media').insert(curatePhotoEntries(savedPhotos, 'actors', 'photo', actor.id, !avatarEntry)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| module.exports = { | module.exports = { | ||||||
|  |  | ||||||
|  | @ -225,13 +225,13 @@ async function storeReleaseAssets(release, releaseId) { | ||||||
| 
 | 
 | ||||||
|     try { |     try { | ||||||
|         await Promise.all([ |         await Promise.all([ | ||||||
|             associateTags(release, releaseId), |             // associateTags(release, releaseId),
 | ||||||
|             storePhotos(release, releaseId), |             storePhotos(release, releaseId), | ||||||
|             storePoster(release, releaseId), |             storePoster(release, releaseId), | ||||||
|             storeTrailer(release, releaseId), |             storeTrailer(release, releaseId), | ||||||
|         ]); |         ]); | ||||||
|     } catch (error) { |     } catch (error) { | ||||||
|         console.log(release, error); |         console.log(release.url, error); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -31,8 +31,6 @@ async function findSite(url, release) { | ||||||
| async function scrapeRelease(url, release, deep = false) { | async function scrapeRelease(url, release, deep = false) { | ||||||
|     const site = await findSite(url, release); |     const site = await findSite(url, release); | ||||||
| 
 | 
 | ||||||
|     console.log(url, site); |  | ||||||
| 
 |  | ||||||
|     if (!site) { |     if (!site) { | ||||||
|         throw new Error('Could not find site in database'); |         throw new Error('Could not find site in database'); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -66,14 +66,10 @@ async function associateTags(release, releaseId) { | ||||||
|         ? await matchTags(release.tags) // scraper returned raw tags
 |         ? await matchTags(release.tags) // scraper returned raw tags
 | ||||||
|         : release.tags; // tags already matched by scraper
 |         : release.tags; // tags already matched by scraper
 | ||||||
| 
 | 
 | ||||||
|     try { |  | ||||||
|     await knex('tags_associated').insert(tags.map(tagId => ({ |     await knex('tags_associated').insert(tags.map(tagId => ({ | ||||||
|         tag_id: tagId, |         tag_id: tagId, | ||||||
|         release_id: releaseId, |         release_id: releaseId, | ||||||
|     }))); |     }))); | ||||||
|     } catch (error) { |  | ||||||
|         console.log(release, error); |  | ||||||
|     } |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function fetchTags(queryObject, groupsQueryObject, limit = 100) { | async function fetchTags(queryObject, groupsQueryObject, limit = 100) { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue