Refactored media module. Returning 320p and 720p videos from MindGeek as teasers instead of trailers.
This commit is contained in:
		
							parent
							
								
									b9e617edfc
								
							
						
					
					
						commit
						97f5e49187
					
				|  | @ -116,7 +116,7 @@ export default { | |||
| 
 | ||||
| .tiles { | ||||
|     display: grid; | ||||
|     grid-template-columns: repeat(auto-fit, minmax(10rem, .5fr)); | ||||
|     grid-template-columns: repeat(auto-fit, 10rem); | ||||
|     grid-gap: 0 .5rem; | ||||
|     padding: 1rem; | ||||
|     flex-grow: 1; | ||||
|  | @ -127,10 +127,11 @@ export default { | |||
|     justify-content: center; | ||||
|     align-items: center; | ||||
|     padding: 0 1rem; | ||||
|     margin: 1rem 0 0 0; | ||||
|     margin: 1rem 0; | ||||
| } | ||||
| 
 | ||||
| .genders { | ||||
|     flex-shrink: 0; | ||||
|     padding: 0 .5rem 0 0; | ||||
|     border-right: solid 1px $shadow-hint; | ||||
|     margin: 0 1rem 0 0; | ||||
|  | @ -186,7 +187,7 @@ export default { | |||
| } | ||||
| 
 | ||||
| @media(max-width: $breakpoint) { | ||||
|     .actors { | ||||
|     .tiles { | ||||
|         grid-template-columns: repeat(auto-fit, minmax(8rem, 1fr)); | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -147,7 +147,19 @@ export default { | |||
| 
 | ||||
| .tiles { | ||||
|     display: grid; | ||||
|     grid-template-columns: repeat(auto-fit, minmax(20rem, 1fr)); | ||||
|     grid-template-columns: repeat(auto-fit, minmax(20rem, .25fr)); | ||||
|     grid-gap: .5rem; | ||||
| } | ||||
| 
 | ||||
| @media(max-width: $breakpoint3) { | ||||
|     .tiles { | ||||
|         grid-template-columns: repeat(auto-fit, minmax(20rem, .5fr)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| @media(max-width: $breakpoint) { | ||||
|     .tiles { | ||||
|         grid-template-columns: repeat(auto-fit, minmax(20rem, 1fr)); | ||||
|     } | ||||
| } | ||||
| </style> | ||||
|  |  | |||
|  | @ -48,8 +48,10 @@ | |||
|                 >{{ `(${formatDate(release.dateAdded, 'MMM D, YYYY')})` }}</a> | ||||
|             </span> | ||||
| 
 | ||||
|             <router-link | ||||
|                 :to="{ name: release.type || 'scene', params: { releaseId: release.id, releaseSlug: release.slug }, ...(referer && { hash: `#${referer}` }) }" | ||||
|             <a | ||||
|                 :href="`/${release.type || 'scene'}/${release.id}/${release.slug}`" | ||||
|                 target="_blank" | ||||
|                 rel="noopener noreferrer" | ||||
|                 class="link" | ||||
|             > | ||||
|                 <img | ||||
|  | @ -71,12 +73,14 @@ | |||
|                     :title="release.title" | ||||
|                     class="thumbnail" | ||||
|                 >No thumbnail available</div> | ||||
|             </router-link> | ||||
|             </a> | ||||
|         </span> | ||||
| 
 | ||||
|         <div class="info"> | ||||
|             <router-link | ||||
|                 :to="{ name: release.type || 'scene', params: { releaseId: release.id, releaseSlug: release.slug }, ...(referer && { hash: `#${referer}` }) }" | ||||
|             <a | ||||
|                 :href="`/${release.type || 'scene'}/${release.id}/${release.slug}`" | ||||
|                 target="_blank" | ||||
|                 rel="noopener noreferrer" | ||||
|                 class="row link" | ||||
|             > | ||||
|                 <h3 | ||||
|  | @ -89,7 +93,7 @@ | |||
|                         icon="film" | ||||
|                     />{{ release.title }} | ||||
|                 </h3> | ||||
|             </router-link> | ||||
|             </a> | ||||
| 
 | ||||
|             <span class="row"> | ||||
|                 <ul class="actors nolist"> | ||||
|  |  | |||
|  | @ -98,7 +98,7 @@ module.exports = { | |||
|         path: './media', | ||||
|         thumbnailSize: 320, // width for 16:9 will be exactly 576px
 | ||||
|         thumbnailQuality: 100, | ||||
|         trailerQuality: [480, 540], | ||||
|         videoQuality: [480, 360, 320, 540, 720, 1080, 2160], | ||||
|         limit: 25, // max number of photos per release
 | ||||
|     }, | ||||
|     titleSlugLength: 50, | ||||
|  |  | |||
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 998 KiB | 
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 95 KiB | 
|  | @ -1,39 +1,39 @@ | |||
| const upsert = require('../src/utils/upsert'); | ||||
| 
 | ||||
| const tagPosters = Object.entries({ | ||||
|     'anal-creampie': [0, 'Gina Valentina and Jane Wilde in "A Very Special Anniversary" for Tushy'], | ||||
|     'ass-to-mouth': ['poster', 'Alysa Gap and Logan in "Anal Buffet 4" for Evil Angel'], | ||||
|     'da-tp': [0, 'Natasha Teen in LegalPorno SZ2164'], | ||||
|     'double-anal': [5, 'Riley Reid in "The Gangbang of Riley Reid" for Jules Jordan'], | ||||
|     'double-penetration': ['poster', 'Mia Malkova in "DP!" for HardX'], | ||||
|     'double-vaginal': ['poster', 'Riley Reid in "Pizza That Ass" for Reid My Lips'], | ||||
|     'dv-tp': ['poster', 'Juelz Ventura in "Gangbanged 5" for Elegant Angel'], | ||||
|     'oral-creampie': [1, 'Keisha Grey in Brazzers House'], | ||||
|     'triple-anal': ['poster', 'Kristy Black in SZ1986 for LegalPorno'], | ||||
|     airtight: [1, 'Jynx Maze in "Pump My Ass Full of Cum 3" for Jules Jordan'], | ||||
|     anal: ['poster', 'Jynx Maze in "Anal Buffet 6" for Evil Angel'], | ||||
|     asian: ['poster', 'Vina Sky in "Young and Glamorous 10" for Jules Jordan'], | ||||
|     blowjob: [0, 'Adriana Chechik in "The Dinner Party" for Real Wife Stories (Brazzers)'], | ||||
|     blowbang: ['poster'], | ||||
|     bukkake: ['poster'], | ||||
|     caucasian: ['poster'], | ||||
|     creampie: ['poster'], | ||||
|     ebony: [1, 'Sarah Banks for Brazzers'], | ||||
|     facial: ['poster'], | ||||
|     facefucking: ['1', 'Carrie for Young Throats'], | ||||
|     gangbang: ['poster', 'Kristen Scott in "Interracial Gangbang!" for Jules Jordan'], | ||||
|     gaping: [0, 'McKenzee Miles in "Anal Buffet 4" for Evil Angel'], | ||||
|     interracial: ['poster'], | ||||
|     latina: ['poster'], | ||||
|     mff: ['poster'], | ||||
|     mfm: ['poster'], | ||||
|     orgy: ['poster'], | ||||
|     schoolgirl: [1, 'Eliza Ibarra for Brazzers'], | ||||
|     swallowing: ['poster'], | ||||
|     tattoo: ['poster', 'Kali Roses in "Goes All In For Anal" for Hussie Pass'], | ||||
|     trainbang: ['poster', 'Kali Roses in "Passing Me Around" for Blacked'], | ||||
| }) | ||||
|     .map(([slug, [filename, comment]], index) => ({ | ||||
| const tagPosters = [ | ||||
|     ['anal-creampie', 0, 'Gina Valentina and Jane Wilde in "A Very Special Anniversary" for Tushy'], | ||||
|     ['ass-to-mouth', 'poster', 'Alysa Gap and Logan in "Anal Buffet 4" for Evil Angel'], | ||||
|     ['da-tp', 0, 'Natasha Teen in LegalPorno SZ2164'], | ||||
|     ['double-anal', 5, 'Riley Reid in "The Gangbang of Riley Reid" for Jules Jordan'], | ||||
|     ['double-penetration', 'poster', 'Mia Malkova in "DP!" for HardX'], | ||||
|     ['double-vaginal', 'poster', 'Riley Reid in "Pizza That Ass" for Reid My Lips'], | ||||
|     ['dv-tp', 'poster', 'Juelz Ventura in "Gangbanged 5" for Elegant Angel'], | ||||
|     ['oral-creampie', 1, 'Keisha Grey in Brazzers House'], | ||||
|     ['triple-anal', 'poster', 'Kristy Black in SZ1986 for LegalPorno'], | ||||
|     ['airtight', 1, 'Jynx Maze in "Pump My Ass Full of Cum 3" for Jules Jordan'], | ||||
|     ['anal', 'poster', 'Jynx Maze in "Anal Buffet 6" for Evil Angel'], | ||||
|     ['asian', 'poster', 'Vina Sky in "Young and Glamorous 10" for Jules Jordan'], | ||||
|     ['blowjob', 0, 'Adriana Chechik in "The Dinner Party" for Real Wife Stories (Brazzers)'], | ||||
|     ['blowbang', 'poster'], | ||||
|     ['bukkake', 'poster'], | ||||
|     ['caucasian', 'poster'], | ||||
|     ['creampie', 'poster'], | ||||
|     ['ebony', 1, 'Sarah Banks for Brazzers'], | ||||
|     ['facial', 'poster'], | ||||
|     ['facefucking', '1', 'Carrie for Young Throats'], | ||||
|     ['gangbang', 'poster', 'Kristen Scott in "Interracial Gangbang!" for Jules Jordan'], | ||||
|     ['gaping', 1, 'Vina Sky in "Vina Sky Does Anal" for HardX'], | ||||
|     ['interracial', 'poster'], | ||||
|     ['latina', 'poster'], | ||||
|     ['mff', 'poster'], | ||||
|     ['mfm', 'poster'], | ||||
|     ['orgy', 'poster'], | ||||
|     ['schoolgirl', 1, 'Eliza Ibarra for Brazzers'], | ||||
|     ['swallowing', 'poster'], | ||||
|     ['tattoo', 'poster', 'Kali Roses in "Goes All In For Anal" for Hussie Pass'], | ||||
|     ['trainbang', 'poster', 'Kali Roses in "Passing Me Around" for Blacked'], | ||||
| ] | ||||
|     .map(([slug, filename, comment], index) => ({ | ||||
|         tagSlug: slug, | ||||
|         path: `tags/${slug}/${filename}.jpeg`, | ||||
|         thumbnail: `tags/${slug}/${filename}_thumb.jpeg`, | ||||
|  | @ -63,6 +63,7 @@ const tagPhotos = [ | |||
|     ['gangbang', 1, 'Ginger Lynn in "Gangbang Mystique", a photoset shot by Suze Randall for Puritan No. 10, 1984. This photo pushed the boundaries of pornography at the time, as depicting a woman \'fully occupied\' was unheard of.'], | ||||
|     ['gangbang', 2, 'Riley Reid\'s double anal in "The Gangbang of Riley Reid" for Jules Jordan'], | ||||
|     ['gaping', 'poster', 'Paulina in "Anal Buffet 4" for Evil Angel'], | ||||
|     ['gaping', 0, 'McKenzee Miles in "Anal Buffet 4" for Evil Angel'], | ||||
|     ['trainbang', 0, 'Nicole Black in GIO971 for LegalPorno'], | ||||
|     ['triple-anal', 1, 'Natasha Teen in SZ2098 for LegalPorno'], | ||||
|     ['triple-anal', 2, 'Kira Thorn in GIO1018 for LegalPorno'], | ||||
|  |  | |||
|  | @ -12,7 +12,8 @@ const scrapers = require('./scrapers/scrapers'); | |||
| const whereOr = require('./utils/where-or'); | ||||
| const resolvePlace = require('./utils/resolve-place'); | ||||
| const slugify = require('./utils/slugify'); | ||||
| const { createMediaDirectory, storePhotos } = require('./media_legacy'); | ||||
| // const { createMediaDirectory, storePhotos } = require('./media_legacy');
 | ||||
| const { storeMedia, associateMedia } = require('./media'); | ||||
| 
 | ||||
| async function curateActor(actor) { | ||||
|     const [aliases, avatar, photos, social] = await Promise.all([ | ||||
|  | @ -250,6 +251,17 @@ async function storeSocialLinks(urls, actorId) { | |||
|     await knex('actors_social').insert(curatedSocialEntries); | ||||
| } | ||||
| 
 | ||||
| async function storeAvatars(avatars, actorId) { | ||||
|     if (!avatars || avatars.length === 0) { | ||||
|         return []; | ||||
|     } | ||||
| 
 | ||||
|     const avatarsBySource = await storeMedia(avatars, 'actor', 'avatar'); | ||||
|     await associateMedia({ [actorId]: avatars }, avatarsBySource, 'actor', 'photo', 'avatar'); | ||||
| 
 | ||||
|     return avatarsBySource; | ||||
| } | ||||
| 
 | ||||
| async function storeActor(actor, scraped = false, scrapeSuccess = false) { | ||||
|     const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess); | ||||
| 
 | ||||
|  | @ -260,15 +272,7 @@ async function storeActor(actor, scraped = false, scrapeSuccess = false) { | |||
|     await storeSocialLinks(actor.social, actorEntry.id); | ||||
| 
 | ||||
|     if (actor.avatars) { | ||||
|         await createMediaDirectory('actors', `${actorEntry.slug}/`); | ||||
|         await storePhotos(actor.avatars, { | ||||
|             domain: 'actor', | ||||
|             role: 'photo', | ||||
|             primaryRole: 'avatar', | ||||
|             targetId: actorEntry.id, | ||||
|             subpath: `${actorEntry.slug}/`, | ||||
|             naming: 'timestamp', | ||||
|         }, actorEntry.name); | ||||
|         await storeAvatars(actor.avatars, actorEntry.id); | ||||
|     } | ||||
| 
 | ||||
|     logger.info(`Added new entry for actor '${actor.name}'`); | ||||
|  | @ -421,19 +425,9 @@ async function scrapeActors(actorNames) { | |||
| 
 | ||||
|             if (argv.save) { | ||||
|                 if (actorEntry && profile) { | ||||
|                     await createMediaDirectory('actors', `${actorEntry.slug}/`); | ||||
| 
 | ||||
|                     await Promise.all([ | ||||
|                         updateActor(profile, true, true), | ||||
|                         // storeAvatars(profile, actorEntry),
 | ||||
|                         storePhotos(profile.avatars, { | ||||
|                             domain: 'actor', | ||||
|                             role: 'photo', | ||||
|                             primaryRole: 'avatar', | ||||
|                             targetId: actorEntry.id, | ||||
|                             subpath: `${actorEntry.slug}/`, | ||||
|                             naming: 'timestamp', | ||||
|                         }, actorEntry.name), | ||||
|                         storeAvatars(profile.avatars, actorEntry.id), | ||||
|                     ]); | ||||
| 
 | ||||
|                     return profile; | ||||
|  |  | |||
							
								
								
									
										274
									
								
								src/media.js
								
								
								
								
							
							
						
						
									
										274
									
								
								src/media.js
								
								
								
								
							|  | @ -4,11 +4,14 @@ const config = require('config'); | |||
| const Promise = require('bluebird'); | ||||
| const bhttp = require('bhttp'); | ||||
| const mime = require('mime'); | ||||
| const fs = require('fs-extra'); | ||||
| const sharp = require('sharp'); | ||||
| const path = require('path'); | ||||
| const blake2 = require('blake2'); | ||||
| 
 | ||||
| const logger = require('./logger'); | ||||
| const logger = require('./logger')(__filename); | ||||
| const knex = require('./knex'); | ||||
| const { ex } = require('./utils/q'); | ||||
| 
 | ||||
| function getHash(buffer) { | ||||
|     const hash = blake2.createHash('blake2b', { digestLength: 24 }); | ||||
|  | @ -17,6 +20,26 @@ function getHash(buffer) { | |||
|     return hash.digest('hex'); | ||||
| } | ||||
| 
 | ||||
| async function createThumbnail(buffer) { | ||||
|     try { | ||||
|         const thumbnail = sharp(buffer) | ||||
|             .resize({ | ||||
|                 height: config.media.thumbnailSize, | ||||
|                 withoutEnlargement: true, | ||||
|             }) | ||||
|             .jpeg({ | ||||
|                 quality: config.media.thumbnailQuality, | ||||
|             }) | ||||
|             .toBuffer(); | ||||
| 
 | ||||
|         return thumbnail; | ||||
|     } catch (error) { | ||||
|         logger.error(`Failed to create thumbnail: ${error.message}`); | ||||
|     } | ||||
| 
 | ||||
|     return null; | ||||
| } | ||||
| 
 | ||||
| function pluckItems(items, specifiedLimit) { | ||||
|     const limit = specifiedLimit || config.media.limit; | ||||
| 
 | ||||
|  | @ -30,6 +53,13 @@ function pluckItems(items, specifiedLimit) { | |||
|     return Array.from(new Set(plucked)).map(itemIndex => items[itemIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
 | ||||
| } | ||||
| 
 | ||||
| function pickQuality(items) { | ||||
|     const itemsByQuality = items.reduce((acc, item) => ({ ...acc, [item.quality]: item }), {}); | ||||
|     const item = config.media.videoQuality.reduce((acc, quality) => acc || itemsByQuality[quality], null); | ||||
| 
 | ||||
|     return item || items[0]; | ||||
| } | ||||
| 
 | ||||
| async function getEntropy(buffer) { | ||||
|     try { | ||||
|         const { entropy } = await sharp(buffer).stats(); | ||||
|  | @ -42,33 +72,58 @@ async function getEntropy(buffer) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| async function fetchItem(source, index, existingItemsBySource, attempt = 1) { | ||||
| async function extractItem(source) { | ||||
|     const res = await bhttp.get(source.src); | ||||
| 
 | ||||
|     if (res.statusCode === 200) { | ||||
|         const { q } = ex(res.body.toString()); | ||||
| 
 | ||||
|         return source.extract(q); | ||||
|     } | ||||
| 
 | ||||
|     return null; | ||||
| } | ||||
| 
 | ||||
| async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null) { | ||||
|     try { | ||||
|         if (Array.isArray(source)) { | ||||
|             if (source.every(sourceX => !!sourceX.quality)) { | ||||
|                 // various video qualities provided
 | ||||
|                 const selectedSource = pickQuality(source); | ||||
|                 return fetchItem(selectedSource, index, existingItemsBySource, domain, role, attempt, originalSource); | ||||
|             } | ||||
| 
 | ||||
|             // fallbacks provided
 | ||||
|             return source.reduce((outcome, sourceX) => outcome.catch(async () => { | ||||
|                 const item = await fetchItem(sourceX, index, existingItemsBySource); | ||||
| 
 | ||||
|                 if (item) { | ||||
|                     return item; | ||||
|                 } | ||||
| 
 | ||||
|                 throw new Error(`Item not available: ${source}`); | ||||
|             }), Promise.reject(new Error())); | ||||
|             return source.reduce( | ||||
|                 (outcome, sourceX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource)), | ||||
|                 Promise.reject(new Error()), | ||||
|             ); | ||||
|         } | ||||
| 
 | ||||
|         if (source.src && source.extract) { | ||||
|             // source links to page containing a (presumably) tokenized photo
 | ||||
|             const itemSource = await extractItem(source); | ||||
| 
 | ||||
|             return fetchItem(itemSource, index, existingItemsBySource, domain, role, attempt, source); | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
|         if (existingItemsBySource[source]) { | ||||
|             return existingItemsBySource[source]; | ||||
|             return null; | ||||
|         } | ||||
| 
 | ||||
|         const res = await bhttp.get(source); | ||||
|         logger.verbose(`Fetching media item from ${source.src || source}`); | ||||
| 
 | ||||
|         const res = await bhttp.get(source.src || source); | ||||
| 
 | ||||
|         if (res.statusCode === 200) { | ||||
|             const { pathname } = new URL(source); | ||||
|             const { pathname } = new URL(source.src || source); | ||||
|             const mimetype = mime.getType(pathname); | ||||
|             const extension = mime.getExtension(mimetype); | ||||
|             const hash = getHash(res.body); | ||||
|             const entropy = await getEntropy(res.body); | ||||
|             const entropy = /image/.test(mimetype) ? await getEntropy(res.body) : null; | ||||
| 
 | ||||
|             logger.verbose(`Fetched media item from ${source.src || source}`); | ||||
| 
 | ||||
|             return { | ||||
|                 file: res.body, | ||||
|  | @ -76,40 +131,193 @@ async function fetchItem(source, index, existingItemsBySource, attempt = 1) { | |||
|                 extension, | ||||
|                 hash, | ||||
|                 entropy, | ||||
|                 source, | ||||
|                 quality: source.quality || null, | ||||
|                 source: originalSource?.src || originalSource || source.src || source, | ||||
|             }; | ||||
|         } | ||||
| 
 | ||||
|         throw new Error(`Response ${res.statusCode} not OK`); | ||||
|     } catch (error) { | ||||
|         if (attempt <= 3) { | ||||
|             return fetchItem(source, index, existingItemsBySource, attempt + 1); | ||||
|         logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`); | ||||
| 
 | ||||
|         if (attempt < 3) { | ||||
|             await Promise.delay(5000); | ||||
|             return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1); | ||||
|         } | ||||
| 
 | ||||
|         throw new Error(`Failed to fetch media from ${source}: ${error}`); | ||||
|         return null; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| async function fetchItems(itemSources, existingItemsBySource) { | ||||
|     return Promise.map(itemSources, async (source, index) => fetchItem(source, index, existingItemsBySource)); | ||||
| async function fetchItems(itemSources, existingItemsBySource, domain, role) { | ||||
|     return Promise.map(itemSources, async (source, index) => fetchItem(source, index, existingItemsBySource, domain, role), { | ||||
|         concurrency: 10, | ||||
|     }).filter(Boolean); | ||||
| } | ||||
| 
 | ||||
| async function storeReleaseMedia(releases, { | ||||
|     type = 'poster', | ||||
| } = {}) { | ||||
|     const pluckedSources = releases.map(release => pluckItems(release[type])); | ||||
|     const existingSourceItems = await knex('media').whereIn('source', pluckedSources.flat()); | ||||
|     const existingItemsBySource = existingSourceItems.reduce((acc, item) => ({ ...acc, [item.source]: item }), {}); | ||||
| async function saveItems(items, domain, role) { | ||||
|     return Promise.map(items, async (item) => { | ||||
|         try { | ||||
|             const dir = item.hash.slice(0, 2); | ||||
|             const subdir = item.hash.slice(2, 4); | ||||
|             const filename = item.quality | ||||
|                 ? `${item.hash.slice(4)}_${item.quality}.${item.extension}` | ||||
|                 : `${item.hash.slice(4)}.${item.extension}`; | ||||
| 
 | ||||
|     const fetchedItems = await fetchItems(pluckedSources, existingItemsBySource); | ||||
|     const existingHashItems = await knex('media').whereIn('hash', fetchedItems.map(item => item.hash)); | ||||
|     const existingItemsByHash = existingHashItems.reduce((acc, item) => ({ ...acc, [item.hash]: item }), {}); | ||||
|             const filedir = path.join(`${domain}s`, `${role}s`, dir, subdir); | ||||
|             const filepath = path.join(filedir, filename); | ||||
| 
 | ||||
|     const newItems = fetchedItems.filter(item => !existingItemsByHash[item.hash]); | ||||
|             await fs.mkdir(path.join(config.media.path, filedir), { recursive: true }); | ||||
|             await fs.writeFile(path.join(config.media.path, filepath), item.file); | ||||
| 
 | ||||
|     console.log(fetchedItems, existingHashItems, existingItemsByHash, newItems); | ||||
|             if (/image/.test(item.mimetype)) { | ||||
|                const thumbnail = await createThumbnail(item.file); | ||||
| 
 | ||||
|                 const thumbdir = path.join(`${domain}s`, `${role}s`, 'thumbs', dir, subdir); | ||||
|                 const thumbpath = path.join(thumbdir, filename); | ||||
| 
 | ||||
|                 await fs.mkdir(path.join(config.media.path, thumbdir), { recursive: true }); | ||||
|                 await fs.writeFile(path.join(config.media.path, thumbpath), thumbnail); | ||||
| 
 | ||||
|                 logger.verbose(`Saved ${domain} ${role} with thumbnail to ${filepath}`); | ||||
| 
 | ||||
|                 return { | ||||
|                     ...item, | ||||
|                     thumbnail, | ||||
|                     filepath, | ||||
|                     thumbpath, | ||||
|                 }; | ||||
|             } | ||||
| 
 | ||||
|             logger.verbose(`Saved ${domain} ${role} to ${filepath}`); | ||||
| 
 | ||||
|             return { | ||||
|                 ...item, | ||||
|                 filepath, | ||||
|             }; | ||||
|         } catch (error) { | ||||
|             logger.error(`Failed to store ${domain} ${role} from ${item.source}: ${error.message}`); | ||||
|             return null; | ||||
|         } | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| function curateItemEntries(items) { | ||||
|     return items.filter(Boolean).map((item, index) => ({ | ||||
|         path: item.filepath, | ||||
|         thumbnail: item.thumbpath, | ||||
|         mime: item.mimetype, | ||||
|         hash: item.hash, | ||||
|         source: item.source, | ||||
|         entropy: item.entropy, | ||||
|         index, | ||||
|     })); | ||||
| } | ||||
| 
 | ||||
| function groupItems(items) { | ||||
|     return items.reduce((acc, item) => ({ | ||||
|         source: { ...acc.source, [item.source]: item }, | ||||
|         hash: { ...acc.hash, [item.hash]: item }, | ||||
|     }), { | ||||
|         source: {}, | ||||
|         hash: {}, | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| async function storeMedia(sources, domain, role) { | ||||
|     const presentSources = sources.filter(Boolean); | ||||
| 
 | ||||
|     if (presentSources.length === 0) { | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     // find source duplicates that don't need to be re-downloaded or re-saved
 | ||||
|     const existingSourceItems = await knex('media').whereIn('source', presentSources.flat().map(source => source.src || source)); | ||||
|     const { source: existingSourceItemsBySource, hash: existingSourceItemsByHash } = groupItems(existingSourceItems); | ||||
| 
 | ||||
|     // download media items from new sources
 | ||||
|     const fetchedItems = await fetchItems(presentSources, existingSourceItemsBySource, domain, role); | ||||
|     const { hash: fetchedItemsByHash } = groupItems(fetchedItems); | ||||
| 
 | ||||
|     // find hash duplicates that don't need to be re-saved
 | ||||
|     const uniqueFetchedItems = Object.values(fetchedItemsByHash); | ||||
|     const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash)); | ||||
|     const { hash: existingHashItemsByHash } = groupItems(existingHashItems); | ||||
| 
 | ||||
|     // save new items to disk
 | ||||
|     const newItems = uniqueFetchedItems.filter(item => !existingHashItemsByHash[item.hash]); | ||||
|     const savedItems = await saveItems(newItems, domain, role); | ||||
| 
 | ||||
|     // store new items in database
 | ||||
|     const curatedItemEntries = curateItemEntries(savedItems); | ||||
|     const storedItems = await knex('media').insert(curatedItemEntries).returning('*'); | ||||
|     const { hash: storedItemsByHash } = groupItems(Array.isArray(storedItems) ? storedItems : []); | ||||
| 
 | ||||
|     // accumulate existing and new items by source to be mapped onto releases
 | ||||
|     const itemsByHash = { ...existingSourceItemsByHash, ...existingHashItemsByHash, ...storedItemsByHash }; | ||||
|     const itemsBySource = { | ||||
|         ...existingSourceItemsBySource, | ||||
|         ...fetchedItems.reduce((acc, item) => ({ ...acc, [item.source]: itemsByHash[item.hash] }), {}), | ||||
|     }; | ||||
| 
 | ||||
|     logger.info(`Stored ${fetchedItems.length} new ${domain} ${role}s`); | ||||
| 
 | ||||
|     return itemsBySource; | ||||
| } | ||||
| 
 | ||||
| function extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId) { | ||||
|     if (!primaryRole) { | ||||
|         return { [role]: associations, [primaryRole]: null }; | ||||
|     } | ||||
| 
 | ||||
|     if (primaryItemsByTargetId[targetId]) { | ||||
|         const remainingAssociations = associations.filter(association => association.media_id !== primaryItemsByTargetId[targetId].media_id); | ||||
| 
 | ||||
|         return { [role]: remainingAssociations, [primaryRole]: null }; | ||||
|     } | ||||
| 
 | ||||
|     return { | ||||
|         [role]: associations.slice(1), | ||||
|         [primaryRole]: associations.slice(0, 1)[0], | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| function associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId) { | ||||
|     if (!sources) return { [role]: null, [primaryRole]: null }; | ||||
| 
 | ||||
|     const associations = sources | ||||
|         .filter(Boolean) | ||||
|         .map((source) => { | ||||
|             const mediaItem = Array.isArray(source) | ||||
|                 ? source.reduce((acc, sourceX) => acc || mediaBySource[sourceX.src || sourceX], null) | ||||
|                 : mediaBySource[source.src || source]; | ||||
| 
 | ||||
|             return mediaItem && { [`${domain}_id`]: targetId, media_id: mediaItem.id }; | ||||
|         }) | ||||
|         .filter(Boolean); | ||||
| 
 | ||||
|     logger.info(`Associating ${associations.length} ${role}s to ${domain} ${targetId}`); | ||||
| 
 | ||||
|     return extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId); | ||||
| } | ||||
| 
 | ||||
| async function associateMedia(sourcesByTargetId, mediaBySource, domain, role, primaryRole) { | ||||
|     const primaryItems = primaryRole ? await knex(`${domain}s_${primaryRole}s`).whereIn(`${domain}_id`, Object.keys(sourcesByTargetId)) : []; | ||||
|     const primaryItemsByTargetId = primaryItems.reduce((acc, item) => ({ ...acc, [item[`${domain}_id`]]: item }), {}); | ||||
| 
 | ||||
|     const associationsPerTarget = await Promise.map(Object.entries(sourcesByTargetId), ([targetId, sources]) => associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId)); | ||||
| 
 | ||||
|     const associations = associationsPerTarget.map(association => association[role]).flat().filter(Boolean); | ||||
|     const primaryAssociations = associationsPerTarget.map(association => association[primaryRole]).filter(Boolean); | ||||
| 
 | ||||
|     return Promise.all([ | ||||
|         (associations.length > 0 && knex.raw(`${knex(`${domain}s_${role}s`).insert(associations).toString()} ON CONFLICT DO NOTHING`)), | ||||
|         (primaryAssociations.length > 0 && knex.raw(`${knex(`${domain}s_${primaryRole}s`).insert(primaryAssociations).toString()} ON CONFLICT DO NOTHING`)), | ||||
|     ]); | ||||
| } | ||||
| 
 | ||||
| module.exports = { | ||||
|     storeReleaseMedia, | ||||
|     pluckItems, | ||||
|     storeMedia, | ||||
|     associateMedia, | ||||
| }; | ||||
|  |  | |||
|  | @ -10,20 +10,11 @@ const argv = require('./argv'); | |||
| const whereOr = require('./utils/where-or'); | ||||
| const { associateTags } = require('./tags'); | ||||
| const { associateActors, scrapeBasicActors } = require('./actors'); | ||||
| /* | ||||
| const { | ||||
|     createMediaDirectory, | ||||
|     storePhotos, | ||||
|     // storeReleasePhotos,
 | ||||
|     storeTrailer, | ||||
|     storeReleaseMedia, | ||||
|     pluckItems, | ||||
|     storeMedia, | ||||
|     associateMedia, | ||||
| } = require('./media'); | ||||
| */ | ||||
| const { | ||||
|     createMediaDirectory, | ||||
|     storePhotos, | ||||
|     storeTrailer, | ||||
| } = require('./media_legacy'); | ||||
| const { fetchSites, findSiteByUrl } = require('./sites'); | ||||
| const slugify = require('./utils/slugify'); | ||||
| 
 | ||||
|  | @ -337,55 +328,39 @@ function accumulateMovies(releases) { | |||
| } | ||||
| 
 | ||||
| async function storeReleaseAssets(releases) { | ||||
|     // await storeReleasePhotos(releases);
 | ||||
|     const releasePostersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.poster] }), {}); | ||||
|     const releaseCoversById = releases.reduce((acc, release) => ({ ...acc, [release.id]: release.covers }), {}); | ||||
|     const releaseTrailersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.trailer] }), {}); | ||||
|     const releaseTeasersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.teaser] }), {}); | ||||
|     const releasePhotosById = releases.reduce((acc, release) => ({ | ||||
|         ...acc, | ||||
|         [release.id]: pluckItems(release.photos), | ||||
|     }), {}); | ||||
| 
 | ||||
|     // return storeReleaseMedia(releases);
 | ||||
|     const [posters, covers] = await Promise.all([ | ||||
|         storeMedia(Object.values(releasePostersById).flat(), 'release', 'poster'), | ||||
|         storeMedia(Object.values(releaseCoversById).flat(), 'release', 'cover'), | ||||
|     ]); | ||||
| 
 | ||||
|     await Promise.map(releases, async (release) => { | ||||
|         const subpath = `${release.site.network.slug}/${release.site.slug}/${release.id}/`; | ||||
|         const identifier = `"${release.title}" (${release.id})`; | ||||
|     // ensure posters are available before fetching supplementary media
 | ||||
|     await Promise.all([ | ||||
|         associateMedia(releasePostersById, posters, 'release', 'poster'), | ||||
|         associateMedia(releaseCoversById, covers, 'release', 'cover'), | ||||
|     ]); | ||||
| 
 | ||||
|         try { | ||||
|             await createMediaDirectory('releases', subpath); | ||||
|     const photos = await storeMedia(Object.values(releasePhotosById).flat(), 'release', 'photo'); | ||||
|     await associateMedia(releasePhotosById, photos, 'release', 'photo'); | ||||
| 
 | ||||
|             // don't use Promise.all to prevent concurrency issues with duplicate detection
 | ||||
|             if (release.poster) { | ||||
|                 await storePhotos([release.poster], { | ||||
|                     role: 'poster', | ||||
|                     targetId: release.id, | ||||
|                     subpath, | ||||
|                 }, identifier); | ||||
|             } | ||||
|     // videos take a long time, fetch last
 | ||||
|     const [trailers, teasers] = await Promise.all([ | ||||
|         storeMedia(Object.values(releaseTrailersById).flat(), 'release', 'trailer'), | ||||
|         storeMedia(Object.values(releaseTeasersById).flat(), 'release', 'teaser'), | ||||
|     ]); | ||||
| 
 | ||||
|             await storePhotos(release.photos, { | ||||
|                 targetId: release.id, | ||||
|                 subpath, | ||||
|                 primaryRole: release.poster ? null : 'poster', | ||||
|             }, identifier); | ||||
| 
 | ||||
|             await storePhotos(release.covers, { | ||||
|                 role: 'cover', | ||||
|                 targetId: release.id, | ||||
|                 subpath, | ||||
|             }, identifier); | ||||
| 
 | ||||
|             await storeTrailer(release.trailer, { | ||||
|                 targetId: release.id, | ||||
|                 subpath, | ||||
|                 role: 'trailer', | ||||
|             }, identifier); | ||||
| 
 | ||||
|             await storeTrailer(release.teaser, { | ||||
|                 targetId: release.id, | ||||
|                 subpath, | ||||
|                 role: 'teaser', | ||||
|             }, identifier); | ||||
|         } catch (error) { | ||||
|             logger.error(error.message); | ||||
|         } | ||||
|     }, { | ||||
|         concurrency: 10, | ||||
|     }); | ||||
|     await Promise.all([ | ||||
|         associateMedia(releaseTrailersById, trailers, 'release', 'trailer'), | ||||
|         associateMedia(releaseTeasersById, teasers, 'release', 'teaser'), | ||||
|     ]); | ||||
| } | ||||
| 
 | ||||
| async function storeRelease(release) { | ||||
|  | @ -453,6 +428,8 @@ async function storeReleases(releases) { | |||
|         concurrency: 10, | ||||
|     }).filter(release => release); | ||||
| 
 | ||||
|     logger.info(`Stored ${storedReleases.length} new releases`); | ||||
| 
 | ||||
|     const actors = accumulateActors(storedReleases); | ||||
|     const movies = accumulateMovies(storedReleases); | ||||
| 
 | ||||
|  |  | |||
|  | @ -47,7 +47,7 @@ function scrapeLatest(html, site) { | |||
|         const actors = title.split(/[,&]|\band\b/).map(actor => actor.trim()); | ||||
| 
 | ||||
|         const poster = `https:${element.querySelector('img').src}`; | ||||
|         const trailer = sceneLinkElement.dataset.preview_clip_url; | ||||
|         const teaser = sceneLinkElement.dataset.preview_clip_url; | ||||
| 
 | ||||
|         return [ | ||||
|             ...acc, | ||||
|  | @ -57,8 +57,8 @@ function scrapeLatest(html, site) { | |||
|                 title, | ||||
|                 actors, | ||||
|                 poster, | ||||
|                 trailer: { | ||||
|                     src: trailer, | ||||
|                 teaser: { | ||||
|                     src: teaser, | ||||
|                 }, | ||||
|                 site, | ||||
|             }, | ||||
|  | @ -99,7 +99,7 @@ async function scrapeScene(html, url, site) { | |||
|     const lastPhotosUrl = Array.from(document.querySelectorAll('.pagination a')).slice(-1)[0].href; | ||||
|     const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url); | ||||
| 
 | ||||
|     const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]').textContent) / 2); | ||||
|     const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]')?.textContent || document.querySelector('span[itemprop="ratingValue"]')?.textContent) / 2); | ||||
|     const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent); | ||||
| 
 | ||||
|     return { | ||||
|  |  | |||
|  | @ -36,35 +36,33 @@ function scrapeLatestX(data, site) { | |||
|         return null; | ||||
|     } | ||||
| 
 | ||||
|     const { id: entryId, title, description } = data; | ||||
|     const hostname = site.parameters?.native ? site.url : site.network.url; | ||||
|     const url = `${hostname}/scene/${entryId}/`; | ||||
|     const date = new Date(data.dateReleased); | ||||
|     const actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender })); | ||||
| 
 | ||||
|     const tags = data.tags.map(tag => tag.name); | ||||
| 
 | ||||
|     const [poster, ...photos] = getThumbs(data); | ||||
|     const trailer = data.videos.mediabook && (data.videos.mediabook.files['720p'] || data.videos.mediabook.files['320p']); | ||||
|     const duration = data.videos.mediabook && data.videos.mediabook.length; | ||||
| 
 | ||||
|     return { | ||||
|         url, | ||||
|         entryId, | ||||
|         title, | ||||
|         description, | ||||
|         actors, | ||||
|         tags, | ||||
|         duration, | ||||
|         poster, | ||||
|         photos, | ||||
|         trailer: trailer && { | ||||
|             src: trailer.urls.view, | ||||
|             quality: parseInt(trailer.format, 10), | ||||
|         }, | ||||
|         date, | ||||
|         site, | ||||
|     const release = { | ||||
|         entryId: data.id, | ||||
|         title: data.title, | ||||
|         description: data.description, | ||||
|     }; | ||||
| 
 | ||||
|     const hostname = site.parameters?.native ? site.url : site.network.url; | ||||
| 
 | ||||
|     release.url = `${hostname}/scene/${release.entryId}/`; | ||||
|     release.date = new Date(data.dateReleased); | ||||
|     release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender })); | ||||
| 
 | ||||
|     release.tags = data.tags.map(tag => tag.name); | ||||
| 
 | ||||
|     release.duration = data.videos.mediabook?.length; | ||||
|     [release.poster, ...release.photos] = getThumbs(data); | ||||
| 
 | ||||
|     const teaserSources = data.videos.mediabook?.files; | ||||
| 
 | ||||
|     if (teaserSources) { | ||||
|         release.teaser = Object.values(teaserSources).map(teaser => ({ | ||||
|             src: teaser.urls.view, | ||||
|             quality: parseInt(teaser.format, 10), | ||||
|         })); | ||||
|     } | ||||
| 
 | ||||
|     return release; | ||||
| } | ||||
| 
 | ||||
| async function scrapeLatest(items, site) { | ||||
|  | @ -89,12 +87,13 @@ function scrapeScene(data, url, _site, networkName) { | |||
| 
 | ||||
|     [release.poster, ...release.photos] = getThumbs(data); | ||||
| 
 | ||||
|     const trailer = data.videos.mediabook && (data.videos.mediabook.files['720p'] || data.videos.mediabook.files['320p']); | ||||
|     if (trailer) { | ||||
|         release.trailer = { | ||||
|             src: trailer.urls.view, | ||||
|             quality: parseInt(trailer.format, 10), | ||||
|         }; | ||||
|     const teaserSources = data.videos.mediabook?.files; | ||||
| 
 | ||||
|     if (teaserSources) { | ||||
|         release.teaser = Object.values(teaserSources).map(teaser => ({ | ||||
|             src: teaser.urls.view, | ||||
|             quality: parseInt(teaser.format, 10), | ||||
|         })); | ||||
|     } | ||||
| 
 | ||||
|     const siteName = data.collections[0]?.name || data.brand; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue