2019-11-11 02:20:00 +00:00
'use strict' ;
const config = require ( 'config' ) ;
const Promise = require ( 'bluebird' ) ;
const path = require ( 'path' ) ;
const fs = require ( 'fs-extra' ) ;
const bhttp = require ( 'bhttp' ) ;
const mime = require ( 'mime' ) ;
const sharp = require ( 'sharp' ) ;
const blake2 = require ( 'blake2' ) ;
2020-01-08 22:33:24 +00:00
const logger = require ( './logger' ) ;
2019-11-11 02:20:00 +00:00
const knex = require ( './knex' ) ;
2019-12-31 02:12:52 +00:00
const upsert = require ( './utils/upsert' ) ;
2019-11-11 02:20:00 +00:00
function getHash ( buffer ) {
const hash = blake2 . createHash ( 'blake2b' , { digestLength : 24 } ) ;
hash . update ( buffer ) ;
return hash . digest ( 'hex' ) ;
}
2019-12-13 02:28:52 +00:00
function pluckPhotos ( photos , specifiedLimit ) {
2019-12-12 02:12:05 +00:00
const limit = specifiedLimit || config . media . limit ;
if ( photos . length <= limit ) {
return photos ;
}
const plucked = [ 1 ]
. concat (
2019-12-12 03:04:35 +00:00
Array . from ( { length : limit - 1 } , ( value , index ) => Math . round ( ( index + 1 ) * ( photos . length / ( limit - 1 ) ) ) ) ,
2019-12-12 02:12:05 +00:00
) ;
return Array . from ( new Set ( plucked ) ) . map ( photoIndex => photos [ photoIndex - 1 ] ) ; // remove duplicates, may happen when photo total and photo limit are close
}
2019-12-13 02:28:52 +00:00
async function createThumbnail ( buffer ) {
2019-11-12 00:22:20 +00:00
return sharp ( buffer )
. resize ( {
height : config . media . thumbnailSize ,
withoutEnlargement : true ,
} )
2019-12-15 21:16:55 +00:00
. jpeg ( {
2020-01-18 22:10:31 +00:00
quality : config . media . thumbnailQuality ,
2019-12-15 21:16:55 +00:00
} )
2019-11-12 00:22:20 +00:00
. toBuffer ( ) ;
}
2019-12-13 02:28:52 +00:00
async function createMediaDirectory ( domain , subpath ) {
const filepath = path . join ( config . media . path , domain , subpath ) ;
2019-11-20 03:53:36 +00:00
2019-12-13 02:28:52 +00:00
await fs . mkdir ( filepath , { recursive : true } ) ;
return filepath ;
2019-11-16 02:33:36 +00:00
}
2019-12-31 02:12:52 +00:00
function curatePhotoEntries ( files ) {
2019-12-07 02:59:55 +00:00
return files . map ( ( file , index ) => ( {
path : file . filepath ,
thumbnail : file . thumbpath ,
mime : file . mimetype ,
hash : file . hash ,
source : file . source ,
index ,
} ) ) ;
}
2019-12-31 02:12:52 +00:00
async function findDuplicates ( photos , identifier , prop = null , label ) {
const duplicates = await knex ( 'media' )
. whereIn ( identifier , photos . flat ( ) . map ( photo => ( prop ? photo [ prop ] : photo ) ) ) ;
2019-12-07 02:59:55 +00:00
2019-12-31 02:12:52 +00:00
const duplicateLookup = new Set ( duplicates . map ( photo => photo [ prop || identifier ] ) ) ;
const originals = photos . filter ( source => ( Array . isArray ( source ) // fallbacks provided?
? ! source . some ( sourceX => duplicateLookup . has ( prop ? sourceX [ prop ] : sourceX ) ) // ensure none of the sources match
: ! duplicateLookup . has ( prop ? source [ prop ] : source ) ) ) ;
2019-12-07 02:59:55 +00:00
2019-12-31 02:12:52 +00:00
if ( duplicates . length > 0 ) {
2020-01-08 22:33:24 +00:00
logger . info ( ` ${ duplicates . length } media items already present by ${ identifier } for ${ label } ` ) ;
2019-11-16 02:33:36 +00:00
}
2019-12-31 02:12:52 +00:00
if ( originals . length > 0 ) {
2020-01-08 22:33:24 +00:00
logger . info ( ` Fetching ${ originals . length } new media items for ${ label } ` ) ;
2019-12-07 02:59:55 +00:00
}
2019-12-31 02:12:52 +00:00
return [ duplicates , originals ] ;
2019-12-07 02:59:55 +00:00
}
2019-12-31 02:12:52 +00:00
async function fetchPhoto ( photoUrl , index , label , attempt = 1 ) {
2019-12-12 02:12:05 +00:00
if ( Array . isArray ( photoUrl ) ) {
2019-12-12 04:18:43 +00:00
return photoUrl . reduce ( async ( outcome , url ) => outcome . catch ( async ( ) => {
2019-12-31 02:12:52 +00:00
const photo = await fetchPhoto ( url , index , label ) ;
2019-12-12 04:18:43 +00:00
if ( photo ) {
return photo ;
}
throw new Error ( 'Photo not available' ) ;
} ) , Promise . reject ( new Error ( ) ) ) ;
2019-12-12 02:12:05 +00:00
}
2019-12-07 02:59:55 +00:00
try {
2019-12-09 04:00:49 +00:00
const { pathname } = new URL ( photoUrl ) ;
const mimetype = mime . getType ( pathname ) ;
2019-12-07 02:59:55 +00:00
const res = await bhttp . get ( photoUrl ) ;
if ( res . statusCode === 200 ) {
const extension = mime . getExtension ( mimetype ) ;
const hash = getHash ( res . body ) ;
return {
photo : res . body ,
mimetype ,
extension ,
hash ,
source : photoUrl ,
} ;
}
throw new Error ( ` Response ${ res . statusCode } not OK ` ) ;
} catch ( error ) {
2020-01-10 03:40:41 +00:00
logger . warn ( ` Failed attempt ${ attempt } /3 to fetch photo ${ index + 1 } for ${ label } ( ${ photoUrl } ): ${ error } ` ) ;
2019-12-12 02:12:05 +00:00
if ( attempt < 3 ) {
await Promise . delay ( 1000 ) ;
2019-12-31 02:12:52 +00:00
return fetchPhoto ( photoUrl , index , label , attempt + 1 ) ;
2019-12-12 02:12:05 +00:00
}
2019-11-11 02:20:00 +00:00
2019-12-07 02:59:55 +00:00
return null ;
}
}
2019-11-11 02:20:00 +00:00
2019-12-13 02:28:52 +00:00
async function savePhotos ( files , {
2019-12-31 02:12:52 +00:00
domain = 'release' ,
2019-12-13 02:28:52 +00:00
subpath ,
role = 'photo' ,
naming = 'index' ,
} ) {
2019-12-07 02:59:55 +00:00
return Promise . map ( files , async ( file , index ) => {
const timestamp = new Date ( ) . getTime ( ) ;
2019-12-13 02:28:52 +00:00
const thumbnail = await createThumbnail ( file . photo ) ;
2019-11-20 03:53:36 +00:00
2019-12-13 02:28:52 +00:00
const filename = naming === 'index'
2019-12-31 02:12:52 +00:00
? ` ${ file . role || role } ${ index + 1 } `
2019-12-13 02:28:52 +00:00
: ` ${ timestamp + index } ` ;
2019-11-11 02:20:00 +00:00
2019-12-31 02:12:52 +00:00
const filepath = path . join ( ` ${ domain } s ` , subpath , ` ${ filename } . ${ file . extension } ` ) ;
const thumbpath = path . join ( ` ${ domain } s ` , subpath , ` ${ filename } _thumb. ${ file . extension } ` ) ;
2019-11-11 02:20:00 +00:00
await Promise . all ( [
2019-12-07 02:59:55 +00:00
fs . writeFile ( path . join ( config . media . path , filepath ) , file . photo ) ,
2019-11-11 02:20:00 +00:00
fs . writeFile ( path . join ( config . media . path , thumbpath ) , thumbnail ) ,
] ) ;
2019-12-07 02:59:55 +00:00
return {
... file ,
thumbnail ,
filepath ,
thumbpath ,
} ;
} ) ;
}
2019-11-11 02:20:00 +00:00
2019-12-13 02:28:52 +00:00
async function storePhotos ( photos , {
2019-12-31 02:12:52 +00:00
domain = 'release' ,
2019-12-13 02:28:52 +00:00
role = 'photo' ,
naming = 'index' ,
targetId ,
subpath ,
primaryRole , // role to assign to first photo if not already in database, used mainly for avatars
2019-12-31 02:12:52 +00:00
} , label ) {
2019-12-13 02:28:52 +00:00
if ( ! photos || photos . length === 0 ) {
2020-01-10 03:40:41 +00:00
logger . info ( ` No ${ role } s available for ${ label } ` ) ;
2019-11-11 02:20:00 +00:00
return ;
}
2019-11-16 02:33:36 +00:00
2020-01-22 21:25:58 +00:00
const pluckedPhotos = pluckPhotos ( Array . from ( new Set ( photos ) ) ) ; // pre-filter link duplicates, limit total per configuration
2019-12-31 02:12:52 +00:00
const [ sourceDuplicates , sourceOriginals ] = await findDuplicates ( pluckedPhotos , 'source' , null , label ) ;
2019-11-11 02:20:00 +00:00
2019-12-31 02:12:52 +00:00
const metaFiles = await Promise . map ( sourceOriginals , async ( photoUrl , index ) => fetchPhoto ( photoUrl , index , label ) , {
2019-12-04 20:58:08 +00:00
concurrency : 10 ,
2019-12-09 23:30:55 +00:00
} ) . filter ( photo => photo ) ;
2019-11-11 02:20:00 +00:00
2020-01-23 02:52:12 +00:00
const metaFilesByHash = metaFiles . reduce ( ( acc , photo ) => ( { ... acc , [ photo . hash ] : photo } ) , { } ) ; // pre-filter hash duplicates within set; may occur through fallbacks
const [ hashDuplicates , hashOriginals ] = await findDuplicates ( Object . values ( metaFilesByHash ) , 'hash' , 'hash' , label ) ;
2019-12-13 15:59:04 +00:00
2019-12-31 02:12:52 +00:00
const savedPhotos = await savePhotos ( hashOriginals , {
2019-12-13 02:28:52 +00:00
domain ,
role ,
targetId ,
subpath ,
naming ,
} ) ;
2019-12-07 02:59:55 +00:00
2019-12-13 02:28:52 +00:00
const curatedPhotoEntries = curatePhotoEntries ( savedPhotos , domain , role , targetId ) ;
2019-12-12 03:04:35 +00:00
2019-12-31 02:12:52 +00:00
const newPhotos = await knex ( 'media' ) . insert ( curatedPhotoEntries ) . returning ( '*' ) ;
const photoEntries = Array . isArray ( newPhotos )
? [ ... sourceDuplicates , ... hashDuplicates , ... newPhotos ]
: [ ... sourceDuplicates , ... hashDuplicates ] ;
const photoAssociations = photoEntries
. map ( photoEntry => ( {
[ ` ${ domain } _id ` ] : targetId ,
media _id : photoEntry . id ,
} ) ) ;
if ( primaryRole ) {
// store one photo as a 'primary' photo, such as an avatar or cover
const primaryPhoto = await knex ( ` ${ domain } s_ ${ primaryRole } s ` )
. where ( ` ${ domain } _id ` , targetId )
. first ( ) ;
if ( primaryPhoto ) {
2020-01-22 22:17:39 +00:00
const remainingAssociations = photoAssociations . filter ( association => association . media _id !== primaryPhoto . media _id ) ;
2020-01-02 16:13:57 +00:00
await upsert ( ` ${ domain } s_ ${ role } s ` , remainingAssociations , [ ` ${ domain } _id ` , 'media_id' ] ) ;
2019-12-31 02:12:52 +00:00
return ;
}
await Promise . all ( [
upsert ( ` ${ domain } s_ ${ primaryRole } s ` , photoAssociations . slice ( 0 , 1 ) , [ ` ${ domain } _id ` , 'media_id' ] ) ,
upsert ( ` ${ domain } s_ ${ role } s ` , photoAssociations . slice ( 1 ) , [ ` ${ domain } _id ` , 'media_id' ] ) ,
] ) ;
2020-01-02 16:13:57 +00:00
return ;
2019-12-31 02:12:52 +00:00
}
2019-12-07 02:59:55 +00:00
2019-12-31 02:12:52 +00:00
await upsert ( ` ${ domain } s_ ${ role } s ` , photoAssociations , [ ` ${ domain } _id ` , 'media_id' ] ) ;
2019-11-11 02:20:00 +00:00
}
2020-01-08 22:33:24 +00:00
/ *
async function storeReleasePhotos ( releases , label ) {
const sources = releases . map ( release => pluckPhotos ( release . photos ) ) . flat ( ) ;
const uniqueSources = Array . from ( new Set ( sources ) ) ;
const [ sourceDuplicates , sourceOriginals ] = await findDuplicates ( uniqueSources , 'source' , null , label ) ;
const metaFiles = await Promise . map (
sourceOriginals ,
async ( photoUrl , index ) => fetchPhoto ( photoUrl , index , label ) ,
{ concurrency : 10 } ,
)
. filter ( photo => photo ) ;
const hashUniques = Object . values ( metaFiles . reduce ( ( acc , file ) => {
if ( ! acc [ file . hash ] ) acc [ file . hash ] = file ;
return acc ;
} , { } ) ) ;
const [ hashDuplicates , hashOriginals ] = await findDuplicates ( hashUniques , 'hash' , 'hash' , label ) ;
const sourceHashes = metaFiles . concat ( sourceDuplicates ) . reduce ( ( acc , file ) => {
acc [ file . source ] = file . hash ;
return acc ;
} , { } ) ;
const associations = releases . map ( release => release . photos . map ( source => [ release . id , sourceHashes [ source ] ] ) ) . flat ( ) ;
console . log ( associations ) ;
}
* /
2019-12-13 02:28:52 +00:00
async function storeTrailer ( trailers , {
domain = 'releases' ,
targetId ,
subpath ,
2019-12-31 02:12:52 +00:00
} , label ) {
2019-12-07 03:17:14 +00:00
// support scrapers supplying multiple qualities
2019-12-13 02:28:52 +00:00
const trailer = Array . isArray ( trailers )
? trailers . find ( trailerX => [ 1080 , 720 ] . includes ( trailerX . quality ) ) || trailers [ 0 ]
: trailers ;
2019-12-07 03:17:14 +00:00
if ( ! trailer || ! trailer . src ) {
2020-01-10 03:40:41 +00:00
logger . info ( ` No trailer available for ${ label } ` ) ;
2019-11-16 02:33:36 +00:00
return ;
}
2019-12-31 02:12:52 +00:00
const [ sourceDuplicates , sourceOriginals ] = await findDuplicates ( [ trailer ] , 'source' , 'src' , label ) ;
2019-11-11 02:20:00 +00:00
2019-12-31 02:12:52 +00:00
const metaFiles = await Promise . map ( sourceOriginals , async ( trailerX ) => {
const { pathname } = new URL ( trailerX . src ) ;
const mimetype = trailerX . type || mime . getType ( pathname ) ;
2019-11-11 02:20:00 +00:00
2019-12-31 02:12:52 +00:00
const res = await bhttp . get ( trailerX . src ) ;
const hash = getHash ( res . body ) ;
const filepath = path . join ( domain , subpath , ` trailer ${ trailerX . quality ? ` _ ${ trailerX . quality } ` : '' } . ${ mime . getExtension ( mimetype ) } ` ) ;
return {
2020-01-02 23:59:02 +00:00
trailer : res . body ,
2019-11-11 02:20:00 +00:00
path : filepath ,
mime : mimetype ,
2019-12-31 02:12:52 +00:00
source : trailerX . src ,
quality : trailerX . quality || null ,
hash ,
} ;
} ) ;
2020-01-02 23:59:02 +00:00
const [ hashDuplicates , hashOriginals ] = await findDuplicates ( metaFiles , 'hash' , 'hash' , label ) ;
2019-12-31 02:12:52 +00:00
const newTrailers = await knex ( 'media' )
2020-01-02 23:59:02 +00:00
. insert ( hashOriginals . map ( trailerX => ( {
path : trailerX . path ,
mime : trailerX . mime ,
source : trailerX . source ,
quality : trailerX . quality ,
hash : trailerX . hash ,
} ) ) )
2019-12-31 02:12:52 +00:00
. returning ( '*' ) ;
2020-01-02 23:59:02 +00:00
await Promise . all ( hashOriginals . map ( trailerX => fs . writeFile ( path . join ( config . media . path , trailerX . path ) , trailerX . trailer ) ) ) ;
const trailerEntries = Array . isArray ( newTrailers )
? [ ... sourceDuplicates , ... hashDuplicates , ... newTrailers ]
: [ ... sourceDuplicates , ... hashDuplicates ] ;
await upsert ( 'releases_trailers' , trailerEntries . map ( trailerEntry => ( {
release _id : targetId ,
media _id : trailerEntry . id ,
} ) ) , [ 'release_id' , 'media_id' ] ) ;
2019-11-11 02:20:00 +00:00
}
module . exports = {
2019-12-13 02:28:52 +00:00
createMediaDirectory ,
2019-11-11 02:20:00 +00:00
storePhotos ,
2020-01-08 22:33:24 +00:00
// storeReleasePhotos,
2019-11-11 02:20:00 +00:00
storeTrailer ,
} ;