diff --git a/src/media.js b/src/media.js index 63cf2a5c..73bea226 100644 --- a/src/media.js +++ b/src/media.js @@ -231,7 +231,7 @@ function groupItems(items) { }); } -async function storeMedia(sources, domain, role) { +async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) { const presentSources = sources.filter(Boolean); if (presentSources.length === 0) { @@ -250,7 +250,7 @@ async function storeMedia(sources, domain, role) { const { hash: fetchedItemsByHash } = groupItems(fetchedItems); // find hash duplicates that don't need to be re-saved - const uniqueFetchedItems = Object.values(fetchedItemsByHash); + const uniqueFetchedItems = Object.values(fetchedItemsByHash).filter(item => !entropyFilter || item.entropy >= entropyFilter); const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash)); const { hash: existingHashItemsByHash } = groupItems(existingHashItems);