diff --git a/public/img/tags/asian/0.jpeg b/public/img/tags/asian/0.jpeg new file mode 100644 index 00000000..ff0e115f Binary files /dev/null and b/public/img/tags/asian/0.jpeg differ diff --git a/public/img/tags/asian/0_thumb.jpeg b/public/img/tags/asian/0_thumb.jpeg new file mode 100644 index 00000000..098ee366 Binary files /dev/null and b/public/img/tags/asian/0_thumb.jpeg differ diff --git a/public/img/tags/asian/1.jpeg b/public/img/tags/asian/1.jpeg new file mode 100644 index 00000000..24f21588 Binary files /dev/null and b/public/img/tags/asian/1.jpeg differ diff --git a/public/img/tags/asian/1_thumb.jpeg b/public/img/tags/asian/1_thumb.jpeg new file mode 100644 index 00000000..d827d0b2 Binary files /dev/null and b/public/img/tags/asian/1_thumb.jpeg differ diff --git a/public/img/tags/asian/poster.jpeg b/public/img/tags/asian/poster.jpeg index 7109cb27..57d5cbfe 100755 Binary files a/public/img/tags/asian/poster.jpeg and b/public/img/tags/asian/poster.jpeg differ diff --git a/public/img/tags/asian/poster_thumb.jpeg b/public/img/tags/asian/poster_thumb.jpeg index c620f1e1..edc3def5 100755 Binary files a/public/img/tags/asian/poster_thumb.jpeg and b/public/img/tags/asian/poster_thumb.jpeg differ diff --git a/public/img/tags/double-anal/7.jpeg b/public/img/tags/double-anal/7.jpeg new file mode 100644 index 00000000..b41208bd Binary files /dev/null and b/public/img/tags/double-anal/7.jpeg differ diff --git a/public/img/tags/double-anal/7_thumb.jpeg b/public/img/tags/double-anal/7_thumb.jpeg new file mode 100644 index 00000000..a940a173 Binary files /dev/null and b/public/img/tags/double-anal/7_thumb.jpeg differ diff --git a/public/img/tags/double-penetration/2.jpeg b/public/img/tags/double-penetration/2.jpeg new file mode 100644 index 00000000..26084c1a Binary files /dev/null and b/public/img/tags/double-penetration/2.jpeg differ diff --git a/public/img/tags/double-penetration/2_thumb.jpeg b/public/img/tags/double-penetration/2_thumb.jpeg new file mode 100644 index 00000000..ca867e3f Binary files /dev/null and b/public/img/tags/double-penetration/2_thumb.jpeg differ diff --git a/public/img/tags/ebony/1.jpeg b/public/img/tags/ebony/1.jpeg index 90691e76..4dc63595 100644 Binary files a/public/img/tags/ebony/1.jpeg and b/public/img/tags/ebony/1.jpeg differ diff --git a/public/img/tags/ebony/1_thumb.jpeg b/public/img/tags/ebony/1_thumb.jpeg index 9d75211a..d34a40ce 100644 Binary files a/public/img/tags/ebony/1_thumb.jpeg and b/public/img/tags/ebony/1_thumb.jpeg differ diff --git a/public/img/tags/latina/poster.jpeg b/public/img/tags/latina/poster.jpeg index 6251f85f..a92f8f22 100755 Binary files a/public/img/tags/latina/poster.jpeg and b/public/img/tags/latina/poster.jpeg differ diff --git a/public/img/tags/latina/poster_thumb.jpeg b/public/img/tags/latina/poster_thumb.jpeg index 53828a06..690f9414 100755 Binary files a/public/img/tags/latina/poster_thumb.jpeg and b/public/img/tags/latina/poster_thumb.jpeg differ diff --git a/public/img/tags/mfm/0.jpeg b/public/img/tags/mfm/0.jpeg new file mode 100755 index 00000000..ac2cc48e Binary files /dev/null and b/public/img/tags/mfm/0.jpeg differ diff --git a/public/img/tags/mfm/0.jpg b/public/img/tags/mfm/0.jpg deleted file mode 100755 index 7b41e11c..00000000 Binary files a/public/img/tags/mfm/0.jpg and /dev/null differ diff --git a/public/img/tags/mfm/0_thumb.jpeg b/public/img/tags/mfm/0_thumb.jpeg new file mode 100755 index 00000000..e924c976 Binary files /dev/null and b/public/img/tags/mfm/0_thumb.jpeg differ diff --git a/public/img/tags/mfm/0_thumb.jpg b/public/img/tags/mfm/0_thumb.jpg deleted file mode 100755 index b58d71e5..00000000 Binary files a/public/img/tags/mfm/0_thumb.jpg and /dev/null differ diff --git a/public/img/tags/mfm/2.jpeg b/public/img/tags/mfm/2.jpeg new file mode 100644 index 00000000..3150da86 Binary files /dev/null and b/public/img/tags/mfm/2.jpeg differ diff --git a/public/img/tags/mfm/2_thumb.jpeg b/public/img/tags/mfm/2_thumb.jpeg new file mode 100644 index 00000000..5c574c34 Binary files /dev/null and b/public/img/tags/mfm/2_thumb.jpeg differ diff --git a/public/img/tags/mfm/3.jpeg b/public/img/tags/mfm/3.jpeg new file mode 100644 index 00000000..7f7a2b98 Binary files /dev/null and b/public/img/tags/mfm/3.jpeg differ diff --git a/public/img/tags/mfm/3_thumb.jpeg b/public/img/tags/mfm/3_thumb.jpeg new file mode 100644 index 00000000..6314d325 Binary files /dev/null and b/public/img/tags/mfm/3_thumb.jpeg differ diff --git a/public/img/tags/mfm/4.jpeg b/public/img/tags/mfm/4.jpeg new file mode 100644 index 00000000..d70a3093 Binary files /dev/null and b/public/img/tags/mfm/4.jpeg differ diff --git a/public/img/tags/mfm/4_thumb.jpeg b/public/img/tags/mfm/4_thumb.jpeg new file mode 100644 index 00000000..ba878c9c Binary files /dev/null and b/public/img/tags/mfm/4_thumb.jpeg differ diff --git a/public/img/tags/mfm/5.jpeg b/public/img/tags/mfm/5.jpeg new file mode 100644 index 00000000..ae457269 Binary files /dev/null and b/public/img/tags/mfm/5.jpeg differ diff --git a/public/img/tags/mfm/5_thumb.jpeg b/public/img/tags/mfm/5_thumb.jpeg new file mode 100644 index 00000000..b976c614 Binary files /dev/null and b/public/img/tags/mfm/5_thumb.jpeg differ diff --git a/public/img/tags/mfm/poster.jpeg b/public/img/tags/mfm/poster.jpeg deleted file mode 100755 index e5fa5557..00000000 Binary files a/public/img/tags/mfm/poster.jpeg and /dev/null differ diff --git a/public/img/tags/mfm/poster_thumb.jpeg b/public/img/tags/mfm/poster_thumb.jpeg deleted file mode 100755 index 19e28253..00000000 Binary files a/public/img/tags/mfm/poster_thumb.jpeg and /dev/null differ diff --git a/public/img/tags/redhead/0.jpeg b/public/img/tags/redhead/0.jpeg new file mode 100644 index 00000000..b25d801c Binary files /dev/null and b/public/img/tags/redhead/0.jpeg differ diff --git a/public/img/tags/redhead/0_thumb.jpeg b/public/img/tags/redhead/0_thumb.jpeg new file mode 100644 index 00000000..17258815 Binary files /dev/null and b/public/img/tags/redhead/0_thumb.jpeg differ diff --git a/seeds/04_media.js b/seeds/04_media.js index 75216233..e3088f3e 100644 --- a/seeds/04_media.js +++ b/seeds/04_media.js @@ -5,7 +5,7 @@ const tagPosters = [ ['anal', 3, 'Dakota Skye for Brazzers'], ['anal-creampie', 0, 'Gina Valentina and Jane Wilde in "A Very Special Anniversary" for Tushy'], ['ass-eating', 0, 'Kendra Sunderland and Ana Foxxx in "Kendra\'s Obsession, Part 3" for Blacked'], - ['asian', 'poster', 'Vina Sky in "Young and Glamorous 10" for Jules Jordan'], + ['asian', 0, 'Alina Li in "Slut Puppies 8" for Jules Jordan'], ['ass-to-mouth', 'poster', 'Alysa Gap and Logan in "Anal Buffet 4" for Evil Angel'], ['bdsm', 0, 'Dani Daniels in "The Traning of Dani Daniels, Day 2" for The Training of O at Kink'], ['behind-the-scenes', 0, 'Janice Griffith in "Day With A Pornstar: Janice" for Brazzers'], @@ -18,23 +18,24 @@ const tagPosters = [ ['creampie', 'poster'], ['da-tp', 0, 'Natasha Teen in LegalPorno SZ2164'], ['deepthroat', 0, 'Chanel Grey in "Deepthroating Is Fun" for Throated'], - ['double-anal', 2, 'Lana Rhoades in "Lana Rhoades Unleashed" for HardX'], + ['double-anal', 7, 'Adriana Chechik in "DP Masters 6" for Jules Jordan'], ['double-blowjob', 0, 'Kira Noir and Kali Roses for Brazzers'], - ['double-penetration', 'poster', 'Mia Malkova in "DP Me 8" for HardX'], + ['double-penetration', 2, 'Adriana Chechik in "DP Masters 6" for Jules Jordan'], ['double-vaginal', 'poster', 'Riley Reid in "Pizza That Ass" for Reid My Lips'], ['dv-tp', 'poster', 'Juelz Ventura in "Gangbanged 5" for Elegant Angel'], - ['ebony', 1, 'Ana Foxxx in "Gangbang Me 3" for HardX'], + ['ebony', 1, 'Ana Foxxx in "DP Me 4" for HardX'], ['facefucking', 1, 'Carrie for Young Throats'], ['facial', 'poster'], ['gangbang', 'poster', 'Kristen Scott in "Interracial Gangbang!" for Jules Jordan'], ['gaping', 1, 'Vina Sky in "Vina Sky Does Anal" for HardX'], ['interracial', 'poster'], - ['latina', 0, 'Abby Lee Brazil for Bang Bros'], + ['latina', 'poster', 'Alexis Love for Penthouse'], ['lesbian', 0, 'Reena Sky and Sarah Banks for Brazzers'], ['mff', 0, 'Madison Ivy and Adriana Chechik in "Day With A Pornstar" for Brazzers'], - ['mfm', 1, 'Jynx Maze in "Don\'t Make Me Beg 4" for Evil Angel'], + ['mfm', 5, 'Vina Sky in "Slut Puppies 15" for Jules Jordan'], ['oral-creampie', 1, 'Keisha Grey in Brazzers House'], ['orgy', 'poster'], + ['redhead', 0, 'Penny Pax in "The Submission of Emma Marx: Evolved" for New Sensations'], ['schoolgirl', 1, 'Eliza Ibarra for Brazzers'], ['swallowing', 'poster'], ['tattoo', 'poster', 'Kali Roses in "Goes All In For Anal" for Hussie Pass'], @@ -53,6 +54,8 @@ const tagPosters = [ const tagPhotos = [ ['airtight', 2, 'Dakota Skye in "Dakota Goes Nuts" for ArchAngel'], ['airtight', 3, 'Anita Bellini in "Triple Dick Gangbang" for Hands On Hardcore (DDF Network)'], + ['asian', 'poster', 'Vina Sky in "Slut Puppies 15" for Jules Jordan'], + // ['asian', 1, 'Alina Li in "Oil Overload 11" for Jules Jordan'], ['anal', 2, 'Gabbie Carter for Tushy Raw'], ['anal', 'poster', 'Jynx Maze in "Anal Buffet 6" for Evil Angel'], ['anal', 1, 'Veronica Leal and Tina Kay in "Agents On Anal Mission" for Asshole Fever'], @@ -63,23 +66,27 @@ const tagPhotos = [ ['da-tp', 2, 'Angel Smalls in GIO408 for LegalPorno'], ['da-tp', 3, 'Evelina Darling in GIO294'], ['da-tp', 4, 'Ninel Mojado aka Mira Cuckold in GIO063 for LegalPorno'], + ['double-anal', 2, 'Lana Rhoades in "Lana Rhoades Unleashed" for HardX'], ['double-anal', 6, 'Sheena Shaw in "Ass Worship 14" for Jules Jordan'], ['double-anal', 5, 'Riley Reid in "The Gangbang of Riley Reid" for Jules Jordan'], ['double-anal', 'poster', 'Haley Reed in "Young Hot Ass" for Evil Angel'], ['double-anal', 0, 'Nicole Black doing double anal during a gangbang in GIO971 for LegalPorno'], ['double-anal', 1, 'Ria Sunn in SZ1801 for LegalPorno'], + ['double-penetration', 'poster', 'Mia Malkova in "DP Me 8" for HardX'], ['double-penetration', 0, 'Zoey Monroe in "Slut Puppies 7" for Jules Jordan'], ['double-penetration', 1, 'Jynx Maze in "Don\'t Make Me Beg 4" for Evil Angel'], ['double-vaginal', 0, 'Aaliyah Hadid in "Squirting From Double Penetration With Anal" for Bang Bros'], ['dv-tp', 1, 'Adriana Chechik in "Adriana\'s Triple Anal Penetration!"'], ['dv-tp', 0, 'Luna Rival in LegalPorno SZ1490'], ['facefucking', 2, 'Jynx Maze for Throated'], + ['latina', 0, 'Abby Lee Brazil for Bang Bros'], ['gangbang', 0, '"4 On 1 Gangbangs" for Doghouse Digital'], ['gangbang', 1, 'Ginger Lynn in "Gangbang Mystique", a photoset shot by Suze Randall for Puritan No. 10, 1984. This photo pushed the boundaries of pornography at the time, as depicting a woman \'fully occupied\' was unheard of.'], ['gangbang', 2, 'Riley Reid\'s double anal in "The Gangbang of Riley Reid" for Jules Jordan'], ['gaping', 'poster', 'Paulina in "Anal Buffet 4" for Evil Angel'], ['gaping', 0, 'McKenzee Miles in "Anal Buffet 4" for Evil Angel'], - ['mfm', 'poster', 'Vina Sky for Jules Jordan'], + // ['mfm', 0, 'Vina Sky in "Jules Jordan\'s Three Ways" for Jules Jordan'], + ['mfm', 1, 'Jynx Maze in "Don\'t Make Me Beg 4" for Evil Angel'], ['trainbang', 0, 'Nicole Black in GIO971 for LegalPorno'], ['triple-anal', 1, 'Natasha Teen in SZ2098 for LegalPorno'], ['triple-anal', 2, 'Kira Thorn in GIO1018 for LegalPorno'], diff --git a/src/actors.js b/src/actors.js index 61d09176..028fa9f5 100644 --- a/src/actors.js +++ b/src/actors.js @@ -2,25 +2,29 @@ const slugify = require('./utils/slugify'); -async function storeReleaseActors(releases) { - const releaseIdsByActor = releases.reduce( - (acc, release) => release.actors.reduce((actorAcc, actor) => { - const releaseActor = actor.name ? actor : { name: actor }; - const actorSlug = slugify(releaseActor.name); - +function toBaseActors(actorsOrNames) { + return actorsOrNames.map((actorOrName) => { + if (actorOrName.name) { return { - ...actorAcc, - [actorSlug]: actorAcc[actorSlug] - ? actorAcc[actorSlug].concat(release.id) - : [release.id], + ...actorOrName, + slug: slugify(actorOrName.name), }; - }, acc), - {}, - ); + } - console.log(releaseIdsByActor); + return { + name: actorOrName, + slug: slugify(actorOrName.name), + }; + }); +} + +async function associateActors(releases) { + const rawActors = releases.map(release => release.actors).flat().filter(Boolean); + const baseActors = toBaseActors(rawActors); + + console.log(baseActors); } module.exports = { - storeReleaseActors, + associateActors, }; diff --git a/src/argv.js b/src/argv.js index e01ac93d..995f2ad1 100644 --- a/src/argv.js +++ b/src/argv.js @@ -117,6 +117,11 @@ const { argv } = yargs type: 'boolean', default: true, }) + .option('media-limit', { + describe: 'Maximum amount of assets of each type per release', + type: 'number', + default: config.media.limit, + }) .option('images', { describe: 'Include any photos, posters or covers', type: 'boolean', diff --git a/src/media-legacy.js b/src/media-legacy.js new file mode 100644 index 00000000..3ace2b8a --- /dev/null +++ b/src/media-legacy.js @@ -0,0 +1,447 @@ +'use strict'; + +const config = require('config'); +const Promise = require('bluebird'); +// const bhttp = require('bhttp'); +const mime = require('mime'); +const fs = require('fs-extra'); +const sharp = require('sharp'); +const path = require('path'); +const blake2 = require('blake2'); + +const argv = require('./argv'); +const logger = require('./logger')(__filename); +const knex = require('./knex'); +const { get } = require('./utils/http'); +const { ex } = require('./utils/q'); +const chunk = require('./utils/chunk'); + +function getHash(buffer) { + const hash = blake2.createHash('blake2b', { digestLength: 24 }); + hash.update(buffer); + + return hash.digest('hex'); +} + +async function getMeta(buffer, withHash = false) { + try { + const { entropy } = await sharp(buffer).stats(); + const { width, height, size } = await sharp(buffer).metadata(); + + const hash = withHash && getHash(buffer); + + return { + width, + height, + size, + entropy, + hash, + }; + } catch (error) { + logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`); + + return 7.5; + } +} + +async function createThumbnail(buffer, height = config.media.thumbnailSize) { + try { + const thumbnail = sharp(buffer) + .resize({ + height, + withoutEnlargement: true, + }) + .jpeg({ + quality: config.media.thumbnailQuality, + }) + .toBuffer(); + + return thumbnail; + } catch (error) { + logger.error(`Failed to create thumbnail: ${error.message}`); + } + + return null; +} + +function groupFallbacksByPriority(chunks) { + /* + Chunks naturally give priority to all of the first item's fallbacks, generally lower quality images. + This function ensures every item's first source is tried, before trying every item's second source, etc., example: + IN: [[1, 2, 3,], 10, [1, 2, 3, 4, 5], [1, 2, 3]] + OUT [[1, 1, 1], [2, 2, 2], [3, 3, 3], [4], [5]] + */ + return chunks.map(group => group.reduce((acc, item) => { + if (Array.isArray(item)) { + // place provided fallbacks at same index (priority) in parent array + item.forEach((fallback, fallbackIndex) => { + if (!acc[fallbackIndex]) { + acc[fallbackIndex] = []; + } + + acc[fallbackIndex].push(fallback); + }); + + return acc; + } + + // no fallbacks provided, first priority + if (!acc[0]) { + acc[0] = []; + } + + acc[0].push(item); + + return acc; + }, []).flat()); +} + +function pluckItems(items, specifiedLimit, asFallbacks = true) { + const limit = specifiedLimit || argv.mediaLimit; + + if (!items || items.length <= limit) return items; + + if (asFallbacks) { + const chunks = chunk(items, Math.ceil(items.length / limit)); + const fallbacks = groupFallbacksByPriority(chunks); + + return fallbacks; + } + + const plucked = [1] + .concat( + Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (items.length / (limit - 1)))), + ); + + return Array.from(new Set(plucked)).map(itemIndex => items[itemIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close +} + +function pickQuality(items) { + const itemsByQuality = items.reduce((acc, item) => ({ ...acc, [item.quality]: item }), {}); + const item = config.media.videoQuality.reduce((acc, quality) => acc || itemsByQuality[quality], null); + + return item || items[0]; +} + +async function extractItem(source) { + // const res = await bhttp.get(source.src); + const res = await get(source.src); + + if (res.statusCode === 200) { + const { qu } = ex(res.body.toString()); + + return source.extract(qu); + } + + return null; +} + +async function fetchSource(source, domain, role) { + logger.silly(`Fetching ${domain} ${role} from ${source.src || source}`); + + // const res = await bhttp.get(source.src || source); + const res = await get(source.src || source, { + headers: { + ...(source.referer && { referer: source.referer }), + ...(source.host && { host: source.host }), + }, + }); + + if (res.statusCode === 200) { + const { pathname } = new URL(source.src || source); + const mimetype = mime.getType(pathname); + const extension = mime.getExtension(mimetype); + const hash = getHash(res.body); + const { entropy, size, width, height } = /image/.test(mimetype) ? await getMeta(res.body) : {}; + + logger.silly(`Fetched media item from ${source.src || source}`); + + return { + file: res.body, + mimetype, + extension, + hash, + entropy: entropy || null, + size: size || null, + width: width || null, + height: height || null, + quality: source.quality || null, + source: source.src || source, + scraper: source.scraper, + copyright: source.copyright, + }; + } + + throw new Error(`Response ${res.statusCode} not OK`); +} + +async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) { + try { + if (!source) { + throw new Error(`Empty ${domain} ${role} source in ${originalSource}`); + } + + if (Array.isArray(source)) { + if (source.every(sourceX => sourceX.quality)) { + // various video qualities provided + const selectedSource = pickQuality(source); + return fetchItem(selectedSource, index, existingItemsBySource, domain, role, attempt, originalSource); + } + + // fallbacks provided + return source.reduce((outcome, sourceX, sourceIndexX) => outcome.catch( + async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, source, sourceIndexX), + ), Promise.reject(new Error())); + } + + if (source.src && source.extract) { + // source links to page containing a (presumably) tokenized photo + const itemSource = await extractItem(source); + + return fetchItem(itemSource, index, existingItemsBySource, domain, role, attempt, source, sourceIndex); + } + + if (existingItemsBySource[source]) { + return null; + } + + return await fetchSource(source, domain, role, originalSource); + } catch (error) { + logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`); + + if (source && attempt < 3) { + // only retry if source is provided at all + await Promise.delay(5000); + return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource, sourceIndex); + } + + if (originalSource && sourceIndex < originalSource.length - 1) { + throw error; // gets caught to try next source + } + + return null; + } +} + +async function fetchItems(itemSources, existingItemsBySource, domain, role) { + return Promise.map(itemSources, async (source, index) => fetchItem(source, index, existingItemsBySource, domain, role)).filter(Boolean); +} + +async function saveItems(items, domain, role) { + return Promise.map(items, async (item) => { + try { + const dir = item.hash.slice(0, 2); + const subdir = item.hash.slice(2, 4); + const filename = item.quality + ? `${item.hash.slice(4)}_${item.quality}.${item.extension}` + : `${item.hash.slice(4)}.${item.extension}`; + + const filedir = path.join(`${role}s`, dir, subdir); + const filepath = path.join(filedir, filename); + + await fs.mkdir(path.join(config.media.path, filedir), { recursive: true }); + await fs.writeFile(path.join(config.media.path, filepath), item.file); + + if (/image/.test(item.mimetype)) { + const thumbnail = await createThumbnail(item.file); + + const thumbdir = path.join(`${role}s`, 'thumbs', dir, subdir); + const thumbpath = path.join(thumbdir, filename); + + await fs.mkdir(path.join(config.media.path, thumbdir), { recursive: true }); + await fs.writeFile(path.join(config.media.path, thumbpath), thumbnail); + + logger.verbose(`Saved ${domain} ${role} with thumbnail to ${filepath}`); + + return { + thumbnail, + filepath, + thumbpath, + mimetype: item.mimetype, + extension: item.extension, + hash: item.hash, + size: item.size, + width: item.width, + height: item.height, + quality: item.quality, + entropy: item.entropy, + scraper: item.scraper, + copyright: item.copyright, + source: item.source, + }; + } + + logger.verbose(`Saved ${domain} ${role} to ${filepath}`); + + return { + filepath, + mimetype: item.mimetype, + extension: item.extension, + hash: item.hash, + size: item.size, + width: item.width, + height: item.height, + quality: item.quality, + entropy: item.entropy, + scraper: item.scraper, + copyright: item.copyright, + source: item.source, + }; + } catch (error) { + logger.error(`Failed to store ${domain} ${role} from ${item.source}: ${error.message}`); + return null; + } + }); +} + +function curateItemEntries(items) { + return items.filter(Boolean).map((item, index) => ({ + path: item.filepath, + thumbnail: item.thumbpath, + mime: item.mimetype, + hash: item.hash, + size: item.size, + width: item.width, + height: item.height, + quality: item.quality, + entropy: item.entropy, + source: item.source, + scraper: item.scraper, + copyright: item.copyright, + index, + })); +} + +function groupItems(items) { + return items.reduce((acc, item) => ({ + source: { ...acc.source, [item.source]: item }, + hash: { ...acc.hash, [item.hash]: item }, + }), { + source: {}, + hash: {}, + }); +} + +async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) { + const presentSources = sources.filter(source => typeof source === 'string' || Array.isArray(source) || (source && source.src)); + + if (presentSources.length === 0) { + return {}; + } + + console.log(presentSources, presentSources.length); + + // split up source list to prevent excessive RAM usage + const itemChunksBySource = await Promise.all(chunk(presentSources, 50).map(async (sourceChunk, index) => { + try { + // find source duplicates that don't need to be re-downloaded or re-saved + const existingSourceItems = await knex('media').whereIn('source', sourceChunk.flat().map(source => source.src || source)); + const { source: existingSourceItemsBySource, hash: existingSourceItemsByHash } = groupItems(existingSourceItems); + + // download media items from new sources + const fetchedItems = await fetchItems(sourceChunk, existingSourceItemsBySource, domain, role); + const { hash: fetchedItemsByHash } = groupItems(fetchedItems); + + // find hash duplicates that don't need to be re-saved + const uniqueFetchedItems = Object.values(fetchedItemsByHash).filter(item => !entropyFilter || item.entropy === null || item.entropy >= entropyFilter); + const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash)); + const { hash: existingHashItemsByHash } = groupItems(existingHashItems); + + // save new items to disk + const newItems = uniqueFetchedItems.filter(item => !existingHashItemsByHash[item.hash]); + const savedItems = await saveItems(newItems, domain, role); + + // store new items in database + const curatedItemEntries = curateItemEntries(savedItems); + const storedItems = await knex('media').insert(curatedItemEntries).returning('*'); + const { hash: storedItemsByHash } = groupItems(Array.isArray(storedItems) ? storedItems : []); + + // accumulate existing and new items by source to be mapped onto releases + const itemsByHash = { ...existingSourceItemsByHash, ...existingHashItemsByHash, ...storedItemsByHash }; + const itemsBySource = { + ...existingSourceItemsBySource, + ...fetchedItems.reduce((acc, item) => ({ ...acc, [item.source]: itemsByHash[item.hash] }), {}), + }; + + logger.info(`Stored batch ${index + 1} with ${fetchedItems.length} of new ${domain} ${role}s`); + + return itemsBySource; + } catch (error) { + logger.error(`Failed to store ${domain} ${role} batch ${index + 1}: ${error.message}`); + + return null; + } + })); + + return itemChunksBySource.reduce((acc, itemChunk) => ({ ...acc, ...itemChunk }), {}); +} + +function extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId) { + if (!primaryRole) { + return { [role]: associations, [primaryRole]: null }; + } + + if (primaryItemsByTargetId[targetId]) { + const remainingAssociations = associations.filter(association => association.media_id !== primaryItemsByTargetId[targetId].media_id); + + return { [role]: remainingAssociations, [primaryRole]: null }; + } + + return { + [role]: associations.slice(1), + [primaryRole]: associations.slice(0, 1)[0], + }; +} + +function associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId) { + if (!sources) return { [role]: null, [primaryRole]: null }; + + const mediaIds = sources + .map((source) => { + if (!source) return null; + + if (Array.isArray(source)) { + const availableSource = source.find(fallbackSource => mediaBySource[fallbackSource.src || fallbackSource]); + return mediaBySource[availableSource]; + } + + return mediaBySource[source.src || source]; + }) + .filter(Boolean) + // .sort((mediaItemA, mediaItemB) => mediaItemB.height - mediaItemA.height) // prefer high res images for primary item + .map(mediaItem => mediaItem.id); + + const uniqueMediaIds = Array.from(new Set(mediaIds)); + const associations = uniqueMediaIds.map(mediaId => ({ [`${domain}_id`]: targetId, media_id: mediaId })); + + logger.silly(`Associating ${associations.length} ${role}s to ${domain} ${targetId}`); + + return extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId); +} + +async function associateMedia(sourcesByTargetId, mediaBySource, domain, role, primaryRole) { + const primaryItems = primaryRole ? await knex(`${domain}s_${primaryRole}s`).whereIn(`${domain}_id`, Object.keys(sourcesByTargetId)) : []; + const primaryItemsByTargetId = primaryItems.reduce((acc, item) => ({ ...acc, [item[`${domain}_id`]]: item }), {}); + + const associationsPerTarget = await Promise.map(Object.entries(sourcesByTargetId), ([targetId, sources]) => associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId)); + + const associations = associationsPerTarget.map(association => association[role]).flat().filter(Boolean); + const primaryAssociations = associationsPerTarget.map(association => association[primaryRole]).filter(Boolean); + + logger.info(`Associated ${associations.length} ${role}s to ${domain}s`); + if (primaryRole) logger.info(`Associated ${primaryAssociations.length} extracted ${primaryRole}s to ${domain}s`); + + return Promise.all([ + (associations.length > 0 && knex.raw(`${knex(`${domain}s_${role}s`).insert(associations).toString()} ON CONFLICT DO NOTHING`)), + (primaryAssociations.length > 0 && knex.raw(`${knex(`${domain}s_${primaryRole}s`).insert(primaryAssociations).toString()} ON CONFLICT DO NOTHING`)), + ]); +} + +module.exports = { + associateMedia, + createThumbnail, + getHash, + getMeta, + pluckItems, + storeMedia, +}; diff --git a/src/media.js b/src/media.js index c6a9e814..83af21e5 100644 --- a/src/media.js +++ b/src/media.js @@ -1,446 +1,9 @@ 'use strict'; -const config = require('config'); -const Promise = require('bluebird'); -// const bhttp = require('bhttp'); -const mime = require('mime'); -const fs = require('fs-extra'); -const sharp = require('sharp'); -const path = require('path'); -const blake2 = require('blake2'); +function toBaseAvatars() { -const logger = require('./logger')(__filename); -const knex = require('./knex'); -const { get } = require('./utils/http'); -const { ex } = require('./utils/q'); -const chunk = require('./utils/chunk'); - -function getHash(buffer) { - const hash = blake2.createHash('blake2b', { digestLength: 24 }); - hash.update(buffer); - - return hash.digest('hex'); -} - -async function getMeta(buffer, withHash = false) { - try { - const { entropy } = await sharp(buffer).stats(); - const { width, height, size } = await sharp(buffer).metadata(); - - const hash = withHash && getHash(buffer); - - return { - width, - height, - size, - entropy, - hash, - }; - } catch (error) { - logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`); - - return 7.5; - } -} - -async function createThumbnail(buffer, height = config.media.thumbnailSize) { - try { - const thumbnail = sharp(buffer) - .resize({ - height, - withoutEnlargement: true, - }) - .jpeg({ - quality: config.media.thumbnailQuality, - }) - .toBuffer(); - - return thumbnail; - } catch (error) { - logger.error(`Failed to create thumbnail: ${error.message}`); - } - - return null; -} - -function groupFallbacksByPriority(chunks) { - /* - Chunks naturally give priority to all of the first item's fallbacks, generally lower quality images. - This function ensures every item's first source is tried, before trying every item's second source, etc., example: - IN: [[1, 2, 3,], 10, [1, 2, 3, 4, 5], [1, 2, 3]] - OUT [[1, 1, 1], [2, 2, 2], [3, 3, 3], [4], [5]] - */ - return chunks.map(group => group.reduce((acc, item) => { - if (Array.isArray(item)) { - // place provided fallbacks at same index (priority) in parent array - item.forEach((fallback, fallbackIndex) => { - if (!acc[fallbackIndex]) { - acc[fallbackIndex] = []; - } - - acc[fallbackIndex].push(fallback); - }); - - return acc; - } - - // no fallbacks provided, first priority - if (!acc[0]) { - acc[0] = []; - } - - acc[0].push(item); - - return acc; - }, []).flat()); -} - -function pluckItems(items, specifiedLimit, asFallbacks = true) { - const limit = specifiedLimit || config.media.limit; - - if (!items || items.length <= limit) return items; - - if (asFallbacks) { - const chunks = chunk(items, Math.ceil(items.length / limit)); - const fallbacks = groupFallbacksByPriority(chunks); - - return fallbacks; - } - - const plucked = [1] - .concat( - Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (items.length / (limit - 1)))), - ); - - return Array.from(new Set(plucked)).map(itemIndex => items[itemIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close -} - -function pickQuality(items) { - const itemsByQuality = items.reduce((acc, item) => ({ ...acc, [item.quality]: item }), {}); - const item = config.media.videoQuality.reduce((acc, quality) => acc || itemsByQuality[quality], null); - - return item || items[0]; -} - -async function extractItem(source) { - // const res = await bhttp.get(source.src); - const res = await get(source.src); - - if (res.statusCode === 200) { - const { qu } = ex(res.body.toString()); - - return source.extract(qu); - } - - return null; -} - -async function fetchSource(source, domain, role) { - logger.silly(`Fetching ${domain} ${role} from ${source.src || source}`); - - // const res = await bhttp.get(source.src || source); - const res = await get(source.src || source, { - headers: { - ...(source.referer && { referer: source.referer }), - ...(source.host && { host: source.host }), - }, - }); - - if (res.statusCode === 200) { - const { pathname } = new URL(source.src || source); - const mimetype = mime.getType(pathname); - const extension = mime.getExtension(mimetype); - const hash = getHash(res.body); - const { entropy, size, width, height } = /image/.test(mimetype) ? await getMeta(res.body) : {}; - - logger.silly(`Fetched media item from ${source.src || source}`); - - return { - file: res.body, - mimetype, - extension, - hash, - entropy: entropy || null, - size: size || null, - width: width || null, - height: height || null, - quality: source.quality || null, - source: source.src || source, - scraper: source.scraper, - copyright: source.copyright, - }; - } - - throw new Error(`Response ${res.statusCode} not OK`); -} - -async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) { - try { - if (!source) { - throw new Error(`Empty ${domain} ${role} source in ${originalSource}`); - } - - if (Array.isArray(source)) { - if (source.every(sourceX => sourceX.quality)) { - // various video qualities provided - const selectedSource = pickQuality(source); - return fetchItem(selectedSource, index, existingItemsBySource, domain, role, attempt, originalSource); - } - - // fallbacks provided - return source.reduce((outcome, sourceX, sourceIndexX) => outcome.catch( - async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, source, sourceIndexX), - ), Promise.reject(new Error())); - } - - if (source.src && source.extract) { - // source links to page containing a (presumably) tokenized photo - const itemSource = await extractItem(source); - - return fetchItem(itemSource, index, existingItemsBySource, domain, role, attempt, source, sourceIndex); - } - - if (existingItemsBySource[source]) { - return null; - } - - return await fetchSource(source, domain, role, originalSource); - } catch (error) { - logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`); - - if (source && attempt < 3) { - // only retry if source is provided at all - await Promise.delay(5000); - return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource, sourceIndex); - } - - if (originalSource && sourceIndex < originalSource.length - 1) { - throw error; // gets caught to try next source - } - - return null; - } -} - -async function fetchItems(itemSources, existingItemsBySource, domain, role) { - return Promise.map(itemSources, async (source, index) => fetchItem(source, index, existingItemsBySource, domain, role)).filter(Boolean); -} - -async function saveItems(items, domain, role) { - return Promise.map(items, async (item) => { - try { - const dir = item.hash.slice(0, 2); - const subdir = item.hash.slice(2, 4); - const filename = item.quality - ? `${item.hash.slice(4)}_${item.quality}.${item.extension}` - : `${item.hash.slice(4)}.${item.extension}`; - - const filedir = path.join(`${role}s`, dir, subdir); - const filepath = path.join(filedir, filename); - - await fs.mkdir(path.join(config.media.path, filedir), { recursive: true }); - await fs.writeFile(path.join(config.media.path, filepath), item.file); - - if (/image/.test(item.mimetype)) { - const thumbnail = await createThumbnail(item.file); - - const thumbdir = path.join(`${role}s`, 'thumbs', dir, subdir); - const thumbpath = path.join(thumbdir, filename); - - await fs.mkdir(path.join(config.media.path, thumbdir), { recursive: true }); - await fs.writeFile(path.join(config.media.path, thumbpath), thumbnail); - - logger.verbose(`Saved ${domain} ${role} with thumbnail to ${filepath}`); - - return { - thumbnail, - filepath, - thumbpath, - mimetype: item.mimetype, - extension: item.extension, - hash: item.hash, - size: item.size, - width: item.width, - height: item.height, - quality: item.quality, - entropy: item.entropy, - scraper: item.scraper, - copyright: item.copyright, - source: item.source, - }; - } - - logger.verbose(`Saved ${domain} ${role} to ${filepath}`); - - return { - filepath, - mimetype: item.mimetype, - extension: item.extension, - hash: item.hash, - size: item.size, - width: item.width, - height: item.height, - quality: item.quality, - entropy: item.entropy, - scraper: item.scraper, - copyright: item.copyright, - source: item.source, - }; - } catch (error) { - logger.error(`Failed to store ${domain} ${role} from ${item.source}: ${error.message}`); - return null; - } - }); -} - -function curateItemEntries(items) { - return items.filter(Boolean).map((item, index) => ({ - path: item.filepath, - thumbnail: item.thumbpath, - mime: item.mimetype, - hash: item.hash, - size: item.size, - width: item.width, - height: item.height, - quality: item.quality, - entropy: item.entropy, - source: item.source, - scraper: item.scraper, - copyright: item.copyright, - index, - })); -} - -function groupItems(items) { - return items.reduce((acc, item) => ({ - source: { ...acc.source, [item.source]: item }, - hash: { ...acc.hash, [item.hash]: item }, - }), { - source: {}, - hash: {}, - }); -} - -async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) { - const presentSources = sources.filter(source => typeof source === 'string' || Array.isArray(source) || (source && source.src)); - - if (presentSources.length === 0) { - return {}; - } - - console.log(presentSources, presentSources.length); - - // split up source list to prevent excessive RAM usage - const itemChunksBySource = await Promise.all(chunk(presentSources, 50).map(async (sourceChunk, index) => { - try { - // find source duplicates that don't need to be re-downloaded or re-saved - const existingSourceItems = await knex('media').whereIn('source', sourceChunk.flat().map(source => source.src || source)); - const { source: existingSourceItemsBySource, hash: existingSourceItemsByHash } = groupItems(existingSourceItems); - - // download media items from new sources - const fetchedItems = await fetchItems(sourceChunk, existingSourceItemsBySource, domain, role); - const { hash: fetchedItemsByHash } = groupItems(fetchedItems); - - // find hash duplicates that don't need to be re-saved - const uniqueFetchedItems = Object.values(fetchedItemsByHash).filter(item => !entropyFilter || item.entropy === null || item.entropy >= entropyFilter); - const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash)); - const { hash: existingHashItemsByHash } = groupItems(existingHashItems); - - // save new items to disk - const newItems = uniqueFetchedItems.filter(item => !existingHashItemsByHash[item.hash]); - const savedItems = await saveItems(newItems, domain, role); - - // store new items in database - const curatedItemEntries = curateItemEntries(savedItems); - const storedItems = await knex('media').insert(curatedItemEntries).returning('*'); - const { hash: storedItemsByHash } = groupItems(Array.isArray(storedItems) ? storedItems : []); - - // accumulate existing and new items by source to be mapped onto releases - const itemsByHash = { ...existingSourceItemsByHash, ...existingHashItemsByHash, ...storedItemsByHash }; - const itemsBySource = { - ...existingSourceItemsBySource, - ...fetchedItems.reduce((acc, item) => ({ ...acc, [item.source]: itemsByHash[item.hash] }), {}), - }; - - logger.info(`Stored batch ${index + 1} with ${fetchedItems.length} of new ${domain} ${role}s`); - - return itemsBySource; - } catch (error) { - logger.error(`Failed to store ${domain} ${role} batch ${index + 1}: ${error.message}`); - - return null; - } - })); - - return itemChunksBySource.reduce((acc, itemChunk) => ({ ...acc, ...itemChunk }), {}); -} - -function extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId) { - if (!primaryRole) { - return { [role]: associations, [primaryRole]: null }; - } - - if (primaryItemsByTargetId[targetId]) { - const remainingAssociations = associations.filter(association => association.media_id !== primaryItemsByTargetId[targetId].media_id); - - return { [role]: remainingAssociations, [primaryRole]: null }; - } - - return { - [role]: associations.slice(1), - [primaryRole]: associations.slice(0, 1)[0], - }; -} - -function associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId) { - if (!sources) return { [role]: null, [primaryRole]: null }; - - const mediaIds = sources - .map((source) => { - if (!source) return null; - - if (Array.isArray(source)) { - const availableSource = source.find(fallbackSource => mediaBySource[fallbackSource.src || fallbackSource]); - return mediaBySource[availableSource]; - } - - return mediaBySource[source.src || source]; - }) - .filter(Boolean) - // .sort((mediaItemA, mediaItemB) => mediaItemB.height - mediaItemA.height) // prefer high res images for primary item - .map(mediaItem => mediaItem.id); - - const uniqueMediaIds = Array.from(new Set(mediaIds)); - const associations = uniqueMediaIds.map(mediaId => ({ [`${domain}_id`]: targetId, media_id: mediaId })); - - logger.silly(`Associating ${associations.length} ${role}s to ${domain} ${targetId}`); - - return extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId); -} - -async function associateMedia(sourcesByTargetId, mediaBySource, domain, role, primaryRole) { - const primaryItems = primaryRole ? await knex(`${domain}s_${primaryRole}s`).whereIn(`${domain}_id`, Object.keys(sourcesByTargetId)) : []; - const primaryItemsByTargetId = primaryItems.reduce((acc, item) => ({ ...acc, [item[`${domain}_id`]]: item }), {}); - - const associationsPerTarget = await Promise.map(Object.entries(sourcesByTargetId), ([targetId, sources]) => associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId)); - - const associations = associationsPerTarget.map(association => association[role]).flat().filter(Boolean); - const primaryAssociations = associationsPerTarget.map(association => association[primaryRole]).filter(Boolean); - - logger.info(`Associated ${associations.length} ${role}s to ${domain}s`); - if (primaryRole) logger.info(`Associated ${primaryAssociations.length} extracted ${primaryRole}s to ${domain}s`); - - return Promise.all([ - (associations.length > 0 && knex.raw(`${knex(`${domain}s_${role}s`).insert(associations).toString()} ON CONFLICT DO NOTHING`)), - (primaryAssociations.length > 0 && knex.raw(`${knex(`${domain}s_${primaryRole}s`).insert(primaryAssociations).toString()} ON CONFLICT DO NOTHING`)), - ]); } module.exports = { - associateMedia, - createThumbnail, - getHash, - getMeta, - pluckItems, - storeMedia, + }; diff --git a/src/media_legacy.js b/src/media_legacy.js deleted file mode 100644 index a630efd7..00000000 --- a/src/media_legacy.js +++ /dev/null @@ -1,385 +0,0 @@ -'use strict'; - -const config = require('config'); -const Promise = require('bluebird'); -const path = require('path'); -const fs = require('fs-extra'); -const bhttp = require('bhttp'); -const mime = require('mime'); -const sharp = require('sharp'); -const blake2 = require('blake2'); - -const logger = require('./logger')(__filename); -const knex = require('./knex'); -const upsert = require('./utils/upsert'); -const { ex } = require('./utils/q'); - -function getHash(buffer) { - const hash = blake2.createHash('blake2b', { digestLength: 24 }); - - hash.update(buffer); - - return hash.digest('hex'); -} - -function pluckPhotos(photos, specifiedLimit) { - const limit = specifiedLimit || config.media.limit; - - if (photos.length <= limit) { - return photos; - } - - const plucked = [1] - .concat( - Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))), - ); - - return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close -} - -async function getEntropy(buffer) { - try { - const { entropy } = await sharp(buffer).stats(); - - return entropy; - } catch (error) { - logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`); - - return 7.5; - } -} - -async function createThumbnail(buffer) { - try { - const thumbnail = sharp(buffer) - .resize({ - height: config.media.thumbnailSize, - withoutEnlargement: true, - }) - .jpeg({ - quality: config.media.thumbnailQuality, - }) - .toBuffer(); - - return thumbnail; - } catch (error) { - logger.error(`Failed to create thumbnail: ${error.message}`); - } - - return null; -} - -async function createMediaDirectory(domain, subpath) { - const filepath = path.join(config.media.path, domain, subpath); - - await fs.mkdir(filepath, { recursive: true }); - return filepath; -} - -function curatePhotoEntries(files) { - return files.map((file, index) => ({ - path: file.filepath, - thumbnail: file.thumbpath, - mime: file.mimetype, - hash: file.hash, - source: file.source, - index, - })); -} - -async function findDuplicates(photos, identifier, prop = null) { - const duplicates = await knex('media') - .whereIn(identifier, photos.flat().map((photo) => { - if (prop) return photo[prop]; - if (photo.src) return photo.src; - - return photo; - })); - - const duplicateLookup = new Set(duplicates.map(photo => photo[prop || identifier])); - - const originals = photos.filter((source) => { - if (Array.isArray(source)) { - return !source.some(sourceX => duplicateLookup.has((prop && sourceX[prop]) || (sourceX.src && sourceX))); - } - - return !duplicateLookup.has((prop && source[prop]) || (source.src && source)); - }); - - return [duplicates, originals]; -} - -async function extractPhoto(source) { - const res = await bhttp.get(source.src); - - if (res.statusCode === 200) { - const { q } = ex(res.body.toString()); - - return source.extract(q); - } - - return null; -} - -async function fetchPhoto(photoUrl, index, label, attempt = 1) { - if (photoUrl.src && photoUrl.extract) { - // source links to page containing a (presumably) tokenized photo - const photo = await extractPhoto(photoUrl); - - return fetchPhoto(photo, index, label); - } - - if (Array.isArray(photoUrl)) { - return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => { - const photo = await fetchPhoto(url, index, label); - - if (photo) { - return photo; - } - - throw new Error('Photo not available'); - }), Promise.reject(new Error())); - } - - try { - const { pathname } = new URL(photoUrl); - const res = await bhttp.get(photoUrl); - - if (res.statusCode === 200) { - const mimetype = mime.getType(pathname); - const extension = mime.getExtension(mimetype); - const hash = getHash(res.body); - const entropy = await getEntropy(res.body); - - return { - photo: res.body, - mimetype, - extension, - hash, - entropy, - source: photoUrl, - }; - } - - throw new Error(`Response ${res.statusCode} not OK`); - } catch (error) { - logger.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${label} (${photoUrl}): ${error}`); - - if (attempt < 3) { - await Promise.delay(5000); - return fetchPhoto(photoUrl, index, label, attempt + 1); - } - - return null; - } -} - -async function savePhotos(files, { - domain = 'release', - subpath, - role = 'photo', - naming = 'index', -}) { - return Promise.map(files, async (file, index) => { - try { - const timestamp = new Date().getTime(); - const thumbnail = await createThumbnail(file.photo); - - const filename = naming === 'index' - ? `${file.role || role}${index + 1}` - : `${timestamp + index}`; - - const filepath = path.join(`${domain}s`, subpath, `${filename}.${file.extension}`); - const thumbpath = path.join(`${domain}s`, subpath, `${filename}_thumb.${file.extension}`); - - await Promise.all([ - fs.writeFile(path.join(config.media.path, filepath), file.photo), - fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), - ]); - - return { - ...file, - thumbnail, - filepath, - thumbpath, - }; - } catch (error) { - logger.error(`Failed to store ${domain} ${role} to ${subpath}: ${error.message}`); - return null; - } - }); -} - -async function storePhotos(photos, { - domain = 'release', - role = 'photo', - naming = 'index', - targetId, - subpath, - primaryRole, // role to assign to first photo if not already in database, used mainly for avatars - entropyFilter = 2.5, // filter out fallback avatars and other generic clipart -}, label) { - if (!photos || photos.length === 0) { - logger.info(`No ${role}s available for ${label}`); - return; - } - - const pluckedPhotos = pluckPhotos(Array.from(new Set(photos))); // pre-filter link duplicates, limit total per configuration - const [sourceDuplicates, sourceOriginals] = await findDuplicates(pluckedPhotos, 'source', null, label); - - logger.info(`Fetching ${sourceOriginals.length} new ${role}s, ${sourceDuplicates.length} already present by source for ${label}`); - - const metaFiles = await Promise.map(sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), { - concurrency: 10, - }).filter(photo => photo && photo.entropy > entropyFilter); - - const metaFilesByHash = metaFiles.reduce((acc, photo) => ({ ...acc, [photo.hash]: photo }), {}); // pre-filter hash duplicates within set; may occur through fallbacks - const [hashDuplicates, hashOriginals] = await findDuplicates(Object.values(metaFilesByHash), 'hash', 'hash', label); - - logger.info(`Saving ${hashOriginals.length} new ${role}s, ${hashDuplicates.length} already present by hash for ${label}`); - - const savedPhotos = await savePhotos(hashOriginals, { - domain, - role, - targetId, - subpath, - naming, - }); - - const curatedPhotoEntries = curatePhotoEntries(savedPhotos.filter(Boolean), domain, role, targetId); - - const newPhotos = await knex('media').insert(curatedPhotoEntries).returning('*'); - const photoEntries = Array.isArray(newPhotos) - ? [...sourceDuplicates, ...hashDuplicates, ...newPhotos] - : [...sourceDuplicates, ...hashDuplicates]; - - const photoAssociations = photoEntries - .map(photoEntry => ({ - [`${domain}_id`]: targetId, - media_id: photoEntry.id, - })); - - if (primaryRole) { - // store one photo as a 'primary' photo, such as an avatar or cover - const primaryPhoto = await knex(`${domain}s_${primaryRole}s`) - .where(`${domain}_id`, targetId) - .first(); - - if (primaryPhoto) { - const remainingAssociations = photoAssociations.filter(association => association.media_id !== primaryPhoto.media_id); - - await upsert(`${domain}s_${role}s`, remainingAssociations, [`${domain}_id`, 'media_id']); - return; - } - - await Promise.all([ - upsert(`${domain}s_${primaryRole}s`, photoAssociations.slice(0, 1), [`${domain}_id`, 'media_id']), - upsert(`${domain}s_${role}s`, photoAssociations.slice(1), [`${domain}_id`, 'media_id']), - ]); - - return; - } - - await upsert(`${domain}s_${role}s`, photoAssociations, [`${domain}_id`, 'media_id']); -} - -/* -async function storeReleasePhotos(releases, label) { - const sources = releases.map(release => pluckPhotos(release.photos)).flat(); - const uniqueSources = Array.from(new Set(sources)); - - const [sourceDuplicates, sourceOriginals] = await findDuplicates(uniqueSources, 'source', null, label); - - const metaFiles = await Promise.map( - sourceOriginals, - async (photoUrl, index) => fetchPhoto(photoUrl, index, label), - { concurrency: 10 }, - ) - .filter(photo => photo); - - const hashUniques = Object.values(metaFiles.reduce((acc, file) => { - if (!acc[file.hash]) acc[file.hash] = file; - - return acc; - }, {})); - - const [hashDuplicates, hashOriginals] = await findDuplicates(hashUniques, 'hash', 'hash', label); - - const sourceHashes = metaFiles.concat(sourceDuplicates).reduce((acc, file) => { - acc[file.source] = file.hash; - return acc; - }, {}); - - const associations = releases.map(release => release.photos.map(source => [release.id, sourceHashes[source]])).flat(); - - console.log(associations); -} -*/ - -async function storeTrailer(trailers, { - domain = 'releases', - role = 'trailer', - targetId, - subpath, -}, label) { - // support scrapers supplying multiple qualities - const trailer = Array.isArray(trailers) - ? trailers.find(trailerX => config.media.trailerQuality.includes(trailerX.quality)) || trailers[0] - : trailers; - - if (!trailer || !trailer.src) { - logger.info(`No ${role} available for ${label}`); - return; - } - - const [sourceDuplicates, sourceOriginals] = await findDuplicates([trailer], 'source', 'src', label); - - const metaFiles = await Promise.map(sourceOriginals, async (trailerX) => { - const { pathname } = new URL(trailerX.src); - const mimetype = trailerX.type || mime.getType(pathname); - - const res = await bhttp.get(trailerX.src); - const hash = getHash(res.body); - const filepath = path.join(domain, subpath, `${role}${trailerX.quality ? `_${trailerX.quality}` : ''}.${mime.getExtension(mimetype)}`); - - return { - trailer: res.body, - path: filepath, - mime: mimetype, - source: trailerX.src, - quality: trailerX.quality || null, - hash, - }; - }); - - const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', 'hash', label); - - const newTrailers = await knex('media') - .insert(hashOriginals.map(trailerX => ({ - path: trailerX.path, - mime: trailerX.mime, - source: trailerX.source, - quality: trailerX.quality, - hash: trailerX.hash, - type: role, - }))) - .returning('*'); - - await Promise.all(hashOriginals.map(trailerX => fs.writeFile(path.join(config.media.path, trailerX.path), trailerX.trailer))); - - const trailerEntries = Array.isArray(newTrailers) - ? [...sourceDuplicates, ...hashDuplicates, ...newTrailers] - : [...sourceDuplicates, ...hashDuplicates]; - - await upsert(`releases_${role}s`, trailerEntries.map(trailerEntry => ({ - release_id: targetId, - media_id: trailerEntry.id, - })), ['release_id', 'media_id']); -} - -module.exports = { - createMediaDirectory, - storePhotos, - // storeReleasePhotos, - storeTrailer, -}; diff --git a/src/store-releases.js b/src/store-releases.js index 2afd2dfa..1601088d 100644 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -6,6 +6,7 @@ const logger = require('./logger')(__filename); const knex = require('./knex'); const slugify = require('./utils/slugify'); const { associateTags } = require('./tags'); +const { associateActors } = require('./actors'); function curateReleaseEntry(release, batchId, existingRelease) { const slug = slugify(release.title, '-', { @@ -147,7 +148,10 @@ async function storeReleases(releases) { const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : []; const releasesWithId = attachReleaseIds(releases, [].concat(storedReleaseEntries, duplicateReleaseEntries)); - await associateTags(releasesWithId); + await Promise.all([ + associateTags(releasesWithId), + associateActors(releasesWithId), + ]); return releasesWithId; }