Added new actors module boilerplate. Added tag posters.
After Width: | Height: | Size: 644 KiB |
After Width: | Height: | Size: 123 KiB |
After Width: | Height: | Size: 645 KiB |
After Width: | Height: | Size: 95 KiB |
Before Width: | Height: | Size: 800 KiB After Width: | Height: | Size: 5.4 MiB |
Before Width: | Height: | Size: 107 KiB After Width: | Height: | Size: 98 KiB |
After Width: | Height: | Size: 658 KiB |
After Width: | Height: | Size: 99 KiB |
After Width: | Height: | Size: 383 KiB |
After Width: | Height: | Size: 117 KiB |
Before Width: | Height: | Size: 607 KiB After Width: | Height: | Size: 581 KiB |
Before Width: | Height: | Size: 67 KiB After Width: | Height: | Size: 73 KiB |
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 5.3 MiB |
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 126 KiB |
After Width: | Height: | Size: 5.7 MiB |
Before Width: | Height: | Size: 70 KiB |
After Width: | Height: | Size: 113 KiB |
Before Width: | Height: | Size: 19 KiB |
After Width: | Height: | Size: 655 KiB |
After Width: | Height: | Size: 108 KiB |
After Width: | Height: | Size: 613 KiB |
After Width: | Height: | Size: 100 KiB |
After Width: | Height: | Size: 732 KiB |
After Width: | Height: | Size: 116 KiB |
After Width: | Height: | Size: 4.1 MiB |
After Width: | Height: | Size: 110 KiB |
Before Width: | Height: | Size: 105 KiB |
Before Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 965 KiB |
After Width: | Height: | Size: 75 KiB |
|
@ -5,7 +5,7 @@ const tagPosters = [
|
||||||
['anal', 3, 'Dakota Skye for Brazzers'],
|
['anal', 3, 'Dakota Skye for Brazzers'],
|
||||||
['anal-creampie', 0, 'Gina Valentina and Jane Wilde in "A Very Special Anniversary" for Tushy'],
|
['anal-creampie', 0, 'Gina Valentina and Jane Wilde in "A Very Special Anniversary" for Tushy'],
|
||||||
['ass-eating', 0, 'Kendra Sunderland and Ana Foxxx in "Kendra\'s Obsession, Part 3" for Blacked'],
|
['ass-eating', 0, 'Kendra Sunderland and Ana Foxxx in "Kendra\'s Obsession, Part 3" for Blacked'],
|
||||||
['asian', 'poster', 'Vina Sky in "Young and Glamorous 10" for Jules Jordan'],
|
['asian', 0, 'Alina Li in "Slut Puppies 8" for Jules Jordan'],
|
||||||
['ass-to-mouth', 'poster', 'Alysa Gap and Logan in "Anal Buffet 4" for Evil Angel'],
|
['ass-to-mouth', 'poster', 'Alysa Gap and Logan in "Anal Buffet 4" for Evil Angel'],
|
||||||
['bdsm', 0, 'Dani Daniels in "The Traning of Dani Daniels, Day 2" for The Training of O at Kink'],
|
['bdsm', 0, 'Dani Daniels in "The Traning of Dani Daniels, Day 2" for The Training of O at Kink'],
|
||||||
['behind-the-scenes', 0, 'Janice Griffith in "Day With A Pornstar: Janice" for Brazzers'],
|
['behind-the-scenes', 0, 'Janice Griffith in "Day With A Pornstar: Janice" for Brazzers'],
|
||||||
|
@ -18,23 +18,24 @@ const tagPosters = [
|
||||||
['creampie', 'poster'],
|
['creampie', 'poster'],
|
||||||
['da-tp', 0, 'Natasha Teen in LegalPorno SZ2164'],
|
['da-tp', 0, 'Natasha Teen in LegalPorno SZ2164'],
|
||||||
['deepthroat', 0, 'Chanel Grey in "Deepthroating Is Fun" for Throated'],
|
['deepthroat', 0, 'Chanel Grey in "Deepthroating Is Fun" for Throated'],
|
||||||
['double-anal', 2, 'Lana Rhoades in "Lana Rhoades Unleashed" for HardX'],
|
['double-anal', 7, 'Adriana Chechik in "DP Masters 6" for Jules Jordan'],
|
||||||
['double-blowjob', 0, 'Kira Noir and Kali Roses for Brazzers'],
|
['double-blowjob', 0, 'Kira Noir and Kali Roses for Brazzers'],
|
||||||
['double-penetration', 'poster', 'Mia Malkova in "DP Me 8" for HardX'],
|
['double-penetration', 2, 'Adriana Chechik in "DP Masters 6" for Jules Jordan'],
|
||||||
['double-vaginal', 'poster', 'Riley Reid in "Pizza That Ass" for Reid My Lips'],
|
['double-vaginal', 'poster', 'Riley Reid in "Pizza That Ass" for Reid My Lips'],
|
||||||
['dv-tp', 'poster', 'Juelz Ventura in "Gangbanged 5" for Elegant Angel'],
|
['dv-tp', 'poster', 'Juelz Ventura in "Gangbanged 5" for Elegant Angel'],
|
||||||
['ebony', 1, 'Ana Foxxx in "Gangbang Me 3" for HardX'],
|
['ebony', 1, 'Ana Foxxx in "DP Me 4" for HardX'],
|
||||||
['facefucking', 1, 'Carrie for Young Throats'],
|
['facefucking', 1, 'Carrie for Young Throats'],
|
||||||
['facial', 'poster'],
|
['facial', 'poster'],
|
||||||
['gangbang', 'poster', 'Kristen Scott in "Interracial Gangbang!" for Jules Jordan'],
|
['gangbang', 'poster', 'Kristen Scott in "Interracial Gangbang!" for Jules Jordan'],
|
||||||
['gaping', 1, 'Vina Sky in "Vina Sky Does Anal" for HardX'],
|
['gaping', 1, 'Vina Sky in "Vina Sky Does Anal" for HardX'],
|
||||||
['interracial', 'poster'],
|
['interracial', 'poster'],
|
||||||
['latina', 0, 'Abby Lee Brazil for Bang Bros'],
|
['latina', 'poster', 'Alexis Love for Penthouse'],
|
||||||
['lesbian', 0, 'Reena Sky and Sarah Banks for Brazzers'],
|
['lesbian', 0, 'Reena Sky and Sarah Banks for Brazzers'],
|
||||||
['mff', 0, 'Madison Ivy and Adriana Chechik in "Day With A Pornstar" for Brazzers'],
|
['mff', 0, 'Madison Ivy and Adriana Chechik in "Day With A Pornstar" for Brazzers'],
|
||||||
['mfm', 1, 'Jynx Maze in "Don\'t Make Me Beg 4" for Evil Angel'],
|
['mfm', 5, 'Vina Sky in "Slut Puppies 15" for Jules Jordan'],
|
||||||
['oral-creampie', 1, 'Keisha Grey in Brazzers House'],
|
['oral-creampie', 1, 'Keisha Grey in Brazzers House'],
|
||||||
['orgy', 'poster'],
|
['orgy', 'poster'],
|
||||||
|
['redhead', 0, 'Penny Pax in "The Submission of Emma Marx: Evolved" for New Sensations'],
|
||||||
['schoolgirl', 1, 'Eliza Ibarra for Brazzers'],
|
['schoolgirl', 1, 'Eliza Ibarra for Brazzers'],
|
||||||
['swallowing', 'poster'],
|
['swallowing', 'poster'],
|
||||||
['tattoo', 'poster', 'Kali Roses in "Goes All In For Anal" for Hussie Pass'],
|
['tattoo', 'poster', 'Kali Roses in "Goes All In For Anal" for Hussie Pass'],
|
||||||
|
@ -53,6 +54,8 @@ const tagPosters = [
|
||||||
const tagPhotos = [
|
const tagPhotos = [
|
||||||
['airtight', 2, 'Dakota Skye in "Dakota Goes Nuts" for ArchAngel'],
|
['airtight', 2, 'Dakota Skye in "Dakota Goes Nuts" for ArchAngel'],
|
||||||
['airtight', 3, 'Anita Bellini in "Triple Dick Gangbang" for Hands On Hardcore (DDF Network)'],
|
['airtight', 3, 'Anita Bellini in "Triple Dick Gangbang" for Hands On Hardcore (DDF Network)'],
|
||||||
|
['asian', 'poster', 'Vina Sky in "Slut Puppies 15" for Jules Jordan'],
|
||||||
|
// ['asian', 1, 'Alina Li in "Oil Overload 11" for Jules Jordan'],
|
||||||
['anal', 2, 'Gabbie Carter for Tushy Raw'],
|
['anal', 2, 'Gabbie Carter for Tushy Raw'],
|
||||||
['anal', 'poster', 'Jynx Maze in "Anal Buffet 6" for Evil Angel'],
|
['anal', 'poster', 'Jynx Maze in "Anal Buffet 6" for Evil Angel'],
|
||||||
['anal', 1, 'Veronica Leal and Tina Kay in "Agents On Anal Mission" for Asshole Fever'],
|
['anal', 1, 'Veronica Leal and Tina Kay in "Agents On Anal Mission" for Asshole Fever'],
|
||||||
|
@ -63,23 +66,27 @@ const tagPhotos = [
|
||||||
['da-tp', 2, 'Angel Smalls in GIO408 for LegalPorno'],
|
['da-tp', 2, 'Angel Smalls in GIO408 for LegalPorno'],
|
||||||
['da-tp', 3, 'Evelina Darling in GIO294'],
|
['da-tp', 3, 'Evelina Darling in GIO294'],
|
||||||
['da-tp', 4, 'Ninel Mojado aka Mira Cuckold in GIO063 for LegalPorno'],
|
['da-tp', 4, 'Ninel Mojado aka Mira Cuckold in GIO063 for LegalPorno'],
|
||||||
|
['double-anal', 2, 'Lana Rhoades in "Lana Rhoades Unleashed" for HardX'],
|
||||||
['double-anal', 6, 'Sheena Shaw in "Ass Worship 14" for Jules Jordan'],
|
['double-anal', 6, 'Sheena Shaw in "Ass Worship 14" for Jules Jordan'],
|
||||||
['double-anal', 5, 'Riley Reid in "The Gangbang of Riley Reid" for Jules Jordan'],
|
['double-anal', 5, 'Riley Reid in "The Gangbang of Riley Reid" for Jules Jordan'],
|
||||||
['double-anal', 'poster', 'Haley Reed in "Young Hot Ass" for Evil Angel'],
|
['double-anal', 'poster', 'Haley Reed in "Young Hot Ass" for Evil Angel'],
|
||||||
['double-anal', 0, 'Nicole Black doing double anal during a gangbang in GIO971 for LegalPorno'],
|
['double-anal', 0, 'Nicole Black doing double anal during a gangbang in GIO971 for LegalPorno'],
|
||||||
['double-anal', 1, 'Ria Sunn in SZ1801 for LegalPorno'],
|
['double-anal', 1, 'Ria Sunn in SZ1801 for LegalPorno'],
|
||||||
|
['double-penetration', 'poster', 'Mia Malkova in "DP Me 8" for HardX'],
|
||||||
['double-penetration', 0, 'Zoey Monroe in "Slut Puppies 7" for Jules Jordan'],
|
['double-penetration', 0, 'Zoey Monroe in "Slut Puppies 7" for Jules Jordan'],
|
||||||
['double-penetration', 1, 'Jynx Maze in "Don\'t Make Me Beg 4" for Evil Angel'],
|
['double-penetration', 1, 'Jynx Maze in "Don\'t Make Me Beg 4" for Evil Angel'],
|
||||||
['double-vaginal', 0, 'Aaliyah Hadid in "Squirting From Double Penetration With Anal" for Bang Bros'],
|
['double-vaginal', 0, 'Aaliyah Hadid in "Squirting From Double Penetration With Anal" for Bang Bros'],
|
||||||
['dv-tp', 1, 'Adriana Chechik in "Adriana\'s Triple Anal Penetration!"'],
|
['dv-tp', 1, 'Adriana Chechik in "Adriana\'s Triple Anal Penetration!"'],
|
||||||
['dv-tp', 0, 'Luna Rival in LegalPorno SZ1490'],
|
['dv-tp', 0, 'Luna Rival in LegalPorno SZ1490'],
|
||||||
['facefucking', 2, 'Jynx Maze for Throated'],
|
['facefucking', 2, 'Jynx Maze for Throated'],
|
||||||
|
['latina', 0, 'Abby Lee Brazil for Bang Bros'],
|
||||||
['gangbang', 0, '"4 On 1 Gangbangs" for Doghouse Digital'],
|
['gangbang', 0, '"4 On 1 Gangbangs" for Doghouse Digital'],
|
||||||
['gangbang', 1, 'Ginger Lynn in "Gangbang Mystique", a photoset shot by Suze Randall for Puritan No. 10, 1984. This photo pushed the boundaries of pornography at the time, as depicting a woman \'fully occupied\' was unheard of.'],
|
['gangbang', 1, 'Ginger Lynn in "Gangbang Mystique", a photoset shot by Suze Randall for Puritan No. 10, 1984. This photo pushed the boundaries of pornography at the time, as depicting a woman \'fully occupied\' was unheard of.'],
|
||||||
['gangbang', 2, 'Riley Reid\'s double anal in "The Gangbang of Riley Reid" for Jules Jordan'],
|
['gangbang', 2, 'Riley Reid\'s double anal in "The Gangbang of Riley Reid" for Jules Jordan'],
|
||||||
['gaping', 'poster', 'Paulina in "Anal Buffet 4" for Evil Angel'],
|
['gaping', 'poster', 'Paulina in "Anal Buffet 4" for Evil Angel'],
|
||||||
['gaping', 0, 'McKenzee Miles in "Anal Buffet 4" for Evil Angel'],
|
['gaping', 0, 'McKenzee Miles in "Anal Buffet 4" for Evil Angel'],
|
||||||
['mfm', 'poster', 'Vina Sky for Jules Jordan'],
|
// ['mfm', 0, 'Vina Sky in "Jules Jordan\'s Three Ways" for Jules Jordan'],
|
||||||
|
['mfm', 1, 'Jynx Maze in "Don\'t Make Me Beg 4" for Evil Angel'],
|
||||||
['trainbang', 0, 'Nicole Black in GIO971 for LegalPorno'],
|
['trainbang', 0, 'Nicole Black in GIO971 for LegalPorno'],
|
||||||
['triple-anal', 1, 'Natasha Teen in SZ2098 for LegalPorno'],
|
['triple-anal', 1, 'Natasha Teen in SZ2098 for LegalPorno'],
|
||||||
['triple-anal', 2, 'Kira Thorn in GIO1018 for LegalPorno'],
|
['triple-anal', 2, 'Kira Thorn in GIO1018 for LegalPorno'],
|
||||||
|
|
|
@ -2,25 +2,29 @@
|
||||||
|
|
||||||
const slugify = require('./utils/slugify');
|
const slugify = require('./utils/slugify');
|
||||||
|
|
||||||
async function storeReleaseActors(releases) {
|
function toBaseActors(actorsOrNames) {
|
||||||
const releaseIdsByActor = releases.reduce(
|
return actorsOrNames.map((actorOrName) => {
|
||||||
(acc, release) => release.actors.reduce((actorAcc, actor) => {
|
if (actorOrName.name) {
|
||||||
const releaseActor = actor.name ? actor : { name: actor };
|
|
||||||
const actorSlug = slugify(releaseActor.name);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...actorAcc,
|
...actorOrName,
|
||||||
[actorSlug]: actorAcc[actorSlug]
|
slug: slugify(actorOrName.name),
|
||||||
? actorAcc[actorSlug].concat(release.id)
|
|
||||||
: [release.id],
|
|
||||||
};
|
};
|
||||||
}, acc),
|
}
|
||||||
{},
|
|
||||||
);
|
|
||||||
|
|
||||||
console.log(releaseIdsByActor);
|
return {
|
||||||
|
name: actorOrName,
|
||||||
|
slug: slugify(actorOrName.name),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function associateActors(releases) {
|
||||||
|
const rawActors = releases.map(release => release.actors).flat().filter(Boolean);
|
||||||
|
const baseActors = toBaseActors(rawActors);
|
||||||
|
|
||||||
|
console.log(baseActors);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
storeReleaseActors,
|
associateActors,
|
||||||
};
|
};
|
||||||
|
|
|
@ -117,6 +117,11 @@ const { argv } = yargs
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
default: true,
|
default: true,
|
||||||
})
|
})
|
||||||
|
.option('media-limit', {
|
||||||
|
describe: 'Maximum amount of assets of each type per release',
|
||||||
|
type: 'number',
|
||||||
|
default: config.media.limit,
|
||||||
|
})
|
||||||
.option('images', {
|
.option('images', {
|
||||||
describe: 'Include any photos, posters or covers',
|
describe: 'Include any photos, posters or covers',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
|
|
|
@ -0,0 +1,447 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const config = require('config');
|
||||||
|
const Promise = require('bluebird');
|
||||||
|
// const bhttp = require('bhttp');
|
||||||
|
const mime = require('mime');
|
||||||
|
const fs = require('fs-extra');
|
||||||
|
const sharp = require('sharp');
|
||||||
|
const path = require('path');
|
||||||
|
const blake2 = require('blake2');
|
||||||
|
|
||||||
|
const argv = require('./argv');
|
||||||
|
const logger = require('./logger')(__filename);
|
||||||
|
const knex = require('./knex');
|
||||||
|
const { get } = require('./utils/http');
|
||||||
|
const { ex } = require('./utils/q');
|
||||||
|
const chunk = require('./utils/chunk');
|
||||||
|
|
||||||
|
function getHash(buffer) {
|
||||||
|
const hash = blake2.createHash('blake2b', { digestLength: 24 });
|
||||||
|
hash.update(buffer);
|
||||||
|
|
||||||
|
return hash.digest('hex');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getMeta(buffer, withHash = false) {
|
||||||
|
try {
|
||||||
|
const { entropy } = await sharp(buffer).stats();
|
||||||
|
const { width, height, size } = await sharp(buffer).metadata();
|
||||||
|
|
||||||
|
const hash = withHash && getHash(buffer);
|
||||||
|
|
||||||
|
return {
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
size,
|
||||||
|
entropy,
|
||||||
|
hash,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`);
|
||||||
|
|
||||||
|
return 7.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createThumbnail(buffer, height = config.media.thumbnailSize) {
|
||||||
|
try {
|
||||||
|
const thumbnail = sharp(buffer)
|
||||||
|
.resize({
|
||||||
|
height,
|
||||||
|
withoutEnlargement: true,
|
||||||
|
})
|
||||||
|
.jpeg({
|
||||||
|
quality: config.media.thumbnailQuality,
|
||||||
|
})
|
||||||
|
.toBuffer();
|
||||||
|
|
||||||
|
return thumbnail;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to create thumbnail: ${error.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function groupFallbacksByPriority(chunks) {
|
||||||
|
/*
|
||||||
|
Chunks naturally give priority to all of the first item's fallbacks, generally lower quality images.
|
||||||
|
This function ensures every item's first source is tried, before trying every item's second source, etc., example:
|
||||||
|
IN: [[1, 2, 3,], 10, [1, 2, 3, 4, 5], [1, 2, 3]]
|
||||||
|
OUT [[1, 1, 1], [2, 2, 2], [3, 3, 3], [4], [5]]
|
||||||
|
*/
|
||||||
|
return chunks.map(group => group.reduce((acc, item) => {
|
||||||
|
if (Array.isArray(item)) {
|
||||||
|
// place provided fallbacks at same index (priority) in parent array
|
||||||
|
item.forEach((fallback, fallbackIndex) => {
|
||||||
|
if (!acc[fallbackIndex]) {
|
||||||
|
acc[fallbackIndex] = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
acc[fallbackIndex].push(fallback);
|
||||||
|
});
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// no fallbacks provided, first priority
|
||||||
|
if (!acc[0]) {
|
||||||
|
acc[0] = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
acc[0].push(item);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, []).flat());
|
||||||
|
}
|
||||||
|
|
||||||
|
function pluckItems(items, specifiedLimit, asFallbacks = true) {
|
||||||
|
const limit = specifiedLimit || argv.mediaLimit;
|
||||||
|
|
||||||
|
if (!items || items.length <= limit) return items;
|
||||||
|
|
||||||
|
if (asFallbacks) {
|
||||||
|
const chunks = chunk(items, Math.ceil(items.length / limit));
|
||||||
|
const fallbacks = groupFallbacksByPriority(chunks);
|
||||||
|
|
||||||
|
return fallbacks;
|
||||||
|
}
|
||||||
|
|
||||||
|
const plucked = [1]
|
||||||
|
.concat(
|
||||||
|
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (items.length / (limit - 1)))),
|
||||||
|
);
|
||||||
|
|
||||||
|
return Array.from(new Set(plucked)).map(itemIndex => items[itemIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||||
|
}
|
||||||
|
|
||||||
|
function pickQuality(items) {
|
||||||
|
const itemsByQuality = items.reduce((acc, item) => ({ ...acc, [item.quality]: item }), {});
|
||||||
|
const item = config.media.videoQuality.reduce((acc, quality) => acc || itemsByQuality[quality], null);
|
||||||
|
|
||||||
|
return item || items[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function extractItem(source) {
|
||||||
|
// const res = await bhttp.get(source.src);
|
||||||
|
const res = await get(source.src);
|
||||||
|
|
||||||
|
if (res.statusCode === 200) {
|
||||||
|
const { qu } = ex(res.body.toString());
|
||||||
|
|
||||||
|
return source.extract(qu);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchSource(source, domain, role) {
|
||||||
|
logger.silly(`Fetching ${domain} ${role} from ${source.src || source}`);
|
||||||
|
|
||||||
|
// const res = await bhttp.get(source.src || source);
|
||||||
|
const res = await get(source.src || source, {
|
||||||
|
headers: {
|
||||||
|
...(source.referer && { referer: source.referer }),
|
||||||
|
...(source.host && { host: source.host }),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.statusCode === 200) {
|
||||||
|
const { pathname } = new URL(source.src || source);
|
||||||
|
const mimetype = mime.getType(pathname);
|
||||||
|
const extension = mime.getExtension(mimetype);
|
||||||
|
const hash = getHash(res.body);
|
||||||
|
const { entropy, size, width, height } = /image/.test(mimetype) ? await getMeta(res.body) : {};
|
||||||
|
|
||||||
|
logger.silly(`Fetched media item from ${source.src || source}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
file: res.body,
|
||||||
|
mimetype,
|
||||||
|
extension,
|
||||||
|
hash,
|
||||||
|
entropy: entropy || null,
|
||||||
|
size: size || null,
|
||||||
|
width: width || null,
|
||||||
|
height: height || null,
|
||||||
|
quality: source.quality || null,
|
||||||
|
source: source.src || source,
|
||||||
|
scraper: source.scraper,
|
||||||
|
copyright: source.copyright,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Response ${res.statusCode} not OK`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) {
|
||||||
|
try {
|
||||||
|
if (!source) {
|
||||||
|
throw new Error(`Empty ${domain} ${role} source in ${originalSource}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(source)) {
|
||||||
|
if (source.every(sourceX => sourceX.quality)) {
|
||||||
|
// various video qualities provided
|
||||||
|
const selectedSource = pickQuality(source);
|
||||||
|
return fetchItem(selectedSource, index, existingItemsBySource, domain, role, attempt, originalSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
// fallbacks provided
|
||||||
|
return source.reduce((outcome, sourceX, sourceIndexX) => outcome.catch(
|
||||||
|
async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, source, sourceIndexX),
|
||||||
|
), Promise.reject(new Error()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (source.src && source.extract) {
|
||||||
|
// source links to page containing a (presumably) tokenized photo
|
||||||
|
const itemSource = await extractItem(source);
|
||||||
|
|
||||||
|
return fetchItem(itemSource, index, existingItemsBySource, domain, role, attempt, source, sourceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (existingItemsBySource[source]) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return await fetchSource(source, domain, role, originalSource);
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`);
|
||||||
|
|
||||||
|
if (source && attempt < 3) {
|
||||||
|
// only retry if source is provided at all
|
||||||
|
await Promise.delay(5000);
|
||||||
|
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource, sourceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (originalSource && sourceIndex < originalSource.length - 1) {
|
||||||
|
throw error; // gets caught to try next source
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchItems(itemSources, existingItemsBySource, domain, role) {
|
||||||
|
return Promise.map(itemSources, async (source, index) => fetchItem(source, index, existingItemsBySource, domain, role)).filter(Boolean);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveItems(items, domain, role) {
|
||||||
|
return Promise.map(items, async (item) => {
|
||||||
|
try {
|
||||||
|
const dir = item.hash.slice(0, 2);
|
||||||
|
const subdir = item.hash.slice(2, 4);
|
||||||
|
const filename = item.quality
|
||||||
|
? `${item.hash.slice(4)}_${item.quality}.${item.extension}`
|
||||||
|
: `${item.hash.slice(4)}.${item.extension}`;
|
||||||
|
|
||||||
|
const filedir = path.join(`${role}s`, dir, subdir);
|
||||||
|
const filepath = path.join(filedir, filename);
|
||||||
|
|
||||||
|
await fs.mkdir(path.join(config.media.path, filedir), { recursive: true });
|
||||||
|
await fs.writeFile(path.join(config.media.path, filepath), item.file);
|
||||||
|
|
||||||
|
if (/image/.test(item.mimetype)) {
|
||||||
|
const thumbnail = await createThumbnail(item.file);
|
||||||
|
|
||||||
|
const thumbdir = path.join(`${role}s`, 'thumbs', dir, subdir);
|
||||||
|
const thumbpath = path.join(thumbdir, filename);
|
||||||
|
|
||||||
|
await fs.mkdir(path.join(config.media.path, thumbdir), { recursive: true });
|
||||||
|
await fs.writeFile(path.join(config.media.path, thumbpath), thumbnail);
|
||||||
|
|
||||||
|
logger.verbose(`Saved ${domain} ${role} with thumbnail to ${filepath}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
thumbnail,
|
||||||
|
filepath,
|
||||||
|
thumbpath,
|
||||||
|
mimetype: item.mimetype,
|
||||||
|
extension: item.extension,
|
||||||
|
hash: item.hash,
|
||||||
|
size: item.size,
|
||||||
|
width: item.width,
|
||||||
|
height: item.height,
|
||||||
|
quality: item.quality,
|
||||||
|
entropy: item.entropy,
|
||||||
|
scraper: item.scraper,
|
||||||
|
copyright: item.copyright,
|
||||||
|
source: item.source,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.verbose(`Saved ${domain} ${role} to ${filepath}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
filepath,
|
||||||
|
mimetype: item.mimetype,
|
||||||
|
extension: item.extension,
|
||||||
|
hash: item.hash,
|
||||||
|
size: item.size,
|
||||||
|
width: item.width,
|
||||||
|
height: item.height,
|
||||||
|
quality: item.quality,
|
||||||
|
entropy: item.entropy,
|
||||||
|
scraper: item.scraper,
|
||||||
|
copyright: item.copyright,
|
||||||
|
source: item.source,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to store ${domain} ${role} from ${item.source}: ${error.message}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function curateItemEntries(items) {
|
||||||
|
return items.filter(Boolean).map((item, index) => ({
|
||||||
|
path: item.filepath,
|
||||||
|
thumbnail: item.thumbpath,
|
||||||
|
mime: item.mimetype,
|
||||||
|
hash: item.hash,
|
||||||
|
size: item.size,
|
||||||
|
width: item.width,
|
||||||
|
height: item.height,
|
||||||
|
quality: item.quality,
|
||||||
|
entropy: item.entropy,
|
||||||
|
source: item.source,
|
||||||
|
scraper: item.scraper,
|
||||||
|
copyright: item.copyright,
|
||||||
|
index,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
function groupItems(items) {
|
||||||
|
return items.reduce((acc, item) => ({
|
||||||
|
source: { ...acc.source, [item.source]: item },
|
||||||
|
hash: { ...acc.hash, [item.hash]: item },
|
||||||
|
}), {
|
||||||
|
source: {},
|
||||||
|
hash: {},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) {
|
||||||
|
const presentSources = sources.filter(source => typeof source === 'string' || Array.isArray(source) || (source && source.src));
|
||||||
|
|
||||||
|
if (presentSources.length === 0) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(presentSources, presentSources.length);
|
||||||
|
|
||||||
|
// split up source list to prevent excessive RAM usage
|
||||||
|
const itemChunksBySource = await Promise.all(chunk(presentSources, 50).map(async (sourceChunk, index) => {
|
||||||
|
try {
|
||||||
|
// find source duplicates that don't need to be re-downloaded or re-saved
|
||||||
|
const existingSourceItems = await knex('media').whereIn('source', sourceChunk.flat().map(source => source.src || source));
|
||||||
|
const { source: existingSourceItemsBySource, hash: existingSourceItemsByHash } = groupItems(existingSourceItems);
|
||||||
|
|
||||||
|
// download media items from new sources
|
||||||
|
const fetchedItems = await fetchItems(sourceChunk, existingSourceItemsBySource, domain, role);
|
||||||
|
const { hash: fetchedItemsByHash } = groupItems(fetchedItems);
|
||||||
|
|
||||||
|
// find hash duplicates that don't need to be re-saved
|
||||||
|
const uniqueFetchedItems = Object.values(fetchedItemsByHash).filter(item => !entropyFilter || item.entropy === null || item.entropy >= entropyFilter);
|
||||||
|
const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash));
|
||||||
|
const { hash: existingHashItemsByHash } = groupItems(existingHashItems);
|
||||||
|
|
||||||
|
// save new items to disk
|
||||||
|
const newItems = uniqueFetchedItems.filter(item => !existingHashItemsByHash[item.hash]);
|
||||||
|
const savedItems = await saveItems(newItems, domain, role);
|
||||||
|
|
||||||
|
// store new items in database
|
||||||
|
const curatedItemEntries = curateItemEntries(savedItems);
|
||||||
|
const storedItems = await knex('media').insert(curatedItemEntries).returning('*');
|
||||||
|
const { hash: storedItemsByHash } = groupItems(Array.isArray(storedItems) ? storedItems : []);
|
||||||
|
|
||||||
|
// accumulate existing and new items by source to be mapped onto releases
|
||||||
|
const itemsByHash = { ...existingSourceItemsByHash, ...existingHashItemsByHash, ...storedItemsByHash };
|
||||||
|
const itemsBySource = {
|
||||||
|
...existingSourceItemsBySource,
|
||||||
|
...fetchedItems.reduce((acc, item) => ({ ...acc, [item.source]: itemsByHash[item.hash] }), {}),
|
||||||
|
};
|
||||||
|
|
||||||
|
logger.info(`Stored batch ${index + 1} with ${fetchedItems.length} of new ${domain} ${role}s`);
|
||||||
|
|
||||||
|
return itemsBySource;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to store ${domain} ${role} batch ${index + 1}: ${error.message}`);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
|
return itemChunksBySource.reduce((acc, itemChunk) => ({ ...acc, ...itemChunk }), {});
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId) {
|
||||||
|
if (!primaryRole) {
|
||||||
|
return { [role]: associations, [primaryRole]: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (primaryItemsByTargetId[targetId]) {
|
||||||
|
const remainingAssociations = associations.filter(association => association.media_id !== primaryItemsByTargetId[targetId].media_id);
|
||||||
|
|
||||||
|
return { [role]: remainingAssociations, [primaryRole]: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
[role]: associations.slice(1),
|
||||||
|
[primaryRole]: associations.slice(0, 1)[0],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId) {
|
||||||
|
if (!sources) return { [role]: null, [primaryRole]: null };
|
||||||
|
|
||||||
|
const mediaIds = sources
|
||||||
|
.map((source) => {
|
||||||
|
if (!source) return null;
|
||||||
|
|
||||||
|
if (Array.isArray(source)) {
|
||||||
|
const availableSource = source.find(fallbackSource => mediaBySource[fallbackSource.src || fallbackSource]);
|
||||||
|
return mediaBySource[availableSource];
|
||||||
|
}
|
||||||
|
|
||||||
|
return mediaBySource[source.src || source];
|
||||||
|
})
|
||||||
|
.filter(Boolean)
|
||||||
|
// .sort((mediaItemA, mediaItemB) => mediaItemB.height - mediaItemA.height) // prefer high res images for primary item
|
||||||
|
.map(mediaItem => mediaItem.id);
|
||||||
|
|
||||||
|
const uniqueMediaIds = Array.from(new Set(mediaIds));
|
||||||
|
const associations = uniqueMediaIds.map(mediaId => ({ [`${domain}_id`]: targetId, media_id: mediaId }));
|
||||||
|
|
||||||
|
logger.silly(`Associating ${associations.length} ${role}s to ${domain} ${targetId}`);
|
||||||
|
|
||||||
|
return extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function associateMedia(sourcesByTargetId, mediaBySource, domain, role, primaryRole) {
|
||||||
|
const primaryItems = primaryRole ? await knex(`${domain}s_${primaryRole}s`).whereIn(`${domain}_id`, Object.keys(sourcesByTargetId)) : [];
|
||||||
|
const primaryItemsByTargetId = primaryItems.reduce((acc, item) => ({ ...acc, [item[`${domain}_id`]]: item }), {});
|
||||||
|
|
||||||
|
const associationsPerTarget = await Promise.map(Object.entries(sourcesByTargetId), ([targetId, sources]) => associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId));
|
||||||
|
|
||||||
|
const associations = associationsPerTarget.map(association => association[role]).flat().filter(Boolean);
|
||||||
|
const primaryAssociations = associationsPerTarget.map(association => association[primaryRole]).filter(Boolean);
|
||||||
|
|
||||||
|
logger.info(`Associated ${associations.length} ${role}s to ${domain}s`);
|
||||||
|
if (primaryRole) logger.info(`Associated ${primaryAssociations.length} extracted ${primaryRole}s to ${domain}s`);
|
||||||
|
|
||||||
|
return Promise.all([
|
||||||
|
(associations.length > 0 && knex.raw(`${knex(`${domain}s_${role}s`).insert(associations).toString()} ON CONFLICT DO NOTHING`)),
|
||||||
|
(primaryAssociations.length > 0 && knex.raw(`${knex(`${domain}s_${primaryRole}s`).insert(primaryAssociations).toString()} ON CONFLICT DO NOTHING`)),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
associateMedia,
|
||||||
|
createThumbnail,
|
||||||
|
getHash,
|
||||||
|
getMeta,
|
||||||
|
pluckItems,
|
||||||
|
storeMedia,
|
||||||
|
};
|
441
src/media.js
|
@ -1,446 +1,9 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const config = require('config');
|
function toBaseAvatars() {
|
||||||
const Promise = require('bluebird');
|
|
||||||
// const bhttp = require('bhttp');
|
|
||||||
const mime = require('mime');
|
|
||||||
const fs = require('fs-extra');
|
|
||||||
const sharp = require('sharp');
|
|
||||||
const path = require('path');
|
|
||||||
const blake2 = require('blake2');
|
|
||||||
|
|
||||||
const logger = require('./logger')(__filename);
|
|
||||||
const knex = require('./knex');
|
|
||||||
const { get } = require('./utils/http');
|
|
||||||
const { ex } = require('./utils/q');
|
|
||||||
const chunk = require('./utils/chunk');
|
|
||||||
|
|
||||||
function getHash(buffer) {
|
|
||||||
const hash = blake2.createHash('blake2b', { digestLength: 24 });
|
|
||||||
hash.update(buffer);
|
|
||||||
|
|
||||||
return hash.digest('hex');
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getMeta(buffer, withHash = false) {
|
|
||||||
try {
|
|
||||||
const { entropy } = await sharp(buffer).stats();
|
|
||||||
const { width, height, size } = await sharp(buffer).metadata();
|
|
||||||
|
|
||||||
const hash = withHash && getHash(buffer);
|
|
||||||
|
|
||||||
return {
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
size,
|
|
||||||
entropy,
|
|
||||||
hash,
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`);
|
|
||||||
|
|
||||||
return 7.5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function createThumbnail(buffer, height = config.media.thumbnailSize) {
|
|
||||||
try {
|
|
||||||
const thumbnail = sharp(buffer)
|
|
||||||
.resize({
|
|
||||||
height,
|
|
||||||
withoutEnlargement: true,
|
|
||||||
})
|
|
||||||
.jpeg({
|
|
||||||
quality: config.media.thumbnailQuality,
|
|
||||||
})
|
|
||||||
.toBuffer();
|
|
||||||
|
|
||||||
return thumbnail;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to create thumbnail: ${error.message}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
function groupFallbacksByPriority(chunks) {
|
|
||||||
/*
|
|
||||||
Chunks naturally give priority to all of the first item's fallbacks, generally lower quality images.
|
|
||||||
This function ensures every item's first source is tried, before trying every item's second source, etc., example:
|
|
||||||
IN: [[1, 2, 3,], 10, [1, 2, 3, 4, 5], [1, 2, 3]]
|
|
||||||
OUT [[1, 1, 1], [2, 2, 2], [3, 3, 3], [4], [5]]
|
|
||||||
*/
|
|
||||||
return chunks.map(group => group.reduce((acc, item) => {
|
|
||||||
if (Array.isArray(item)) {
|
|
||||||
// place provided fallbacks at same index (priority) in parent array
|
|
||||||
item.forEach((fallback, fallbackIndex) => {
|
|
||||||
if (!acc[fallbackIndex]) {
|
|
||||||
acc[fallbackIndex] = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
acc[fallbackIndex].push(fallback);
|
|
||||||
});
|
|
||||||
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
|
|
||||||
// no fallbacks provided, first priority
|
|
||||||
if (!acc[0]) {
|
|
||||||
acc[0] = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
acc[0].push(item);
|
|
||||||
|
|
||||||
return acc;
|
|
||||||
}, []).flat());
|
|
||||||
}
|
|
||||||
|
|
||||||
function pluckItems(items, specifiedLimit, asFallbacks = true) {
|
|
||||||
const limit = specifiedLimit || config.media.limit;
|
|
||||||
|
|
||||||
if (!items || items.length <= limit) return items;
|
|
||||||
|
|
||||||
if (asFallbacks) {
|
|
||||||
const chunks = chunk(items, Math.ceil(items.length / limit));
|
|
||||||
const fallbacks = groupFallbacksByPriority(chunks);
|
|
||||||
|
|
||||||
return fallbacks;
|
|
||||||
}
|
|
||||||
|
|
||||||
const plucked = [1]
|
|
||||||
.concat(
|
|
||||||
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (items.length / (limit - 1)))),
|
|
||||||
);
|
|
||||||
|
|
||||||
return Array.from(new Set(plucked)).map(itemIndex => items[itemIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
|
||||||
}
|
|
||||||
|
|
||||||
function pickQuality(items) {
|
|
||||||
const itemsByQuality = items.reduce((acc, item) => ({ ...acc, [item.quality]: item }), {});
|
|
||||||
const item = config.media.videoQuality.reduce((acc, quality) => acc || itemsByQuality[quality], null);
|
|
||||||
|
|
||||||
return item || items[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
async function extractItem(source) {
|
|
||||||
// const res = await bhttp.get(source.src);
|
|
||||||
const res = await get(source.src);
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
const { qu } = ex(res.body.toString());
|
|
||||||
|
|
||||||
return source.extract(qu);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchSource(source, domain, role) {
|
|
||||||
logger.silly(`Fetching ${domain} ${role} from ${source.src || source}`);
|
|
||||||
|
|
||||||
// const res = await bhttp.get(source.src || source);
|
|
||||||
const res = await get(source.src || source, {
|
|
||||||
headers: {
|
|
||||||
...(source.referer && { referer: source.referer }),
|
|
||||||
...(source.host && { host: source.host }),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
const { pathname } = new URL(source.src || source);
|
|
||||||
const mimetype = mime.getType(pathname);
|
|
||||||
const extension = mime.getExtension(mimetype);
|
|
||||||
const hash = getHash(res.body);
|
|
||||||
const { entropy, size, width, height } = /image/.test(mimetype) ? await getMeta(res.body) : {};
|
|
||||||
|
|
||||||
logger.silly(`Fetched media item from ${source.src || source}`);
|
|
||||||
|
|
||||||
return {
|
|
||||||
file: res.body,
|
|
||||||
mimetype,
|
|
||||||
extension,
|
|
||||||
hash,
|
|
||||||
entropy: entropy || null,
|
|
||||||
size: size || null,
|
|
||||||
width: width || null,
|
|
||||||
height: height || null,
|
|
||||||
quality: source.quality || null,
|
|
||||||
source: source.src || source,
|
|
||||||
scraper: source.scraper,
|
|
||||||
copyright: source.copyright,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error(`Response ${res.statusCode} not OK`);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) {
|
|
||||||
try {
|
|
||||||
if (!source) {
|
|
||||||
throw new Error(`Empty ${domain} ${role} source in ${originalSource}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Array.isArray(source)) {
|
|
||||||
if (source.every(sourceX => sourceX.quality)) {
|
|
||||||
// various video qualities provided
|
|
||||||
const selectedSource = pickQuality(source);
|
|
||||||
return fetchItem(selectedSource, index, existingItemsBySource, domain, role, attempt, originalSource);
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallbacks provided
|
|
||||||
return source.reduce((outcome, sourceX, sourceIndexX) => outcome.catch(
|
|
||||||
async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, source, sourceIndexX),
|
|
||||||
), Promise.reject(new Error()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (source.src && source.extract) {
|
|
||||||
// source links to page containing a (presumably) tokenized photo
|
|
||||||
const itemSource = await extractItem(source);
|
|
||||||
|
|
||||||
return fetchItem(itemSource, index, existingItemsBySource, domain, role, attempt, source, sourceIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (existingItemsBySource[source]) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return await fetchSource(source, domain, role, originalSource);
|
|
||||||
} catch (error) {
|
|
||||||
logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`);
|
|
||||||
|
|
||||||
if (source && attempt < 3) {
|
|
||||||
// only retry if source is provided at all
|
|
||||||
await Promise.delay(5000);
|
|
||||||
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource, sourceIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (originalSource && sourceIndex < originalSource.length - 1) {
|
|
||||||
throw error; // gets caught to try next source
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchItems(itemSources, existingItemsBySource, domain, role) {
|
|
||||||
return Promise.map(itemSources, async (source, index) => fetchItem(source, index, existingItemsBySource, domain, role)).filter(Boolean);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function saveItems(items, domain, role) {
|
|
||||||
return Promise.map(items, async (item) => {
|
|
||||||
try {
|
|
||||||
const dir = item.hash.slice(0, 2);
|
|
||||||
const subdir = item.hash.slice(2, 4);
|
|
||||||
const filename = item.quality
|
|
||||||
? `${item.hash.slice(4)}_${item.quality}.${item.extension}`
|
|
||||||
: `${item.hash.slice(4)}.${item.extension}`;
|
|
||||||
|
|
||||||
const filedir = path.join(`${role}s`, dir, subdir);
|
|
||||||
const filepath = path.join(filedir, filename);
|
|
||||||
|
|
||||||
await fs.mkdir(path.join(config.media.path, filedir), { recursive: true });
|
|
||||||
await fs.writeFile(path.join(config.media.path, filepath), item.file);
|
|
||||||
|
|
||||||
if (/image/.test(item.mimetype)) {
|
|
||||||
const thumbnail = await createThumbnail(item.file);
|
|
||||||
|
|
||||||
const thumbdir = path.join(`${role}s`, 'thumbs', dir, subdir);
|
|
||||||
const thumbpath = path.join(thumbdir, filename);
|
|
||||||
|
|
||||||
await fs.mkdir(path.join(config.media.path, thumbdir), { recursive: true });
|
|
||||||
await fs.writeFile(path.join(config.media.path, thumbpath), thumbnail);
|
|
||||||
|
|
||||||
logger.verbose(`Saved ${domain} ${role} with thumbnail to ${filepath}`);
|
|
||||||
|
|
||||||
return {
|
|
||||||
thumbnail,
|
|
||||||
filepath,
|
|
||||||
thumbpath,
|
|
||||||
mimetype: item.mimetype,
|
|
||||||
extension: item.extension,
|
|
||||||
hash: item.hash,
|
|
||||||
size: item.size,
|
|
||||||
width: item.width,
|
|
||||||
height: item.height,
|
|
||||||
quality: item.quality,
|
|
||||||
entropy: item.entropy,
|
|
||||||
scraper: item.scraper,
|
|
||||||
copyright: item.copyright,
|
|
||||||
source: item.source,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.verbose(`Saved ${domain} ${role} to ${filepath}`);
|
|
||||||
|
|
||||||
return {
|
|
||||||
filepath,
|
|
||||||
mimetype: item.mimetype,
|
|
||||||
extension: item.extension,
|
|
||||||
hash: item.hash,
|
|
||||||
size: item.size,
|
|
||||||
width: item.width,
|
|
||||||
height: item.height,
|
|
||||||
quality: item.quality,
|
|
||||||
entropy: item.entropy,
|
|
||||||
scraper: item.scraper,
|
|
||||||
copyright: item.copyright,
|
|
||||||
source: item.source,
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to store ${domain} ${role} from ${item.source}: ${error.message}`);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function curateItemEntries(items) {
|
|
||||||
return items.filter(Boolean).map((item, index) => ({
|
|
||||||
path: item.filepath,
|
|
||||||
thumbnail: item.thumbpath,
|
|
||||||
mime: item.mimetype,
|
|
||||||
hash: item.hash,
|
|
||||||
size: item.size,
|
|
||||||
width: item.width,
|
|
||||||
height: item.height,
|
|
||||||
quality: item.quality,
|
|
||||||
entropy: item.entropy,
|
|
||||||
source: item.source,
|
|
||||||
scraper: item.scraper,
|
|
||||||
copyright: item.copyright,
|
|
||||||
index,
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
function groupItems(items) {
|
|
||||||
return items.reduce((acc, item) => ({
|
|
||||||
source: { ...acc.source, [item.source]: item },
|
|
||||||
hash: { ...acc.hash, [item.hash]: item },
|
|
||||||
}), {
|
|
||||||
source: {},
|
|
||||||
hash: {},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) {
|
|
||||||
const presentSources = sources.filter(source => typeof source === 'string' || Array.isArray(source) || (source && source.src));
|
|
||||||
|
|
||||||
if (presentSources.length === 0) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(presentSources, presentSources.length);
|
|
||||||
|
|
||||||
// split up source list to prevent excessive RAM usage
|
|
||||||
const itemChunksBySource = await Promise.all(chunk(presentSources, 50).map(async (sourceChunk, index) => {
|
|
||||||
try {
|
|
||||||
// find source duplicates that don't need to be re-downloaded or re-saved
|
|
||||||
const existingSourceItems = await knex('media').whereIn('source', sourceChunk.flat().map(source => source.src || source));
|
|
||||||
const { source: existingSourceItemsBySource, hash: existingSourceItemsByHash } = groupItems(existingSourceItems);
|
|
||||||
|
|
||||||
// download media items from new sources
|
|
||||||
const fetchedItems = await fetchItems(sourceChunk, existingSourceItemsBySource, domain, role);
|
|
||||||
const { hash: fetchedItemsByHash } = groupItems(fetchedItems);
|
|
||||||
|
|
||||||
// find hash duplicates that don't need to be re-saved
|
|
||||||
const uniqueFetchedItems = Object.values(fetchedItemsByHash).filter(item => !entropyFilter || item.entropy === null || item.entropy >= entropyFilter);
|
|
||||||
const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash));
|
|
||||||
const { hash: existingHashItemsByHash } = groupItems(existingHashItems);
|
|
||||||
|
|
||||||
// save new items to disk
|
|
||||||
const newItems = uniqueFetchedItems.filter(item => !existingHashItemsByHash[item.hash]);
|
|
||||||
const savedItems = await saveItems(newItems, domain, role);
|
|
||||||
|
|
||||||
// store new items in database
|
|
||||||
const curatedItemEntries = curateItemEntries(savedItems);
|
|
||||||
const storedItems = await knex('media').insert(curatedItemEntries).returning('*');
|
|
||||||
const { hash: storedItemsByHash } = groupItems(Array.isArray(storedItems) ? storedItems : []);
|
|
||||||
|
|
||||||
// accumulate existing and new items by source to be mapped onto releases
|
|
||||||
const itemsByHash = { ...existingSourceItemsByHash, ...existingHashItemsByHash, ...storedItemsByHash };
|
|
||||||
const itemsBySource = {
|
|
||||||
...existingSourceItemsBySource,
|
|
||||||
...fetchedItems.reduce((acc, item) => ({ ...acc, [item.source]: itemsByHash[item.hash] }), {}),
|
|
||||||
};
|
|
||||||
|
|
||||||
logger.info(`Stored batch ${index + 1} with ${fetchedItems.length} of new ${domain} ${role}s`);
|
|
||||||
|
|
||||||
return itemsBySource;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to store ${domain} ${role} batch ${index + 1}: ${error.message}`);
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
|
|
||||||
return itemChunksBySource.reduce((acc, itemChunk) => ({ ...acc, ...itemChunk }), {});
|
|
||||||
}
|
|
||||||
|
|
||||||
function extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId) {
|
|
||||||
if (!primaryRole) {
|
|
||||||
return { [role]: associations, [primaryRole]: null };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (primaryItemsByTargetId[targetId]) {
|
|
||||||
const remainingAssociations = associations.filter(association => association.media_id !== primaryItemsByTargetId[targetId].media_id);
|
|
||||||
|
|
||||||
return { [role]: remainingAssociations, [primaryRole]: null };
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
[role]: associations.slice(1),
|
|
||||||
[primaryRole]: associations.slice(0, 1)[0],
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId) {
|
|
||||||
if (!sources) return { [role]: null, [primaryRole]: null };
|
|
||||||
|
|
||||||
const mediaIds = sources
|
|
||||||
.map((source) => {
|
|
||||||
if (!source) return null;
|
|
||||||
|
|
||||||
if (Array.isArray(source)) {
|
|
||||||
const availableSource = source.find(fallbackSource => mediaBySource[fallbackSource.src || fallbackSource]);
|
|
||||||
return mediaBySource[availableSource];
|
|
||||||
}
|
|
||||||
|
|
||||||
return mediaBySource[source.src || source];
|
|
||||||
})
|
|
||||||
.filter(Boolean)
|
|
||||||
// .sort((mediaItemA, mediaItemB) => mediaItemB.height - mediaItemA.height) // prefer high res images for primary item
|
|
||||||
.map(mediaItem => mediaItem.id);
|
|
||||||
|
|
||||||
const uniqueMediaIds = Array.from(new Set(mediaIds));
|
|
||||||
const associations = uniqueMediaIds.map(mediaId => ({ [`${domain}_id`]: targetId, media_id: mediaId }));
|
|
||||||
|
|
||||||
logger.silly(`Associating ${associations.length} ${role}s to ${domain} ${targetId}`);
|
|
||||||
|
|
||||||
return extractPrimaryItem(associations, targetId, role, primaryRole, primaryItemsByTargetId);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function associateMedia(sourcesByTargetId, mediaBySource, domain, role, primaryRole) {
|
|
||||||
const primaryItems = primaryRole ? await knex(`${domain}s_${primaryRole}s`).whereIn(`${domain}_id`, Object.keys(sourcesByTargetId)) : [];
|
|
||||||
const primaryItemsByTargetId = primaryItems.reduce((acc, item) => ({ ...acc, [item[`${domain}_id`]]: item }), {});
|
|
||||||
|
|
||||||
const associationsPerTarget = await Promise.map(Object.entries(sourcesByTargetId), ([targetId, sources]) => associateTargetMedia(targetId, sources, mediaBySource, domain, role, primaryRole, primaryItemsByTargetId));
|
|
||||||
|
|
||||||
const associations = associationsPerTarget.map(association => association[role]).flat().filter(Boolean);
|
|
||||||
const primaryAssociations = associationsPerTarget.map(association => association[primaryRole]).filter(Boolean);
|
|
||||||
|
|
||||||
logger.info(`Associated ${associations.length} ${role}s to ${domain}s`);
|
|
||||||
if (primaryRole) logger.info(`Associated ${primaryAssociations.length} extracted ${primaryRole}s to ${domain}s`);
|
|
||||||
|
|
||||||
return Promise.all([
|
|
||||||
(associations.length > 0 && knex.raw(`${knex(`${domain}s_${role}s`).insert(associations).toString()} ON CONFLICT DO NOTHING`)),
|
|
||||||
(primaryAssociations.length > 0 && knex.raw(`${knex(`${domain}s_${primaryRole}s`).insert(primaryAssociations).toString()} ON CONFLICT DO NOTHING`)),
|
|
||||||
]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
associateMedia,
|
|
||||||
createThumbnail,
|
|
||||||
getHash,
|
|
||||||
getMeta,
|
|
||||||
pluckItems,
|
|
||||||
storeMedia,
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,385 +0,0 @@
|
||||||
'use strict';
|
|
||||||
|
|
||||||
const config = require('config');
|
|
||||||
const Promise = require('bluebird');
|
|
||||||
const path = require('path');
|
|
||||||
const fs = require('fs-extra');
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
const mime = require('mime');
|
|
||||||
const sharp = require('sharp');
|
|
||||||
const blake2 = require('blake2');
|
|
||||||
|
|
||||||
const logger = require('./logger')(__filename);
|
|
||||||
const knex = require('./knex');
|
|
||||||
const upsert = require('./utils/upsert');
|
|
||||||
const { ex } = require('./utils/q');
|
|
||||||
|
|
||||||
function getHash(buffer) {
|
|
||||||
const hash = blake2.createHash('blake2b', { digestLength: 24 });
|
|
||||||
|
|
||||||
hash.update(buffer);
|
|
||||||
|
|
||||||
return hash.digest('hex');
|
|
||||||
}
|
|
||||||
|
|
||||||
function pluckPhotos(photos, specifiedLimit) {
|
|
||||||
const limit = specifiedLimit || config.media.limit;
|
|
||||||
|
|
||||||
if (photos.length <= limit) {
|
|
||||||
return photos;
|
|
||||||
}
|
|
||||||
|
|
||||||
const plucked = [1]
|
|
||||||
.concat(
|
|
||||||
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))),
|
|
||||||
);
|
|
||||||
|
|
||||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getEntropy(buffer) {
|
|
||||||
try {
|
|
||||||
const { entropy } = await sharp(buffer).stats();
|
|
||||||
|
|
||||||
return entropy;
|
|
||||||
} catch (error) {
|
|
||||||
logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`);
|
|
||||||
|
|
||||||
return 7.5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function createThumbnail(buffer) {
|
|
||||||
try {
|
|
||||||
const thumbnail = sharp(buffer)
|
|
||||||
.resize({
|
|
||||||
height: config.media.thumbnailSize,
|
|
||||||
withoutEnlargement: true,
|
|
||||||
})
|
|
||||||
.jpeg({
|
|
||||||
quality: config.media.thumbnailQuality,
|
|
||||||
})
|
|
||||||
.toBuffer();
|
|
||||||
|
|
||||||
return thumbnail;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to create thumbnail: ${error.message}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function createMediaDirectory(domain, subpath) {
|
|
||||||
const filepath = path.join(config.media.path, domain, subpath);
|
|
||||||
|
|
||||||
await fs.mkdir(filepath, { recursive: true });
|
|
||||||
return filepath;
|
|
||||||
}
|
|
||||||
|
|
||||||
function curatePhotoEntries(files) {
|
|
||||||
return files.map((file, index) => ({
|
|
||||||
path: file.filepath,
|
|
||||||
thumbnail: file.thumbpath,
|
|
||||||
mime: file.mimetype,
|
|
||||||
hash: file.hash,
|
|
||||||
source: file.source,
|
|
||||||
index,
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
async function findDuplicates(photos, identifier, prop = null) {
|
|
||||||
const duplicates = await knex('media')
|
|
||||||
.whereIn(identifier, photos.flat().map((photo) => {
|
|
||||||
if (prop) return photo[prop];
|
|
||||||
if (photo.src) return photo.src;
|
|
||||||
|
|
||||||
return photo;
|
|
||||||
}));
|
|
||||||
|
|
||||||
const duplicateLookup = new Set(duplicates.map(photo => photo[prop || identifier]));
|
|
||||||
|
|
||||||
const originals = photos.filter((source) => {
|
|
||||||
if (Array.isArray(source)) {
|
|
||||||
return !source.some(sourceX => duplicateLookup.has((prop && sourceX[prop]) || (sourceX.src && sourceX)));
|
|
||||||
}
|
|
||||||
|
|
||||||
return !duplicateLookup.has((prop && source[prop]) || (source.src && source));
|
|
||||||
});
|
|
||||||
|
|
||||||
return [duplicates, originals];
|
|
||||||
}
|
|
||||||
|
|
||||||
async function extractPhoto(source) {
|
|
||||||
const res = await bhttp.get(source.src);
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
const { q } = ex(res.body.toString());
|
|
||||||
|
|
||||||
return source.extract(q);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchPhoto(photoUrl, index, label, attempt = 1) {
|
|
||||||
if (photoUrl.src && photoUrl.extract) {
|
|
||||||
// source links to page containing a (presumably) tokenized photo
|
|
||||||
const photo = await extractPhoto(photoUrl);
|
|
||||||
|
|
||||||
return fetchPhoto(photo, index, label);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Array.isArray(photoUrl)) {
|
|
||||||
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
|
|
||||||
const photo = await fetchPhoto(url, index, label);
|
|
||||||
|
|
||||||
if (photo) {
|
|
||||||
return photo;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error('Photo not available');
|
|
||||||
}), Promise.reject(new Error()));
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const { pathname } = new URL(photoUrl);
|
|
||||||
const res = await bhttp.get(photoUrl);
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
const mimetype = mime.getType(pathname);
|
|
||||||
const extension = mime.getExtension(mimetype);
|
|
||||||
const hash = getHash(res.body);
|
|
||||||
const entropy = await getEntropy(res.body);
|
|
||||||
|
|
||||||
return {
|
|
||||||
photo: res.body,
|
|
||||||
mimetype,
|
|
||||||
extension,
|
|
||||||
hash,
|
|
||||||
entropy,
|
|
||||||
source: photoUrl,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error(`Response ${res.statusCode} not OK`);
|
|
||||||
} catch (error) {
|
|
||||||
logger.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${label} (${photoUrl}): ${error}`);
|
|
||||||
|
|
||||||
if (attempt < 3) {
|
|
||||||
await Promise.delay(5000);
|
|
||||||
return fetchPhoto(photoUrl, index, label, attempt + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function savePhotos(files, {
|
|
||||||
domain = 'release',
|
|
||||||
subpath,
|
|
||||||
role = 'photo',
|
|
||||||
naming = 'index',
|
|
||||||
}) {
|
|
||||||
return Promise.map(files, async (file, index) => {
|
|
||||||
try {
|
|
||||||
const timestamp = new Date().getTime();
|
|
||||||
const thumbnail = await createThumbnail(file.photo);
|
|
||||||
|
|
||||||
const filename = naming === 'index'
|
|
||||||
? `${file.role || role}${index + 1}`
|
|
||||||
: `${timestamp + index}`;
|
|
||||||
|
|
||||||
const filepath = path.join(`${domain}s`, subpath, `${filename}.${file.extension}`);
|
|
||||||
const thumbpath = path.join(`${domain}s`, subpath, `${filename}_thumb.${file.extension}`);
|
|
||||||
|
|
||||||
await Promise.all([
|
|
||||||
fs.writeFile(path.join(config.media.path, filepath), file.photo),
|
|
||||||
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
|
|
||||||
]);
|
|
||||||
|
|
||||||
return {
|
|
||||||
...file,
|
|
||||||
thumbnail,
|
|
||||||
filepath,
|
|
||||||
thumbpath,
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to store ${domain} ${role} to ${subpath}: ${error.message}`);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function storePhotos(photos, {
|
|
||||||
domain = 'release',
|
|
||||||
role = 'photo',
|
|
||||||
naming = 'index',
|
|
||||||
targetId,
|
|
||||||
subpath,
|
|
||||||
primaryRole, // role to assign to first photo if not already in database, used mainly for avatars
|
|
||||||
entropyFilter = 2.5, // filter out fallback avatars and other generic clipart
|
|
||||||
}, label) {
|
|
||||||
if (!photos || photos.length === 0) {
|
|
||||||
logger.info(`No ${role}s available for ${label}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const pluckedPhotos = pluckPhotos(Array.from(new Set(photos))); // pre-filter link duplicates, limit total per configuration
|
|
||||||
const [sourceDuplicates, sourceOriginals] = await findDuplicates(pluckedPhotos, 'source', null, label);
|
|
||||||
|
|
||||||
logger.info(`Fetching ${sourceOriginals.length} new ${role}s, ${sourceDuplicates.length} already present by source for ${label}`);
|
|
||||||
|
|
||||||
const metaFiles = await Promise.map(sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), {
|
|
||||||
concurrency: 10,
|
|
||||||
}).filter(photo => photo && photo.entropy > entropyFilter);
|
|
||||||
|
|
||||||
const metaFilesByHash = metaFiles.reduce((acc, photo) => ({ ...acc, [photo.hash]: photo }), {}); // pre-filter hash duplicates within set; may occur through fallbacks
|
|
||||||
const [hashDuplicates, hashOriginals] = await findDuplicates(Object.values(metaFilesByHash), 'hash', 'hash', label);
|
|
||||||
|
|
||||||
logger.info(`Saving ${hashOriginals.length} new ${role}s, ${hashDuplicates.length} already present by hash for ${label}`);
|
|
||||||
|
|
||||||
const savedPhotos = await savePhotos(hashOriginals, {
|
|
||||||
domain,
|
|
||||||
role,
|
|
||||||
targetId,
|
|
||||||
subpath,
|
|
||||||
naming,
|
|
||||||
});
|
|
||||||
|
|
||||||
const curatedPhotoEntries = curatePhotoEntries(savedPhotos.filter(Boolean), domain, role, targetId);
|
|
||||||
|
|
||||||
const newPhotos = await knex('media').insert(curatedPhotoEntries).returning('*');
|
|
||||||
const photoEntries = Array.isArray(newPhotos)
|
|
||||||
? [...sourceDuplicates, ...hashDuplicates, ...newPhotos]
|
|
||||||
: [...sourceDuplicates, ...hashDuplicates];
|
|
||||||
|
|
||||||
const photoAssociations = photoEntries
|
|
||||||
.map(photoEntry => ({
|
|
||||||
[`${domain}_id`]: targetId,
|
|
||||||
media_id: photoEntry.id,
|
|
||||||
}));
|
|
||||||
|
|
||||||
if (primaryRole) {
|
|
||||||
// store one photo as a 'primary' photo, such as an avatar or cover
|
|
||||||
const primaryPhoto = await knex(`${domain}s_${primaryRole}s`)
|
|
||||||
.where(`${domain}_id`, targetId)
|
|
||||||
.first();
|
|
||||||
|
|
||||||
if (primaryPhoto) {
|
|
||||||
const remainingAssociations = photoAssociations.filter(association => association.media_id !== primaryPhoto.media_id);
|
|
||||||
|
|
||||||
await upsert(`${domain}s_${role}s`, remainingAssociations, [`${domain}_id`, 'media_id']);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.all([
|
|
||||||
upsert(`${domain}s_${primaryRole}s`, photoAssociations.slice(0, 1), [`${domain}_id`, 'media_id']),
|
|
||||||
upsert(`${domain}s_${role}s`, photoAssociations.slice(1), [`${domain}_id`, 'media_id']),
|
|
||||||
]);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
await upsert(`${domain}s_${role}s`, photoAssociations, [`${domain}_id`, 'media_id']);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
async function storeReleasePhotos(releases, label) {
|
|
||||||
const sources = releases.map(release => pluckPhotos(release.photos)).flat();
|
|
||||||
const uniqueSources = Array.from(new Set(sources));
|
|
||||||
|
|
||||||
const [sourceDuplicates, sourceOriginals] = await findDuplicates(uniqueSources, 'source', null, label);
|
|
||||||
|
|
||||||
const metaFiles = await Promise.map(
|
|
||||||
sourceOriginals,
|
|
||||||
async (photoUrl, index) => fetchPhoto(photoUrl, index, label),
|
|
||||||
{ concurrency: 10 },
|
|
||||||
)
|
|
||||||
.filter(photo => photo);
|
|
||||||
|
|
||||||
const hashUniques = Object.values(metaFiles.reduce((acc, file) => {
|
|
||||||
if (!acc[file.hash]) acc[file.hash] = file;
|
|
||||||
|
|
||||||
return acc;
|
|
||||||
}, {}));
|
|
||||||
|
|
||||||
const [hashDuplicates, hashOriginals] = await findDuplicates(hashUniques, 'hash', 'hash', label);
|
|
||||||
|
|
||||||
const sourceHashes = metaFiles.concat(sourceDuplicates).reduce((acc, file) => {
|
|
||||||
acc[file.source] = file.hash;
|
|
||||||
return acc;
|
|
||||||
}, {});
|
|
||||||
|
|
||||||
const associations = releases.map(release => release.photos.map(source => [release.id, sourceHashes[source]])).flat();
|
|
||||||
|
|
||||||
console.log(associations);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
async function storeTrailer(trailers, {
|
|
||||||
domain = 'releases',
|
|
||||||
role = 'trailer',
|
|
||||||
targetId,
|
|
||||||
subpath,
|
|
||||||
}, label) {
|
|
||||||
// support scrapers supplying multiple qualities
|
|
||||||
const trailer = Array.isArray(trailers)
|
|
||||||
? trailers.find(trailerX => config.media.trailerQuality.includes(trailerX.quality)) || trailers[0]
|
|
||||||
: trailers;
|
|
||||||
|
|
||||||
if (!trailer || !trailer.src) {
|
|
||||||
logger.info(`No ${role} available for ${label}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const [sourceDuplicates, sourceOriginals] = await findDuplicates([trailer], 'source', 'src', label);
|
|
||||||
|
|
||||||
const metaFiles = await Promise.map(sourceOriginals, async (trailerX) => {
|
|
||||||
const { pathname } = new URL(trailerX.src);
|
|
||||||
const mimetype = trailerX.type || mime.getType(pathname);
|
|
||||||
|
|
||||||
const res = await bhttp.get(trailerX.src);
|
|
||||||
const hash = getHash(res.body);
|
|
||||||
const filepath = path.join(domain, subpath, `${role}${trailerX.quality ? `_${trailerX.quality}` : ''}.${mime.getExtension(mimetype)}`);
|
|
||||||
|
|
||||||
return {
|
|
||||||
trailer: res.body,
|
|
||||||
path: filepath,
|
|
||||||
mime: mimetype,
|
|
||||||
source: trailerX.src,
|
|
||||||
quality: trailerX.quality || null,
|
|
||||||
hash,
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', 'hash', label);
|
|
||||||
|
|
||||||
const newTrailers = await knex('media')
|
|
||||||
.insert(hashOriginals.map(trailerX => ({
|
|
||||||
path: trailerX.path,
|
|
||||||
mime: trailerX.mime,
|
|
||||||
source: trailerX.source,
|
|
||||||
quality: trailerX.quality,
|
|
||||||
hash: trailerX.hash,
|
|
||||||
type: role,
|
|
||||||
})))
|
|
||||||
.returning('*');
|
|
||||||
|
|
||||||
await Promise.all(hashOriginals.map(trailerX => fs.writeFile(path.join(config.media.path, trailerX.path), trailerX.trailer)));
|
|
||||||
|
|
||||||
const trailerEntries = Array.isArray(newTrailers)
|
|
||||||
? [...sourceDuplicates, ...hashDuplicates, ...newTrailers]
|
|
||||||
: [...sourceDuplicates, ...hashDuplicates];
|
|
||||||
|
|
||||||
await upsert(`releases_${role}s`, trailerEntries.map(trailerEntry => ({
|
|
||||||
release_id: targetId,
|
|
||||||
media_id: trailerEntry.id,
|
|
||||||
})), ['release_id', 'media_id']);
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
|
||||||
createMediaDirectory,
|
|
||||||
storePhotos,
|
|
||||||
// storeReleasePhotos,
|
|
||||||
storeTrailer,
|
|
||||||
};
|
|
|
@ -6,6 +6,7 @@ const logger = require('./logger')(__filename);
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const slugify = require('./utils/slugify');
|
const slugify = require('./utils/slugify');
|
||||||
const { associateTags } = require('./tags');
|
const { associateTags } = require('./tags');
|
||||||
|
const { associateActors } = require('./actors');
|
||||||
|
|
||||||
function curateReleaseEntry(release, batchId, existingRelease) {
|
function curateReleaseEntry(release, batchId, existingRelease) {
|
||||||
const slug = slugify(release.title, '-', {
|
const slug = slugify(release.title, '-', {
|
||||||
|
@ -147,7 +148,10 @@ async function storeReleases(releases) {
|
||||||
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
|
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
|
||||||
const releasesWithId = attachReleaseIds(releases, [].concat(storedReleaseEntries, duplicateReleaseEntries));
|
const releasesWithId = attachReleaseIds(releases, [].concat(storedReleaseEntries, duplicateReleaseEntries));
|
||||||
|
|
||||||
await associateTags(releasesWithId);
|
await Promise.all([
|
||||||
|
associateTags(releasesWithId),
|
||||||
|
associateActors(releasesWithId),
|
||||||
|
]);
|
||||||
|
|
||||||
return releasesWithId;
|
return releasesWithId;
|
||||||
}
|
}
|
||||||
|
|