From df3421639f499b227b0ca19a2a096a7b8bec5c9d Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:53 +0200 Subject: [PATCH] Flow and modularization refactor. Added duplicates option and applying limit after fetch. --- app.js | 112 ++++++------------------ config/default.js | 9 +- curate/submissions.js | 19 +++- curate/user.js | 2 - fetchContent.js => fetch/content.js | 12 +-- fetch/info.js | 21 +++++ fetchItem.js => fetch/item.js | 0 interpolate.js | 2 +- methods/imgurAlbum.js | 2 +- save.js | 19 ---- save/profileDetails.js | 38 ++++++++ save/save.js | 24 +++++ textToStream.js => save/textToStream.js | 0 13 files changed, 141 insertions(+), 119 deletions(-) rename fetchContent.js => fetch/content.js (76%) create mode 100644 fetch/info.js rename fetchItem.js => fetch/item.js (100%) delete mode 100644 save.js create mode 100644 save/profileDetails.js create mode 100644 save/save.js rename textToStream.js => save/textToStream.js (100%) diff --git a/app.js b/app.js index 2d8fb48..bf9b755 100644 --- a/app.js +++ b/app.js @@ -6,55 +6,21 @@ const fs = require('fs-extra'); const yargs = require('yargs').argv; const snoowrap = require('snoowrap'); -const curateSubmissions = require('./curate/submissions.js'); -const curateUser = require('./curate/user.js'); -const methods = require('./methods/methods.js'); -const interpolate = require('./interpolate.js'); -const fetchItem = require('./fetchItem.js'); -const fetchContent = require('./fetchContent.js'); -const save = require('./save.js'); -const textToStream = require('./textToStream.js'); - const reddit = new snoowrap(config.reddit.api); -function saveProfileDetails(user) { - if(config.library.profile.image) { - // pass profile image as item to interpolate extension variable - const filepath = interpolate(config.library.profile.image, user, null, { - url: user.profile.image - }); +const curateSubmissions = require('./curate/submissions.js'); +const curateUser = require('./curate/user.js'); - fetchItem(user.profile.image).then(stream => save(filepath, stream)).catch(error => { - console.log('\x1b[33m%s\x1b[0m', `Could not save profile image for '${user.name}': ${error}`); - }); - } +const methods = require('./methods/methods.js'); +const interpolate = require('./interpolate.js'); - if(config.library.profile.description) { - if(user.profile.description) { - const filepath = interpolate(config.library.profile.description, user); - const stream = textToStream(user.profile.description); +const fetchInfo = require('./fetch/info.js'); +const fetchContent = require('./fetch/content.js'); - save(filepath, stream).catch(error => { - console.log('\x1b[33m%s\x1b[0m', `Could not save profile description for '${user.name}': ${error}`); - }); - } else { - console.log('\x1b[33m%s\x1b[0m', `No profile description for '${user.name}'`); - } - } -}; +const save = require('./save/save.js'); +const saveProfileDetails = require('./save/profileDetails.js'); -function getSubmissions(users, sort, limit) { - return users.reduce((chain, user) => { - return chain.then(acc => { - return reddit.getUser(user).getSubmissions({ - sort: sort, - limit: limit - }).then(submissions => { - return acc.concat(submissions); - }); - }); - }, Promise.resolve([])); -}; +const limit = yargs.limit || config.fetch.limit; if(!yargs.user && typeof yargs.users !== 'string') { return console.log('\x1b[31m%s\x1b[0m', 'Please supply at least one user with --user=[user], or multiple users with --users=[user1,user2] or --user=[user1] --user=[user2]'); @@ -62,50 +28,28 @@ if(!yargs.user && typeof yargs.users !== 'string') { const users = yargs.users ? yargs.users.split(',') : [].concat(yargs.user); -users.forEach(user => { +users.forEach(username => { return Promise.resolve().then(() => { - // get reddit profile - return reddit.getUser(user).fetch().then(curateUser); + return reddit.getUser(username).fetch().then(curateUser); }).then(user => { return saveProfileDetails(user); - // get submissions + }).then(user => { + return reddit.getUser(username).getSubmissions({ + sort: yargs.sort || config.fetch.sort + }).then(submissions => ({ + user, + submissions + })); + }).then(({user, submissions}) => { + const posts = curateSubmissions(submissions).slice(0, limit); + + return fetchInfo(posts).then(info => ({ + user, + posts + })); + }).then(({user, posts}) => { + return fetchContent(posts, user); }).catch(error => { - return console.log('\x1b[33m%s\x1b[0m', error); + return console.log(error); }); }); - -/* -Promise.resolve().then(() => { - if(yargs.user || yargs.users) { - const users = yargs.users ? yargs.users.split(',') : [].concat(yargs.user); - - return Promise.resolve().then(() => { - if(config.library.profile) { - return getProfiles(users); - } - }).then(() => { - return getSubmissions(users, yargs.sort || config.reddit.sort, yargs.limit === undefined ? config.reddit.limit : yargs.limit); - }); - } - - return Promise.reject('Please supply at least one user with one or multiple --user, or --users!'); -}).then(submissions => { - return Promise.all(curate(submissions).reduce((acc, post) => { - if(post.host && methods[post.host.method]) { - acc = acc.concat(methods[post.host.method](post).then(content => { - post.content = content; - - return post; - })); - } else { - console.log('\x1b[33m%s\x1b[0m', `"${post.title}": '${post.url}' not supported :(`); - } - - return acc; - }, [])); -}).then(posts => { - return fetchContent(posts); -}).catch(error => { - return console.log('\x1b[31m%s\x1b[0m', error); -}); -*/ diff --git a/config/default.js b/config/default.js index d7ebe66..df3b008 100644 --- a/config/default.js +++ b/config/default.js @@ -10,8 +10,8 @@ module.exports = { video: '$postDate - $albumId - $postTitle/$itemIndex - $itemId$ext' }, profile: { - image: 'profile$ext', - description: 'profile ($userVerified$userVerifiedEmail$userGold)' + image: '$userCreated - profile$ext', + description: '$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)' }, booleans: { extracted: 'extracted-', @@ -25,9 +25,12 @@ module.exports = { indexOffset: 1, slashSubstitute: '#', }, - reddit: { + fetch: { sort: 'top', limit: 1000, + ignoreDuplicates: true + }, + reddit: { api: { userAgent: 'wat', clientId: 'VPquALMpTGl3ag', diff --git a/curate/submissions.js b/curate/submissions.js index 8121499..b4afa7e 100644 --- a/curate/submissions.js +++ b/curate/submissions.js @@ -1,10 +1,19 @@ 'use strict'; +const config = require('config'); const dissectLink = require('../dissectLink.js'); function curateSubmissions(submissions) { - return submissions.map((submission, index) => { - return { + const processed = new Set(); + + return submissions.reduce((acc, submission, index) => { + if(config.fetch.ignoreDuplicates && processed.has(submission.url)) { + console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${submission.title}' - ${submission.url}`); + + return acc; + } + + const curatedSubmission = { id: submission.id, index: index, title: submission.title, @@ -16,7 +25,11 @@ function curateSubmissions(submissions) { subreddit: submission.subreddit.display_name, host: dissectLink(submission.url) }; - }); + + processed.add(submission.url); + + return acc.concat(curatedSubmission); + }, []); }; module.exports = curateSubmissions; diff --git a/curate/user.js b/curate/user.js index af809c3..bb62afe 100644 --- a/curate/user.js +++ b/curate/user.js @@ -3,8 +3,6 @@ const path = require('path'); function curateUser(user) { - console.log(user); - return { id: user.id, name: user.name, diff --git a/fetchContent.js b/fetch/content.js similarity index 76% rename from fetchContent.js rename to fetch/content.js index b18cfa1..ebbd4be 100644 --- a/fetchContent.js +++ b/fetch/content.js @@ -4,12 +4,12 @@ const fs = require('fs-extra'); const path = require('path'); const config = require('config'); -const fetchItem = require('./fetchItem'); -const save = require('./save.js'); -const interpolate = require('./interpolate.js'); -const textToStream = require('./textToStream.js'); +const fetchItem = require('./item.js'); +const interpolate = require('../interpolate.js'); +const save = require('../save/save.js'); +const textToStream = require('../save/textToStream.js'); -module.exports = function(posts) { +module.exports = function(posts, user) { return Promise.all(posts.map(post => { return Promise.resolve().then(() => { return Promise.all(post.content.items.map((item, index) => { @@ -26,7 +26,7 @@ module.exports = function(posts) { }).then(items => { return Promise.all(items.map(item => { const type = item.type.split('/')[0]; - const filepath = post.content.album ? interpolate(config.library.album[type], post.user, post, item) : interpolate(config.library[type], post.user, post, item); + const filepath = post.content.album ? interpolate(config.library.album[type], user, post, item) : interpolate(config.library[type], user, post, item); return Promise.resolve().then(() => { return fs.ensureDir(path.dirname(filepath)); diff --git a/fetch/info.js b/fetch/info.js new file mode 100644 index 0000000..85ce654 --- /dev/null +++ b/fetch/info.js @@ -0,0 +1,21 @@ +'use strict'; + +const methods = require('../methods/methods.js'); + +function fetchInfo(posts) { + return Promise.all(posts.reduce((acc, post) => { + if(post.host && methods[post.host.method]) { + acc = acc.concat(methods[post.host.method](post).then(content => { + post.content = content; + + return post; + })); + } else { + console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.title}' - ${post.url}`); + } + + return acc; + }, [])); +}; + +module.exports = fetchInfo; diff --git a/fetchItem.js b/fetch/item.js similarity index 100% rename from fetchItem.js rename to fetch/item.js diff --git a/interpolate.js b/interpolate.js index 7ebdd49..ba5f396 100644 --- a/interpolate.js +++ b/interpolate.js @@ -64,7 +64,7 @@ function interpolate(pattern, user, post, item) { $itemDate: dateFns.format(item.datetime, dateFormat), $itemIndex: item.index + config.library.indexOffset, $extracted: item.extracted ? config.library.booleans.extracted : '', - $ext: item.type ? extensions[item.type] : path.extname(url.parse(item.url).pathname) + $ext: item.extension || (item.type ? extensions[item.type] : path.extname(url.parse(item.url).pathname)) }); } diff --git a/methods/imgurAlbum.js b/methods/imgurAlbum.js index d697d9b..5efa71e 100644 --- a/methods/imgurAlbum.js +++ b/methods/imgurAlbum.js @@ -10,7 +10,7 @@ function imgurAlbum(post) { 'Authorization': `Client-ID ${config.methods.imgur.clientId}` } }).then(res => res.json()).then(res => { - const extract = config.patterns.album.extractSingleItem && res.data.images.length === 1; + const extract = config.library.album.extractSingleItem && res.data.images.length === 1; return { album: extract ? null : { diff --git a/save.js b/save.js deleted file mode 100644 index b14a286..0000000 --- a/save.js +++ /dev/null @@ -1,19 +0,0 @@ -'use strict'; - -const fs = require('fs-extra'); - -function save(filepath, stream) { - const file = fs.createWriteStream(filepath); - - return new Promise((resolve, reject) => { - stream.pipe(file).on('error', error => { - reject(error); - }).on('finish', () => { - console.log('\x1b[32m%s\x1b[0m', `Saved '${filepath}'`); - - resolve(filepath); - }); - }); -}; - -module.exports = save; diff --git a/save/profileDetails.js b/save/profileDetails.js new file mode 100644 index 0000000..aee5c99 --- /dev/null +++ b/save/profileDetails.js @@ -0,0 +1,38 @@ +'use strict'; + +const config = require('config'); + +const interpolate = require('../interpolate.js'); +const fetchItem = require('../fetch/item.js'); +const textToStream = require('./textToStream.js'); +const save = require('./save.js'); + +function saveProfileDetails(user) { + if(config.library.profile.image) { + const filepath = interpolate(config.library.profile.image, user, null, { + // pass profile image as item to interpolate extension variable + url: user.profile.image + }); + + fetchItem(user.profile.image).then(stream => save(filepath, stream)).catch(error => { + console.log('\x1b[33m%s\x1b[0m', `Could not save profile image for '${user.name}': ${error}`); + }); + } + + if(config.library.profile.description) { + if(user.profile.description) { + const filepath = interpolate(config.library.profile.description, user); + const stream = textToStream(user.profile.description); + + save(filepath, stream).catch(error => { + console.log('\x1b[33m%s\x1b[0m', `Could not save profile description for '${user.name}': ${error}`); + }); + } else { + console.log('\x1b[33m%s\x1b[0m', `No profile description for '${user.name}'`); + } + } + + return user; +}; + +module.exports = saveProfileDetails; diff --git a/save/save.js b/save/save.js new file mode 100644 index 0000000..bb70355 --- /dev/null +++ b/save/save.js @@ -0,0 +1,24 @@ +'use strict'; + +const fs = require('fs-extra'); +const path = require('path'); + +function save(filepath, stream) { + return Promise.resolve().then(() => { + return fs.ensureDir(path.dirname(filepath)); + }).then(() => { + const file = fs.createWriteStream(filepath); + + return new Promise((resolve, reject) => { + stream.pipe(file).on('error', error => { + reject(error); + }).on('finish', () => { + console.log('\x1b[32m%s\x1b[0m', `Saved '${filepath}'`); + + resolve(filepath); + }); + }); + }); +}; + +module.exports = save; diff --git a/textToStream.js b/save/textToStream.js similarity index 100% rename from textToStream.js rename to save/textToStream.js