From ca3bdd717d2a9dbeb6cb658ca09ddfdaab0a615c Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Sat, 5 May 2018 00:51:58 +0200 Subject: [PATCH] Added archive support, and the IP archive. --- config/default.js | 7 ++++- package-lock.json | 10 ++++++ package.json | 1 + src/app.js | 65 ++++++++++++++++++++++++++++++--------- src/archives/archives.js | 7 +++++ src/archives/ip.js | 21 +++++++++++++ src/cli.js | 3 ++ src/curate/posts.js | 2 ++ src/fetch/content.js | 4 +++ src/fetch/info.js | 8 +++-- src/methods/eroshare.js | 2 -- src/methods/imgurAlbum.js | 2 -- src/methods/imgurImage.js | 2 -- src/methods/methods.js | 14 ++++----- 14 files changed, 117 insertions(+), 31 deletions(-) create mode 100644 src/archives/archives.js create mode 100644 src/archives/ip.js diff --git a/config/default.js b/config/default.js index e07557d..0369960 100644 --- a/config/default.js +++ b/config/default.js @@ -32,7 +32,12 @@ module.exports = { fetch: { sort: 'new', limit: 1000, - avoidDuplicates: true + avoidDuplicates: true, + archives: { + search: false, + reddit: ['ip'], + reupload: [] + } }, reddit: { api: { diff --git a/package-lock.json b/package-lock.json index 79607e5..edd88e6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -25,6 +25,16 @@ "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz", "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=" }, + "array.prototype.flatten": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/array.prototype.flatten/-/array.prototype.flatten-1.2.1.tgz", + "integrity": "sha512-3GhsA78XgK//wQKbhUe6L93kknekGlTRY0kvYcpuSi0aa9rVrMr/okeIIv/XSpN8fZ5iUM+bWifhf2/7CYKtIg==", + "requires": { + "define-properties": "1.1.2", + "es-abstract": "1.11.0", + "function-bind": "1.1.1" + } + }, "asn1": { "version": "0.2.3", "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.3.tgz", diff --git a/package.json b/package.json index 5d23e4e..e698af9 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ "author": "Niels Simenon", "license": "ISC", "dependencies": { + "array.prototype.flatten": "^1.2.1", "cheerio": "^1.0.0-rc.2", "config": "^1.30.0", "date-fns": "^1.29.0", diff --git a/src/app.js b/src/app.js index 4b47106..68e96f5 100644 --- a/src/app.js +++ b/src/app.js @@ -4,7 +4,9 @@ const config = require('config'); const util = require('util'); const fs = require('fs-extra'); const snoowrap = require('snoowrap'); -const promiseFinally = require('promise.prototype.finally'); + +require('promise.prototype.finally').shim(); +require('array.prototype.flatten').shim(); const reddit = new snoowrap(config.reddit.api); @@ -16,11 +18,11 @@ const interpolate = require('./interpolate.js'); const attachContentInfo = require('./fetch/info.js'); const fetchContent = require('./fetch/content.js'); +const archives = require('./archives/archives.js'); + const save = require('./save/save.js'); const saveProfileDetails = require('./save/profileDetails.js'); -promiseFinally.shim(); - const args = require('./cli.js'); if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) { @@ -31,35 +33,56 @@ Promise.resolve().then(() => { if(args.users) { return getUserPosts(args.users); } -}).then((userPosts = []) => { + + return []; +}).then(userPosts => { if(args.posts) { return getPosts(args.posts).then(posts => posts.concat(userPosts)); } return userPosts; }).then(posts => { - return curatePosts(posts).slice(0, args.limit); + return curatePosts(posts, args.ignore).slice(0, args.limit); }).then(posts => { - return attachContentInfo(posts).then(info => fetchContent(posts)); + return attachContentInfo(posts).then(info => posts); +}).then(posts => { + return fetchContent(posts); }).catch(error => { return console.error(error); }); -function getUserPosts(users) { - return users.reduce((chain, username) => { +function getUserPosts(usernames) { + return usernames.reduce((chain, username) => { return chain.then(accPosts => { - return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({user, accPosts})); + return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({ + user, + accPosts + })); }).then(({user, accPosts}) => { return reddit.getUser(username).getSubmissions({ sort: args.sort, limit: Infinity - }).then(posts => { - return accPosts.concat(posts.map(post => { - post.user = user; + }).then(posts => ({ + user, + accPosts: accPosts.concat(posts) + })); + }).then(({user, accPosts}) => { + if(args.archives || config.fetch.archives.search) { + return getArchivePostIds(username, accPosts.map(post => post.id)).then(postIds => { + return Promise.all(postIds.map(postId => { + return reddit.getSubmission(postId).fetch(); + })); + }).then(archivedPosts => { + return { + user, + accPosts: accPosts.concat(archivedPosts) + }; + }); + } - return post; - })); - }); + return {user, accPosts}; + }).then(({user, accPosts}) => { + return accPosts.map(post => Object.assign(post, {user})); }); }, Promise.resolve([])); }; @@ -94,3 +117,15 @@ function getPosts(postIds) { return posts; }); }; + +function getArchivePostIds(username, exclude) { + console.log('Searching archives for posts...'); + + return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username))).then(postIds => postIds.flatten()).then(postIds => { + return exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds; + }).then(postIds => { + console.log(`Found ${postIds.length} unique archived posts`); + + return postIds; + }); +}; diff --git a/src/archives/archives.js b/src/archives/archives.js new file mode 100644 index 0000000..5625cef --- /dev/null +++ b/src/archives/archives.js @@ -0,0 +1,7 @@ +'use strict'; + +const ip = require('./ip.js'); + +module.exports = { + ip +}; diff --git a/src/archives/ip.js b/src/archives/ip.js new file mode 100644 index 0000000..6a5a6f6 --- /dev/null +++ b/src/archives/ip.js @@ -0,0 +1,21 @@ +'use strict'; + +const fetch = require('node-fetch'); +const $ = require('cheerio'); + +function findOnIp(username, page = 1, acc = []) { + return Promise.resolve().then(() => { + return fetch(`https://www.imageporn.net/user/${username}/all/${page}`); + }).then(res => res.text()).then(res => { + const postIds = $('.icon a', res).toArray().map(link => link.attribs.href.slice(16)); + + if(postIds.length) { + // still finding items, check next page + return findOnIp(username, ++page, acc.concat(postIds)) + } + + return acc; + }); +}; + +module.exports = findOnIp; diff --git a/src/cli.js b/src/cli.js index 6d5bd5c..f6a6f5b 100644 --- a/src/cli.js +++ b/src/cli.js @@ -23,4 +23,7 @@ module.exports = yargs.command('npm start -- --user ').option('users', describe: 'Ignore posts with any of these properties', type: 'array', choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'] +}).option('archives', { + describe: 'Search archives for deleted posts', + type: 'boolean' }).argv; diff --git a/src/curate/posts.js b/src/curate/posts.js index a3c7eea..fd0edbb 100644 --- a/src/curate/posts.js +++ b/src/curate/posts.js @@ -6,6 +6,8 @@ const dissectLink = require('../dissectLink.js'); function curatePosts(posts, ignore) { const processed = new Set(); + console.log(ignore); + return posts.reduce((acc, post, index) => { const host = dissectLink(post.url); const ignoring = ignore ? ignore.find(prop => { diff --git a/src/fetch/content.js b/src/fetch/content.js index ccfbfb5..9237d38 100644 --- a/src/fetch/content.js +++ b/src/fetch/content.js @@ -21,6 +21,10 @@ module.exports = function(posts) { return ep.open(); }).then(() => { return Promise.all(posts.map(post => { + if(!post.content) { + return console.log('SLIPPED THROUGH!', post); + } + return Promise.all(post.content.items.map((item, index) => { item.index = index; diff --git a/src/fetch/info.js b/src/fetch/info.js index ddd873d..df83834 100644 --- a/src/fetch/info.js +++ b/src/fetch/info.js @@ -9,13 +9,17 @@ function attachContentInfo(posts) { post.content = content; return post; + }).catch(error => { + console.log('\x1b[31m%s\x1b[0m', error); + + return null })); } else { - console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.title}' - ${post.url}`); + console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.id} - ${post.title} - ${post.url}`); } return acc; - }, [])); + }, [])).then(posts => posts.filter(post => post)); }; module.exports = attachContentInfo; diff --git a/src/methods/eroshare.js b/src/methods/eroshare.js index d80c6e7..296b46f 100644 --- a/src/methods/eroshare.js +++ b/src/methods/eroshare.js @@ -36,8 +36,6 @@ function eroshare(post) { }; }) }; - }).catch(error => { - console.log('\x1b[33m%s\x1b[0m', error); }); }; diff --git a/src/methods/imgurAlbum.js b/src/methods/imgurAlbum.js index 84e736a..24084e5 100644 --- a/src/methods/imgurAlbum.js +++ b/src/methods/imgurAlbum.js @@ -40,8 +40,6 @@ function imgurAlbum(post) { original: item })) }; - }).catch(error => { - return console.log('\x1b[31m%s\x1b[0m', error); }); }; diff --git a/src/methods/imgurImage.js b/src/methods/imgurImage.js index d7222a3..227ddd2 100644 --- a/src/methods/imgurImage.js +++ b/src/methods/imgurImage.js @@ -26,8 +26,6 @@ function imgurImage(post) { original: res.data }] }; - }).catch(error => { - return console.log('\x1b[31m%s\x1b[0m', error); }); }; diff --git a/src/methods/methods.js b/src/methods/methods.js index 5b45a67..2db9ac1 100644 --- a/src/methods/methods.js +++ b/src/methods/methods.js @@ -9,11 +9,11 @@ const gfycat = require('./gfycat.js'); const eroshare = require('./eroshare.js'); module.exports = { - self: self, - redditImage: redditImage, - redditVideo: redditVideo, - imgurImage: imgurImage, - imgurAlbum: imgurAlbum, - gfycat: gfycat, - eroshare: eroshare + self, + redditImage, + redditVideo, + imgurImage, + imgurAlbum, + gfycat, + eroshare };