Added archive support, and the IP archive.

This commit is contained in:
ThePendulum 2018-05-05 00:51:58 +02:00
parent 1b3a334c24
commit ca3bdd717d
14 changed files with 117 additions and 31 deletions

View File

@ -32,7 +32,12 @@ module.exports = {
fetch: {
sort: 'new',
limit: 1000,
avoidDuplicates: true
avoidDuplicates: true,
archives: {
search: false,
reddit: ['ip'],
reupload: []
}
},
reddit: {
api: {

10
package-lock.json generated
View File

@ -25,6 +25,16 @@
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz",
"integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg="
},
"array.prototype.flatten": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/array.prototype.flatten/-/array.prototype.flatten-1.2.1.tgz",
"integrity": "sha512-3GhsA78XgK//wQKbhUe6L93kknekGlTRY0kvYcpuSi0aa9rVrMr/okeIIv/XSpN8fZ5iUM+bWifhf2/7CYKtIg==",
"requires": {
"define-properties": "1.1.2",
"es-abstract": "1.11.0",
"function-bind": "1.1.1"
}
},
"asn1": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.3.tgz",

View File

@ -27,6 +27,7 @@
"author": "Niels Simenon",
"license": "ISC",
"dependencies": {
"array.prototype.flatten": "^1.2.1",
"cheerio": "^1.0.0-rc.2",
"config": "^1.30.0",
"date-fns": "^1.29.0",

View File

@ -4,7 +4,9 @@ const config = require('config');
const util = require('util');
const fs = require('fs-extra');
const snoowrap = require('snoowrap');
const promiseFinally = require('promise.prototype.finally');
require('promise.prototype.finally').shim();
require('array.prototype.flatten').shim();
const reddit = new snoowrap(config.reddit.api);
@ -16,11 +18,11 @@ const interpolate = require('./interpolate.js');
const attachContentInfo = require('./fetch/info.js');
const fetchContent = require('./fetch/content.js');
const archives = require('./archives/archives.js');
const save = require('./save/save.js');
const saveProfileDetails = require('./save/profileDetails.js');
promiseFinally.shim();
const args = require('./cli.js');
if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) {
@ -31,35 +33,56 @@ Promise.resolve().then(() => {
if(args.users) {
return getUserPosts(args.users);
}
}).then((userPosts = []) => {
return [];
}).then(userPosts => {
if(args.posts) {
return getPosts(args.posts).then(posts => posts.concat(userPosts));
}
return userPosts;
}).then(posts => {
return curatePosts(posts).slice(0, args.limit);
return curatePosts(posts, args.ignore).slice(0, args.limit);
}).then(posts => {
return attachContentInfo(posts).then(info => fetchContent(posts));
return attachContentInfo(posts).then(info => posts);
}).then(posts => {
return fetchContent(posts);
}).catch(error => {
return console.error(error);
});
function getUserPosts(users) {
return users.reduce((chain, username) => {
function getUserPosts(usernames) {
return usernames.reduce((chain, username) => {
return chain.then(accPosts => {
return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({user, accPosts}));
return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({
user,
accPosts
}));
}).then(({user, accPosts}) => {
return reddit.getUser(username).getSubmissions({
sort: args.sort,
limit: Infinity
}).then(posts => {
return accPosts.concat(posts.map(post => {
post.user = user;
}).then(posts => ({
user,
accPosts: accPosts.concat(posts)
}));
}).then(({user, accPosts}) => {
if(args.archives || config.fetch.archives.search) {
return getArchivePostIds(username, accPosts.map(post => post.id)).then(postIds => {
return Promise.all(postIds.map(postId => {
return reddit.getSubmission(postId).fetch();
}));
}).then(archivedPosts => {
return {
user,
accPosts: accPosts.concat(archivedPosts)
};
});
}
return post;
}));
});
return {user, accPosts};
}).then(({user, accPosts}) => {
return accPosts.map(post => Object.assign(post, {user}));
});
}, Promise.resolve([]));
};
@ -94,3 +117,15 @@ function getPosts(postIds) {
return posts;
});
};
function getArchivePostIds(username, exclude) {
console.log('Searching archives for posts...');
return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username))).then(postIds => postIds.flatten()).then(postIds => {
return exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds;
}).then(postIds => {
console.log(`Found ${postIds.length} unique archived posts`);
return postIds;
});
};

7
src/archives/archives.js Normal file
View File

@ -0,0 +1,7 @@
'use strict';
const ip = require('./ip.js');
module.exports = {
ip
};

21
src/archives/ip.js Normal file
View File

@ -0,0 +1,21 @@
'use strict';
const fetch = require('node-fetch');
const $ = require('cheerio');
function findOnIp(username, page = 1, acc = []) {
return Promise.resolve().then(() => {
return fetch(`https://www.imageporn.net/user/${username}/all/${page}`);
}).then(res => res.text()).then(res => {
const postIds = $('.icon a', res).toArray().map(link => link.attribs.href.slice(16));
if(postIds.length) {
// still finding items, check next page
return findOnIp(username, ++page, acc.concat(postIds))
}
return acc;
});
};
module.exports = findOnIp;

View File

@ -23,4 +23,7 @@ module.exports = yargs.command('npm start -- --user <username>').option('users',
describe: 'Ignore posts with any of these properties',
type: 'array',
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18']
}).option('archives', {
describe: 'Search archives for deleted posts',
type: 'boolean'
}).argv;

View File

@ -6,6 +6,8 @@ const dissectLink = require('../dissectLink.js');
function curatePosts(posts, ignore) {
const processed = new Set();
console.log(ignore);
return posts.reduce((acc, post, index) => {
const host = dissectLink(post.url);
const ignoring = ignore ? ignore.find(prop => {

View File

@ -21,6 +21,10 @@ module.exports = function(posts) {
return ep.open();
}).then(() => {
return Promise.all(posts.map(post => {
if(!post.content) {
return console.log('SLIPPED THROUGH!', post);
}
return Promise.all(post.content.items.map((item, index) => {
item.index = index;

View File

@ -9,13 +9,17 @@ function attachContentInfo(posts) {
post.content = content;
return post;
}).catch(error => {
console.log('\x1b[31m%s\x1b[0m', error);
return null
}));
} else {
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.title}' - ${post.url}`);
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.id} - ${post.title} - ${post.url}`);
}
return acc;
}, []));
}, [])).then(posts => posts.filter(post => post));
};
module.exports = attachContentInfo;

View File

@ -36,8 +36,6 @@ function eroshare(post) {
};
})
};
}).catch(error => {
console.log('\x1b[33m%s\x1b[0m', error);
});
};

View File

@ -40,8 +40,6 @@ function imgurAlbum(post) {
original: item
}))
};
}).catch(error => {
return console.log('\x1b[31m%s\x1b[0m', error);
});
};

View File

@ -26,8 +26,6 @@ function imgurImage(post) {
original: res.data
}]
};
}).catch(error => {
return console.log('\x1b[31m%s\x1b[0m', error);
});
};

View File

@ -9,11 +9,11 @@ const gfycat = require('./gfycat.js');
const eroshare = require('./eroshare.js');
module.exports = {
self: self,
redditImage: redditImage,
redditVideo: redditVideo,
imgurImage: imgurImage,
imgurAlbum: imgurAlbum,
gfycat: gfycat,
eroshare: eroshare
self,
redditImage,
redditVideo,
imgurImage,
imgurAlbum,
gfycat,
eroshare
};