Added archive support, and the IP archive.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:54 +02:00
parent 8c4353f8ed
commit f0e312f2b1
14 changed files with 117 additions and 31 deletions

View File

@ -32,7 +32,12 @@ module.exports = {
fetch: { fetch: {
sort: 'new', sort: 'new',
limit: 1000, limit: 1000,
avoidDuplicates: true avoidDuplicates: true,
archives: {
search: false,
reddit: ['ip'],
reupload: []
}
}, },
reddit: { reddit: {
api: { api: {

10
package-lock.json generated
View File

@ -25,6 +25,16 @@
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz",
"integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=" "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg="
}, },
"array.prototype.flatten": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/array.prototype.flatten/-/array.prototype.flatten-1.2.1.tgz",
"integrity": "sha512-3GhsA78XgK//wQKbhUe6L93kknekGlTRY0kvYcpuSi0aa9rVrMr/okeIIv/XSpN8fZ5iUM+bWifhf2/7CYKtIg==",
"requires": {
"define-properties": "1.1.2",
"es-abstract": "1.11.0",
"function-bind": "1.1.1"
}
},
"asn1": { "asn1": {
"version": "0.2.3", "version": "0.2.3",
"resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.3.tgz", "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.3.tgz",

View File

@ -27,6 +27,7 @@
"author": "Niels Simenon", "author": "Niels Simenon",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"array.prototype.flatten": "^1.2.1",
"cheerio": "^1.0.0-rc.2", "cheerio": "^1.0.0-rc.2",
"config": "^1.30.0", "config": "^1.30.0",
"date-fns": "^1.29.0", "date-fns": "^1.29.0",

View File

@ -4,7 +4,9 @@ const config = require('config');
const util = require('util'); const util = require('util');
const fs = require('fs-extra'); const fs = require('fs-extra');
const snoowrap = require('snoowrap'); const snoowrap = require('snoowrap');
const promiseFinally = require('promise.prototype.finally');
require('promise.prototype.finally').shim();
require('array.prototype.flatten').shim();
const reddit = new snoowrap(config.reddit.api); const reddit = new snoowrap(config.reddit.api);
@ -16,11 +18,11 @@ const interpolate = require('./interpolate.js');
const attachContentInfo = require('./fetch/info.js'); const attachContentInfo = require('./fetch/info.js');
const fetchContent = require('./fetch/content.js'); const fetchContent = require('./fetch/content.js');
const archives = require('./archives/archives.js');
const save = require('./save/save.js'); const save = require('./save/save.js');
const saveProfileDetails = require('./save/profileDetails.js'); const saveProfileDetails = require('./save/profileDetails.js');
promiseFinally.shim();
const args = require('./cli.js'); const args = require('./cli.js');
if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) { if(!(args.users && args.users.length) && !(args.posts && args.posts.length)) {
@ -31,35 +33,56 @@ Promise.resolve().then(() => {
if(args.users) { if(args.users) {
return getUserPosts(args.users); return getUserPosts(args.users);
} }
}).then((userPosts = []) => {
return [];
}).then(userPosts => {
if(args.posts) { if(args.posts) {
return getPosts(args.posts).then(posts => posts.concat(userPosts)); return getPosts(args.posts).then(posts => posts.concat(userPosts));
} }
return userPosts; return userPosts;
}).then(posts => { }).then(posts => {
return curatePosts(posts).slice(0, args.limit); return curatePosts(posts, args.ignore).slice(0, args.limit);
}).then(posts => { }).then(posts => {
return attachContentInfo(posts).then(info => fetchContent(posts)); return attachContentInfo(posts).then(info => posts);
}).then(posts => {
return fetchContent(posts);
}).catch(error => { }).catch(error => {
return console.error(error); return console.error(error);
}); });
function getUserPosts(users) { function getUserPosts(usernames) {
return users.reduce((chain, username) => { return usernames.reduce((chain, username) => {
return chain.then(accPosts => { return chain.then(accPosts => {
return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({user, accPosts})); return reddit.getUser(username).fetch().then(curateUser).then(saveProfileDetails).then(user => ({
user,
accPosts
}));
}).then(({user, accPosts}) => { }).then(({user, accPosts}) => {
return reddit.getUser(username).getSubmissions({ return reddit.getUser(username).getSubmissions({
sort: args.sort, sort: args.sort,
limit: Infinity limit: Infinity
}).then(posts => { }).then(posts => ({
return accPosts.concat(posts.map(post => { user,
post.user = user; accPosts: accPosts.concat(posts)
return post;
})); }));
}).then(({user, accPosts}) => {
if(args.archives || config.fetch.archives.search) {
return getArchivePostIds(username, accPosts.map(post => post.id)).then(postIds => {
return Promise.all(postIds.map(postId => {
return reddit.getSubmission(postId).fetch();
}));
}).then(archivedPosts => {
return {
user,
accPosts: accPosts.concat(archivedPosts)
};
}); });
}
return {user, accPosts};
}).then(({user, accPosts}) => {
return accPosts.map(post => Object.assign(post, {user}));
}); });
}, Promise.resolve([])); }, Promise.resolve([]));
}; };
@ -94,3 +117,15 @@ function getPosts(postIds) {
return posts; return posts;
}); });
}; };
function getArchivePostIds(username, exclude) {
console.log('Searching archives for posts...');
return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username))).then(postIds => postIds.flatten()).then(postIds => {
return exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds;
}).then(postIds => {
console.log(`Found ${postIds.length} unique archived posts`);
return postIds;
});
};

7
src/archives/archives.js Normal file
View File

@ -0,0 +1,7 @@
'use strict';
const ip = require('./ip.js');
module.exports = {
ip
};

21
src/archives/ip.js Normal file
View File

@ -0,0 +1,21 @@
'use strict';
const fetch = require('node-fetch');
const $ = require('cheerio');
function findOnIp(username, page = 1, acc = []) {
return Promise.resolve().then(() => {
return fetch(`https://www.imageporn.net/user/${username}/all/${page}`);
}).then(res => res.text()).then(res => {
const postIds = $('.icon a', res).toArray().map(link => link.attribs.href.slice(16));
if(postIds.length) {
// still finding items, check next page
return findOnIp(username, ++page, acc.concat(postIds))
}
return acc;
});
};
module.exports = findOnIp;

View File

@ -23,4 +23,7 @@ module.exports = yargs.command('npm start -- --user <username>').option('users',
describe: 'Ignore posts with any of these properties', describe: 'Ignore posts with any of these properties',
type: 'array', type: 'array',
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18'] choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18']
}).option('archives', {
describe: 'Search archives for deleted posts',
type: 'boolean'
}).argv; }).argv;

View File

@ -6,6 +6,8 @@ const dissectLink = require('../dissectLink.js');
function curatePosts(posts, ignore) { function curatePosts(posts, ignore) {
const processed = new Set(); const processed = new Set();
console.log(ignore);
return posts.reduce((acc, post, index) => { return posts.reduce((acc, post, index) => {
const host = dissectLink(post.url); const host = dissectLink(post.url);
const ignoring = ignore ? ignore.find(prop => { const ignoring = ignore ? ignore.find(prop => {

View File

@ -21,6 +21,10 @@ module.exports = function(posts) {
return ep.open(); return ep.open();
}).then(() => { }).then(() => {
return Promise.all(posts.map(post => { return Promise.all(posts.map(post => {
if(!post.content) {
return console.log('SLIPPED THROUGH!', post);
}
return Promise.all(post.content.items.map((item, index) => { return Promise.all(post.content.items.map((item, index) => {
item.index = index; item.index = index;

View File

@ -9,13 +9,17 @@ function attachContentInfo(posts) {
post.content = content; post.content = content;
return post; return post;
}).catch(error => {
console.log('\x1b[31m%s\x1b[0m', error);
return null
})); }));
} else { } else {
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.title}' - ${post.url}`); console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.id} - ${post.title} - ${post.url}`);
} }
return acc; return acc;
}, [])); }, [])).then(posts => posts.filter(post => post));
}; };
module.exports = attachContentInfo; module.exports = attachContentInfo;

View File

@ -36,8 +36,6 @@ function eroshare(post) {
}; };
}) })
}; };
}).catch(error => {
console.log('\x1b[33m%s\x1b[0m', error);
}); });
}; };

View File

@ -40,8 +40,6 @@ function imgurAlbum(post) {
original: item original: item
})) }))
}; };
}).catch(error => {
return console.log('\x1b[31m%s\x1b[0m', error);
}); });
}; };

View File

@ -26,8 +26,6 @@ function imgurImage(post) {
original: res.data original: res.data
}] }]
}; };
}).catch(error => {
return console.log('\x1b[31m%s\x1b[0m', error);
}); });
}; };

View File

@ -9,11 +9,11 @@ const gfycat = require('./gfycat.js');
const eroshare = require('./eroshare.js'); const eroshare = require('./eroshare.js');
module.exports = { module.exports = {
self: self, self,
redditImage: redditImage, redditImage,
redditVideo: redditVideo, redditVideo,
imgurImage: imgurImage, imgurImage,
imgurAlbum: imgurAlbum, imgurAlbum,
gfycat: gfycat, gfycat,
eroshare: eroshare eroshare
}; };