Added support for RedGifs and Reddit albums. Improved command line logger. Added rate limiters for reddit and host URLs.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:58 +02:00
parent bb06fe9763
commit de50d609f3
33 changed files with 586 additions and 256 deletions

View File

@ -6,6 +6,6 @@
"rules": {
"no-console": 0,
"indent": ["error", 4],
"max-len": [2, {"code": 200, "tabWidth": 4, "ignoreUrls": true}]
"max-len": 0
}
}

View File

@ -12,9 +12,10 @@ Most features are optional and can easily be disabled!
* Extract single images from albums
### Supported hosts
* Reddit text/self, images and videos[\*](#reddit-videos)
* Reddit text/self, images, albums and videos[\*](#reddit-videos)
* Imgur (requires API key as of late 2019)
* Gfycat
* RedGifs
* YouTube
* PornHub, YouPorn, xHamster, RedTube, xnxx, YouJizz
* Twitter, Facebook, Instagram

View File

@ -69,6 +69,13 @@ module.exports = {
reupload: [],
},
},
logger: {
level: 'info',
},
limiter: {
concurrency: 100,
interval: 100,
},
reddit: {
api: {
userAgent: 'ripunzel',

213
package-lock.json generated
View File

@ -4,6 +4,16 @@
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"@dabh/diagnostics": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.2.tgz",
"integrity": "sha512-+A1YivoVDNNVCdfozHSR8v/jyuuLTMXwjWuxPFlFlUapXoGc+Gj9mDlTDDfrwl7rXCl2tNZ0kE8sIBO6YOn96Q==",
"requires": {
"colorspace": "1.1.x",
"enabled": "2.0.x",
"kuler": "^2.0.0"
}
},
"@types/node": {
"version": "9.6.5",
"resolved": "https://registry.npmjs.org/@types/node/-/node-9.6.5.tgz",
@ -295,6 +305,11 @@
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24="
},
"bottleneck": {
"version": "2.19.5",
"resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz",
"integrity": "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw=="
},
"brace-expansion": {
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
@ -443,6 +458,25 @@
"resolved": "https://registry.npmjs.org/code-point-at/-/code-point-at-1.1.0.tgz",
"integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c="
},
"color": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz",
"integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==",
"requires": {
"color-convert": "^1.9.3",
"color-string": "^1.6.0"
},
"dependencies": {
"color-convert": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
"requires": {
"color-name": "1.1.3"
}
}
}
},
"color-convert": {
"version": "1.9.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.1.tgz",
@ -455,8 +489,30 @@
"color-name": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=",
"dev": true
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
},
"color-string": {
"version": "1.9.0",
"resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.0.tgz",
"integrity": "sha512-9Mrz2AQLefkH1UvASKj6v6hj/7eWgjnT/cVsR8CumieLoT+g900exWeNogqtweI8dxloXN9BDQTYro1oWu/5CQ==",
"requires": {
"color-name": "^1.0.0",
"simple-swizzle": "^0.2.2"
}
},
"colors": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/colors/-/colors-1.4.0.tgz",
"integrity": "sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA=="
},
"colorspace": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz",
"integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==",
"requires": {
"color": "^3.1.3",
"text-hex": "1.0.x"
}
},
"combined-stream": {
"version": "1.0.6",
@ -724,6 +780,11 @@
"jsbn": "~0.1.0"
}
},
"enabled": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz",
"integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ=="
},
"end-of-stream": {
"version": "1.4.4",
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz",
@ -1062,6 +1123,11 @@
"resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
"integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc="
},
"fecha": {
"version": "4.2.1",
"resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.1.tgz",
"integrity": "sha512-MMMQ0ludy/nBs1/o0zVOiKTpG7qMbonKUzjJgQFEuvq6INZ1OraKPRAWkBq5vlKLOUMpmNYG1JoN3oDPUQ9m3Q=="
},
"figures": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/figures/-/figures-2.0.0.tgz",
@ -1110,6 +1176,11 @@
"which": "^1.1.1"
}
},
"fn.name": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz",
"integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw=="
},
"foreach": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/foreach/-/foreach-2.0.5.tgz",
@ -1161,6 +1232,11 @@
"resolved": "https://registry.npmjs.org/lodash/-/lodash-2.4.2.tgz",
"integrity": "sha1-+t2DS5aDBz2hebPq5tnA0VBT9z4="
},
"mime": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
"integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="
},
"uuid": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-2.0.3.tgz",
@ -1829,6 +1905,11 @@
"verror": "1.10.0"
}
},
"kuler": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz",
"integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A=="
},
"lcid": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/lcid/-/lcid-1.0.0.tgz",
@ -1877,6 +1958,25 @@
"resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz",
"integrity": "sha1-7dFMgk4sycHgsKG0K7UhBRakJDg="
},
"logform": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/logform/-/logform-2.3.0.tgz",
"integrity": "sha512-graeoWUH2knKbGthMtuG1EfaSPMZFZBIrhuJHhkS5ZseFBrc7DupCzihOQAzsK/qIKPQaPJ/lFQFctILUY5ARQ==",
"requires": {
"colors": "^1.2.1",
"fecha": "^4.2.0",
"ms": "^2.1.1",
"safe-stable-stringify": "^1.1.0",
"triple-beam": "^1.3.0"
},
"dependencies": {
"ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
}
}
},
"lru-cache": {
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.2.tgz",
@ -1905,9 +2005,9 @@
"integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w=="
},
"mime": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
"integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/mime/-/mime-3.0.0.tgz",
"integrity": "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A=="
},
"mime-db": {
"version": "1.33.0",
@ -2069,6 +2169,14 @@
"wrappy": "1"
}
},
"one-time": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz",
"integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==",
"requires": {
"fn.name": "1.x.x"
}
},
"onetime": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/onetime/-/onetime-2.0.1.tgz",
@ -2538,6 +2646,11 @@
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.1.tgz",
"integrity": "sha512-kKvNJn6Mm93gAczWVJg7wH+wGYWNrDHdWvpUmHyEsgCtIwwo3bqPtV4tR5tuPaUhTOo/kvhVwd8XwwOllGYkbg=="
},
"safe-stable-stringify": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-1.1.1.tgz",
"integrity": "sha512-ERq4hUjKDbJfE4+XtZLFPCDi8Vb1JqaxAPTxWFLBx8XcAlf9Bda/ZJdVezs/NAfsMQScyIlUMx+Yeu7P7rx5jw=="
},
"safer-buffer": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
@ -2580,6 +2693,21 @@
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.2.tgz",
"integrity": "sha1-tf3AjxKH6hF4Yo5BXiUTK3NkbG0="
},
"simple-swizzle": {
"version": "0.2.2",
"resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
"integrity": "sha1-pNprY1/8zMoz9w0Xy5JZLeleVXo=",
"requires": {
"is-arrayish": "^0.3.1"
},
"dependencies": {
"is-arrayish": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
"integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ=="
}
}
},
"slice-ansi": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-1.0.0.tgz",
@ -2672,6 +2800,11 @@
"tweetnacl": "~0.14.0"
}
},
"stack-trace": {
"version": "0.0.10",
"resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz",
"integrity": "sha1-VHxws0fo0ytOEI6hoqFZ5f3eGcA="
},
"stealthy-require": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/stealthy-require/-/stealthy-require-1.1.1.tgz",
@ -2782,6 +2915,11 @@
"resolved": "https://registry.npmjs.org/template-format/-/template-format-1.2.4.tgz",
"integrity": "sha512-+8ItNMtMTBbsEHyPR1l7Ke1WZfl91PAcoTvwAcx5U28CRLd7ylzDLazv0kuDTzNmdq/RAOnsxFVWzr4QwVIFVg=="
},
"text-hex": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz",
"integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg=="
},
"text-table": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
@ -2881,6 +3019,11 @@
"resolved": "https://registry.npmjs.org/traverse/-/traverse-0.6.6.tgz",
"integrity": "sha1-y99WD9e5r2MlAv7UD5GMFX6pcTc="
},
"triple-beam": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.3.0.tgz",
"integrity": "sha512-XrHUvV5HpdLmIj4uVMxHggLbFSZYIn7HEWsqePZcI50pco+MPqJ50wMGY794X7AOOhxOBAjbkqfAbEe/QMp2Lw=="
},
"tunnel-agent": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
@ -3037,6 +3180,66 @@
"resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz",
"integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho="
},
"winston": {
"version": "3.3.3",
"resolved": "https://registry.npmjs.org/winston/-/winston-3.3.3.tgz",
"integrity": "sha512-oEXTISQnC8VlSAKf1KYSSd7J6IWuRPQqDdo8eoRNaYKLvwSb5+79Z3Yi1lrl6KDpU6/VWaxpakDAtb1oQ4n9aw==",
"requires": {
"@dabh/diagnostics": "^2.0.2",
"async": "^3.1.0",
"is-stream": "^2.0.0",
"logform": "^2.2.0",
"one-time": "^1.0.0",
"readable-stream": "^3.4.0",
"stack-trace": "0.0.x",
"triple-beam": "^1.3.0",
"winston-transport": "^4.4.0"
},
"dependencies": {
"async": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/async/-/async-3.2.2.tgz",
"integrity": "sha512-H0E+qZaDEfx/FY4t7iLRv1W2fFI6+pyCeTw1uN20AQPiwqwM6ojPxHxdLv4z8hi2DtnW9BOckSspLucW7pIE5g=="
},
"is-stream": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
"integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg=="
},
"readable-stream": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz",
"integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==",
"requires": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
}
}
}
},
"winston-transport": {
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.4.1.tgz",
"integrity": "sha512-ciZRlU4CSjHqHe8RQG1iPxKMRVwv6ZJ0RC7DxStKWd0KjpAhPDy5gVYSCpIUq+5CUsP+IyNOTZy1X0tO2QZqjg==",
"requires": {
"logform": "^2.2.0",
"readable-stream": "^3.4.0",
"triple-beam": "^1.2.0"
},
"dependencies": {
"readable-stream": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz",
"integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==",
"requires": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
}
}
}
},
"wordwrap": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz",

View File

@ -32,6 +32,7 @@
"bhttp": "^1.2.4",
"blake2": "^4.0.0",
"bluebird": "^3.5.1",
"bottleneck": "^2.19.5",
"cheerio": "^1.0.0-rc.2",
"config": "^1.30.0",
"date-fns": "^1.29.0",
@ -40,6 +41,7 @@
"fs-extra": "^5.0.0",
"js-yaml": "^3.12.0",
"jsdom": "^15.2.0",
"mime": "^3.0.0",
"mime-types": "^2.1.18",
"node-cron": "^1.2.1",
"node-exiftool": "^2.3.0",
@ -49,6 +51,7 @@
"snoowrap": "^1.20.0",
"template-format": "^1.2.4",
"url-pattern": "^1.0.3",
"winston": "^3.3.3",
"yargs": "^11.0.0",
"youtube-dl": "^2.1.0"
},

View File

@ -7,21 +7,21 @@ const Promise = require('bluebird');
const exiftool = require('node-exiftool');
const exiftoolBin = require('dist-exiftool');
const cron = require('node-cron');
const { format } = require('date-fns');
require('array.prototype.flatten').shim();
const reddit = new Snoowrap(config.reddit.api);
const args = require('./cli.js')();
const args = require('./cli')();
const logger = require('./logger')(__filename);
const dissectLink = require('./dissectLink.js');
const curatePosts = require('./curate/posts.js');
const dissectLink = require('./dissectLink');
const curatePosts = require('./curate/posts');
const { attachContentInfo, getInfo } = require('./fetch/info.js');
const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content.js');
const { attachContentInfo, getInfo } = require('./fetch/info');
const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content');
const getPosts = require('./sources/getPosts.js')(reddit, args);
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);
const getPosts = require('./sources/getPosts')(reddit, args);
const getUserPosts = require('./sources/getUserPosts')(reddit, args);
async function getFileContents(location, label) {
try {
@ -29,7 +29,7 @@ async function getFileContents(location, label) {
return fileContents.split('\n').filter(entry => entry && entry.slice(0, 1) !== '#');
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`);
logger.error(`Could not read ${label} file '${location}': ${error}.`);
return [];
}
@ -120,13 +120,13 @@ async function initApp() {
await ep.close();
if (args.watch) {
console.log(`[${format(new Date(), 'YYYY-MM-DD HH:mm:ss')}] Watch-mode enabled, checking again for new posts according to crontab '${config.fetch.watch.schedule}'.`);
logger.info(`Watch-mode enabled, checking again for new posts according to crontab '${config.fetch.watch.schedule}'.`);
}
} catch (error) {
if (args.debug) {
console.log('\x1b[31m%s\x1b[0m', error.stack);
logger.error(error.stack);
} else {
console.log('\x1b[31m%s\x1b[0m', error.message);
logger.error(error.message);
}
}
}

View File

@ -2,18 +2,20 @@
const config = require('config');
const archives = require('./archives.js');
const logger = require('../logger')(__filename);
const archives = require('./archives');
function getArchivePostIds(username, exclude) {
console.log(`Finding archived posts for '${username}'...`);
logger.info(`Finding archived posts for '${username}'...`);
return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username))).then(postIds => postIds.flatten()).then(postIds => {
return exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds;
}).then(postIds => {
console.log(`Found ${postIds.length} unique archived posts for user '${username}'`);
return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username)))
.then(postIds => postIds.flatten())
.then(postIds => (exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds))
.then((postIds) => {
logger.info(`Found ${postIds.length} unique archived posts for user '${username}'`);
return postIds;
});
};
}
module.exports = getArchivePostIds;

View File

@ -6,6 +6,22 @@ const yargs = require('yargs');
function getArgs() {
const args = yargs
.command('npm start -- --user <username>')
.option('log-level', {
alias: 'level',
describe: 'CLI log verbosity',
type: 'string',
default: config.logger.level,
})
.option('interval', {
describe: 'Minimum wait time between HTTP requests',
type: 'number',
default: config.limiter.interval,
})
.option('concurrency', {
describe: 'Maximum HTTP requests pending at the same time',
type: 'number',
default: config.limiter.concurrency,
})
.option('users', {
alias: 'user',
describe: 'Reddit usernames to fetch posts from',

View File

@ -7,43 +7,39 @@ const dissectLink = require('../dissectLink.js');
const hashPost = require('./hashPost.js');
const { isAfter, isBefore, isEqual } = require('date-fns');
const logger = require('../logger')(__filename);
function report(curatedPosts, indexed, user, args) {
const {
indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored,
indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored, duplicates,
} = curatedPosts;
if (indexedUpdated.length > 0) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`);
logger.info(`Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`);
}
if (requestedIgnored.length > 0) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring ${requestedIgnored.length} posts because their IDs are specified to be ignored for '${user.name}'`,
);
logger.info(`Ignoring ${requestedIgnored.length} posts because their IDs are specified to be ignored for '${user.name}'`);
}
if (tooOldCount > 0) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`);
logger.info(`Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`);
}
if (tooRecentCount > 0) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooRecentCount} newer posts for '${user.name}' for specified date limit '${args.before}'`);
logger.info(`Ignoring ${tooRecentCount} newer posts for '${user.name}' for specified date limit '${args.before}'`);
}
if (beforeIndexedCount > 0) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'`,
);
logger.info(`Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'`);
}
if (afterIndexedCount > 0) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`,
);
logger.info(`Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`);
}
if (duplicates.length > 0) {
logger.info(`Ignoring ${duplicates.length} duplicate posts for '${user.name}'`);
}
}
@ -103,7 +99,7 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
if (ignoring) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
logger.verbose(`Ignoring ${ignoring} post '${post.title}' (${permalink})`);
return acc;
}
@ -117,21 +113,15 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
}
if (hostIncludes || hostExcluded) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
);
logger.info(`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`);
return acc;
}
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`,
);
logger.verbose(`Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`);
return acc;
return { ...acc, duplicates: [...acc.duplicates, curatedPost] };
}
processed.add(host.id);
@ -139,6 +129,7 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
return {
...acc,
processed,
posts: [
...acc.posts,
curatedPost,
@ -161,6 +152,8 @@ const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts
posts: [],
indexedUpdated: [],
requestedIgnored: [],
duplicates: [],
duplicateCount: 0,
tooOldCount: 0,
tooRecentCount: 0,
beforeIndexedCount: 0,

View File

@ -18,6 +18,11 @@ const hosts = [
label: 'reddit',
pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'),
},
{
method: 'redditAlbum',
label: 'reddit',
pattern: new UrlPattern('http(s)\\://(www.)reddit.com/gallery/:id'),
},
{
method: 'redditVideo',
label: 'reddit',
@ -53,6 +58,11 @@ const hosts = [
label: 'gfycat',
pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'),
},
{
method: 'redgifs',
label: 'redgifs',
pattern: new UrlPattern('http(s)\\://(:subdomain.)redgifs.com(/watch)/(:id-mobile)(:id)(.:ext)(?*)'),
},
{
method: 'erome',
label: 'erome',
@ -139,7 +149,5 @@ module.exports = function dissectLink(url) {
}
}
console.log(url);
return null;
};

View File

@ -4,14 +4,15 @@ const config = require('config');
const Promise = require('bluebird');
const yaml = require('js-yaml');
const saveProfileDetails = require('../save/profileDetails.js');
const fetchItem = require('./item.js');
const interpolate = require('../interpolate.js');
const save = require('../save/save.js');
// const textToStream = require('../save/textToStream.js');
const saveMeta = require('../save/meta.js');
const mux = require('../save/mux.js');
const writeToIndex = require('../save/writeToIndex.js');
const logger = require('../logger')(__filename);
const saveProfileDetails = require('../save/profileDetails');
const fetchItem = require('./item');
const interpolate = require('../interpolate');
const save = require('../save/save');
// const textToStream = require('../save/textToStream');
const saveMeta = require('../save/meta');
const mux = require('../save/mux');
const writeToIndex = require('../save/writeToIndex');
function curateComments(comments) {
return comments.map((comment) => {
@ -107,6 +108,10 @@ async function fetchSaveUserContent(user, ep, args) {
const hashes = new Set(user.indexed.original.map(item => item.hash));
const posts = await Promise.map(user.posts, async (post) => {
if (!post.content) {
return null;
}
const hash = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index };
const buffers = await getBuffers(item, post, post.host);
@ -118,10 +123,7 @@ async function fetchSaveUserContent(user, ep, args) {
// prevent duplicates
if (config.fetch.avoidDuplicates && hashes.has(buffers[0].hash)) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring duplicate file '${post.url}' (${post.permalink})`,
);
logger.verbose(`Ignoring duplicate file '${post.url}' (${post.permalink})`);
return buffers[0].hash;
}
@ -148,12 +150,12 @@ async function fetchSaveUserContent(user, ep, args) {
concurrency: config.fetch.concurrency,
});
return writeToIndex(posts, profilePaths, user, args);
return writeToIndex(posts.filter(Boolean), profilePaths, user, args);
}
async function fetchSaveDirectContent(content, host, ep) {
return Promise.reduce(content.items, async (accItems, originalItem, index) => {
console.log(`Fetching and saving '${host.url}'`);
logger.info(`Fetching and saving '${host.url}'`);
const item = { ...originalItem, index };
const buffers = await getBuffers(item, null, host);

View File

@ -3,7 +3,8 @@
const config = require('config');
const Promise = require('bluebird');
const methods = require('../methods/methods.js');
const logger = require('../logger')(__filename);
const methods = require('../methods/methods');
const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users), async (accUsers, user) => ({
...accUsers,
@ -11,7 +12,7 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users)
...user,
posts: await Promise.reduce(user.posts, async (accPosts, post) => {
if (!post.host || !methods[post.host.method]) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.url}' (${post.permalink})`);
logger.warn(`Ignoring unsupported content '${post.url}' (${post.permalink})`);
return accPosts;
}
@ -25,10 +26,10 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users)
},
];
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`);
logger.warn(`${error.message} (${post.permalink})`);
if (config.fetch.archives.preview && post.preview) {
console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);
logger.info(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);
return [
...accPosts,
@ -53,7 +54,7 @@ async function getInfo(host, reddit, url) {
return info;
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${url}'`);
logger.verbose(`Ignoring unsupported content '${url}'`);
return null;
}

View File

@ -4,13 +4,14 @@ const config = require('config');
const bhttp = require('bhttp');
const blake2 = require('blake2');
const logger = require('../logger')(__filename);
const limiter = require('../limiter').items;
async function fetchItem(url, attempt, post, host) {
async function retry(error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`);
logger.warn(`Failed to fetch '${url}', ${attempt < config.fetch.retries ? 'retrying' : 'giving up'}: ${error.message} (${post ? post.permalink : 'no post'})`);
if (attempt < config.fetch.retries) {
console.log('Retrying...');
return fetchItem(url, attempt + 1, post);
}
@ -18,13 +19,17 @@ async function fetchItem(url, attempt, post, host) {
}
try {
const res = await bhttp.get(url);
const res = await limiter.schedule(async () => bhttp.get(url));
if (!res.statusCode === 200) {
throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`);
}
console.log(`Fetched '${host.url}' (${post ? post.permalink : 'no post'})`);
if (!Buffer.isBuffer(res.body)) {
throw new Error(`Unexpected response for '${url}' (${res.status}): ${res.body}`);
}
logger.debug(`Fetched '${host ? host.url : url}' (${post ? post.permalink : 'no post'})`);
const hash = blake2.createHash('blake2b', { digestLength: 24 });
hash.update(res.body);
@ -32,7 +37,6 @@ async function fetchItem(url, attempt, post, host) {
return Object.assign(res.body, { hash: contentHash });
} catch (error) {
console.log(error);
return retry(error);
}
}

19
src/limiter.js Normal file
View File

@ -0,0 +1,19 @@
'use strict';
const Bottleneck = require('bottleneck');
const args = require('./cli')();
module.exports = {
reddit: new Bottleneck({
reservoir: 30,
reservoirRefreshAmount: 30,
reservoirRefreshInterval: 60000,
maxConcurrent: 1,
minTime: 100,
}),
items: new Bottleneck({
maxConcurrent: args.concurrency,
minTime: args.interval,
}),
};

26
src/logger.js Normal file
View File

@ -0,0 +1,26 @@
'use strict';
const winston = require('winston');
const args = require('./cli.js')();
const logger = winston.createLogger({
level: args.logLevel,
transports: [
new winston.transports.Console({
level: args.logLevel,
format: winston.format.combine(
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
winston.format.colorize(),
winston.format.printf(info => `${info.timestamp} ${info.level}: ${info.message}`),
),
timestamp: true,
}),
],
});
function getLogger() {
return logger;
}
module.exports = getLogger;

View File

@ -4,6 +4,8 @@ const config = require('config');
const fetch = require('node-fetch');
const cheerio = require('cheerio');
const logger = require('../logger')(__filename);
const base = 'https://www.erome.com/';
async function erome(host, post) {
@ -50,7 +52,7 @@ async function erome(host, post) {
const extract = config.library.extractSingleAlbumItem && (items.length === 1);
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${url}' (${post ? post.url : 'no post'})`);
logger.verbose(`Extracting single item from album '${url}' (${post ? post.url : 'no post'})`);
}
return {

View File

@ -1,13 +1,28 @@
'use strict';
const fetch = require('node-fetch');
const bhttp = require('bhttp');
const redgifs = require('./redgifs');
async function gfycat(host) {
const res = await fetch(`https://api.gfycat.com/v1/gfycats/${host.id}`);
const data = await res.json();
const res = await bhttp.get(`https://api.gfycat.com/v1/gfycats/${host.id}`);
const data = await res.body;
if (data.error) {
throw new Error(data.error);
if (data.errorMessage) {
const redirectRes = await bhttp.head(host.url, {
followRedirects: false,
});
if (redirectRes.statusCode === 301) {
// Gfycat redirects all NSFW gifs to RedGifs, likely the case
return redgifs({
...host,
url: `https://www.redgifs.com/watch/${host.id}`,
method: 'redgifs',
label: 'redgifs',
});
}
throw new Error(`Gfycat API returned error for source '${host.url}' (${res.status}): ${data.errorMessage}`);
}
return {

View File

@ -2,7 +2,8 @@
const config = require('config');
const fetch = require('node-fetch');
// const mime = require('mime-types');
const logger = require('../logger')(__filename);
async function imgurAlbumApi(host, post) {
const res = await fetch(`https://api.imgur.com/3/album/${host.id}`, {
@ -20,7 +21,7 @@ async function imgurAlbumApi(host, post) {
const extract = config.library.extractSingleAlbumItem && data.images.length === 1;
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`);
logger.verbose(`Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`);
}
return {
@ -45,46 +46,4 @@ async function imgurAlbumApi(host, post) {
};
}
/*
* as of late 2019, imgur requires log in to view albums and gallery images
async function imgurAlbum(host, post) {
const res = await fetch(`https://imgur.com/a/${post.host.id}`);
const html = await res.text();
if (res.status !== 200) {
if (config.methods.imgur.clientId) {
console.log('\x1b[31m%s\x1b[0m', `Could not fetch info for direct imgur album '${post.host.id}' (${res.statusText}), trying API fallback (${post.permalink})`);
return imgurAlbumApi(post);
}
throw new Error(`Could not fetch info for imgur album '${post.host.id}' (${res.statusText}) no API fallback configured`);
}
const dataString = html.replace(/\s+/g, ' ').match(/}}, item:(.*)}; var PREBID_TIMEOUT/)[1];
const data = JSON.parse(dataString);
const extract = config.library.album.extractSingleItem && data.album_images.images.length === 1;
return {
album: extract ? null : {
id: data.id,
url: `https://imgur.com/a/${post.host.id}`,
title: data.title,
description: data.description,
datetime: new Date(data.datetime),
},
items: data.album_images.images.map(item => ({
extracted: extract,
id: item.hash,
url: data.animated ? `https://i.imgur.com/${item.hash}.mp4` : `https://i.imgur.com/${item.hash}${item.ext}`,
title: item.title || (extract ? data.title : null),
description: item.description || (extract ? data.description : null),
type: item.animated ? 'video/mp4' : mime.lookup(item.ext.split('?')[0]),
datetime: new Date(item.datetime),
})),
};
}
*/
module.exports = imgurAlbumApi;

View File

@ -10,6 +10,10 @@ async function imgurImageApi(host) {
},
});
if (res.status !== 200) {
throw new Error(`Imgur API returned HTTP ${res.status} for source '${host.url}'`);
}
const { data } = await res.json();
if (res.status !== 200) {
@ -32,37 +36,6 @@ async function imgurImageApi(host) {
async function imgurImage(host, post) {
return imgurImageApi(host, post);
/*
* as of late 2019, imgur requires log in to view albums and gallery images
const res = await fetch(`https://imgur.com/${post.host.id}`);
const html = await res.text();
if (res.status !== 200) {
if (config.methods.imgur.clientId) {
console.log('\x1b[31m%s\x1b[0m', `Could not scrape info for imgur image '${post.host.id}' (${res.statusText}), trying API fallback (${post.permalink})`);
return imgurImageApi(post);
}
throw new Error(`Could not scrape info for imgur image '${post.host.id}' (${res.statusText}), no API fallback configured`);
}
const dataString = html.replace(/\s+/g, ' ').match(/}}, item:(.*)}; var PREBID_TIMEOUT/)[1];
const data = JSON.parse(dataString);
return {
album: null,
items: [{
id: data.hash,
url: data.animated ? `https://i.imgur.com/${post.host.id}.mp4` : `https://i.imgur.com/${post.host.id}${data.ext}`,
title: data.title,
description: data.description,
type: data.animated ? 'video/mp4' : data.mimetype,
datetime: new Date(data.timestamp || data.datetime),
}],
};
*/
}
module.exports = imgurImage;

View File

@ -7,8 +7,10 @@ const gfycat = require('./gfycat');
const imgurAlbum = require('./imgurAlbum');
const imgurImage = require('./imgurImage');
const redditImage = require('./redditImage');
const redditAlbum = require('./redditAlbum');
const redditPreview = require('./redditPreview');
const redditVideo = require('./redditVideo');
const redgifs = require('./redgifs');
const self = require('./self');
const vidbleAlbum = require('./vidbleAlbum');
const vidbleImage = require('./vidbleImage');
@ -23,8 +25,10 @@ module.exports = {
imgurAlbum,
imgurImage,
redditImage,
redditAlbum,
redditPreview,
redditVideo,
redgifs,
self,
tube,
vidbleAlbum,

View File

@ -0,0 +1,32 @@
'use strict';
const mime = require('mime');
const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
async function redditAlbum(host, post) {
const res = await bhttp.get(host.url);
if (res.statusCode !== 200) {
throw new Error(res.body.toString());
}
const { document } = new JSDOM(res.body.toString(), { runScripts: 'dangerously' }).window;
const items = Array.from(document.querySelectorAll('li a'), el => el.href);
return {
album: {
id: host.id,
url: host.url,
title: post.title,
},
items: items.map(url => ({
id: new URL(url).pathname.match(/\/(.*).jpg/)[1],
url,
datetime: post.datetime,
type: mime.getType(url) || 'image/jpeg',
})),
};
}
module.exports = redditAlbum;

View File

@ -1,6 +1,6 @@
'use strict';
const mime = require('mime-types');
const mime = require('mime');
async function redditImage(host, post) {
return {
@ -10,7 +10,7 @@ async function redditImage(host, post) {
url: post.url,
title: post.title,
datetime: post.datetime,
type: mime.lookup(post.url.split('/.')[0]) || 'image/jpeg',
type: mime.getType(post.url) || 'image/jpeg',
original: post,
}],
};

69
src/methods/redgifs.js Normal file
View File

@ -0,0 +1,69 @@
'use strict';
const fetch = require('node-fetch');
const mime = require('mime');
function scrapeGallery(data) {
const oldestDate = Math.min(...data.gifs.map(gif => gif.createDate));
return {
album: {
id: data.id,
datetime: new Date(oldestDate * 1000),
},
items: data.gifs.map(gif => ({
id: gif.id,
url: gif.urls.hd,
description: gif.tags.join(', '),
type: mime.getType(gif.urls.hd),
datetime: new Date(gif.createDate * 1000),
original: gif,
})),
};
}
async function fetchGallery(galleryId) {
const res = await fetch(`https://api.redgifs.com/v2/gallery/${galleryId}`);
const data = await res.json();
if (!data.gifs) {
return null;
}
return scrapeGallery(data);
}
async function redgifs(host) {
const res = await fetch(`https://api.redgifs.com/v2/gifs/${host.id.toLowerCase()}`);
const data = await res.json();
if (data.errorMessage) {
throw new Error(`RedGifs API returned error for source '${host.url}' (${res.status}): ${data.errorMessage.description}`);
}
if (data.id && data.gifs) {
return scrapeGallery(data);
}
if (!data.gif) {
return null;
}
if (data.gif.gallery) {
return fetchGallery(data.gif.gallery);
}
return {
album: null,
items: [{
id: data.gif.id,
url: data.gif.urls.hd,
description: data.gif.tags.join(', '),
type: mime.getType(data.gif.urls.hd),
datetime: new Date(data.gif.createDate * 1000),
original: data.gif,
}],
};
}
module.exports = redgifs;

View File

@ -3,6 +3,8 @@
const youtubedl = require('youtube-dl');
const dateFns = require('date-fns');
const logger = require('../logger')(__filename);
async function tube(host, post) {
try {
const data = await new Promise((resolve, reject) => {
@ -15,7 +17,7 @@ async function tube(host, post) {
});
});
host.id = data.display_id;
host.id = data.display_id; // eslint-disable-line no-param-reassign
return {
album: null,
@ -32,7 +34,7 @@ async function tube(host, post) {
],
};
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring possible profile page '${host.url}' (${post ? post.permalink : 'no post'})`);
logger.warn(`Ignoring possible image or profile page '${host.url}' (${post ? post.permalink : 'no post'})`);
return null;
}

View File

@ -6,6 +6,8 @@ const UrlPattern = require('url-pattern');
const cheerio = require('cheerio');
const mime = require('mime-types');
const logger = require('../logger')(__filename);
const pattern = new UrlPattern('https\\://(www.)vidble.com/:id(_med)(.:ext)');
async function vidbleAlbum(host, post) {
@ -23,7 +25,7 @@ async function vidbleAlbum(host, post) {
const extract = config.library.extractSingleAlbumItem && imgUrls.length === 1;
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.link}`);
logger.verbose(`Extracting single item from album '${post.title}' - ${res.link}`);
}
return {
@ -43,7 +45,7 @@ async function vidbleAlbum(host, post) {
id,
url: `https://vidble.com/${id}.${components.ext}`,
type: mimetype,
datetime: post.datetime,
datetime: post ? post.datetime : null,
};
}),
};

View File

@ -1,9 +1,11 @@
'use strict';
const logger = require('../logger')(__filename);
async function saveMeta(filepath, meta, ep) {
await ep.writeMetadata(filepath, meta, ['overwrite_original']);
console.log('\x1b[36m%s\x1b[0m', `Wrote metadata to '${filepath}'`);
logger.debug(`Wrote metadata to '${filepath}'`);
}
module.exports = saveMeta;

View File

@ -3,24 +3,25 @@
const ffmpeg = require('fluent-ffmpeg');
const fs = require('fs-extra');
function mux(target, sources, item) {
return new Promise((resolve, reject) => {
return sources.reduce((acc, source) => {
return acc.input(source);
}, ffmpeg()).videoCodec('copy').audioCodec('copy').on('start', cmd => {
console.log('\x1b[36m%s\x1b[0m', `Muxing ${sources.length} streams to '${target}'`);
}).on('end', (stdout) => {
console.log('\x1b[32m%s\x1b[0m', `Muxed and saved '${target}'`);
const logger = require('../logger')(__filename);
function mux(target, sources) {
return new Promise((resolve, reject) => sources.reduce((acc, source) => acc.input(source), ffmpeg())
.videoCodec('copy')
.audioCodec('copy')
.on('start', () => {
logger.verbose(`Muxing ${sources.length} streams to '${target}'`);
})
.on('end', (stdout) => {
logger.verbose(`Muxed and saved '${target}'`);
resolve(stdout);
}).on('error', error => reject).save(target);
}).then(() => {
return Promise.all(sources.map(source => {
return fs.remove(source);
})).then(() => {
console.log('\x1b[36m%s\x1b[0m', `Cleaned up temporary files for '${target}'`);
});
});
};
})
.on('error', () => reject)
.save(target))
.then(() => Promise.all(sources.map(source => fs.remove(source))).then(() => {
logger.verbose(`Cleaned up temporary files for '${target}'`);
}));
}
module.exports = mux;

View File

@ -4,14 +4,15 @@ const config = require('config');
const Promise = require('bluebird');
const UrlPattern = require('url-pattern');
const interpolate = require('../interpolate.js');
const fetchItem = require('../fetch/item.js');
// const textToStream = require('./textToStream.js');
const save = require('./save.js');
const interpolate = require('../interpolate');
const fetchItem = require('../fetch/item');
// const textToStream = require('./textToStream');
const save = require('./save');
const logger = require('../logger')(__filename);
async function saveProfileImage(user, args) {
if (!args.redownloadProfile && user.indexed.profile.image) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring already present profile image for '${user.name}' (https://reddit.com/user/${user.name})`);
logger.verbose(`Ignoring already present profile image for '${user.name}' (https://reddit.com/user/${user.name})`);
return user.indexed.profile.image;
}
@ -20,7 +21,7 @@ async function saveProfileImage(user, args) {
const image = user.profile ? user.profile.image : user.image;
if (config.library.profile.avoidAvatar && new UrlPattern('http(s)\\://(www.)redditstatic.com/avatars/:id(.:ext)(?:query)').match(image)) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring standard avatar profile image for '${user.name}' (https://reddit.com/user/${user.name})`);
logger.verbose(`Ignoring standard avatar profile image for '${user.name}' (https://reddit.com/user/${user.name})`);
return null;
}
@ -44,7 +45,7 @@ async function saveProfileImage(user, args) {
return targets[0];
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Could not save profile image for '${user.name}': ${error} (https://reddit.com/user/${user.name})`);
logger.warn(`Could not save profile image for '${user.name}': ${error} (https://reddit.com/user/${user.name})`);
return null;
}
@ -55,7 +56,7 @@ async function saveProfileImage(user, args) {
async function saveProfileDescription(user, args) {
if (!args.redownloadProfile && user.indexed.profile.description) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring already present profile description for '${user.name}' (https://reddit.com/user/${user.name})`);
logger.verbose(`Ignoring already present profile description for '${user.name}' (https://reddit.com/user/${user.name})`);
return user.indexed.profile.description;
}
@ -70,13 +71,13 @@ async function saveProfileDescription(user, args) {
return targets[0];
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Could not save profile description for '${user.name}': ${error} (https://reddit.com/user/${user.name})`);
logger.error(`Could not save profile description for '${user.name}': ${error} (https://reddit.com/user/${user.name})`);
return null;
}
}
console.log('\x1b[33m%s\x1b[0m', `No profile description for '${user.name}' (https://reddit.com/user/${user.name})`);
logger.verbose(`No profile description for '${user.name}' (https://reddit.com/user/${user.name})`);
return null;
}

View File

@ -4,6 +4,7 @@ const config = require('config');
const fs = require('fs-extra');
const path = require('path');
const truncate = require('../utils/truncate-bytes');
const logger = require('../logger')(__filename);
function limitPathElement(element, limit) {
return element.split('/').map((component) => {
@ -30,48 +31,14 @@ async function writeBufferToFile(target, buffer, item) {
await fs.writeFile(target, buffer);
if (item && item.mux) {
console.log(`Temporarily saved '${target}', queued for muxing`);
logger.debug(`Temporarily saved '${target}', queued for muxing`);
} else {
console.log('\x1b[32m%s\x1b[0m', `Saved '${target}'`);
logger.verbose(`Saved '${target}'`);
}
return target;
}
/*
async function pipeStreamToFile(target, stream, item) {
const file = fs.createWriteStream(target);
return new Promise((resolve, reject) => {
stream.pipe(file);
stream.on('error', reject);
stream.on('end', () => {
if (item && item.mux) {
console.log(`Temporarily saved '${target}', queued for muxing`);
} else {
console.log('\x1b[32m%s\x1b[0m', `Saved '${target}'`);
}
resolve(target);
});
});
}
async function save(requestedFilepath, streamOrStreams, item) {
const pathElements = getPathElements(requestedFilepath);
const streams = [].concat(streamOrStreams); // allow for single stream argument
await fs.ensureDir(pathElements.dir);
return Promise.all(streams.map((stream, index) => {
const target = path.join(pathElements.root, pathElements.dir, `${pathElements.name}${streams.length > 1 ? `-${index}` : ''}${pathElements.ext}`);
return pipeStreamToFile(target, stream, item);
}));
}
*/
async function save(requestedFilepath, bufferOrBuffers, item) {
const pathElements = getPathElements(requestedFilepath);
const buffers = [].concat(bufferOrBuffers); // allow for single stream argument

View File

@ -6,6 +6,7 @@ const yaml = require('js-yaml');
const interpolate = require('../interpolate');
// const textToStream = require('./textToStream');
const save = require('./save');
const logger = require('../logger')(__filename);
async function writeToIndex(posts, profilePaths, user, args) {
const filepath = interpolate(config.library.index.file, null, null, null, null, user, false);
@ -44,8 +45,14 @@ async function writeToIndex(posts, profilePaths, user, args) {
return false;
}
// return save(filepath, textToStream(yaml.safeDump(data)));
return save(filepath, Buffer.from(yaml.safeDump(data), 'utf8'));
try {
const yamlIndex = yaml.safeDump(data);
return save(filepath, Buffer.from(yamlIndex, 'utf8'));
} catch (error) {
logger.error(`Could not save index for ${user.username}: ${error.message}`);
return null;
}
}
module.exports = writeToIndex;

View File

@ -4,7 +4,8 @@ const config = require('config');
const fs = require('fs-extra');
const yaml = require('js-yaml');
const interpolate = require('../interpolate.js');
const logger = require('../logger')(__filename);
const interpolate = require('../interpolate');
async function getIndex(user) {
const indexFilePath = interpolate(config.library.index.file, null, null, null, null, user, false);
@ -14,7 +15,7 @@ async function getIndex(user) {
return yaml.safeLoad(indexFile);
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `No index file found for '${user.name}' at '${indexFilePath}'`);
logger.info(`No index file found for '${user.name}' at '${indexFilePath}'`);
return { profile: { image: null, description: null }, posts: [] };
}

View File

@ -5,33 +5,38 @@ const Promise = require('bluebird');
const getIndex = require('./getIndex.js');
const curateUser = require('../curate/user.js');
const logger = require('../logger')(__filename);
const limiter = require('../limiter').reddit;
async function getUser(username, reddit) {
try {
const user = await reddit.getUser(username).fetch();
const user = await limiter.schedule(async () => reddit.getUser(username).fetch());
return curateUser(user);
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
logger.error(`Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
return {
name: username,
fallback: true,
};
}
};
}
const getPostsWrap = reddit => function getPosts(postIds, userPosts = {}) {
return Promise.reduce(postIds, (accUserPosts, postId) => Promise.resolve().then(async () => {
const post = await reddit
.getSubmission(postId)
.fetch();
const post = await limiter.schedule(async () => reddit.getSubmission(postId).fetch());
post.direct = true;
if (accUserPosts[post.author.name]) {
accUserPosts[post.author.name].posts = accUserPosts[post.author.name].posts.concat(post);
return accUserPosts;
return {
...accUserPosts,
[post.author.name]: {
...accUserPosts[post.author.name],
posts: [...accUserPosts[post.author.name].posts, post],
},
};
}
// don't attempt to fetch deleted user

View File

@ -6,15 +6,18 @@ const getIndex = require('./getIndex.js');
const getArchivePostIds = require('../archives/getArchivePostIds.js');
const curateUser = require('../curate/user.js');
const logger = require('../logger')(__filename);
const limiter = require('../limiter').reddit;
async function getUser(username, reddit) {
try {
const user = await reddit.getUser(username).fetch();
const user = await limiter.schedule(async () => reddit.getUser(username).fetch());
console.log(`Fetched user profile for '${username}' (https://reddit.com/user/${username})`);
logger.info(`Fetched user profile for '${username}' (https://reddit.com/user/${username})`);
return curateUser(user);
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
logger.error(`Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
return {
name: username,
@ -25,18 +28,18 @@ async function getUser(username, reddit) {
async function getPosts(username, reddit, args) {
try {
const submissions = await reddit
const submissions = await limiter.schedule(async () => reddit
.getUser(username)
.getSubmissions({
sort: args.sort,
limit: Infinity,
});
}));
console.log(`Fetched ${submissions.length} submissions for '${username}' (https://reddit.com/user/${username})`);
logger.info(`Fetched ${submissions.length} submissions for '${username}' (https://reddit.com/user/${username})`);
return submissions;
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
logger.warn(`Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
return [];
}
@ -45,7 +48,7 @@ async function getPosts(username, reddit, args) {
async function getArchivedPosts(username, posts, reddit) {
const postIds = await getArchivePostIds(username, posts.map(post => post.id));
return Promise.all(postIds.map(postId => reddit.getSubmission(postId).fetch()));
return Promise.all(postIds.map(postId => limiter.schedule(async () => reddit.getSubmission(postId).fetch())));
}
function getUserPostsWrap(reddit, args) {
@ -78,12 +81,12 @@ function getUserPostsWrap(reddit, args) {
return null;
} catch (error) {
console.log(username, error);
logger.error(`Failed to fetch posts from 'username': ${error.message}`);
return null;
}
}, {
concurrency: 5,
concurrency: 10,
});
return users.reduce(
@ -97,7 +100,7 @@ function getUserPostsWrap(reddit, args) {
{},
);
} catch (error) {
console.log(error);
logger.error(error);
throw error;
}