From de50d609f3507b898e41e6452f5fc7527ee73562 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:58 +0200 Subject: [PATCH] Added support for RedGifs and Reddit albums. Improved command line logger. Added rate limiters for reddit and host URLs. --- .eslintrc | 2 +- README.md | 3 +- config/default.js | 7 + package-lock.json | 213 +++++++++++++++++++++++++++++- package.json | 3 + src/app.js | 24 ++-- src/archives/getArchivePostIds.js | 20 +-- src/cli.js | 16 +++ src/curate/posts.js | 45 +++---- src/dissectLink.js | 12 +- src/fetch/content.js | 30 +++-- src/fetch/info.js | 11 +- src/fetch/item.js | 16 ++- src/limiter.js | 19 +++ src/logger.js | 26 ++++ src/methods/erome.js | 4 +- src/methods/gfycat.js | 25 +++- src/methods/imgurAlbum.js | 47 +------ src/methods/imgurImage.js | 35 +---- src/methods/methods.js | 4 + src/methods/redditAlbum.js | 32 +++++ src/methods/redditImage.js | 4 +- src/methods/redgifs.js | 69 ++++++++++ src/methods/tube.js | 6 +- src/methods/vidbleAlbum.js | 6 +- src/save/meta.js | 4 +- src/save/mux.js | 35 ++--- src/save/profileDetails.js | 21 +-- src/save/save.js | 39 +----- src/save/writeToIndex.js | 11 +- src/sources/getIndex.js | 5 +- src/sources/getPosts.js | 23 ++-- src/sources/getUserPosts.js | 25 ++-- 33 files changed, 586 insertions(+), 256 deletions(-) create mode 100644 src/limiter.js create mode 100644 src/logger.js create mode 100644 src/methods/redditAlbum.js create mode 100644 src/methods/redgifs.js diff --git a/.eslintrc b/.eslintrc index 780f9cb..b0d0c02 100644 --- a/.eslintrc +++ b/.eslintrc @@ -6,6 +6,6 @@ "rules": { "no-console": 0, "indent": ["error", 4], - "max-len": [2, {"code": 200, "tabWidth": 4, "ignoreUrls": true}] + "max-len": 0 } } diff --git a/README.md b/README.md index 31016fc..1703cee 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,10 @@ Most features are optional and can easily be disabled! * Extract single images from albums ### Supported hosts -* Reddit text/self, images and videos[\*](#reddit-videos) +* Reddit text/self, images, albums and videos[\*](#reddit-videos) * Imgur (requires API key as of late 2019) * Gfycat +* RedGifs * YouTube * PornHub, YouPorn, xHamster, RedTube, xnxx, YouJizz * Twitter, Facebook, Instagram diff --git a/config/default.js b/config/default.js index 4b9d01c..dd26c1f 100644 --- a/config/default.js +++ b/config/default.js @@ -69,6 +69,13 @@ module.exports = { reupload: [], }, }, + logger: { + level: 'info', + }, + limiter: { + concurrency: 100, + interval: 100, + }, reddit: { api: { userAgent: 'ripunzel', diff --git a/package-lock.json b/package-lock.json index 279fd37..6cda8af 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4,6 +4,16 @@ "lockfileVersion": 1, "requires": true, "dependencies": { + "@dabh/diagnostics": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.2.tgz", + "integrity": "sha512-+A1YivoVDNNVCdfozHSR8v/jyuuLTMXwjWuxPFlFlUapXoGc+Gj9mDlTDDfrwl7rXCl2tNZ0kE8sIBO6YOn96Q==", + "requires": { + "colorspace": "1.1.x", + "enabled": "2.0.x", + "kuler": "^2.0.0" + } + }, "@types/node": { "version": "9.6.5", "resolved": "https://registry.npmjs.org/@types/node/-/node-9.6.5.tgz", @@ -295,6 +305,11 @@ "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", "integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24=" }, + "bottleneck": { + "version": "2.19.5", + "resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz", + "integrity": "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw==" + }, "brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -443,6 +458,25 @@ "resolved": "https://registry.npmjs.org/code-point-at/-/code-point-at-1.1.0.tgz", "integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c=" }, + "color": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz", + "integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==", + "requires": { + "color-convert": "^1.9.3", + "color-string": "^1.6.0" + }, + "dependencies": { + "color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "requires": { + "color-name": "1.1.3" + } + } + } + }, "color-convert": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.1.tgz", @@ -455,8 +489,30 @@ "color-name": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", - "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", - "dev": true + "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=" + }, + "color-string": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.0.tgz", + "integrity": "sha512-9Mrz2AQLefkH1UvASKj6v6hj/7eWgjnT/cVsR8CumieLoT+g900exWeNogqtweI8dxloXN9BDQTYro1oWu/5CQ==", + "requires": { + "color-name": "^1.0.0", + "simple-swizzle": "^0.2.2" + } + }, + "colors": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/colors/-/colors-1.4.0.tgz", + "integrity": "sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA==" + }, + "colorspace": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz", + "integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==", + "requires": { + "color": "^3.1.3", + "text-hex": "1.0.x" + } }, "combined-stream": { "version": "1.0.6", @@ -724,6 +780,11 @@ "jsbn": "~0.1.0" } }, + "enabled": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz", + "integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ==" + }, "end-of-stream": { "version": "1.4.4", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", @@ -1062,6 +1123,11 @@ "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=" }, + "fecha": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.1.tgz", + "integrity": "sha512-MMMQ0ludy/nBs1/o0zVOiKTpG7qMbonKUzjJgQFEuvq6INZ1OraKPRAWkBq5vlKLOUMpmNYG1JoN3oDPUQ9m3Q==" + }, "figures": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/figures/-/figures-2.0.0.tgz", @@ -1110,6 +1176,11 @@ "which": "^1.1.1" } }, + "fn.name": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", + "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" + }, "foreach": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/foreach/-/foreach-2.0.5.tgz", @@ -1161,6 +1232,11 @@ "resolved": "https://registry.npmjs.org/lodash/-/lodash-2.4.2.tgz", "integrity": "sha1-+t2DS5aDBz2hebPq5tnA0VBT9z4=" }, + "mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==" + }, "uuid": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/uuid/-/uuid-2.0.3.tgz", @@ -1829,6 +1905,11 @@ "verror": "1.10.0" } }, + "kuler": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz", + "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==" + }, "lcid": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/lcid/-/lcid-1.0.0.tgz", @@ -1877,6 +1958,25 @@ "resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz", "integrity": "sha1-7dFMgk4sycHgsKG0K7UhBRakJDg=" }, + "logform": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/logform/-/logform-2.3.0.tgz", + "integrity": "sha512-graeoWUH2knKbGthMtuG1EfaSPMZFZBIrhuJHhkS5ZseFBrc7DupCzihOQAzsK/qIKPQaPJ/lFQFctILUY5ARQ==", + "requires": { + "colors": "^1.2.1", + "fecha": "^4.2.0", + "ms": "^2.1.1", + "safe-stable-stringify": "^1.1.0", + "triple-beam": "^1.3.0" + }, + "dependencies": { + "ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + } + } + }, "lru-cache": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.2.tgz", @@ -1905,9 +2005,9 @@ "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==" }, "mime": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", - "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==" + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-3.0.0.tgz", + "integrity": "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A==" }, "mime-db": { "version": "1.33.0", @@ -2069,6 +2169,14 @@ "wrappy": "1" } }, + "one-time": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz", + "integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==", + "requires": { + "fn.name": "1.x.x" + } + }, "onetime": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/onetime/-/onetime-2.0.1.tgz", @@ -2538,6 +2646,11 @@ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.1.tgz", "integrity": "sha512-kKvNJn6Mm93gAczWVJg7wH+wGYWNrDHdWvpUmHyEsgCtIwwo3bqPtV4tR5tuPaUhTOo/kvhVwd8XwwOllGYkbg==" }, + "safe-stable-stringify": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-1.1.1.tgz", + "integrity": "sha512-ERq4hUjKDbJfE4+XtZLFPCDi8Vb1JqaxAPTxWFLBx8XcAlf9Bda/ZJdVezs/NAfsMQScyIlUMx+Yeu7P7rx5jw==" + }, "safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", @@ -2580,6 +2693,21 @@ "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.2.tgz", "integrity": "sha1-tf3AjxKH6hF4Yo5BXiUTK3NkbG0=" }, + "simple-swizzle": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", + "integrity": "sha1-pNprY1/8zMoz9w0Xy5JZLeleVXo=", + "requires": { + "is-arrayish": "^0.3.1" + }, + "dependencies": { + "is-arrayish": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz", + "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==" + } + } + }, "slice-ansi": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-1.0.0.tgz", @@ -2672,6 +2800,11 @@ "tweetnacl": "~0.14.0" } }, + "stack-trace": { + "version": "0.0.10", + "resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz", + "integrity": "sha1-VHxws0fo0ytOEI6hoqFZ5f3eGcA=" + }, "stealthy-require": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/stealthy-require/-/stealthy-require-1.1.1.tgz", @@ -2782,6 +2915,11 @@ "resolved": "https://registry.npmjs.org/template-format/-/template-format-1.2.4.tgz", "integrity": "sha512-+8ItNMtMTBbsEHyPR1l7Ke1WZfl91PAcoTvwAcx5U28CRLd7ylzDLazv0kuDTzNmdq/RAOnsxFVWzr4QwVIFVg==" }, + "text-hex": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz", + "integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==" + }, "text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", @@ -2881,6 +3019,11 @@ "resolved": "https://registry.npmjs.org/traverse/-/traverse-0.6.6.tgz", "integrity": "sha1-y99WD9e5r2MlAv7UD5GMFX6pcTc=" }, + "triple-beam": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.3.0.tgz", + "integrity": "sha512-XrHUvV5HpdLmIj4uVMxHggLbFSZYIn7HEWsqePZcI50pco+MPqJ50wMGY794X7AOOhxOBAjbkqfAbEe/QMp2Lw==" + }, "tunnel-agent": { "version": "0.6.0", "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", @@ -3037,6 +3180,66 @@ "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz", "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=" }, + "winston": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/winston/-/winston-3.3.3.tgz", + "integrity": "sha512-oEXTISQnC8VlSAKf1KYSSd7J6IWuRPQqDdo8eoRNaYKLvwSb5+79Z3Yi1lrl6KDpU6/VWaxpakDAtb1oQ4n9aw==", + "requires": { + "@dabh/diagnostics": "^2.0.2", + "async": "^3.1.0", + "is-stream": "^2.0.0", + "logform": "^2.2.0", + "one-time": "^1.0.0", + "readable-stream": "^3.4.0", + "stack-trace": "0.0.x", + "triple-beam": "^1.3.0", + "winston-transport": "^4.4.0" + }, + "dependencies": { + "async": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.2.tgz", + "integrity": "sha512-H0E+qZaDEfx/FY4t7iLRv1W2fFI6+pyCeTw1uN20AQPiwqwM6ojPxHxdLv4z8hi2DtnW9BOckSspLucW7pIE5g==" + }, + "is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==" + }, + "readable-stream": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz", + "integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==", + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + } + } + } + }, + "winston-transport": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.4.1.tgz", + "integrity": "sha512-ciZRlU4CSjHqHe8RQG1iPxKMRVwv6ZJ0RC7DxStKWd0KjpAhPDy5gVYSCpIUq+5CUsP+IyNOTZy1X0tO2QZqjg==", + "requires": { + "logform": "^2.2.0", + "readable-stream": "^3.4.0", + "triple-beam": "^1.2.0" + }, + "dependencies": { + "readable-stream": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz", + "integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==", + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + } + } + } + }, "wordwrap": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", diff --git a/package.json b/package.json index 44ca420..f77b4cc 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "bhttp": "^1.2.4", "blake2": "^4.0.0", "bluebird": "^3.5.1", + "bottleneck": "^2.19.5", "cheerio": "^1.0.0-rc.2", "config": "^1.30.0", "date-fns": "^1.29.0", @@ -40,6 +41,7 @@ "fs-extra": "^5.0.0", "js-yaml": "^3.12.0", "jsdom": "^15.2.0", + "mime": "^3.0.0", "mime-types": "^2.1.18", "node-cron": "^1.2.1", "node-exiftool": "^2.3.0", @@ -49,6 +51,7 @@ "snoowrap": "^1.20.0", "template-format": "^1.2.4", "url-pattern": "^1.0.3", + "winston": "^3.3.3", "yargs": "^11.0.0", "youtube-dl": "^2.1.0" }, diff --git a/src/app.js b/src/app.js index e80ed78..81c8a9a 100644 --- a/src/app.js +++ b/src/app.js @@ -7,21 +7,21 @@ const Promise = require('bluebird'); const exiftool = require('node-exiftool'); const exiftoolBin = require('dist-exiftool'); const cron = require('node-cron'); -const { format } = require('date-fns'); require('array.prototype.flatten').shim(); const reddit = new Snoowrap(config.reddit.api); -const args = require('./cli.js')(); +const args = require('./cli')(); +const logger = require('./logger')(__filename); -const dissectLink = require('./dissectLink.js'); -const curatePosts = require('./curate/posts.js'); +const dissectLink = require('./dissectLink'); +const curatePosts = require('./curate/posts'); -const { attachContentInfo, getInfo } = require('./fetch/info.js'); -const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content.js'); +const { attachContentInfo, getInfo } = require('./fetch/info'); +const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content'); -const getPosts = require('./sources/getPosts.js')(reddit, args); -const getUserPosts = require('./sources/getUserPosts.js')(reddit, args); +const getPosts = require('./sources/getPosts')(reddit, args); +const getUserPosts = require('./sources/getUserPosts')(reddit, args); async function getFileContents(location, label) { try { @@ -29,7 +29,7 @@ async function getFileContents(location, label) { return fileContents.split('\n').filter(entry => entry && entry.slice(0, 1) !== '#'); } catch (error) { - console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`); + logger.error(`Could not read ${label} file '${location}': ${error}.`); return []; } @@ -120,13 +120,13 @@ async function initApp() { await ep.close(); if (args.watch) { - console.log(`[${format(new Date(), 'YYYY-MM-DD HH:mm:ss')}] Watch-mode enabled, checking again for new posts according to crontab '${config.fetch.watch.schedule}'.`); + logger.info(`Watch-mode enabled, checking again for new posts according to crontab '${config.fetch.watch.schedule}'.`); } } catch (error) { if (args.debug) { - console.log('\x1b[31m%s\x1b[0m', error.stack); + logger.error(error.stack); } else { - console.log('\x1b[31m%s\x1b[0m', error.message); + logger.error(error.message); } } } diff --git a/src/archives/getArchivePostIds.js b/src/archives/getArchivePostIds.js index d5f5677..b187c19 100644 --- a/src/archives/getArchivePostIds.js +++ b/src/archives/getArchivePostIds.js @@ -2,18 +2,20 @@ const config = require('config'); -const archives = require('./archives.js'); +const logger = require('../logger')(__filename); +const archives = require('./archives'); function getArchivePostIds(username, exclude) { - console.log(`Finding archived posts for '${username}'...`); + logger.info(`Finding archived posts for '${username}'...`); - return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username))).then(postIds => postIds.flatten()).then(postIds => { - return exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds; - }).then(postIds => { - console.log(`Found ${postIds.length} unique archived posts for user '${username}'`); + return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username))) + .then(postIds => postIds.flatten()) + .then(postIds => (exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds)) + .then((postIds) => { + logger.info(`Found ${postIds.length} unique archived posts for user '${username}'`); - return postIds; - }); -}; + return postIds; + }); +} module.exports = getArchivePostIds; diff --git a/src/cli.js b/src/cli.js index 1fa7f57..d5cc6fa 100644 --- a/src/cli.js +++ b/src/cli.js @@ -6,6 +6,22 @@ const yargs = require('yargs'); function getArgs() { const args = yargs .command('npm start -- --user ') + .option('log-level', { + alias: 'level', + describe: 'CLI log verbosity', + type: 'string', + default: config.logger.level, + }) + .option('interval', { + describe: 'Minimum wait time between HTTP requests', + type: 'number', + default: config.limiter.interval, + }) + .option('concurrency', { + describe: 'Maximum HTTP requests pending at the same time', + type: 'number', + default: config.limiter.concurrency, + }) .option('users', { alias: 'user', describe: 'Reddit usernames to fetch posts from', diff --git a/src/curate/posts.js b/src/curate/posts.js index 8f9c83f..6161023 100644 --- a/src/curate/posts.js +++ b/src/curate/posts.js @@ -7,43 +7,39 @@ const dissectLink = require('../dissectLink.js'); const hashPost = require('./hashPost.js'); const { isAfter, isBefore, isEqual } = require('date-fns'); +const logger = require('../logger')(__filename); function report(curatedPosts, indexed, user, args) { const { - indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored, + indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored, duplicates, } = curatedPosts; if (indexedUpdated.length > 0) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`); + logger.info(`Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`); } if (requestedIgnored.length > 0) { - console.log( - '\x1b[33m%s\x1b[0m', - `Ignoring ${requestedIgnored.length} posts because their IDs are specified to be ignored for '${user.name}'`, - ); + logger.info(`Ignoring ${requestedIgnored.length} posts because their IDs are specified to be ignored for '${user.name}'`); } if (tooOldCount > 0) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`); + logger.info(`Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`); } if (tooRecentCount > 0) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooRecentCount} newer posts for '${user.name}' for specified date limit '${args.before}'`); + logger.info(`Ignoring ${tooRecentCount} newer posts for '${user.name}' for specified date limit '${args.before}'`); } if (beforeIndexedCount > 0) { - console.log( - '\x1b[33m%s\x1b[0m', - `Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'`, - ); + logger.info(`Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'`); } if (afterIndexedCount > 0) { - console.log( - '\x1b[33m%s\x1b[0m', - `Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`, - ); + logger.info(`Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`); + } + + if (duplicates.length > 0) { + logger.info(`Ignoring ${duplicates.length} duplicate posts for '${user.name}'`); } } @@ -103,7 +99,7 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null; if (ignoring) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`); + logger.verbose(`Ignoring ${ignoring} post '${post.title}' (${permalink})`); return acc; } @@ -117,21 +113,15 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) } if (hostIncludes || hostExcluded) { - console.log( - '\x1b[33m%s\x1b[0m', - `Ignoring source '${host.label}' from post '${post.url}' (${permalink})`, - ); + logger.info(`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`); return acc; } if (config.fetch.avoidDuplicates && processed.has(host.id)) { - console.log( - '\x1b[33m%s\x1b[0m', - `Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`, - ); + logger.verbose(`Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`); - return acc; + return { ...acc, duplicates: [...acc.duplicates, curatedPost] }; } processed.add(host.id); @@ -139,6 +129,7 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) return { ...acc, + processed, posts: [ ...acc.posts, curatedPost, @@ -161,6 +152,8 @@ const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts posts: [], indexedUpdated: [], requestedIgnored: [], + duplicates: [], + duplicateCount: 0, tooOldCount: 0, tooRecentCount: 0, beforeIndexedCount: 0, diff --git a/src/dissectLink.js b/src/dissectLink.js index 80fab2f..31e2193 100644 --- a/src/dissectLink.js +++ b/src/dissectLink.js @@ -18,6 +18,11 @@ const hosts = [ label: 'reddit', pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'), }, + { + method: 'redditAlbum', + label: 'reddit', + pattern: new UrlPattern('http(s)\\://(www.)reddit.com/gallery/:id'), + }, { method: 'redditVideo', label: 'reddit', @@ -53,6 +58,11 @@ const hosts = [ label: 'gfycat', pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'), }, + { + method: 'redgifs', + label: 'redgifs', + pattern: new UrlPattern('http(s)\\://(:subdomain.)redgifs.com(/watch)/(:id-mobile)(:id)(.:ext)(?*)'), + }, { method: 'erome', label: 'erome', @@ -139,7 +149,5 @@ module.exports = function dissectLink(url) { } } - console.log(url); - return null; }; diff --git a/src/fetch/content.js b/src/fetch/content.js index d81f4d5..7fefd4e 100644 --- a/src/fetch/content.js +++ b/src/fetch/content.js @@ -4,14 +4,15 @@ const config = require('config'); const Promise = require('bluebird'); const yaml = require('js-yaml'); -const saveProfileDetails = require('../save/profileDetails.js'); -const fetchItem = require('./item.js'); -const interpolate = require('../interpolate.js'); -const save = require('../save/save.js'); -// const textToStream = require('../save/textToStream.js'); -const saveMeta = require('../save/meta.js'); -const mux = require('../save/mux.js'); -const writeToIndex = require('../save/writeToIndex.js'); +const logger = require('../logger')(__filename); +const saveProfileDetails = require('../save/profileDetails'); +const fetchItem = require('./item'); +const interpolate = require('../interpolate'); +const save = require('../save/save'); +// const textToStream = require('../save/textToStream'); +const saveMeta = require('../save/meta'); +const mux = require('../save/mux'); +const writeToIndex = require('../save/writeToIndex'); function curateComments(comments) { return comments.map((comment) => { @@ -107,6 +108,10 @@ async function fetchSaveUserContent(user, ep, args) { const hashes = new Set(user.indexed.original.map(item => item.hash)); const posts = await Promise.map(user.posts, async (post) => { + if (!post.content) { + return null; + } + const hash = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => { const item = { ...originalItem, index }; const buffers = await getBuffers(item, post, post.host); @@ -118,10 +123,7 @@ async function fetchSaveUserContent(user, ep, args) { // prevent duplicates if (config.fetch.avoidDuplicates && hashes.has(buffers[0].hash)) { - console.log( - '\x1b[33m%s\x1b[0m', - `Ignoring duplicate file '${post.url}' (${post.permalink})`, - ); + logger.verbose(`Ignoring duplicate file '${post.url}' (${post.permalink})`); return buffers[0].hash; } @@ -148,12 +150,12 @@ async function fetchSaveUserContent(user, ep, args) { concurrency: config.fetch.concurrency, }); - return writeToIndex(posts, profilePaths, user, args); + return writeToIndex(posts.filter(Boolean), profilePaths, user, args); } async function fetchSaveDirectContent(content, host, ep) { return Promise.reduce(content.items, async (accItems, originalItem, index) => { - console.log(`Fetching and saving '${host.url}'`); + logger.info(`Fetching and saving '${host.url}'`); const item = { ...originalItem, index }; const buffers = await getBuffers(item, null, host); diff --git a/src/fetch/info.js b/src/fetch/info.js index 14b5a6b..4634725 100644 --- a/src/fetch/info.js +++ b/src/fetch/info.js @@ -3,7 +3,8 @@ const config = require('config'); const Promise = require('bluebird'); -const methods = require('../methods/methods.js'); +const logger = require('../logger')(__filename); +const methods = require('../methods/methods'); const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users), async (accUsers, user) => ({ ...accUsers, @@ -11,7 +12,7 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users) ...user, posts: await Promise.reduce(user.posts, async (accPosts, post) => { if (!post.host || !methods[post.host.method]) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.url}' (${post.permalink})`); + logger.warn(`Ignoring unsupported content '${post.url}' (${post.permalink})`); return accPosts; } @@ -25,10 +26,10 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users) }, ]; } catch (error) { - console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`); + logger.warn(`${error.message} (${post.permalink})`); if (config.fetch.archives.preview && post.preview) { - console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); + logger.info(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); return [ ...accPosts, @@ -53,7 +54,7 @@ async function getInfo(host, reddit, url) { return info; } catch (error) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${url}'`); + logger.verbose(`Ignoring unsupported content '${url}'`); return null; } diff --git a/src/fetch/item.js b/src/fetch/item.js index eec1bae..0a64563 100644 --- a/src/fetch/item.js +++ b/src/fetch/item.js @@ -4,13 +4,14 @@ const config = require('config'); const bhttp = require('bhttp'); const blake2 = require('blake2'); +const logger = require('../logger')(__filename); +const limiter = require('../limiter').items; + async function fetchItem(url, attempt, post, host) { async function retry(error) { - console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`); + logger.warn(`Failed to fetch '${url}', ${attempt < config.fetch.retries ? 'retrying' : 'giving up'}: ${error.message} (${post ? post.permalink : 'no post'})`); if (attempt < config.fetch.retries) { - console.log('Retrying...'); - return fetchItem(url, attempt + 1, post); } @@ -18,13 +19,17 @@ async function fetchItem(url, attempt, post, host) { } try { - const res = await bhttp.get(url); + const res = await limiter.schedule(async () => bhttp.get(url)); if (!res.statusCode === 200) { throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`); } - console.log(`Fetched '${host.url}' (${post ? post.permalink : 'no post'})`); + if (!Buffer.isBuffer(res.body)) { + throw new Error(`Unexpected response for '${url}' (${res.status}): ${res.body}`); + } + + logger.debug(`Fetched '${host ? host.url : url}' (${post ? post.permalink : 'no post'})`); const hash = blake2.createHash('blake2b', { digestLength: 24 }); hash.update(res.body); @@ -32,7 +37,6 @@ async function fetchItem(url, attempt, post, host) { return Object.assign(res.body, { hash: contentHash }); } catch (error) { - console.log(error); return retry(error); } } diff --git a/src/limiter.js b/src/limiter.js new file mode 100644 index 0000000..1c4a22e --- /dev/null +++ b/src/limiter.js @@ -0,0 +1,19 @@ +'use strict'; + +const Bottleneck = require('bottleneck'); + +const args = require('./cli')(); + +module.exports = { + reddit: new Bottleneck({ + reservoir: 30, + reservoirRefreshAmount: 30, + reservoirRefreshInterval: 60000, + maxConcurrent: 1, + minTime: 100, + }), + items: new Bottleneck({ + maxConcurrent: args.concurrency, + minTime: args.interval, + }), +}; diff --git a/src/logger.js b/src/logger.js new file mode 100644 index 0000000..dede395 --- /dev/null +++ b/src/logger.js @@ -0,0 +1,26 @@ +'use strict'; + +const winston = require('winston'); + +const args = require('./cli.js')(); + +const logger = winston.createLogger({ + level: args.logLevel, + transports: [ + new winston.transports.Console({ + level: args.logLevel, + format: winston.format.combine( + winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), + winston.format.colorize(), + winston.format.printf(info => `${info.timestamp} ${info.level}: ${info.message}`), + ), + timestamp: true, + }), + ], +}); + +function getLogger() { + return logger; +} + +module.exports = getLogger; diff --git a/src/methods/erome.js b/src/methods/erome.js index a544052..b4811bb 100644 --- a/src/methods/erome.js +++ b/src/methods/erome.js @@ -4,6 +4,8 @@ const config = require('config'); const fetch = require('node-fetch'); const cheerio = require('cheerio'); +const logger = require('../logger')(__filename); + const base = 'https://www.erome.com/'; async function erome(host, post) { @@ -50,7 +52,7 @@ async function erome(host, post) { const extract = config.library.extractSingleAlbumItem && (items.length === 1); if (extract) { - console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${url}' (${post ? post.url : 'no post'})`); + logger.verbose(`Extracting single item from album '${url}' (${post ? post.url : 'no post'})`); } return { diff --git a/src/methods/gfycat.js b/src/methods/gfycat.js index 34df6a9..3d826e4 100644 --- a/src/methods/gfycat.js +++ b/src/methods/gfycat.js @@ -1,13 +1,28 @@ 'use strict'; -const fetch = require('node-fetch'); +const bhttp = require('bhttp'); +const redgifs = require('./redgifs'); async function gfycat(host) { - const res = await fetch(`https://api.gfycat.com/v1/gfycats/${host.id}`); - const data = await res.json(); + const res = await bhttp.get(`https://api.gfycat.com/v1/gfycats/${host.id}`); + const data = await res.body; - if (data.error) { - throw new Error(data.error); + if (data.errorMessage) { + const redirectRes = await bhttp.head(host.url, { + followRedirects: false, + }); + + if (redirectRes.statusCode === 301) { + // Gfycat redirects all NSFW gifs to RedGifs, likely the case + return redgifs({ + ...host, + url: `https://www.redgifs.com/watch/${host.id}`, + method: 'redgifs', + label: 'redgifs', + }); + } + + throw new Error(`Gfycat API returned error for source '${host.url}' (${res.status}): ${data.errorMessage}`); } return { diff --git a/src/methods/imgurAlbum.js b/src/methods/imgurAlbum.js index 6ad4ecf..03e6262 100644 --- a/src/methods/imgurAlbum.js +++ b/src/methods/imgurAlbum.js @@ -2,7 +2,8 @@ const config = require('config'); const fetch = require('node-fetch'); -// const mime = require('mime-types'); + +const logger = require('../logger')(__filename); async function imgurAlbumApi(host, post) { const res = await fetch(`https://api.imgur.com/3/album/${host.id}`, { @@ -20,7 +21,7 @@ async function imgurAlbumApi(host, post) { const extract = config.library.extractSingleAlbumItem && data.images.length === 1; if (extract) { - console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`); + logger.verbose(`Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`); } return { @@ -45,46 +46,4 @@ async function imgurAlbumApi(host, post) { }; } -/* - * as of late 2019, imgur requires log in to view albums and gallery images -async function imgurAlbum(host, post) { - const res = await fetch(`https://imgur.com/a/${post.host.id}`); - const html = await res.text(); - - if (res.status !== 200) { - if (config.methods.imgur.clientId) { - console.log('\x1b[31m%s\x1b[0m', `Could not fetch info for direct imgur album '${post.host.id}' (${res.statusText}), trying API fallback (${post.permalink})`); - - return imgurAlbumApi(post); - } - - throw new Error(`Could not fetch info for imgur album '${post.host.id}' (${res.statusText}) no API fallback configured`); - } - - const dataString = html.replace(/\s+/g, ' ').match(/}}, item:(.*)}; var PREBID_TIMEOUT/)[1]; - const data = JSON.parse(dataString); - - const extract = config.library.album.extractSingleItem && data.album_images.images.length === 1; - - return { - album: extract ? null : { - id: data.id, - url: `https://imgur.com/a/${post.host.id}`, - title: data.title, - description: data.description, - datetime: new Date(data.datetime), - }, - items: data.album_images.images.map(item => ({ - extracted: extract, - id: item.hash, - url: data.animated ? `https://i.imgur.com/${item.hash}.mp4` : `https://i.imgur.com/${item.hash}${item.ext}`, - title: item.title || (extract ? data.title : null), - description: item.description || (extract ? data.description : null), - type: item.animated ? 'video/mp4' : mime.lookup(item.ext.split('?')[0]), - datetime: new Date(item.datetime), - })), - }; -} -*/ - module.exports = imgurAlbumApi; diff --git a/src/methods/imgurImage.js b/src/methods/imgurImage.js index 7956239..76293ca 100644 --- a/src/methods/imgurImage.js +++ b/src/methods/imgurImage.js @@ -10,6 +10,10 @@ async function imgurImageApi(host) { }, }); + if (res.status !== 200) { + throw new Error(`Imgur API returned HTTP ${res.status} for source '${host.url}'`); + } + const { data } = await res.json(); if (res.status !== 200) { @@ -32,37 +36,6 @@ async function imgurImageApi(host) { async function imgurImage(host, post) { return imgurImageApi(host, post); - - /* - * as of late 2019, imgur requires log in to view albums and gallery images - const res = await fetch(`https://imgur.com/${post.host.id}`); - const html = await res.text(); - - if (res.status !== 200) { - if (config.methods.imgur.clientId) { - console.log('\x1b[31m%s\x1b[0m', `Could not scrape info for imgur image '${post.host.id}' (${res.statusText}), trying API fallback (${post.permalink})`); - - return imgurImageApi(post); - } - - throw new Error(`Could not scrape info for imgur image '${post.host.id}' (${res.statusText}), no API fallback configured`); - } - - const dataString = html.replace(/\s+/g, ' ').match(/}}, item:(.*)}; var PREBID_TIMEOUT/)[1]; - const data = JSON.parse(dataString); - - return { - album: null, - items: [{ - id: data.hash, - url: data.animated ? `https://i.imgur.com/${post.host.id}.mp4` : `https://i.imgur.com/${post.host.id}${data.ext}`, - title: data.title, - description: data.description, - type: data.animated ? 'video/mp4' : data.mimetype, - datetime: new Date(data.timestamp || data.datetime), - }], - }; - */ } module.exports = imgurImage; diff --git a/src/methods/methods.js b/src/methods/methods.js index ce42426..171a9c9 100644 --- a/src/methods/methods.js +++ b/src/methods/methods.js @@ -7,8 +7,10 @@ const gfycat = require('./gfycat'); const imgurAlbum = require('./imgurAlbum'); const imgurImage = require('./imgurImage'); const redditImage = require('./redditImage'); +const redditAlbum = require('./redditAlbum'); const redditPreview = require('./redditPreview'); const redditVideo = require('./redditVideo'); +const redgifs = require('./redgifs'); const self = require('./self'); const vidbleAlbum = require('./vidbleAlbum'); const vidbleImage = require('./vidbleImage'); @@ -23,8 +25,10 @@ module.exports = { imgurAlbum, imgurImage, redditImage, + redditAlbum, redditPreview, redditVideo, + redgifs, self, tube, vidbleAlbum, diff --git a/src/methods/redditAlbum.js b/src/methods/redditAlbum.js new file mode 100644 index 0000000..a8cc239 --- /dev/null +++ b/src/methods/redditAlbum.js @@ -0,0 +1,32 @@ +'use strict'; + +const mime = require('mime'); +const bhttp = require('bhttp'); +const { JSDOM } = require('jsdom'); + +async function redditAlbum(host, post) { + const res = await bhttp.get(host.url); + + if (res.statusCode !== 200) { + throw new Error(res.body.toString()); + } + + const { document } = new JSDOM(res.body.toString(), { runScripts: 'dangerously' }).window; + const items = Array.from(document.querySelectorAll('li a'), el => el.href); + + return { + album: { + id: host.id, + url: host.url, + title: post.title, + }, + items: items.map(url => ({ + id: new URL(url).pathname.match(/\/(.*).jpg/)[1], + url, + datetime: post.datetime, + type: mime.getType(url) || 'image/jpeg', + })), + }; +} + +module.exports = redditAlbum; diff --git a/src/methods/redditImage.js b/src/methods/redditImage.js index 8db1c5f..25543aa 100644 --- a/src/methods/redditImage.js +++ b/src/methods/redditImage.js @@ -1,6 +1,6 @@ 'use strict'; -const mime = require('mime-types'); +const mime = require('mime'); async function redditImage(host, post) { return { @@ -10,7 +10,7 @@ async function redditImage(host, post) { url: post.url, title: post.title, datetime: post.datetime, - type: mime.lookup(post.url.split('/.')[0]) || 'image/jpeg', + type: mime.getType(post.url) || 'image/jpeg', original: post, }], }; diff --git a/src/methods/redgifs.js b/src/methods/redgifs.js new file mode 100644 index 0000000..b3569d4 --- /dev/null +++ b/src/methods/redgifs.js @@ -0,0 +1,69 @@ +'use strict'; + +const fetch = require('node-fetch'); +const mime = require('mime'); + +function scrapeGallery(data) { + const oldestDate = Math.min(...data.gifs.map(gif => gif.createDate)); + + return { + album: { + id: data.id, + datetime: new Date(oldestDate * 1000), + }, + items: data.gifs.map(gif => ({ + id: gif.id, + url: gif.urls.hd, + description: gif.tags.join(', '), + type: mime.getType(gif.urls.hd), + datetime: new Date(gif.createDate * 1000), + original: gif, + })), + }; +} + +async function fetchGallery(galleryId) { + const res = await fetch(`https://api.redgifs.com/v2/gallery/${galleryId}`); + const data = await res.json(); + + if (!data.gifs) { + return null; + } + + return scrapeGallery(data); +} + +async function redgifs(host) { + const res = await fetch(`https://api.redgifs.com/v2/gifs/${host.id.toLowerCase()}`); + const data = await res.json(); + + if (data.errorMessage) { + throw new Error(`RedGifs API returned error for source '${host.url}' (${res.status}): ${data.errorMessage.description}`); + } + + if (data.id && data.gifs) { + return scrapeGallery(data); + } + + if (!data.gif) { + return null; + } + + if (data.gif.gallery) { + return fetchGallery(data.gif.gallery); + } + + return { + album: null, + items: [{ + id: data.gif.id, + url: data.gif.urls.hd, + description: data.gif.tags.join(', '), + type: mime.getType(data.gif.urls.hd), + datetime: new Date(data.gif.createDate * 1000), + original: data.gif, + }], + }; +} + +module.exports = redgifs; diff --git a/src/methods/tube.js b/src/methods/tube.js index f93f846..5c191f7 100644 --- a/src/methods/tube.js +++ b/src/methods/tube.js @@ -3,6 +3,8 @@ const youtubedl = require('youtube-dl'); const dateFns = require('date-fns'); +const logger = require('../logger')(__filename); + async function tube(host, post) { try { const data = await new Promise((resolve, reject) => { @@ -15,7 +17,7 @@ async function tube(host, post) { }); }); - host.id = data.display_id; + host.id = data.display_id; // eslint-disable-line no-param-reassign return { album: null, @@ -32,7 +34,7 @@ async function tube(host, post) { ], }; } catch (error) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring possible profile page '${host.url}' (${post ? post.permalink : 'no post'})`); + logger.warn(`Ignoring possible image or profile page '${host.url}' (${post ? post.permalink : 'no post'})`); return null; } diff --git a/src/methods/vidbleAlbum.js b/src/methods/vidbleAlbum.js index 1066a1f..150c52a 100644 --- a/src/methods/vidbleAlbum.js +++ b/src/methods/vidbleAlbum.js @@ -6,6 +6,8 @@ const UrlPattern = require('url-pattern'); const cheerio = require('cheerio'); const mime = require('mime-types'); +const logger = require('../logger')(__filename); + const pattern = new UrlPattern('https\\://(www.)vidble.com/:id(_med)(.:ext)'); async function vidbleAlbum(host, post) { @@ -23,7 +25,7 @@ async function vidbleAlbum(host, post) { const extract = config.library.extractSingleAlbumItem && imgUrls.length === 1; if (extract) { - console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.link}`); + logger.verbose(`Extracting single item from album '${post.title}' - ${res.link}`); } return { @@ -43,7 +45,7 @@ async function vidbleAlbum(host, post) { id, url: `https://vidble.com/${id}.${components.ext}`, type: mimetype, - datetime: post.datetime, + datetime: post ? post.datetime : null, }; }), }; diff --git a/src/save/meta.js b/src/save/meta.js index ac34205..e812e74 100644 --- a/src/save/meta.js +++ b/src/save/meta.js @@ -1,9 +1,11 @@ 'use strict'; +const logger = require('../logger')(__filename); + async function saveMeta(filepath, meta, ep) { await ep.writeMetadata(filepath, meta, ['overwrite_original']); - console.log('\x1b[36m%s\x1b[0m', `Wrote metadata to '${filepath}'`); + logger.debug(`Wrote metadata to '${filepath}'`); } module.exports = saveMeta; diff --git a/src/save/mux.js b/src/save/mux.js index 642eac5..6e8dd4c 100644 --- a/src/save/mux.js +++ b/src/save/mux.js @@ -3,24 +3,25 @@ const ffmpeg = require('fluent-ffmpeg'); const fs = require('fs-extra'); -function mux(target, sources, item) { - return new Promise((resolve, reject) => { - return sources.reduce((acc, source) => { - return acc.input(source); - }, ffmpeg()).videoCodec('copy').audioCodec('copy').on('start', cmd => { - console.log('\x1b[36m%s\x1b[0m', `Muxing ${sources.length} streams to '${target}'`); - }).on('end', (stdout) => { - console.log('\x1b[32m%s\x1b[0m', `Muxed and saved '${target}'`); +const logger = require('../logger')(__filename); + +function mux(target, sources) { + return new Promise((resolve, reject) => sources.reduce((acc, source) => acc.input(source), ffmpeg()) + .videoCodec('copy') + .audioCodec('copy') + .on('start', () => { + logger.verbose(`Muxing ${sources.length} streams to '${target}'`); + }) + .on('end', (stdout) => { + logger.verbose(`Muxed and saved '${target}'`); resolve(stdout); - }).on('error', error => reject).save(target); - }).then(() => { - return Promise.all(sources.map(source => { - return fs.remove(source); - })).then(() => { - console.log('\x1b[36m%s\x1b[0m', `Cleaned up temporary files for '${target}'`); - }); - }); -}; + }) + .on('error', () => reject) + .save(target)) + .then(() => Promise.all(sources.map(source => fs.remove(source))).then(() => { + logger.verbose(`Cleaned up temporary files for '${target}'`); + })); +} module.exports = mux; diff --git a/src/save/profileDetails.js b/src/save/profileDetails.js index 3f3d845..fb3ebe5 100644 --- a/src/save/profileDetails.js +++ b/src/save/profileDetails.js @@ -4,14 +4,15 @@ const config = require('config'); const Promise = require('bluebird'); const UrlPattern = require('url-pattern'); -const interpolate = require('../interpolate.js'); -const fetchItem = require('../fetch/item.js'); -// const textToStream = require('./textToStream.js'); -const save = require('./save.js'); +const interpolate = require('../interpolate'); +const fetchItem = require('../fetch/item'); +// const textToStream = require('./textToStream'); +const save = require('./save'); +const logger = require('../logger')(__filename); async function saveProfileImage(user, args) { if (!args.redownloadProfile && user.indexed.profile.image) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring already present profile image for '${user.name}' (https://reddit.com/user/${user.name})`); + logger.verbose(`Ignoring already present profile image for '${user.name}' (https://reddit.com/user/${user.name})`); return user.indexed.profile.image; } @@ -20,7 +21,7 @@ async function saveProfileImage(user, args) { const image = user.profile ? user.profile.image : user.image; if (config.library.profile.avoidAvatar && new UrlPattern('http(s)\\://(www.)redditstatic.com/avatars/:id(.:ext)(?:query)').match(image)) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring standard avatar profile image for '${user.name}' (https://reddit.com/user/${user.name})`); + logger.verbose(`Ignoring standard avatar profile image for '${user.name}' (https://reddit.com/user/${user.name})`); return null; } @@ -44,7 +45,7 @@ async function saveProfileImage(user, args) { return targets[0]; } catch (error) { - console.log('\x1b[33m%s\x1b[0m', `Could not save profile image for '${user.name}': ${error} (https://reddit.com/user/${user.name})`); + logger.warn(`Could not save profile image for '${user.name}': ${error} (https://reddit.com/user/${user.name})`); return null; } @@ -55,7 +56,7 @@ async function saveProfileImage(user, args) { async function saveProfileDescription(user, args) { if (!args.redownloadProfile && user.indexed.profile.description) { - console.log('\x1b[33m%s\x1b[0m', `Ignoring already present profile description for '${user.name}' (https://reddit.com/user/${user.name})`); + logger.verbose(`Ignoring already present profile description for '${user.name}' (https://reddit.com/user/${user.name})`); return user.indexed.profile.description; } @@ -70,13 +71,13 @@ async function saveProfileDescription(user, args) { return targets[0]; } catch (error) { - console.log('\x1b[33m%s\x1b[0m', `Could not save profile description for '${user.name}': ${error} (https://reddit.com/user/${user.name})`); + logger.error(`Could not save profile description for '${user.name}': ${error} (https://reddit.com/user/${user.name})`); return null; } } - console.log('\x1b[33m%s\x1b[0m', `No profile description for '${user.name}' (https://reddit.com/user/${user.name})`); + logger.verbose(`No profile description for '${user.name}' (https://reddit.com/user/${user.name})`); return null; } diff --git a/src/save/save.js b/src/save/save.js index ac7ff56..0c1465d 100644 --- a/src/save/save.js +++ b/src/save/save.js @@ -4,6 +4,7 @@ const config = require('config'); const fs = require('fs-extra'); const path = require('path'); const truncate = require('../utils/truncate-bytes'); +const logger = require('../logger')(__filename); function limitPathElement(element, limit) { return element.split('/').map((component) => { @@ -30,48 +31,14 @@ async function writeBufferToFile(target, buffer, item) { await fs.writeFile(target, buffer); if (item && item.mux) { - console.log(`Temporarily saved '${target}', queued for muxing`); + logger.debug(`Temporarily saved '${target}', queued for muxing`); } else { - console.log('\x1b[32m%s\x1b[0m', `Saved '${target}'`); + logger.verbose(`Saved '${target}'`); } return target; } -/* -async function pipeStreamToFile(target, stream, item) { - const file = fs.createWriteStream(target); - - return new Promise((resolve, reject) => { - stream.pipe(file); - - stream.on('error', reject); - stream.on('end', () => { - if (item && item.mux) { - console.log(`Temporarily saved '${target}', queued for muxing`); - } else { - console.log('\x1b[32m%s\x1b[0m', `Saved '${target}'`); - } - - resolve(target); - }); - }); -} - -async function save(requestedFilepath, streamOrStreams, item) { - const pathElements = getPathElements(requestedFilepath); - const streams = [].concat(streamOrStreams); // allow for single stream argument - - await fs.ensureDir(pathElements.dir); - - return Promise.all(streams.map((stream, index) => { - const target = path.join(pathElements.root, pathElements.dir, `${pathElements.name}${streams.length > 1 ? `-${index}` : ''}${pathElements.ext}`); - - return pipeStreamToFile(target, stream, item); - })); -} -*/ - async function save(requestedFilepath, bufferOrBuffers, item) { const pathElements = getPathElements(requestedFilepath); const buffers = [].concat(bufferOrBuffers); // allow for single stream argument diff --git a/src/save/writeToIndex.js b/src/save/writeToIndex.js index 86c8d6b..1c182c4 100644 --- a/src/save/writeToIndex.js +++ b/src/save/writeToIndex.js @@ -6,6 +6,7 @@ const yaml = require('js-yaml'); const interpolate = require('../interpolate'); // const textToStream = require('./textToStream'); const save = require('./save'); +const logger = require('../logger')(__filename); async function writeToIndex(posts, profilePaths, user, args) { const filepath = interpolate(config.library.index.file, null, null, null, null, user, false); @@ -44,8 +45,14 @@ async function writeToIndex(posts, profilePaths, user, args) { return false; } - // return save(filepath, textToStream(yaml.safeDump(data))); - return save(filepath, Buffer.from(yaml.safeDump(data), 'utf8')); + try { + const yamlIndex = yaml.safeDump(data); + + return save(filepath, Buffer.from(yamlIndex, 'utf8')); + } catch (error) { + logger.error(`Could not save index for ${user.username}: ${error.message}`); + return null; + } } module.exports = writeToIndex; diff --git a/src/sources/getIndex.js b/src/sources/getIndex.js index 60796eb..a58b751 100644 --- a/src/sources/getIndex.js +++ b/src/sources/getIndex.js @@ -4,7 +4,8 @@ const config = require('config'); const fs = require('fs-extra'); const yaml = require('js-yaml'); -const interpolate = require('../interpolate.js'); +const logger = require('../logger')(__filename); +const interpolate = require('../interpolate'); async function getIndex(user) { const indexFilePath = interpolate(config.library.index.file, null, null, null, null, user, false); @@ -14,7 +15,7 @@ async function getIndex(user) { return yaml.safeLoad(indexFile); } catch (error) { - console.log('\x1b[33m%s\x1b[0m', `No index file found for '${user.name}' at '${indexFilePath}'`); + logger.info(`No index file found for '${user.name}' at '${indexFilePath}'`); return { profile: { image: null, description: null }, posts: [] }; } diff --git a/src/sources/getPosts.js b/src/sources/getPosts.js index 295c198..847ee40 100644 --- a/src/sources/getPosts.js +++ b/src/sources/getPosts.js @@ -5,33 +5,38 @@ const Promise = require('bluebird'); const getIndex = require('./getIndex.js'); const curateUser = require('../curate/user.js'); +const logger = require('../logger')(__filename); +const limiter = require('../limiter').reddit; + async function getUser(username, reddit) { try { - const user = await reddit.getUser(username).fetch(); + const user = await limiter.schedule(async () => reddit.getUser(username).fetch()); return curateUser(user); } catch (error) { - console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`); + logger.error(`Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`); return { name: username, fallback: true, }; } -}; +} const getPostsWrap = reddit => function getPosts(postIds, userPosts = {}) { return Promise.reduce(postIds, (accUserPosts, postId) => Promise.resolve().then(async () => { - const post = await reddit - .getSubmission(postId) - .fetch(); + const post = await limiter.schedule(async () => reddit.getSubmission(postId).fetch()); post.direct = true; if (accUserPosts[post.author.name]) { - accUserPosts[post.author.name].posts = accUserPosts[post.author.name].posts.concat(post); - - return accUserPosts; + return { + ...accUserPosts, + [post.author.name]: { + ...accUserPosts[post.author.name], + posts: [...accUserPosts[post.author.name].posts, post], + }, + }; } // don't attempt to fetch deleted user diff --git a/src/sources/getUserPosts.js b/src/sources/getUserPosts.js index 26ec152..adee9a7 100644 --- a/src/sources/getUserPosts.js +++ b/src/sources/getUserPosts.js @@ -6,15 +6,18 @@ const getIndex = require('./getIndex.js'); const getArchivePostIds = require('../archives/getArchivePostIds.js'); const curateUser = require('../curate/user.js'); +const logger = require('../logger')(__filename); +const limiter = require('../limiter').reddit; + async function getUser(username, reddit) { try { - const user = await reddit.getUser(username).fetch(); + const user = await limiter.schedule(async () => reddit.getUser(username).fetch()); - console.log(`Fetched user profile for '${username}' (https://reddit.com/user/${username})`); + logger.info(`Fetched user profile for '${username}' (https://reddit.com/user/${username})`); return curateUser(user); } catch (error) { - console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`); + logger.error(`Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`); return { name: username, @@ -25,18 +28,18 @@ async function getUser(username, reddit) { async function getPosts(username, reddit, args) { try { - const submissions = await reddit + const submissions = await limiter.schedule(async () => reddit .getUser(username) .getSubmissions({ sort: args.sort, limit: Infinity, - }); + })); - console.log(`Fetched ${submissions.length} submissions for '${username}' (https://reddit.com/user/${username})`); + logger.info(`Fetched ${submissions.length} submissions for '${username}' (https://reddit.com/user/${username})`); return submissions; } catch (error) { - console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`); + logger.warn(`Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`); return []; } @@ -45,7 +48,7 @@ async function getPosts(username, reddit, args) { async function getArchivedPosts(username, posts, reddit) { const postIds = await getArchivePostIds(username, posts.map(post => post.id)); - return Promise.all(postIds.map(postId => reddit.getSubmission(postId).fetch())); + return Promise.all(postIds.map(postId => limiter.schedule(async () => reddit.getSubmission(postId).fetch()))); } function getUserPostsWrap(reddit, args) { @@ -78,12 +81,12 @@ function getUserPostsWrap(reddit, args) { return null; } catch (error) { - console.log(username, error); + logger.error(`Failed to fetch posts from 'username': ${error.message}`); return null; } }, { - concurrency: 5, + concurrency: 10, }); return users.reduce( @@ -97,7 +100,7 @@ function getUserPostsWrap(reddit, args) { {}, ); } catch (error) { - console.log(error); + logger.error(error); throw error; }