Fixed invalid URL breaking scrape, prefixing reddit URLs with origin.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:59 +02:00
parent b1e13e048c
commit e642203d6c
6 changed files with 115 additions and 467 deletions

536
package-lock.json generated
View File

@ -30,9 +30,9 @@
"object.omit": "^3.0.0",
"object.pick": "^1.3.0",
"sharp-phash": "^2.1.0",
"snoowrap": "^1.20.0",
"snoowrap": "^1.23.0",
"template-format": "^1.2.4",
"unprint": "^0.8.1",
"unprint": "^0.8.2",
"url-pattern": "^1.0.3",
"winston": "^3.3.3",
"winston-daily-rotate-file": "^4.7.1",
@ -3780,152 +3780,6 @@
"resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.0.tgz",
"integrity": "sha512-fxNG2sQjHvlVAYmzBZS9YlDp6PTSSDwa98vkD4QgVDDCAo84z5X1t5XyJQ62ImdLXx5NdIIfihey6xpum9/gRQ=="
},
"node_modules/jsdom/node_modules/request": {
"version": "2.88.0",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz",
"integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==",
"deprecated": "request has been deprecated, see https://github.com/request/request/issues/3142",
"dependencies": {
"aws-sign2": "~0.7.0",
"aws4": "^1.8.0",
"caseless": "~0.12.0",
"combined-stream": "~1.0.6",
"extend": "~3.0.2",
"forever-agent": "~0.6.1",
"form-data": "~2.3.2",
"har-validator": "~5.1.0",
"http-signature": "~1.2.0",
"is-typedarray": "~1.0.0",
"isstream": "~0.1.2",
"json-stringify-safe": "~5.0.1",
"mime-types": "~2.1.19",
"oauth-sign": "~0.9.0",
"performance-now": "^2.1.0",
"qs": "~6.5.2",
"safe-buffer": "^5.1.2",
"tough-cookie": "~2.4.3",
"tunnel-agent": "^0.6.0",
"uuid": "^3.3.2"
},
"engines": {
"node": ">= 4"
}
},
"node_modules/jsdom/node_modules/request/node_modules/aws-sign2": {
"version": "0.7.0",
"resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz",
"integrity": "sha512-08kcGqnYf/YmjoRhfxyu+CLxBjUtHLXLXX/vUfx9l2LYzG3c1m61nrpyFUZI6zeS+Li/wWMMidD9KgrqtGq3mA==",
"engines": {
"node": "*"
}
},
"node_modules/jsdom/node_modules/request/node_modules/caseless": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
"integrity": "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw=="
},
"node_modules/jsdom/node_modules/request/node_modules/forever-agent": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
"integrity": "sha512-j0KLYPhm6zeac4lz3oJ3o65qvgQCcPubiyotZrXqEaG4hNagNYO8qdlUrX5vwqv9ohqeT/Z3j6+yW067yWWdUw==",
"engines": {
"node": "*"
}
},
"node_modules/jsdom/node_modules/request/node_modules/form-data": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.2.tgz",
"integrity": "sha512-6DD2fGWwyxCca2EASUT50GsxWEuwNQDpjMhD9TTaBvI1NE3nLkCr5v7nRdtlmG5g+mNqosdOVHVro+UGmp0Kcw==",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "1.0.6",
"mime-types": "^2.1.12"
},
"engines": {
"node": ">= 0.12"
}
},
"node_modules/jsdom/node_modules/request/node_modules/http-signature": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz",
"integrity": "sha512-CAbnr6Rz4CYQkLYUtSNXxQPUH2gK8f3iWexVlsnMeD+GjlsQ0Xsy1cOX+mN3dtxYomRy21CiOzU8Uhw6OwncEQ==",
"dependencies": {
"assert-plus": "^1.0.0",
"jsprim": "^1.2.2",
"sshpk": "^1.7.0"
},
"engines": {
"node": ">=0.8",
"npm": ">=1.3.7"
}
},
"node_modules/jsdom/node_modules/request/node_modules/is-typedarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz",
"integrity": "sha512-cyA56iCMHAh5CdzjJIa4aohJyeO1YbwLi3Jc35MmRU6poroFjIGZzUzupGiRPOjgHg9TLu43xbpwXk523fMxKA=="
},
"node_modules/jsdom/node_modules/request/node_modules/isstream": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz",
"integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g=="
},
"node_modules/jsdom/node_modules/request/node_modules/json-stringify-safe": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
"integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="
},
"node_modules/jsdom/node_modules/request/node_modules/performance-now": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
"integrity": "sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow=="
},
"node_modules/jsdom/node_modules/request/node_modules/psl": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/psl/-/psl-1.4.0.tgz",
"integrity": "sha512-HZzqCGPecFLyoRj5HLfuDSKYTJkAfB5thKBIkRHtGjWwY7p1dAyveIbXIq4tO0KYfDF2tHqPUgY9SDnGm00uFw=="
},
"node_modules/jsdom/node_modules/request/node_modules/tough-cookie": {
"version": "2.4.3",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz",
"integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==",
"dependencies": {
"psl": "^1.1.24",
"punycode": "^1.4.1"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/jsdom/node_modules/request/node_modules/tunnel-agent": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
"integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
"dependencies": {
"safe-buffer": "^5.0.1"
},
"engines": {
"node": "*"
}
},
"node_modules/jsdom/node_modules/safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/jsdom/node_modules/tough-cookie": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-3.0.1.tgz",
@ -3947,19 +3801,6 @@
"node": ">=4"
}
},
"node_modules/jsdom/node_modules/tough-cookie/node_modules/psl": {
"version": "1.9.0",
"resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz",
"integrity": "sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag=="
},
"node_modules/jsdom/node_modules/tough-cookie/node_modules/punycode": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
"integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==",
"engines": {
"node": ">=6"
}
},
"node_modules/jsdom/node_modules/ws": {
"version": "7.2.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-7.2.0.tgz",
@ -4105,9 +3946,9 @@
}
},
"node_modules/lodash": {
"version": "4.17.5",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.5.tgz",
"integrity": "sha512-svL3uiZf1RwhH+cWrfZn3A4+U58wbP0tGVTLQPbjplZxZ8ROD9VLuNgsRniTlLe7OlSqR79RUehXgpBW/s0IQw=="
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
},
"node_modules/lodash.merge": {
"version": "4.6.2",
@ -4812,9 +4653,12 @@
}
},
"node_modules/punycode": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz",
"integrity": "sha512-jmYNElW7yvO7TV33CjSmvSiE2yco3bV2czu/OzDKdMNVZQWfxCblURLhf+47syQRBntjfLdd/H0egrzIG+oaFQ=="
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
"integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
"engines": {
"node": ">=6"
}
},
"node_modules/qs": {
"version": "6.5.2",
@ -4935,9 +4779,9 @@
}
},
"node_modules/request": {
"version": "2.88.0",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz",
"integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==",
"version": "2.88.2",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
"integrity": "sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==",
"deprecated": "request has been deprecated, see https://github.com/request/request/issues/3142",
"dependencies": {
"aws-sign2": "~0.7.0",
@ -4947,7 +4791,7 @@
"extend": "~3.0.2",
"forever-agent": "~0.6.1",
"form-data": "~2.3.2",
"har-validator": "~5.1.0",
"har-validator": "~5.1.3",
"http-signature": "~1.2.0",
"is-typedarray": "~1.0.0",
"isstream": "~0.1.2",
@ -4957,22 +4801,22 @@
"performance-now": "^2.1.0",
"qs": "~6.5.2",
"safe-buffer": "^5.1.2",
"tough-cookie": "~2.4.3",
"tough-cookie": "~2.5.0",
"tunnel-agent": "^0.6.0",
"uuid": "^3.3.2"
},
"engines": {
"node": ">= 4"
"node": ">= 6"
}
},
"node_modules/request-promise": {
"version": "4.2.4",
"resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.4.tgz",
"integrity": "sha512-8wgMrvE546PzbR5WbYxUQogUnUDfM0S7QIFZMID+J73vdFARkFy+HElj4T+MWYhpXwlLp0EQ8Zoj8xUA0he4Vg==",
"version": "4.2.6",
"resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.6.tgz",
"integrity": "sha512-HCHI3DJJUakkOr8fNoCc73E5nU5bqITjOYFMDrKHYOXWXrgD/SBaC7LjwuPymUprRyuF06UK7hd/lMHkmUXglQ==",
"deprecated": "request-promise has been deprecated because it extends the now deprecated request package, see https://github.com/request/request/issues/3142",
"dependencies": {
"bluebird": "^3.5.0",
"request-promise-core": "1.1.2",
"request-promise-core": "1.1.4",
"stealthy-require": "^1.1.1",
"tough-cookie": "^2.3.3"
},
@ -4997,11 +4841,6 @@
"request": "^2.34"
}
},
"node_modules/request-promise-core/node_modules/lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
},
"node_modules/request-promise-native": {
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/request-promise-native/-/request-promise-native-1.0.7.tgz",
@ -5019,6 +4858,20 @@
"request": "^2.34"
}
},
"node_modules/request-promise/node_modules/request-promise-core": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.4.tgz",
"integrity": "sha512-TTbAfBBRdWD7aNNOoVOBH4pN/KigV6LyapYNNlAPA8JwbovRti1E88m3sYAwsLi5ryhPKsE9APwnjFTgdUjTpw==",
"dependencies": {
"lodash": "^4.17.19"
},
"engines": {
"node": ">=0.10.0"
},
"peerDependencies": {
"request": "^2.34"
}
},
"node_modules/request/node_modules/safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
@ -5038,18 +4891,6 @@
}
]
},
"node_modules/request/node_modules/tough-cookie": {
"version": "2.4.3",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz",
"integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==",
"dependencies": {
"psl": "^1.1.24",
"punycode": "^1.4.1"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/require-directory": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
@ -5381,25 +5222,21 @@
}
},
"node_modules/snoowrap": {
"version": "1.20.0",
"integrity": "sha512-TZ+hzHF+1aCCSchHabxGVEpP4qN73fqZS9kpBpsiiGAtogjPVlkIIJviGbxtyZ6/noXWlADqs/53eRcj0+Cl8g==",
"version": "1.23.0",
"resolved": "https://registry.npmjs.org/snoowrap/-/snoowrap-1.23.0.tgz",
"integrity": "sha512-8FIGWr20Gc+d/C3NRrNPp5VQFNb+eGaQyvIkM5KUL71pYpRM231fkRdLNydMrdtLNRDrTZeZApHHCb8Uk/QuTQ==",
"dependencies": {
"bluebird": "^3.5.5",
"lodash": "^4.17.15",
"promise-chains": "^0.3.11",
"request": "^2.87.0",
"request-promise": "^4.2.2",
"request": "^2.88.2",
"request-promise": "^4.2.6",
"ws": "^3.3.1"
},
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/snoowrap/node_modules/lodash": {
"version": "4.17.15",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
"integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
},
"node_modules/source-map": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
@ -5740,11 +5577,12 @@
}
},
"node_modules/tough-cookie": {
"version": "2.3.4",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.3.4.tgz",
"integrity": "sha512-TZ6TTfI5NtZnuyy/Kecv+CnoROnyXn2DN97LontgQpCwsX2XyLYCC0ENhYkehSOwAp8rTQKc/NUIF7BkQ5rKLA==",
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.5.0.tgz",
"integrity": "sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==",
"dependencies": {
"punycode": "^1.4.1"
"psl": "^1.1.28",
"punycode": "^2.1.1"
},
"engines": {
"node": ">=0.8"
@ -5758,14 +5596,6 @@
"punycode": "^2.1.0"
}
},
"node_modules/tr46/node_modules/punycode": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
"integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==",
"engines": {
"node": ">=6"
}
},
"node_modules/traverse": {
"version": "0.6.7",
"resolved": "https://registry.npmjs.org/traverse/-/traverse-0.6.7.tgz",
@ -5890,8 +5720,9 @@
"integrity": "sha512-MVi79HEPwGk0grI7/Kl6H51fX7wcDTe6gGoCdK22pkRG6IPsi9L6NltClWJfBLUoIE5y3pKy3SplFAs/b0G+QQ=="
},
"node_modules/unprint": {
"version": "0.8.1",
"integrity": "sha512-6cF2LgIpc7JEBBXWs/CmJgFBAPmvM5NuD3tDvmnnmZR+wrEF03TNR5kv08iPTV/GhT8EHPhZM3LSd8eRipkWdA==",
"version": "0.8.2",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.8.2.tgz",
"integrity": "sha512-mCKPDPwtuECbXJJLQbDn2FFbydr6fLKytyS3pymbxcTh2dkk7NFypMjR7qjU2Uv9Fl91hSE48SjYMsWHNKpp4w==",
"dependencies": {
"axios": "^0.27.2",
"bottleneck": "^2.19.5",
@ -6056,24 +5887,6 @@
"node": ">= 0.8.0"
}
},
"node_modules/unprint/node_modules/eslint-config-airbnb-base": {
"version": "15.0.0",
"resolved": "https://registry.npmjs.org/eslint-config-airbnb-base/-/eslint-config-airbnb-base-15.0.0.tgz",
"integrity": "sha512-xaX3z4ZZIcFLvh2oUNvcX5oEofXda7giYmuplVxoOg5A7EXJMrUyqRgR+mhDhPK8LZ4PttFOBvCYDbX3sUoUig==",
"dependencies": {
"confusing-browser-globals": "^1.0.10",
"object.assign": "^4.1.2",
"object.entries": "^1.1.5",
"semver": "^6.3.0"
},
"engines": {
"node": "^10.12.0 || >=12.0.0"
},
"peerDependencies": {
"eslint": "^7.32.0 || ^8.2.0",
"eslint-plugin-import": "^2.25.2"
}
},
"node_modules/unprint/node_modules/estraverse": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
@ -6191,14 +6004,6 @@
"resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz",
"integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw=="
},
"node_modules/unprint/node_modules/punycode": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
"integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==",
"engines": {
"node": ">=6"
}
},
"node_modules/unprint/node_modules/saxes": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/saxes/-/saxes-5.0.1.tgz",
@ -6349,14 +6154,6 @@
"punycode": "^2.1.0"
}
},
"node_modules/uri-js/node_modules/punycode": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
"integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==",
"engines": {
"node": ">=6"
}
},
"node_modules/url-pattern": {
"version": "1.0.3",
"integrity": "sha512-uQcEj/2puA4aq1R3A2+VNVBgaWYR24FdWjl7VNW83rnWftlhyzOZ/tBjezRiC2UkIzuxC8Top3IekN3vUf1WxA==",
@ -9975,117 +9772,6 @@
"resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.0.tgz",
"integrity": "sha512-fxNG2sQjHvlVAYmzBZS9YlDp6PTSSDwa98vkD4QgVDDCAo84z5X1t5XyJQ62ImdLXx5NdIIfihey6xpum9/gRQ=="
},
"request": {
"version": "2.88.0",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz",
"integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==",
"requires": {
"aws-sign2": "~0.7.0",
"aws4": "^1.8.0",
"caseless": "~0.12.0",
"combined-stream": "~1.0.6",
"extend": "~3.0.2",
"forever-agent": "~0.6.1",
"form-data": "~2.3.2",
"har-validator": "~5.1.0",
"http-signature": "~1.2.0",
"is-typedarray": "~1.0.0",
"isstream": "~0.1.2",
"json-stringify-safe": "~5.0.1",
"mime-types": "~2.1.19",
"oauth-sign": "~0.9.0",
"performance-now": "^2.1.0",
"qs": "~6.5.2",
"safe-buffer": "^5.1.2",
"tough-cookie": "~2.4.3",
"tunnel-agent": "^0.6.0",
"uuid": "^3.3.2"
},
"dependencies": {
"aws-sign2": {
"version": "0.7.0",
"resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz",
"integrity": "sha512-08kcGqnYf/YmjoRhfxyu+CLxBjUtHLXLXX/vUfx9l2LYzG3c1m61nrpyFUZI6zeS+Li/wWMMidD9KgrqtGq3mA=="
},
"caseless": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
"integrity": "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw=="
},
"forever-agent": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
"integrity": "sha512-j0KLYPhm6zeac4lz3oJ3o65qvgQCcPubiyotZrXqEaG4hNagNYO8qdlUrX5vwqv9ohqeT/Z3j6+yW067yWWdUw=="
},
"form-data": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.2.tgz",
"integrity": "sha512-6DD2fGWwyxCca2EASUT50GsxWEuwNQDpjMhD9TTaBvI1NE3nLkCr5v7nRdtlmG5g+mNqosdOVHVro+UGmp0Kcw==",
"requires": {
"asynckit": "^0.4.0",
"combined-stream": "1.0.6",
"mime-types": "^2.1.12"
}
},
"http-signature": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz",
"integrity": "sha512-CAbnr6Rz4CYQkLYUtSNXxQPUH2gK8f3iWexVlsnMeD+GjlsQ0Xsy1cOX+mN3dtxYomRy21CiOzU8Uhw6OwncEQ==",
"requires": {
"assert-plus": "^1.0.0",
"jsprim": "^1.2.2",
"sshpk": "^1.7.0"
}
},
"is-typedarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz",
"integrity": "sha512-cyA56iCMHAh5CdzjJIa4aohJyeO1YbwLi3Jc35MmRU6poroFjIGZzUzupGiRPOjgHg9TLu43xbpwXk523fMxKA=="
},
"isstream": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz",
"integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g=="
},
"json-stringify-safe": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
"integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="
},
"performance-now": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
"integrity": "sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow=="
},
"psl": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/psl/-/psl-1.4.0.tgz",
"integrity": "sha512-HZzqCGPecFLyoRj5HLfuDSKYTJkAfB5thKBIkRHtGjWwY7p1dAyveIbXIq4tO0KYfDF2tHqPUgY9SDnGm00uFw=="
},
"tough-cookie": {
"version": "2.4.3",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz",
"integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==",
"requires": {
"psl": "^1.1.24",
"punycode": "^1.4.1"
}
},
"tunnel-agent": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
"integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
"requires": {
"safe-buffer": "^5.0.1"
}
}
}
},
"safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="
},
"tough-cookie": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-3.0.1.tgz",
@ -10100,16 +9786,6 @@
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/ip-regex/-/ip-regex-2.1.0.tgz",
"integrity": "sha512-58yWmlHpp7VYfcdTwMTvwMmqx/Elfxjd9RXTDyMsbL7lLWmhMylLEqiYVLKuLzOZqVgiWXD9MfR62Vv89VRxkw=="
},
"psl": {
"version": "1.9.0",
"resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz",
"integrity": "sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag=="
},
"punycode": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
"integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA=="
}
}
},
@ -10238,9 +9914,9 @@
}
},
"lodash": {
"version": "4.17.5",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.5.tgz",
"integrity": "sha512-svL3uiZf1RwhH+cWrfZn3A4+U58wbP0tGVTLQPbjplZxZ8ROD9VLuNgsRniTlLe7OlSqR79RUehXgpBW/s0IQw=="
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
},
"lodash.merge": {
"version": "4.6.2",
@ -10788,9 +10464,9 @@
}
},
"punycode": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz",
"integrity": "sha512-jmYNElW7yvO7TV33CjSmvSiE2yco3bV2czu/OzDKdMNVZQWfxCblURLhf+47syQRBntjfLdd/H0egrzIG+oaFQ=="
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
"integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="
},
"qs": {
"version": "6.5.2",
@ -10875,9 +10551,9 @@
"integrity": "sha512-pq2bWo9mVD43nbts2wGv17XLiNLya+GklZ8kaDLV2Z08gDCsGpnKn9BFMepvWuHCbyVvY7J5o5+BVvoQbmlJLg=="
},
"request": {
"version": "2.88.0",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz",
"integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==",
"version": "2.88.2",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
"integrity": "sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==",
"requires": {
"aws-sign2": "~0.7.0",
"aws4": "^1.8.0",
@ -10886,7 +10562,7 @@
"extend": "~3.0.2",
"forever-agent": "~0.6.1",
"form-data": "~2.3.2",
"har-validator": "~5.1.0",
"har-validator": "~5.1.3",
"http-signature": "~1.2.0",
"is-typedarray": "~1.0.0",
"isstream": "~0.1.2",
@ -10896,7 +10572,7 @@
"performance-now": "^2.1.0",
"qs": "~6.5.2",
"safe-buffer": "^5.1.2",
"tough-cookie": "~2.4.3",
"tough-cookie": "~2.5.0",
"tunnel-agent": "^0.6.0",
"uuid": "^3.3.2"
},
@ -10905,27 +10581,28 @@
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="
},
"tough-cookie": {
"version": "2.4.3",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz",
"integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==",
"requires": {
"psl": "^1.1.24",
"punycode": "^1.4.1"
}
}
}
},
"request-promise": {
"version": "4.2.4",
"resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.4.tgz",
"integrity": "sha512-8wgMrvE546PzbR5WbYxUQogUnUDfM0S7QIFZMID+J73vdFARkFy+HElj4T+MWYhpXwlLp0EQ8Zoj8xUA0he4Vg==",
"version": "4.2.6",
"resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.6.tgz",
"integrity": "sha512-HCHI3DJJUakkOr8fNoCc73E5nU5bqITjOYFMDrKHYOXWXrgD/SBaC7LjwuPymUprRyuF06UK7hd/lMHkmUXglQ==",
"requires": {
"bluebird": "^3.5.0",
"request-promise-core": "1.1.2",
"request-promise-core": "1.1.4",
"stealthy-require": "^1.1.1",
"tough-cookie": "^2.3.3"
},
"dependencies": {
"request-promise-core": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.4.tgz",
"integrity": "sha512-TTbAfBBRdWD7aNNOoVOBH4pN/KigV6LyapYNNlAPA8JwbovRti1E88m3sYAwsLi5ryhPKsE9APwnjFTgdUjTpw==",
"requires": {
"lodash": "^4.17.19"
}
}
}
},
"request-promise-core": {
@ -10934,13 +10611,6 @@
"integrity": "sha512-UHYyq1MO8GsefGEt7EprS8UrXsm1TxEvFUX1IMTuSLU2Rh7fTIdFtl8xD7JiEYiWU2dl+NYAjCTksTehQUxPag==",
"requires": {
"lodash": "^4.17.11"
},
"dependencies": {
"lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
}
}
},
"request-promise-native": {
@ -11177,22 +10847,16 @@
}
},
"snoowrap": {
"version": "1.20.0",
"integrity": "sha512-TZ+hzHF+1aCCSchHabxGVEpP4qN73fqZS9kpBpsiiGAtogjPVlkIIJviGbxtyZ6/noXWlADqs/53eRcj0+Cl8g==",
"version": "1.23.0",
"resolved": "https://registry.npmjs.org/snoowrap/-/snoowrap-1.23.0.tgz",
"integrity": "sha512-8FIGWr20Gc+d/C3NRrNPp5VQFNb+eGaQyvIkM5KUL71pYpRM231fkRdLNydMrdtLNRDrTZeZApHHCb8Uk/QuTQ==",
"requires": {
"bluebird": "^3.5.5",
"lodash": "^4.17.15",
"promise-chains": "^0.3.11",
"request": "^2.87.0",
"request-promise": "^4.2.2",
"request": "^2.88.2",
"request-promise": "^4.2.6",
"ws": "^3.3.1"
},
"dependencies": {
"lodash": {
"version": "4.17.15",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
"integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
}
}
},
"source-map": {
@ -11473,11 +11137,12 @@
"peer": true
},
"tough-cookie": {
"version": "2.3.4",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.3.4.tgz",
"integrity": "sha512-TZ6TTfI5NtZnuyy/Kecv+CnoROnyXn2DN97LontgQpCwsX2XyLYCC0ENhYkehSOwAp8rTQKc/NUIF7BkQ5rKLA==",
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.5.0.tgz",
"integrity": "sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==",
"requires": {
"punycode": "^1.4.1"
"psl": "^1.1.28",
"punycode": "^2.1.1"
}
},
"tr46": {
@ -11486,13 +11151,6 @@
"integrity": "sha512-dTpowEjclQ7Kgx5SdBkqRzVhERQXov8/l9Ft9dVM9fmg0W0KQSVaXX9T4i6twCPNtYiZM53lpSSUAwJbFPOHxA==",
"requires": {
"punycode": "^2.1.0"
},
"dependencies": {
"punycode": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
"integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA=="
}
}
},
"traverse": {
@ -11594,8 +11252,9 @@
"integrity": "sha512-MVi79HEPwGk0grI7/Kl6H51fX7wcDTe6gGoCdK22pkRG6IPsi9L6NltClWJfBLUoIE5y3pKy3SplFAs/b0G+QQ=="
},
"unprint": {
"version": "0.8.1",
"integrity": "sha512-6cF2LgIpc7JEBBXWs/CmJgFBAPmvM5NuD3tDvmnnmZR+wrEF03TNR5kv08iPTV/GhT8EHPhZM3LSd8eRipkWdA==",
"version": "0.8.2",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.8.2.tgz",
"integrity": "sha512-mCKPDPwtuECbXJJLQbDn2FFbydr6fLKytyS3pymbxcTh2dkk7NFypMjR7qjU2Uv9Fl91hSE48SjYMsWHNKpp4w==",
"requires": {
"axios": "^0.27.2",
"bottleneck": "^2.19.5",
@ -11723,17 +11382,6 @@
}
}
},
"eslint-config-airbnb-base": {
"version": "15.0.0",
"resolved": "https://registry.npmjs.org/eslint-config-airbnb-base/-/eslint-config-airbnb-base-15.0.0.tgz",
"integrity": "sha512-xaX3z4ZZIcFLvh2oUNvcX5oEofXda7giYmuplVxoOg5A7EXJMrUyqRgR+mhDhPK8LZ4PttFOBvCYDbX3sUoUig==",
"requires": {
"confusing-browser-globals": "^1.0.10",
"object.assign": "^4.1.2",
"object.entries": "^1.1.5",
"semver": "^6.3.0"
}
},
"estraverse": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
@ -11827,11 +11475,6 @@
"resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz",
"integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw=="
},
"punycode": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
"integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA=="
},
"saxes": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/saxes/-/saxes-5.0.1.tgz",
@ -11932,13 +11575,6 @@
"integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
"requires": {
"punycode": "^2.1.0"
},
"dependencies": {
"punycode": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
"integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA=="
}
}
},
"url-pattern": {

View File

@ -49,9 +49,9 @@
"object.omit": "^3.0.0",
"object.pick": "^1.3.0",
"sharp-phash": "^2.1.0",
"snoowrap": "^1.20.0",
"snoowrap": "^1.23.0",
"template-format": "^1.2.4",
"unprint": "^0.8.1",
"unprint": "^0.8.2",
"url-pattern": "^1.0.3",
"winston": "^3.3.3",
"winston-daily-rotate-file": "^4.7.1",

View File

@ -3,6 +3,7 @@
const config = require('config');
const { isAfter, isBefore, isEqual } = require('date-fns');
const omit = require('object.omit');
const unprint = require('unprint');
const dissectLink = require('../dissectLink');
const hashPost = require('./hashPost');
@ -44,7 +45,7 @@ function report(curatedPosts, indexed, user, args) {
}
function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args) {
const host = dissectLink(post.url);
const host = dissectLink(unprint.prefixUrl(post.url, 'https://reddit.com'));
const permalink = `https://reddit.com${post.permalink}`;
const curatedPost = {

View File

@ -2,6 +2,8 @@
const UrlPattern = require('url-pattern');
const logger = require('./logger')(__filename);
const hosts = [
{
method: 'self',
@ -118,19 +120,23 @@ module.exports = function dissectLink(url) {
return acc;
}
// const match = host.pattern.match(url.replace(/(https?:\/\/)|(\/)+/g, '$1$2')); // remove double slashes
const { origin, pathname } = new URL(url);
const match = host.pattern.match(`${origin}${pathname}`); // remove double slashes
try {
const { origin, pathname } = new URL(url);
const match = host.pattern.match(`${origin}${pathname}`); // remove double slashes
if (match) {
return Object.assign(match, {
url,
method: host.method,
label: host.label,
});
if (match) {
return Object.assign(match, {
url,
method: host.method,
label: host.label,
});
}
return null;
} catch (error) {
logger.error(`${error.message}: ${url}`);
return url;
}
return null;
}, null);
if (hostMethod) {

View File

@ -3,6 +3,7 @@
const config = require('config');
const Promise = require('bluebird');
const args = require('../cli')();
const logger = require('../logger')(__filename);
const methods = require('../methods/methods');
@ -30,7 +31,11 @@ async function attachContentInfo(users, { reddit, predata }) {
},
];
} catch (error) {
logger.warn(`${error.message} (${post.permalink})`);
if (args.debug) {
logger.warn(`${error.stack} (${post.permalink})`);
} else {
logger.warn(`${error.message} (${post.permalink})`);
}
if (config.fetch.archives.preview && post.preview) {
logger.info(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);

View File

@ -20,8 +20,8 @@ async function redditAlbum(host, post) {
url: host.url,
title: post.title,
},
items: items.map(url => ({
id: new URL(url).pathname.match(/\/(.*).jpg/)[1],
items: items.map((url) => ({
id: new URL(url).pathname.match(/\/(.*).\w+$/)?.[1],
url,
datetime: post.datetime,
type: mime.getType(url) || 'image/jpeg',