Added puppeteer to http module, refactored Kink scraper to utilize it.

This commit is contained in:
DebaucheryLibrarian 2022-12-28 01:34:12 +01:00
parent 17feadbc15
commit 74214bc060
14 changed files with 537 additions and 62 deletions

449
package-lock.json generated
View File

@ -67,6 +67,8 @@
"promise-task-queue": "^1.2.0", "promise-task-queue": "^1.2.0",
"prop-types": "^15.7.2", "prop-types": "^15.7.2",
"puppeteer": "^18.2.0", "puppeteer": "^18.2.0",
"puppeteer-extra": "^3.3.4",
"puppeteer-extra-plugin-stealth": "^2.11.1",
"sharp": "^0.29.2", "sharp": "^0.29.2",
"showdown": "^1.9.1", "showdown": "^1.9.1",
"source-map-support": "^0.5.16", "source-map-support": "^0.5.16",
@ -76,7 +78,7 @@
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.32", "ua-parser-js": "^1.0.32",
"undici": "^4.13.0", "undici": "^4.13.0",
"unprint": "^0.7.2", "unprint": "^0.8.0",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3", "v-tooltip": "^2.0.3",
"video.js": "^7.11.4", "video.js": "^7.11.4",
@ -3201,6 +3203,14 @@
"node": ">= 10" "node": ">= 10"
} }
}, },
"node_modules/@types/debug": {
"version": "4.1.7",
"resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.7.tgz",
"integrity": "sha512-9AonUzyTjXXhEOa0DnqpzZi6VHlqKMswga9EXjpXnnqxwLtdvPPtlO8evrI5D9S6asFRCQ6v+wpiUKbw+vKqyg==",
"dependencies": {
"@types/ms": "*"
}
},
"node_modules/@types/eslint": { "node_modules/@types/eslint": {
"version": "7.28.2", "version": "7.28.2",
"resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-7.28.2.tgz", "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-7.28.2.tgz",
@ -3252,6 +3262,11 @@
"integrity": "sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==", "integrity": "sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==",
"dev": true "dev": true
}, },
"node_modules/@types/ms": {
"version": "0.7.31",
"resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.31.tgz",
"integrity": "sha512-iiUgKzV9AuaEkZqkOLDIvlQiL6ltuZd9tGcW3gwpnX8JbuiuhFlEGmmFXEXkN50Cvq7Os88IY2v0dkDqXYWVgA=="
},
"node_modules/@types/node": { "node_modules/@types/node": {
"version": "16.11.6", "version": "16.11.6",
"resolved": "https://registry.npmjs.org/@types/node/-/node-16.11.6.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-16.11.6.tgz",
@ -10761,6 +10776,14 @@
"language-subtag-registry": "~0.3.2" "language-subtag-registry": "~0.3.2"
} }
}, },
"node_modules/lazy-cache": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz",
"integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/lcid": { "node_modules/lcid": {
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/lcid/-/lcid-1.0.0.tgz", "resolved": "https://registry.npmjs.org/lcid/-/lcid-1.0.0.tgz",
@ -11311,6 +11334,97 @@
"url": "https://github.com/sponsors/sindresorhus" "url": "https://github.com/sponsors/sindresorhus"
} }
}, },
"node_modules/merge-deep": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz",
"integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==",
"dependencies": {
"arr-union": "^3.1.0",
"clone-deep": "^0.2.4",
"kind-of": "^3.0.2"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/merge-deep/node_modules/clone-deep": {
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz",
"integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==",
"dependencies": {
"for-own": "^0.1.3",
"is-plain-object": "^2.0.1",
"kind-of": "^3.0.2",
"lazy-cache": "^1.0.3",
"shallow-clone": "^0.1.2"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/merge-deep/node_modules/for-own": {
"version": "0.1.5",
"resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz",
"integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==",
"dependencies": {
"for-in": "^1.0.1"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/merge-deep/node_modules/is-extendable": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
"integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/merge-deep/node_modules/kind-of": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
"integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==",
"dependencies": {
"is-buffer": "^1.1.5"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/merge-deep/node_modules/shallow-clone": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz",
"integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==",
"dependencies": {
"is-extendable": "^0.1.1",
"kind-of": "^2.0.1",
"lazy-cache": "^0.2.3",
"mixin-object": "^2.0.1"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/merge-deep/node_modules/shallow-clone/node_modules/kind-of": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz",
"integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==",
"dependencies": {
"is-buffer": "^1.0.2"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/merge-deep/node_modules/shallow-clone/node_modules/lazy-cache": {
"version": "0.2.7",
"resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz",
"integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/merge-descriptors": { "node_modules/merge-descriptors": {
"version": "1.0.1", "version": "1.0.1",
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
@ -11534,6 +11648,34 @@
"node": ">=0.10.0" "node": ">=0.10.0"
} }
}, },
"node_modules/mixin-object": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz",
"integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==",
"dependencies": {
"for-in": "^0.1.3",
"is-extendable": "^0.1.1"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/mixin-object/node_modules/for-in": {
"version": "0.1.8",
"resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz",
"integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/mixin-object/node_modules/is-extendable": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
"integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/mkdirp": { "node_modules/mkdirp": {
"version": "0.5.5", "version": "0.5.5",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.5.tgz", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.5.tgz",
@ -13640,6 +13782,137 @@
} }
} }
}, },
"node_modules/puppeteer-extra": {
"version": "3.3.4",
"resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.4.tgz",
"integrity": "sha512-fN5pHvSMJ8d1o7Z8wLLTQOUBpORD2BcFn+KDs7QnkGZs9SV69hcUcce67vX4L4bNSEG3A0P6Osrv+vWNhhdm8w==",
"dependencies": {
"@types/debug": "^4.1.0",
"debug": "^4.1.1",
"deepmerge": "^4.2.2"
},
"engines": {
"node": ">=8"
},
"peerDependencies": {
"@types/puppeteer": "*",
"puppeteer": "*",
"puppeteer-core": "*"
},
"peerDependenciesMeta": {
"@types/puppeteer": {
"optional": true
},
"puppeteer": {
"optional": true
},
"puppeteer-core": {
"optional": true
}
}
},
"node_modules/puppeteer-extra-plugin": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.2.tgz",
"integrity": "sha512-0uatQxzuVn8yegbrEwSk03wvwpMB5jNs7uTTnermylLZzoT+1rmAQaJXwlS3+vADUbw6ELNgNEHC7Skm0RqHbQ==",
"dependencies": {
"@types/debug": "^4.1.0",
"debug": "^4.1.1",
"merge-deep": "^3.0.1"
},
"engines": {
"node": ">=9.11.2"
},
"peerDependencies": {
"playwright-extra": "*",
"puppeteer-extra": "*"
},
"peerDependenciesMeta": {
"playwright-extra": {
"optional": true
},
"puppeteer-extra": {
"optional": true
}
}
},
"node_modules/puppeteer-extra-plugin-stealth": {
"version": "2.11.1",
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.1.tgz",
"integrity": "sha512-n0wdC0Ilc9tk5L6FWLyd0P2gT8b2fp+2NuB+KB0oTSw3wXaZ0D6WNakjJsayJ4waGzIJFCUHkmK9zgx5NKMoFw==",
"dependencies": {
"debug": "^4.1.1",
"puppeteer-extra-plugin": "^3.2.2",
"puppeteer-extra-plugin-user-preferences": "^2.4.0"
},
"engines": {
"node": ">=8"
},
"peerDependencies": {
"playwright-extra": "*",
"puppeteer-extra": "*"
},
"peerDependenciesMeta": {
"playwright-extra": {
"optional": true
},
"puppeteer-extra": {
"optional": true
}
}
},
"node_modules/puppeteer-extra-plugin-user-data-dir": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.0.tgz",
"integrity": "sha512-qrhYPTGIqzL2hpeJ5DXjf8xMy5rt1UvcqSgpGTTOUOjIMz1ROWnKHjBoE9fNBJ4+ToRZbP8MzIDXWlEk/e1zJA==",
"dependencies": {
"debug": "^4.1.1",
"fs-extra": "^10.0.0",
"puppeteer-extra-plugin": "^3.2.2",
"rimraf": "^3.0.2"
},
"engines": {
"node": ">=8"
},
"peerDependencies": {
"playwright-extra": "*",
"puppeteer-extra": "*"
},
"peerDependenciesMeta": {
"playwright-extra": {
"optional": true
},
"puppeteer-extra": {
"optional": true
}
}
},
"node_modules/puppeteer-extra-plugin-user-preferences": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.0.tgz",
"integrity": "sha512-4XxMhMkJ+qqLsPY9ULF90qS9Bj1Qrwwgp1TY9zTdp1dJuy7QSgYE7xlyamq3cKrRuzg3QUOqygJo52sVeXSg5A==",
"dependencies": {
"debug": "^4.1.1",
"deepmerge": "^4.2.2",
"puppeteer-extra-plugin": "^3.2.2",
"puppeteer-extra-plugin-user-data-dir": "^2.4.0"
},
"engines": {
"node": ">=8"
},
"peerDependencies": {
"playwright-extra": "*",
"puppeteer-extra": "*"
},
"peerDependenciesMeta": {
"playwright-extra": {
"optional": true
},
"puppeteer-extra": {
"optional": true
}
}
},
"node_modules/qs": { "node_modules/qs": {
"version": "6.7.0", "version": "6.7.0",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz", "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz",
@ -16735,9 +17008,9 @@
} }
}, },
"node_modules/unprint": { "node_modules/unprint": {
"version": "0.7.2", "version": "0.8.0",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.7.2.tgz", "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.8.0.tgz",
"integrity": "sha512-uG76G9b0NCpmggvtG1Nj1cgOVlQp46OEWfhIEi/VdDSIvPZJ5nSS8hqfA6ljR6V6/P8ZXVYDnV7PFJHh/4Imsg==", "integrity": "sha512-PBVdumsc7DfLwwTeSD5xSqITj3PFTUr/wA4unUykzov05YMp0a+Z3tVM1H7JFLZuRgzHRfKDbEpvAcj8vfYlLA==",
"dependencies": { "dependencies": {
"axios": "^0.27.2", "axios": "^0.27.2",
"bottleneck": "^2.19.5", "bottleneck": "^2.19.5",
@ -20608,6 +20881,14 @@
"resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-2.0.0.tgz", "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-2.0.0.tgz",
"integrity": "sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==" "integrity": "sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A=="
}, },
"@types/debug": {
"version": "4.1.7",
"resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.7.tgz",
"integrity": "sha512-9AonUzyTjXXhEOa0DnqpzZi6VHlqKMswga9EXjpXnnqxwLtdvPPtlO8evrI5D9S6asFRCQ6v+wpiUKbw+vKqyg==",
"requires": {
"@types/ms": "*"
}
},
"@types/eslint": { "@types/eslint": {
"version": "7.28.2", "version": "7.28.2",
"resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-7.28.2.tgz", "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-7.28.2.tgz",
@ -20659,6 +20940,11 @@
"integrity": "sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==", "integrity": "sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==",
"dev": true "dev": true
}, },
"@types/ms": {
"version": "0.7.31",
"resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.31.tgz",
"integrity": "sha512-iiUgKzV9AuaEkZqkOLDIvlQiL6ltuZd9tGcW3gwpnX8JbuiuhFlEGmmFXEXkN50Cvq7Os88IY2v0dkDqXYWVgA=="
},
"@types/node": { "@types/node": {
"version": "16.11.6", "version": "16.11.6",
"resolved": "https://registry.npmjs.org/@types/node/-/node-16.11.6.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-16.11.6.tgz",
@ -26450,6 +26736,11 @@
"language-subtag-registry": "~0.3.2" "language-subtag-registry": "~0.3.2"
} }
}, },
"lazy-cache": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz",
"integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ=="
},
"lcid": { "lcid": {
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/lcid/-/lcid-1.0.0.tgz", "resolved": "https://registry.npmjs.org/lcid/-/lcid-1.0.0.tgz",
@ -26903,6 +27194,77 @@
} }
} }
}, },
"merge-deep": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz",
"integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==",
"requires": {
"arr-union": "^3.1.0",
"clone-deep": "^0.2.4",
"kind-of": "^3.0.2"
},
"dependencies": {
"clone-deep": {
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz",
"integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==",
"requires": {
"for-own": "^0.1.3",
"is-plain-object": "^2.0.1",
"kind-of": "^3.0.2",
"lazy-cache": "^1.0.3",
"shallow-clone": "^0.1.2"
}
},
"for-own": {
"version": "0.1.5",
"resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz",
"integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==",
"requires": {
"for-in": "^1.0.1"
}
},
"is-extendable": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
"integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw=="
},
"kind-of": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
"integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==",
"requires": {
"is-buffer": "^1.1.5"
}
},
"shallow-clone": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz",
"integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==",
"requires": {
"is-extendable": "^0.1.1",
"kind-of": "^2.0.1",
"lazy-cache": "^0.2.3",
"mixin-object": "^2.0.1"
},
"dependencies": {
"kind-of": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz",
"integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==",
"requires": {
"is-buffer": "^1.0.2"
}
},
"lazy-cache": {
"version": "0.2.7",
"resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz",
"integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ=="
}
}
}
}
},
"merge-descriptors": { "merge-descriptors": {
"version": "1.0.1", "version": "1.0.1",
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
@ -27067,6 +27429,27 @@
"is-extendable": "^1.0.1" "is-extendable": "^1.0.1"
} }
}, },
"mixin-object": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz",
"integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==",
"requires": {
"for-in": "^0.1.3",
"is-extendable": "^0.1.1"
},
"dependencies": {
"for-in": {
"version": "0.1.8",
"resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz",
"integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g=="
},
"is-extendable": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
"integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw=="
}
}
},
"mkdirp": { "mkdirp": {
"version": "0.5.5", "version": "0.5.5",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.5.tgz", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.5.tgz",
@ -28658,6 +29041,58 @@
} }
} }
}, },
"puppeteer-extra": {
"version": "3.3.4",
"resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.4.tgz",
"integrity": "sha512-fN5pHvSMJ8d1o7Z8wLLTQOUBpORD2BcFn+KDs7QnkGZs9SV69hcUcce67vX4L4bNSEG3A0P6Osrv+vWNhhdm8w==",
"requires": {
"@types/debug": "^4.1.0",
"debug": "^4.1.1",
"deepmerge": "^4.2.2"
}
},
"puppeteer-extra-plugin": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.2.tgz",
"integrity": "sha512-0uatQxzuVn8yegbrEwSk03wvwpMB5jNs7uTTnermylLZzoT+1rmAQaJXwlS3+vADUbw6ELNgNEHC7Skm0RqHbQ==",
"requires": {
"@types/debug": "^4.1.0",
"debug": "^4.1.1",
"merge-deep": "^3.0.1"
}
},
"puppeteer-extra-plugin-stealth": {
"version": "2.11.1",
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.1.tgz",
"integrity": "sha512-n0wdC0Ilc9tk5L6FWLyd0P2gT8b2fp+2NuB+KB0oTSw3wXaZ0D6WNakjJsayJ4waGzIJFCUHkmK9zgx5NKMoFw==",
"requires": {
"debug": "^4.1.1",
"puppeteer-extra-plugin": "^3.2.2",
"puppeteer-extra-plugin-user-preferences": "^2.4.0"
}
},
"puppeteer-extra-plugin-user-data-dir": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.0.tgz",
"integrity": "sha512-qrhYPTGIqzL2hpeJ5DXjf8xMy5rt1UvcqSgpGTTOUOjIMz1ROWnKHjBoE9fNBJ4+ToRZbP8MzIDXWlEk/e1zJA==",
"requires": {
"debug": "^4.1.1",
"fs-extra": "^10.0.0",
"puppeteer-extra-plugin": "^3.2.2",
"rimraf": "^3.0.2"
}
},
"puppeteer-extra-plugin-user-preferences": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.0.tgz",
"integrity": "sha512-4XxMhMkJ+qqLsPY9ULF90qS9Bj1Qrwwgp1TY9zTdp1dJuy7QSgYE7xlyamq3cKrRuzg3QUOqygJo52sVeXSg5A==",
"requires": {
"debug": "^4.1.1",
"deepmerge": "^4.2.2",
"puppeteer-extra-plugin": "^3.2.2",
"puppeteer-extra-plugin-user-data-dir": "^2.4.0"
}
},
"qs": { "qs": {
"version": "6.7.0", "version": "6.7.0",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz", "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz",
@ -31027,9 +31462,9 @@
"integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=" "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw="
}, },
"unprint": { "unprint": {
"version": "0.7.2", "version": "0.8.0",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.7.2.tgz", "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.8.0.tgz",
"integrity": "sha512-uG76G9b0NCpmggvtG1Nj1cgOVlQp46OEWfhIEi/VdDSIvPZJ5nSS8hqfA6ljR6V6/P8ZXVYDnV7PFJHh/4Imsg==", "integrity": "sha512-PBVdumsc7DfLwwTeSD5xSqITj3PFTUr/wA4unUykzov05YMp0a+Z3tVM1H7JFLZuRgzHRfKDbEpvAcj8vfYlLA==",
"requires": { "requires": {
"axios": "^0.27.2", "axios": "^0.27.2",
"bottleneck": "^2.19.5", "bottleneck": "^2.19.5",

View File

@ -126,6 +126,8 @@
"promise-task-queue": "^1.2.0", "promise-task-queue": "^1.2.0",
"prop-types": "^15.7.2", "prop-types": "^15.7.2",
"puppeteer": "^18.2.0", "puppeteer": "^18.2.0",
"puppeteer-extra": "^3.3.4",
"puppeteer-extra-plugin-stealth": "^2.11.1",
"sharp": "^0.29.2", "sharp": "^0.29.2",
"showdown": "^1.9.1", "showdown": "^1.9.1",
"source-map-support": "^0.5.16", "source-map-support": "^0.5.16",
@ -135,7 +137,7 @@
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.32", "ua-parser-js": "^1.0.32",
"undici": "^4.13.0", "undici": "^4.13.0",
"unprint": "^0.7.2", "unprint": "^0.8.0",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3", "v-tooltip": "^2.0.3",
"video.js": "^7.11.4", "video.js": "^7.11.4",

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

View File

@ -0,0 +1 @@
E-mail Rocky Emerson for permission to use these photos https://twitter.com/RockyEmersonXXX/status/1605387352087547905

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

View File

@ -208,6 +208,7 @@ async function init() {
} }
await http.destroyBypassSessions(); await http.destroyBypassSessions();
await http.destroyBrowserSessions();
knex.destroy(); knex.destroy();
done = true; done = true;

View File

@ -221,10 +221,14 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
async function scrapeReleases(baseReleases, entitiesBySlug, type) { async function scrapeReleases(baseReleases, entitiesBySlug, type) {
const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesBySlug).map(async ([slug, entity]) => { const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesBySlug).map(async ([slug, entity]) => {
console.log('scraper', entity.scraper?.beforeFetchScenes);
if (entity.scraper?.beforeFetchScenes) { if (entity.scraper?.beforeFetchScenes) {
const parameters = getRecursiveParameters(entity); const parameters = getRecursiveParameters(entity);
const preData = await entity.scraper.beforeFetchScenes(entity, parameters); const preData = await entity.scraper.beforeFetchScenes(entity, parameters);
console.log('pre data', preData);
return [slug, { ...entity, preData }]; return [slug, { ...entity, preData }];
} }

View File

@ -1,26 +1,34 @@
'use strict'; 'use strict';
const unprint = require('unprint');
const qu = require('../utils/qu'); const qu = require('../utils/qu');
const http = require('../utils/http'); const http = require('../utils/http');
const slugify = require('../utils/slugify');
function scrapeAll(scenes) { function scrapeAll(scenes) {
return scenes.map(({ query }) => { return scenes.map(({ query }) => {
const release = {}; const release = {};
const href = query.url('.shoot-thumb-info > a'); const href = query.url('.shoot-link');
release.url = `https://kink.com${href}`;
release.url = `https://www.kink.com${href}`;
release.shootId = href.split('/').slice(-1)[0]; release.shootId = href.split('/').slice(-1)[0];
release.entryId = release.shootId; release.entryId = release.shootId;
release.title = query.q('.shoot-thumb-title a', true); release.title = query.content('.shoot-thumb-title a', true);
release.date = query.date('.date', 'MMM DD, YYYY'); release.date = query.date('.date', 'MMM DD, YYYY');
release.actors = query.all('.shoot-thumb-models a', true); release.actors = query.all('.shoot-thumb-models a').map((actorEl) => ({
release.stars = query.q('.average-rating', 'data-rating') / 10; name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: 'https://www.kink.com' }),
}));
release.rating = query.number('.thumb-ratings') / 10;
release.poster = query.img('.adimage'); release.poster = query.img('.adimage');
release.photos = query.imgs('.rollover .roll-image', 'data-imagesrc').map((photo) => [ release.photos = query.imgs('.rollover .roll-image', { attribute: 'data-imagesrc' }).map((photo) => [
photo.replace('410/', '830/'), photo.replace('410/', '830/'),
photo, photo,
]); ]);
@ -31,25 +39,28 @@ function scrapeAll(scenes) {
}); });
} }
async function scrapeScene({ query }, url) { function scrapeScene({ query }, url) {
const release = { url }; const release = { url };
release.shootId = new URL(url).pathname.split('/')[2]; release.shootId = new URL(url).pathname.split('/')[2];
release.entryId = release.shootId; release.entryId = release.shootId;
release.title = query.q('.shoot-title span.favorite-button', 'data-title'); release.title = query.attribute('.shoot-title .favorite-button', 'data-title') || query.content('.shoot-title');
release.description = query.q('.description-text', true); release.description = query.content('.description-text');
release.date = query.date('.shoot-date', 'MMMM DD, YYYY'); release.date = query.date('.shoot-date', 'MMMM DD, YYYY');
release.actors = query.all('.names a', true).map((actor) => actor.replace(/,\s*/, '')); release.actors = query.elements('.names a').map((actorEl) => ({
release.director = query.q('.director-name', true); name: unprint.query.content(actorEl).replace(/,\s*/, ''),
url: unprint.query.url(actorEl, null, { origin: 'https://www.kink.com' }),
}));
release.director = query.content('.director-name');
release.photos = query.imgs('.gallery .thumb img, #gallerySlider .gallery-img', 'data-image-file'); release.photos = query.imgs('.gallery .thumb img, #gallerySlider .gallery-img', 'data-image-file');
release.poster = query.poster(); release.poster = query.poster();
release.tags = query.all('.tag-list a[href*="/tag"]', true).map((tag) => tag.replace(/,\s*/, '')); release.tags = query.contents('.tag-list a[href*="/tag"]').map((tag) => tag.replace(/,\s*/, ''));
const trailer = query.q('.player span[data-type="trailer-src"]', 'data-url'); const trailer = query.attribute('.player span[data-type="trailer-src"]', 'data-url');
if (trailer) { if (trailer) {
release.trailer = [ release.trailer = [
@ -72,7 +83,9 @@ async function scrapeScene({ query }, url) {
]; ];
} }
release.channel = query.url('.shoot-logo a').split('/').slice(-1)[0]; release.channel = slugify(query.url('.shoot-logo a')?.split('/').slice(-1)[0], '');
console.log(release);
return release; return release;
} }
@ -131,33 +144,43 @@ async function scrapeProfile({ query }, actorUrl, include) {
return profile; return profile;
} }
async function beforeFetchLatest() {
const tab = await http.getBrowserSession();
await tab.goto('https://www.kink.com');
await tab.click('#ccc-recommended-settings', { delay: 1000 });
await tab.click('#contentTypeModal button', { delay: 1000 });
return tab;
}
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
// const res = await qu.getAll(`${site.url}/latest/page/${page}`, '.shoot-list .shoot', { const { tab } = await http.getBrowserSession('kink', { headless: false });
// const res = await qu.getAll(`https://www.kink.com/channel/bound-gang-bangs/latest/page/${page}`, '.shoot-list .shoot', { const res = await tab.goto(`https://www.kink.com/search?type=shoots&channelIds=${site.slug}&sort=published&page=${page}`);
const res = await qu.getAll(`https://www.kink.com/search?type=shoots&channelIds=${site.slug}&sort=published&page=${page}`, '.shoot-list .shoot', { const status = res.status();
cookie: 'ct=2;',
}, {
includeDefaultHeaders: false,
followRedirects: false,
});
console.log(res.items); if (status === 200) {
const html = await tab.content();
const items = unprint.initAll(html, '.results .shoot-card');
if (res.ok) { const scenes = scrapeAll(items, site);
return scrapeAll(res.items, site);
await tab.close();
return scenes;
} }
return res.status; return status;
}
async function fetchScene(url, channel) {
const { tab } = await http.getBrowserSession('kink');
const res = await tab.goto(url);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const item = unprint.init(html);
const scene = scrapeScene(item, url, channel);
await tab.close();
return scene;
}
return status;
} }
async function fetchProfile({ name: actorName }, entity, include) { async function fetchProfile({ name: actorName }, entity, include) {
@ -185,8 +208,8 @@ async function fetchProfile({ name: actorName }, entity, include) {
} }
module.exports = { module.exports = {
beforeFetchLatest, // beforeNetwork,
fetchLatest, fetchLatest,
fetchScene,
fetchProfile, fetchProfile,
scrapeScene,
}; };

View File

@ -235,7 +235,7 @@ async function scrapeChannel(channelEntity, accNetworkReleases, beforeNetwork) {
} }
try { try {
const beforeFetchLatest = await scraper.beforeFetchLatest?.(channelEntity); const beforeFetchLatest = await scraper.beforeFetchLatest?.(channelEntity, { beforeNetwork });
return await scrapeChannelReleases(layoutScraper, channelEntity, { return await scrapeChannelReleases(layoutScraper, channelEntity, {
...accNetworkReleases, ...accNetworkReleases,

View File

@ -9,7 +9,8 @@ const stream = require('stream');
const tunnel = require('tunnel'); const tunnel = require('tunnel');
const Bottleneck = require('bottleneck'); const Bottleneck = require('bottleneck');
const { JSDOM, toughCookie } = require('jsdom'); const { JSDOM, toughCookie } = require('jsdom');
const puppeteer = require('puppeteer'); const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const windows = require('./http-windows'); const windows = require('./http-windows');
@ -30,12 +31,13 @@ const limiters = {
const bypassSessions = new Map(); const bypassSessions = new Map();
let browser = null; let browser = null;
const browserSessions = new Map();
Promise.config({ Promise.config({
cancellation: true, cancellation: true,
}); });
puppeteer.use(StealthPlugin());
const defaultOptions = { const defaultOptions = {
timeout: argv.requestTimeout, timeout: argv.requestTimeout,
encodeJSON: true, encodeJSON: true,
@ -156,26 +158,27 @@ function extractJson(solution) {
return solution.response; return solution.response;
} }
async function getBrowserSession(hostname) { async function getBrowserSession(identifier, options = {}) {
console.log(browserSessions); return limiters.bypass.schedule(async () => {
if (browserSessions.has(hostname)) {
return browserSessions.get(hostname);
}
if (!browser) { if (!browser) {
browser = await puppeteer.launch({ headless: false }); browser = await puppeteer.launch({
headless: typeof options.headless === 'undefined' ? true : options.headless,
// headless: false,
});
logger.info('Initialized puppeteer browser');
} }
const page = await browser.newPage(); const tab = await browser.newPage();
browserSessions.set(hostname, page); logger.verbose(`Opened puppeteer tab${identifier ? `for ${identifier}` : ''}`);
return page; return { browser, tab };
});
} }
async function bypassBrowserRequest(url, _options) { async function bypassBrowserRequest(url, _options) {
const page = await limiters.bypass.schedule(async () => getBrowserSession(new URL(url).hostname)); const page = await getBrowserSession(new URL(url).hostname);
const res = await page.goto(url); const res = await page.goto(url);
const body = await page.content(); const body = await page.content();
@ -254,6 +257,10 @@ async function destroyBypassSessions() {
await Promise.map(sessionListRes.body.sessions, async (sessionId) => destroyBypassSession(sessionId), { concurrency: 5 }); await Promise.map(sessionListRes.body.sessions, async (sessionId) => destroyBypassSession(sessionId), { concurrency: 5 });
} }
async function destroyBrowserSessions() {
await browser?.close();
}
async function bypassCloudflareRequest(url, method, body, cloudflareBypass, options, attempts = 0) { async function bypassCloudflareRequest(url, method, body, cloudflareBypass, options, attempts = 0) {
const sessionId = await limiters.bypass.schedule(async () => getBypassSession(url, cloudflareBypass)); const sessionId = await limiters.bypass.schedule(async () => getBypassSession(url, cloudflareBypass));
@ -456,9 +463,11 @@ module.exports = {
patch, patch,
session: getSession, session: getSession,
cookieJar: getCookieJar, cookieJar: getCookieJar,
browser,
getBrowserSession, getBrowserSession,
getBypassSession, getBypassSession,
getSession, getSession,
getCookieJar, getCookieJar,
destroyBypassSessions, destroyBypassSessions,
destroyBrowserSessions,
}; };