Improved Love Her Films profile scraping.

This commit is contained in:
DebaucheryLibrarian 2022-12-15 23:22:52 +01:00
parent 97c641af4e
commit 78b1bd2e48
4 changed files with 29 additions and 6 deletions

1
.gitignore vendored
View File

@ -12,3 +12,4 @@ assets/js/config/
!assets/js/config/default.js !assets/js/config/default.js
*.heapprofile *.heapprofile
*.heapsnapshot *.heapsnapshot
.vscode

24
package-lock.json generated
View File

@ -74,6 +74,7 @@
"tippy.js": "^6.3.1", "tippy.js": "^6.3.1",
"tough-cookie": "^4.0.0", "tough-cookie": "^4.0.0",
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.32",
"undici": "^4.13.0", "undici": "^4.13.0",
"unprint": "^0.7.2", "unprint": "^0.7.2",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
@ -16551,6 +16552,24 @@
"resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz",
"integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=" "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c="
}, },
"node_modules/ua-parser-js": {
"version": "1.0.32",
"resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-1.0.32.tgz",
"integrity": "sha512-dXVsz3M4j+5tTiovFVyVqssXBu5HM47//YSOeZ9fQkdDKkfzv2v3PP1jmH6FUyPW+yCSn7aBVK1fGGKNhowdDA==",
"funding": [
{
"type": "opencollective",
"url": "https://opencollective.com/ua-parser-js"
},
{
"type": "paypal",
"url": "https://paypal.me/faisalman"
}
],
"engines": {
"node": "*"
}
},
"node_modules/uid-safe": { "node_modules/uid-safe": {
"version": "2.1.5", "version": "2.1.5",
"resolved": "https://registry.npmjs.org/uid-safe/-/uid-safe-2.1.5.tgz", "resolved": "https://registry.npmjs.org/uid-safe/-/uid-safe-2.1.5.tgz",
@ -30887,6 +30906,11 @@
"resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz",
"integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=" "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c="
}, },
"ua-parser-js": {
"version": "1.0.32",
"resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-1.0.32.tgz",
"integrity": "sha512-dXVsz3M4j+5tTiovFVyVqssXBu5HM47//YSOeZ9fQkdDKkfzv2v3PP1jmH6FUyPW+yCSn7aBVK1fGGKNhowdDA=="
},
"uid-safe": { "uid-safe": {
"version": "2.1.5", "version": "2.1.5",
"resolved": "https://registry.npmjs.org/uid-safe/-/uid-safe-2.1.5.tgz", "resolved": "https://registry.npmjs.org/uid-safe/-/uid-safe-2.1.5.tgz",

View File

@ -133,6 +133,7 @@
"tippy.js": "^6.3.1", "tippy.js": "^6.3.1",
"tough-cookie": "^4.0.0", "tough-cookie": "^4.0.0",
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.32",
"undici": "^4.13.0", "undici": "^4.13.0",
"unprint": "^0.7.2", "unprint": "^0.7.2",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",

View File

@ -13,7 +13,7 @@ function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => { return scenes.map(({ query }) => {
const release = {}; const release = {};
release.url = query.url('a.item-video-overlay'); release.url = query.url('a.item-video-overlay, a.item-episode-overlay');
release.title = query.cnt('.item-title'); release.title = query.cnt('.item-title');
release.date = query.date('.video-date', 'MMM D, YYYY'); release.date = query.date('.video-date', 'MMM D, YYYY');
@ -60,7 +60,7 @@ function scrapeScene({ query }, url, channel) {
return release; return release;
} }
function scrapeProfile({ query, el }, url, entity, include) { function scrapeProfile({ query, el }, url, entity, _include) {
const profile = { url }; const profile = { url };
const bio = Array.from(Array.from(query.html('.stats script').matchAll(/totalStats\.push\(.*\)/g))).reduce((acc, match) => { const bio = Array.from(Array.from(query.html('.stats script').matchAll(/totalStats\.push\(.*\)/g))).reduce((acc, match) => {
@ -106,10 +106,7 @@ function scrapeProfile({ query, el }, url, entity, include) {
} }
profile.avatar = query.img('.picture img'); profile.avatar = query.img('.picture img');
profile.scenes = scrapeAll(qu.initAll(el, '.scene, .latest-scene .item-episode'), entity);
if (include.releases) {
return scrapeAll(qu.initAll(el, '.scene'));
}
console.log(bio); console.log(bio);
console.log(profile); console.log(profile);