From 01341b80d6c5133864a8b055a79a113e021d0e8c Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Mon, 9 Feb 2026 05:10:06 +0100 Subject: [PATCH] Refactored WankzVR for unprint, added referers in hopes of fixing missing assets. --- package-lock.json | 15 ++-- package.json | 2 +- src/deep.js | 1 + src/scrapers/wankzvr.js | 174 ++++++++++++++++++++++------------------ 4 files changed, 108 insertions(+), 84 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0274a35f..b9ca2df6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -94,7 +94,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.18.31", + "unprint": "^0.18.32", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", @@ -18822,6 +18822,11 @@ "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==" }, + "node_modules/set-cookie-parser": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-3.0.1.tgz", + "integrity": "sha512-n7Z7dXZhJbwuAHhNzkTti6Aw9QDDjZtm3JTpTGATIdNzdQz5GuFs22w90BcvF4INfnrL5xrX3oGsuqO5Dx3A1Q==" + }, "node_modules/set-function-length": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.1.1.tgz", @@ -20380,10 +20385,9 @@ } }, "node_modules/unprint": { - "version": "0.18.31", - "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.31.tgz", - "integrity": "sha512-7z4kC7eyyyRUeGBGc+lByRv1WqWP1NkO5Fwh6RwOs60x6HW3mffYUxa2R02fjpUOsEaPRukZwEsikP5jOA/JAA==", - "license": "ISC", + "version": "0.18.32", + "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.32.tgz", + "integrity": "sha512-3AOeu6vvAqJfxfYGJl/AUO07qnfFKbyyFpZzSBFwQJ+bZgU+kBX1uaj/4TD3ayBHkMgqRDYObJuGRQMZ6Xdncg==", "dependencies": { "bottleneck": "^2.19.5", "cookie": "^1.1.1", @@ -20395,6 +20399,7 @@ "moment-timezone": "^0.5.34", "object-hash": "^3.0.0", "patchright": "^1.56.1", + "set-cookie-parser": "^3.0.1", "srcset": "^4.0.0", "tunnel": "^0.0.6", "undici": "^7.18.2" diff --git a/package.json b/package.json index 4dce2d16..8813c3ca 100755 --- a/package.json +++ b/package.json @@ -153,7 +153,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.18.31", + "unprint": "^0.18.32", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", diff --git a/src/deep.js b/src/deep.js index 665e7535..909e2c4f 100755 --- a/src/deep.js +++ b/src/deep.js @@ -69,6 +69,7 @@ async function fetchUnprintScene(scraper, url, entity, baseRelease, options, typ entity, baseRelease, headers: res.headers, + cookies: res.cookies, include, beforeFetchScenes: options.beforeFetchScenes, parameters: options.parameters, diff --git a/src/scrapers/wankzvr.js b/src/scrapers/wankzvr.js index e6698e4f..27f2b44d 100755 --- a/src/scrapers/wankzvr.js +++ b/src/scrapers/wankzvr.js @@ -1,68 +1,43 @@ 'use strict'; -const qu = require('../utils/qu'); -const http = require('../utils/http'); +const unprint = require('unprint'); + const slugify = require('../utils/slugify'); -async function getTrailerUrl(release, channel, request) { - const csrfToken = request.cookie.match('csrfst=(.*?);')?.[1]; - - if (!csrfToken) { - return null; - } - - const res = await http.post(`${channel.url}/ajax/player-config.json`, { - item_id: release.entryId, - }, { - headers: { - 'X-CSRF-Token': csrfToken, - }, - session: request.session, - encodeJSON: false, - }); - - if (res.ok) { - const trailers = res.body.streams.map((trailer) => ({ - src: trailer.url, - quality: Number(trailer.id?.match(/\d+/)?.[0] || trailer?.name.match(/\d+/)?.[0]), - vr: true, - })); - - return { - trailers, - poster: qu.prefixUrl(res.body.poster, res.body.thumbCDN), - }; - } - - return null; -} - -function scrapeAll(scenes, channel) { +function scrapeAll(scenes, channel, url) { return scenes.map(({ query }) => { const release = {}; - release.url = query.url('a', 'href', { origin: channel.url }); + release.url = query.url('a', { origin: channel.origin }); release.entryId = new URL(release.url).pathname.match(/(\d+)\/?$/)?.[1]; - release.title = query.cnt('.card__h'); - release.date = query.date('.card__date', 'D MMMM, YYYY'); + release.title = query.content('.card__h'); + release.date = query.date('.card__date', 'D MMMM, YYYY', { match: null }); - release.actors = query.all('.card__links a').map((el) => ({ - name: qu.query.cnt(el), - url: qu.query.url(el, null, 'href', { origin: channel.url }), + release.actors = query.all('.card__links a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: channel.url }), })); - const poster = query.srcset('picture source[type="image/jpeg"]', 'data-srcset') - || query.srcset('picture source[type="image/jpeg"]', 'srcset') - || query.srcset('.video__cover', 'srcset'); + const poster = query.sourceSet('picture source[type="image/jpeg"]', 'data-srcset') + || query.sourceSet('picture source[type="image/jpeg"]', 'srcset') + || query.sourceSet('.video__cover', 'srcset'); if (poster?.[0]) { release.poster = [ poster[0].replace(/small|tiny/, 'large'), ...poster, - ]; + ].map((src) => ({ + src, + referer: url, + })); - release.teaser = poster[0].replace(/\b(cover|hero|\d+)\/[a-z0-9_]+\.[a-z]+$/i, 'roll.webm'); // actually how site generates teaser URL + const teaser = poster[0].replace(/\b(cover|hero|\d+)\/[a-z0-9_]+\.[a-z]+$/i, 'roll.webm'); // actually how site generates teaser URL + + release.teaser = { + src: teaser, + referer: url, + }; } release.channel = channel.slug; // avoid being assigned to WankzVR network @@ -71,31 +46,82 @@ function scrapeAll(scenes, channel) { }); } -async function scrapeScene({ query }, url, channel, baseRelease, options, request) { +async function fetchLatest(channel, page) { + const url = `${channel.url}/videos?o=d&p=${page}`; + const res = await unprint.get(url, { selectAll: '.layout__content > .cards-list .card' }); // .cards-list is also used for hidden upcoming scenes + + if (res.ok) { + return scrapeAll(res.context, channel, url); + } + + return res.status; +} + +async function getTrailerUrl(release, channel, cookies, referer) { + const csrfToken = cookies.csrfst; + + if (!csrfToken) { + return null; + } + + const res = await unprint.post(`${channel.url}/ajax/player-config.json`, { + item_id: release.entryId, + }, { + form: true, + headers: { + 'X-Requested-With': 'XMLHttpRequest', + 'X-CSRF-Token': csrfToken, + }, + cookies, + }); + + if (res.ok) { + const trailers = res.data.streams.map((trailer) => ({ + src: trailer.url, + quality: Number(trailer.id?.match(/\d+/)?.[0] || trailer?.name.match(/\d+/)?.[0]), + vr: true, + referer, + })); + + const poster = unprint.prefixUrl(res.data.poster, res.data.thumbCDN); + + return { + trailers, + poster: poster && { + src: poster, + referer, + }, + }; + } + + return null; +} + +async function scrapeScene({ query }, { url, entity, include, cookies }) { const release = {}; release.entryId = new URL(url).pathname.match(/(\d+)\/?$/)?.[1]; - release.title = query.cnt('.detail__title'); - release.description = query.cnt('.detail__txt'); + release.title = query.content('.detail__title'); + release.description = query.content('.detail__txt'); - release.date = query.date('.detail__date', 'D MMMM, YYYY'); + release.date = query.date('.detail__date', 'D MMMM, YYYY', { match: null }); release.duration = query.number('.time') * 60; release.actors = (query.all('.detail__header-lg .detail__models a') || query.all('.detail__header-sm .detail__models a')).map((el) => ({ - name: qu.query.cnt(el), - url: qu.query.url(el, null, 'href', { origin: channel.url }), + name: unprint.query.content(el), + url: unprint.query.url(el, null, { origin: entity.origin }), })); - release.tags = query.cnts('.tag-list .tag').concat(query.cnts('.detail__specs-list .detail__specs-item')); + release.tags = query.contents('.tag-list .tag').concat(query.contents('.detail__specs-list .detail__specs-item')); release.photos = query.all('.photo-strip__slide').map((el) => ([ - qu.query.img(el, null, 'data-src'), - qu.query.img(el, 'img', 'src'), - ])); + unprint.query.img(el, null, 'data-src'), + unprint.query.img(el, 'img'), + ].map((src) => ({ src, referer: url })))); - if (options.includePosters || options.includeTrailers) { - const { trailers, poster } = await getTrailerUrl(release, channel, request); + if (include.posters || include.trailers) { + const { trailers, poster } = await getTrailerUrl(release, entity, cookies, url); release.trailer = trailers; release.poster = poster; @@ -105,17 +131,17 @@ async function scrapeScene({ query }, url, channel, baseRelease, options, reques } async function fetchActorScenes({ query }, url, entity, page = 1, accScenes = []) { - const scenes = scrapeAll(qu.initAll(query.all('.cards-list .card')), entity); + const scenes = scrapeAll(unprint.initAll(query.all('.cards-list .card')), entity); const hasNextPage = !query.exists('.pagenav__link.inactive'); if (hasNextPage) { const { origin, pathname, searchParams } = new URL(url); searchParams.set('p', page + 1); - const res = await qu.get(`${origin}${pathname}?${searchParams}`); + const res = await unprint.get(`${origin}${pathname}?${searchParams}`); if (res.ok) { - return fetchActorScenes(res.item, url, entity, page + 1, accScenes.concat(scenes)); + return fetchActorScenes(res.context, url, entity, page + 1, accScenes.concat(scenes)); } } @@ -127,10 +153,10 @@ async function scrapeProfile({ query }, url, entity, options) { const bio = query.all('.person__meta__item').reduce((acc, el) => ({ ...acc, - [slugify(qu.query.cnt(el, '.person__meta__label'))]: qu.query.text(el), + [slugify(unprint.query.content(el, '.person__meta__label'))]: unprint.query.text(el), }), {}); - profile.description = query.cnt('.person__content'); + profile.description = query.content('.person__content'); profile.gender = entity.slug === 'tranzvr' ? 'transsexual' : 'female'; profile.age = Number(bio.age) || null; @@ -140,7 +166,10 @@ async function scrapeProfile({ query }, url, entity, options) { profile.height = parseInt(bio.height, 10); profile.measurements = bio.measurements; - profile.avatar = query.srcset('.person__avatar img'); + profile.avatar = query.sourceSet('.person__avatar img').map((src) => ({ + src, + referer: url, + })); if (options.includeActorScenes) { profile.scenes = await fetchActorScenes({ query }, url, entity); @@ -149,22 +178,12 @@ async function scrapeProfile({ query }, url, entity, options) { return profile; } -async function fetchLatest(channel, page) { - const res = await qu.getAll(`${channel.url}/videos?o=d&p=${page}`, '.cards-list .card'); - - if (res.ok) { - return scrapeAll(res.items, channel); - } - - return res.status; -} - async function fetchProfile(baseActor, { entity }, options) { const url = `${entity.url}/${baseActor.slug}`; - const res = await qu.get(url); + const res = await unprint.get(url); if (res.ok) { - return scrapeProfile(res.item, url, entity, options); + return scrapeProfile(res.context, url, entity, options); } return res.status; @@ -174,5 +193,4 @@ module.exports = { fetchLatest, scrapeScene, fetchProfile, - deprecated: true, };