diff --git a/package-lock.json b/package-lock.json index b5e9dc2c..4be28a13 100644 --- a/package-lock.json +++ b/package-lock.json @@ -92,7 +92,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.17.7", + "unprint": "^0.17.8", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", @@ -18403,9 +18403,9 @@ } }, "node_modules/unprint": { - "version": "0.17.7", - "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.17.7.tgz", - "integrity": "sha512-faESg+KnVQLZ9LfMKMK6+9MKYLXgL9g6mQ8V/o4bd0QXfBtpKbN+mq/pyWf2QyJVziG9ZgHo03YlUx0rDrDaSw==", + "version": "0.17.8", + "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.17.8.tgz", + "integrity": "sha512-S6j9SjiGLg2edmiBwTeMBojemfpAEuQHHJEeKqyndAv32+wFAy//UCwsq67qqY90vKRht/ymTCh17DB9Polwvg==", "dependencies": { "axios": "^0.27.2", "bottleneck": "^2.19.5", diff --git a/package.json b/package.json index 6575dc69..da5c8474 100755 --- a/package.json +++ b/package.json @@ -151,7 +151,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.17.7", + "unprint": "^0.17.8", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 83ef4edb..9c6bf043 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -5935,8 +5935,7 @@ const sites = [ tags: ['vr'], parent: 'kink', parameters: { - latest: 'bdsm-vr-videos', - actor: 'bdsm-performer', + layout: 'vr', }, }, // KINK MEN diff --git a/src/actors.js b/src/actors.js index 78e83020..08ac79e6 100755 --- a/src/actors.js +++ b/src/actors.js @@ -496,20 +496,23 @@ async function curateProfile(profile, actor) { } } - curatedProfile.social = [].concat(profile.social).map((social) => { - if (!social) { - return null; - } + curatedProfile.social = [] + .concat(profile.social) // legacy + .concat(profile.socials) + .map((social) => { + if (!social) { + return null; + } - try { - const { origin, pathname } = new URL(social); + try { + const { origin, pathname } = new URL(social); - return `${origin}${pathname}`; - } catch (error) { - logger.warn(`Profile scraper for '${profile.entity.name}' returned invalid social link: ${social}`); - return null; - } - }).filter(Boolean); + return `${origin}${pathname}`; + } catch (error) { + logger.warn(`Profile scraper for '${profile.entity.name}' returned invalid social link: ${social}`); + return null; + } + }).filter(Boolean); curatedProfile.scenes = toBaseReleases(profile.scenes || profile.releases, profile.entity, actor) // attach actor to base scene, in case it was not scraped diff --git a/src/scrapers/kink.js b/src/scrapers/kink.js index 64235003..f2dd57df 100755 --- a/src/scrapers/kink.js +++ b/src/scrapers/kink.js @@ -66,13 +66,11 @@ function scrapeAll(scenes, entity) { async function fetchLatest(channel, page = 1) { const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`; - const res = await unprint.browserRequest(url, { + const res = await unprint.browser(url, { selectAll: '.container .card', }); if (res.status === 200) { - // const items = unprint.initAll(html, '.container .card'); - const scenes = scrapeAll(res.context, channel); return scenes; @@ -81,6 +79,52 @@ async function fetchLatest(channel, page = 1) { return res.status; } +function scrapeAllVr(scenes, channel) { + return scenes.map(({ query }) => { + const release = {}; + const url = query.url('a.image-link, a.video-title'); + const { pathname } = new URL(url); + + release.url = url; + // legacy ID in slug preferred to match old entries, but prepare for retirement just in case + release.entryId = pathname.match(/-(\d+)\/?$/)?.[1] || pathname.match(/\/vd\/(\d+)\//)[1]; + + release.title = query.content('.video-title'); + release.description = query.content('.description'); + + release.date = query.date('.main-info', 'MMM Do YYYY', { match: /\w{3} \d+\w+ \d{4}/ }); + + release.actors = query.all('.actors a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: channel.url }), + })); + + release.poster = query.sourceSet('.image-link img'); + release.photos = query.dataset('.image-link div[data-gallery-images]', 'galleryImages')?.split(',').filter(Boolean); // can sometimes be ,,,, with no URLs + + return release; + }); +} + +async function fetchLatestVr(channel, page = 1) { + const url = `${channel.url}/videos/page${page}`; + + const res = await unprint.get(url, { + selectAll: '#listView .video-list-view', // more details than #gridView + headers: { + Cookie: 'agreedToDisclaimer=true', + }, + }); + + if (res.ok) { + const scenes = scrapeAllVr(res.context, channel); + + return scenes; + } + + return res.status; +} + function scrapeScene({ query }, url, entity) { const release = { url }; const data = query.json('div[data-setup]', { attribute: 'data-setup' }); @@ -145,7 +189,7 @@ function scrapeScene({ query }, url, entity) { } async function fetchScene(url, channel) { - const res = await unprint.browserRequest(url); + const res = await unprint.browser(url); if (res.status === 200) { const scene = scrapeScene(res.context, url, channel); @@ -156,6 +200,66 @@ async function fetchScene(url, channel) { return res.status; } +const qualityMap = { + psvr: 1080, // as of recent, might've been lower in the past + '4k': 2160, + '5k': 2280, + '8k': 4320, +}; + +function scrapeSceneVr({ query }, url, channel) { + const release = {}; + + const { pathname } = new URL(url); + // legacy ID in slug preferred to match old entries, but prepare for retirement just in case + release.entryId = pathname.match(/-(\d+)\/?$/)?.[1] || pathname.match(/\/vd\/(\d+)\//)[1]; + + release.title = query.content('.page-title'); + release.description = query.content('#collapseDescription .accordion-body') || query.attribute('meta[name="description"]', 'content'); + + release.date = query.date('.video-description-list', 'MMMM D, YYYY'); + + release.actors = query.all('.video-description-list a[href*="/girl"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: channel.url }), + })); // no sign of boys + + release.tags = query.contents('.video-description-list a[href*="/category"]'); + + release.poster = query.poster('dl8-video'); + + release.photos = query.sourceSets('.carousel .item img'); + + if (query.exists('dl8-video source[src*=".mp4"]')) { + // sometimes the trailer URL is missing the filename, it won't play on their site either + release.trailer = { + src: query.video('dl8-video source'), + vr: true, + }; + } + + release.qualities = query + .contents('#downloadsData a') + .map((button) => qualityMap[button.match(/download (\w+)/i)?.[1]?.toLowerCase()]) + .filter(Boolean); + + return release; +} + +async function fetchSceneVr(url, channel) { + const res = await unprint.get(url, { + headers: { + Cookie: 'agreedToDisclaimer=true', + }, + }); + + if (res.ok) { + return scrapeSceneVr(res.context, url, channel); + } + + return res.status; +} + async function scrapeProfile({ query }, actorUrl) { const profile = { url: actorUrl }; @@ -196,7 +300,7 @@ async function getActorUrl({ name: actorName, url }, networkUrl) { } // const searchRes = await tab.goto(`${networkUrl}/search?type=performers&q=${actorName}`); - const searchApiRes = await unprint.browserRequest(`https://www.kink.com/api/v2/search/suggestions/performers?term=${actorName}`); + const searchApiRes = await unprint.browser(`https://www.kink.com/api/v2/search/suggestions/performers?term=${actorName}`); if (searchApiRes.status === 200) { const data = searchApiRes.context.query.json('body pre'); @@ -217,7 +321,7 @@ async function fetchProfile(actor, entity) { const actorUrl = await getActorUrl(actor, networkUrl); if (actorUrl) { - const actorRes = await unprint.browserRequest(actorUrl); + const actorRes = await unprint.browser(actorUrl); if (actorRes.status === 200) { return scrapeProfile(actorRes.context, actorUrl); @@ -229,9 +333,82 @@ async function fetchProfile(actor, entity) { return null; } +async function getActorUrlVr(actor, entity) { + if (actor.url) { + return actor.url; + } + + const res = await unprint.get(`${entity.url}/search/`, { + selectAll: '#actors option', + headers: { + Cookie: 'agreedToDisclaimer=true', + }, + }); + + if (res.ok) { + const actors = res.context.map(({ query }) => ({ + name: query.content(), + id: query.attribute(null, 'value'), + })); + + const targetActor = actors.find((actorOption) => actor.slug === slugify(actorOption.name)); + + if (targetActor?.id) { + return `${entity.url}/girl/${targetActor.id}/${slugify(targetActor.name)}`; + } + } + + return null; +} + +function scrapeProfileVr({ query }, url) { + const profile = { url }; + + const keys = query.contents('.info .key'); + const values = query.contents('.info .value', { filter: false }); + const bio = Object.fromEntries(keys.map((key, index) => [slugify(key, '_'), values[index]])); + + profile.description = query.content('#readMoreFull'); + profile.avatar = query.sourceSet('.images img'); + + if (bio.birthdate) profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'MMMM DD, YYYY'); + if (bio.country) profile.birthPlace = bio.country; + if (bio.cup) profile.cup = bio.cup; + if (bio.height) profile.height = Number(bio.height.match(/(\d+) cm/i)?.[1]) || null; + if (bio.weight) profile.weight = Number(bio.weight.match(/(\d+) kg/i)?.[1]) || null; + + profile.socials = query.urls('.value.social a'); + + return profile; +} + +async function fetchProfileVr(actor, entity) { + const url = await getActorUrlVr(actor, entity); + + if (url) { + const res = await unprint.get(url, { + headers: { + Cookie: 'agreedToDisclaimer=true', + }, + }); + + if (res.ok) { + return scrapeProfileVr(res.context, url, entity); + } + + return res.status; + } + + return null; +} + module.exports = { - // beforeNetwork, fetchLatest, fetchScene, fetchProfile, + vr: { + fetchLatest: fetchLatestVr, + fetchScene: fetchSceneVr, + fetchProfile: fetchProfileVr, + }, }; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index eb9c2b05..8de42839 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -138,7 +138,7 @@ const scrapers = { '5kvids': kellymadison, killergram, kink, - kinkvr: badoink, + // kinkvr: badoink, // analvids, analvids: pornbox, littlecapricedreams, @@ -273,7 +273,7 @@ const scrapers = { killergram, kink, kinkmen: kink, - kinkvr: badoink, + kinkvr: kink, loveherfilms, loveherfeet: loveherfilms, shelovesblack: loveherfilms,