From 06f9efa49202a09c41da3aa83643485093056311 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Mon, 24 Feb 2025 02:53:46 +0100 Subject: [PATCH] Added Virtual Taboo (including OnlyTarts). --- seeds/02_sites.js | 16 +++- src/actors.js | 2 +- src/scrapers/scrapers.js | 4 + src/scrapers/virtualtaboo.js | 163 +++++++++++++++++++++++++++++++++++ 4 files changed, 180 insertions(+), 5 deletions(-) create mode 100755 src/scrapers/virtualtaboo.js diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 873daecd..cc11798f 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -15182,6 +15182,13 @@ const sites = [ tags: ['cheating', 'family'], parent: 'nubiles', }, + { + slug: 'realitysis', + name: 'Reality Sis', + url: 'https://www.realitysis.com', + tags: ['family'], + parent: 'nubiles', + }, // PASCALS SUBSLUTS { slug: 'pascalssubsluts', @@ -20511,30 +20518,31 @@ const sites = [ { slug: 'virtualtaboo', name: 'Virtual Taboo', - url: 'https://www.virtualtaboo.com', + url: 'https://virtualtaboo.com', tags: ['vr'], parent: 'virtualtaboo', parameters: { latest: '/videos', + actor: '/pornstars', }, }, { slug: 'onlytarts', name: 'OnlyTarts', - url: 'https://www.onlytarts.com', + url: 'https://onlytarts.com', parent: 'virtualtaboo', }, { slug: 'oopsfamily', name: 'Oops Family', - url: 'https://www.oopsfamily.com', + url: 'https://oopsfamily.com', tags: ['family'], parent: 'virtualtaboo', }, { slug: 'darkroomvr', name: 'Dark Room VR', - url: 'https://www.darkroomvr.com', + url: 'https://darkroomvr.com', tags: ['vr'], parent: 'virtualtaboo', }, diff --git a/src/actors.js b/src/actors.js index a88a0cf7..96d07fdf 100755 --- a/src/actors.js +++ b/src/actors.js @@ -775,7 +775,7 @@ async function scrapeActors(argNames) { const entitySlugs = sources.flat(); const [entitiesBySlug, existingActorEntries] = await Promise.all([ - fetchEntitiesBySlug(entitySlugs, { types: ['channel', 'network', 'info'] }), + fetchEntitiesBySlug(entitySlugs, { types: ['channel', 'network', 'info'], prefer: argv.prefer }), knex('actors') .select(knex.raw('actors.id, actors.name, actors.slug, actors.entry_id, actors.entity_id, row_to_json(entities) as entity')) .whereIn('actors.slug', baseActors.map((baseActor) => baseActor.slug)) diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index bdcc0736..5ecc9c82 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -353,6 +353,10 @@ const scrapers = { tushyraw: vixen, twistys: aylo, vipsexvault: porndoe, + virtualtaboo, + darkroomvr: virtualtaboo, + onlytarts: virtualtaboo, + oopsfamily: virtualtaboo, vixen, vrcosplayx: badoink, wankzvr, diff --git a/src/scrapers/virtualtaboo.js b/src/scrapers/virtualtaboo.js new file mode 100755 index 00000000..192dd325 --- /dev/null +++ b/src/scrapers/virtualtaboo.js @@ -0,0 +1,163 @@ +'use strict'; + +const unprint = require('unprint'); + +const slugify = require('../utils/slugify'); + +function scrapeAll(scenes) { + return scenes.map(({ query }) => { + const release = {}; + + release.url = query.url('a.image-container, a.video-card__title') || query.url(null); + release.entryId = new URL(release.url).pathname.match(/\/videos?\/([\w-]+)/)[1]; + + release.title = query.content('.video-card__title'); + + release.duration = query.duration('.video-card__quality'); + + release.actors = query.exists('.video-card__actors a') + ? query.all('.video-card__actors a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null), + })) + : query.content('.video-card__actors')?.split(',').map((actor) => actor.trim()); + + release.poster = query.img('.image-container img'); + release.teaser = query.video('.video-card__trailer'); + + return release; + }); +} + +function getPhotos(query) { + const teaserPhotos = query.urls('.video-detail__gallery a[href*="//static"], .gallery-item-container a[href*="//static"]'); + const galleryMore = query.number('.video-detail__gallery-item--more, .video-detail__gallery-item-more'); + const galleryUrl = /\/(img_)?\d{3}\.jpg/.test(teaserPhotos[0]) && teaserPhotos[0]; + + // no incremental URL found, return original links + if (!galleryMore || !galleryUrl) { + return teaserPhotos; + } + + return Array.from({ + length: teaserPhotos.length + galleryMore + 1, // + number seems to be off by one + }, (_value, index) => galleryUrl.replace(/\d+\.jpg/, `${String(index + 1).padStart(3, '0')}.jpg`)); +} + +function getTrailer({ query, window }) { + if (query.exists('.download-pane__list, .download-list')) { + // Dark Room VR + return query.all('.download-pane__item-container, .download-list__item-container').map((videoEl) => ({ + src: unprint.query.url(videoEl, '.download-pane__item, .download-list__item'), + quality: unprint.query.number(videoEl, '.download-pane__item, .download-list__item', { match: /\d+×(\d+)/, matchIndex: 1 }), + vr: true, // only used on VR sites + expectType: { + 'application/octet-stream': 'video/mp4', + }, + })); + } + + try { + const trailerData = window.eval('coreSettings')?.sources?.standard?.h264; + + return trailerData + .filter((source) => source.quality !== 'auto') + .map((source) => ({ + src: source.fallback, // main url doesn't seem to return plausible video files + quality: Number(source.label.match(/\d+\s*x\s*(\d+)/)?.[1]) || null, + })); + } catch (error) { + console.log(error); + // no data variable + } + + return null; +} + +function scrapeScene({ query, window }, { url }) { + const release = {}; + + release.entryId = new URL(url).pathname.match(/\/videos?\/([\w-]+)/)[1]; + + release.title = query.content('.right-info h1, .video-detail__title'); + release.description = query.text('.video-detail__description p, .description p'); + + release.date = query.date('.video-info__time, .info', 'DD MMMM, YYYY', { match: /\d{1,2} \w+, \d{4}/ }); + release.duration = query.duration('.video-info__time, .info'); + + release.actors = query.all('.video-detail__desktop-sidebar .video-info__text a[href*="/model"], .right-info .info a[href*="/pornstars"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null), + })); + + release.tags = query.contents('.tag-list a, .tags a'); + + // release.poster = query.sourceSet('.image-container img') || query.background('.xp-poster'); + release.poster = query.img(['meta[property="og:image"]', 'meta[property="twitter:image"'], { attribute: 'content' }) + || query.poster('.video-detail__image-container *[poster]'); + + release.photos = getPhotos(query); + release.trailer = getTrailer({ query, window }); + + return release; +} + +function scrapeProfile({ query }) { + const profile = {}; + + const bioKeys = query.contents('.pornstar-detail__params--top strong, .actor-detail__param-name'); + const bioValues = query.exists('.actor-detail__param-value') + ? query.contents('.actor-detail__param-value') + : query.text('.pornstar-detail__params--top', { join: false })?.map((text) => text.split('•')[0].replace(':', '').trim()); + + const bio = Object.fromEntries(bioKeys.map((key, index) => [slugify(key, '_'), bioValues[index]])); + const tags = query.contents('.actor-detail__tags a').map((tag) => slugify(tag, '_')); + + profile.description = query.content('.pornstar-detail__description, .actor-detail__description') || null; + profile.birthPlace = query.content('.pornstar-detail__info span, .actor-detail__info-value')?.split(',')[0].trim(); + profile.dateOfBirth = unprint.extractDate(bio.birthday, 'MMM D, YYYY'); + + profile.measurements = bio.measurements; + profile.height = unprint.extractNumber(bio.height); + profile.weight = unprint.extractNumber(bio.weight); + + profile.naturalBoobs = tags.includes('natural_tits') ? true : null; // seemingly no tag for fake tits + profile.hasTattoos = tags.includes('no_tattoos') ? false : null; + + profile.avatar = query.img('img.pornstar-detail__picture, .actor-detail__picture img'); + + return profile; +} + +async function fetchLatest(channel, page = 1, { parameters }) { + const url = `${channel.url}${parameters.latest || '/video'}?page=${page}`; + const res = await unprint.get(url, { selectAll: '.video-card__item' }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + +async function fetchProfile({ name: actorName }, { entity, parameters }) { + const url = `${entity.url}${parameters.actor || '/model'}/${slugify(actorName, '-')}`; + const res = await unprint.get(url); + + if (res.ok) { + return scrapeProfile(res.context, entity); + } + + return res.status; +} + +module.exports = { + fetchLatest, + fetchProfile, + scrapeScene: { + scraper: scrapeScene, + parser: { + runScripts: 'dangerously', + }, + }, +};