Files
traxxx/src/scrapers/virtualtaboo.js

164 lines
5.7 KiB
JavaScript
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a.image-container, a.video-card__title') || query.url(null);
release.entryId = new URL(release.url).pathname.match(/\/videos?\/([\w-]+)/)[1];
release.title = query.content('.video-card__title');
release.duration = query.duration('.video-card__quality');
release.actors = query.exists('.video-card__actors a')
? query.all('.video-card__actors a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}))
: query.content('.video-card__actors')?.split(',').map((actor) => actor.trim());
release.poster = query.img('.image-container img');
release.teaser = query.video('.video-card__trailer');
return release;
});
}
function getPhotos(query) {
const teaserPhotos = query.urls('.video-detail__gallery a[href*="//static"], .gallery-item-container a[href*="//static"]');
const galleryMore = query.number('.video-detail__gallery-item--more, .video-detail__gallery-item-more');
const galleryUrl = /\/(img_)?\d{3}\.jpg/.test(teaserPhotos[0]) && teaserPhotos[0];
// no incremental URL found, return original links
if (!galleryMore || !galleryUrl) {
return teaserPhotos;
}
return Array.from({
length: teaserPhotos.length + galleryMore + 1, // + number seems to be off by one
}, (_value, index) => galleryUrl.replace(/\d+\.jpg/, `${String(index + 1).padStart(3, '0')}.jpg`));
}
function getTrailer({ query, window }) {
if (query.exists('.download-pane__list, .download-list')) {
// Dark Room VR
return query.all('.download-pane__item-container, .download-list__item-container').map((videoEl) => ({
src: unprint.query.url(videoEl, '.download-pane__item, .download-list__item'),
quality: unprint.query.number(videoEl, '.download-pane__item, .download-list__item', { match: /\d+×(\d+)/, matchIndex: 1 }),
vr: true, // only used on VR sites
expectType: {
'application/octet-stream': 'video/mp4',
},
}));
}
try {
const trailerData = window.eval('coreSettings')?.sources?.standard?.h264;
return trailerData
.filter((source) => source.quality !== 'auto')
.map((source) => ({
src: source.fallback, // main url doesn't seem to return plausible video files
quality: Number(source.label.match(/\d+\s*x\s*(\d+)/)?.[1]) || null,
}));
} catch (error) {
console.log(error);
// no data variable
}
return null;
}
function scrapeScene({ query, window }, { url }) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/videos?\/([\w-]+)/)[1];
release.title = query.content('.right-info h1, .video-detail__title');
release.description = query.text('.video-detail__description p, .description p');
release.date = query.date('.video-info__time, .info', 'DD MMMM, YYYY', { match: /\d{1,2} \w+, \d{4}/ });
release.duration = query.duration('.video-info__time, .info');
release.actors = query.all('.video-detail__desktop-sidebar .video-info__text a[href*="/model"], .right-info .info a[href*="/pornstars"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.tags = query.contents('.tag-list a, .tags a');
// release.poster = query.sourceSet('.image-container img') || query.background('.xp-poster');
release.poster = query.img(['meta[property="og:image"]', 'meta[property="twitter:image"'], { attribute: 'content' })
|| query.poster('.video-detail__image-container *[poster]');
release.photos = getPhotos(query);
release.trailer = getTrailer({ query, window });
return release;
}
function scrapeProfile({ query }) {
const profile = {};
const bioKeys = query.contents('.pornstar-detail__params--top strong, .actor-detail__param-name, td.pornstar-detail__info--title');
const bioValues = query.exists('.actor-detail__param-value, .pornstar-detail__info--title')
? query.contents('.actor-detail__param-value, .pornstar-detail__info--title + td')
: query.text('.pornstar-detail__params--top', { join: false })?.map((text) => text.split('•')[0].replace(':', '').trim());
const bio = Object.fromEntries(bioKeys.map((key, index) => [slugify(key, '_'), bioValues[index]]));
const tags = query.contents('.actor-detail__tags a').map((tag) => slugify(tag, '_'));
profile.description = query.content('.pornstar-detail__description, .actor-detail__description') || null;
profile.birthPlace = query.content('.pornstar-detail__info span, .actor-detail__info-value')?.split(',')[0].trim();
profile.dateOfBirth = unprint.extractDate(bio.birthday, 'MMM D, YYYY');
profile.measurements = bio.measurements;
profile.height = unprint.extractNumber(bio.height);
profile.weight = unprint.extractNumber(bio.weight);
profile.naturalBoobs = tags.includes('natural_tits') ? true : null; // seemingly no tag for fake tits
profile.hasTattoos = tags.includes('no_tattoos') ? false : null;
profile.avatar = query.img('img.pornstar-detail__picture, .actor-detail__picture img');
return profile;
}
async function fetchLatest(channel, page = 1, { parameters }) {
const url = `${channel.url}${parameters.latest || '/video'}?page=${page}`;
const res = await unprint.get(url, { selectAll: '.video-card__item' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchProfile({ name: actorName }, { entity, parameters }) {
const url = `${entity.url}${parameters.actor || '/model'}/${slugify(actorName, '-')}`;
const res = await unprint.get(url);
if (res.ok) {
return scrapeProfile(res.context, entity);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
scrapeScene: {
scraper: scrapeScene,
parser: {
runScripts: 'dangerously',
},
},
};