'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); function scrapeAll(scenes) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('a.image-container, a.video-card__title') || query.url(null); release.entryId = new URL(release.url).pathname.match(/\/videos?\/([\w-]+)/)[1]; release.title = query.content('.video-card__title'); release.duration = query.duration('.video-card__quality'); release.actors = query.exists('.video-card__actors a') ? query.all('.video-card__actors a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), })) : query.content('.video-card__actors')?.split(',').map((actor) => actor.trim()); release.poster = query.img('.image-container img'); release.teaser = query.video('.video-card__trailer'); return release; }); } function getPhotos(query) { const teaserPhotos = query.urls('.video-detail__gallery a[href*="//static"], .gallery-item-container a[href*="//static"]'); const galleryMore = query.number('.video-detail__gallery-item--more, .video-detail__gallery-item-more'); const galleryUrl = /\/(img_)?\d{3}\.jpg/.test(teaserPhotos[0]) && teaserPhotos[0]; // no incremental URL found, return original links if (!galleryMore || !galleryUrl) { return teaserPhotos; } return Array.from({ length: teaserPhotos.length + galleryMore + 1, // + number seems to be off by one }, (_value, index) => galleryUrl.replace(/\d+\.jpg/, `${String(index + 1).padStart(3, '0')}.jpg`)); } function getTrailer({ query, window }) { if (query.exists('.download-pane__list, .download-list')) { // Dark Room VR return query.all('.download-pane__item-container, .download-list__item-container').map((videoEl) => ({ src: unprint.query.url(videoEl, '.download-pane__item, .download-list__item'), quality: unprint.query.number(videoEl, '.download-pane__item, .download-list__item', { match: /\d+×(\d+)/, matchIndex: 1 }), vr: true, // only used on VR sites expectType: { 'application/octet-stream': 'video/mp4', }, })); } try { const trailerData = window.eval('coreSettings')?.sources?.standard?.h264; return trailerData .filter((source) => source.quality !== 'auto') .map((source) => ({ src: source.fallback, // main url doesn't seem to return plausible video files quality: Number(source.label.match(/\d+\s*x\s*(\d+)/)?.[1]) || null, })); } catch (error) { console.log(error); // no data variable } return null; } function scrapeScene({ query, window }, { url }) { const release = {}; release.entryId = new URL(url).pathname.match(/\/videos?\/([\w-]+)/)[1]; release.title = query.content('.right-info h1, .video-detail__title'); release.description = query.text('.video-detail__description p, .description p'); release.date = query.date('.video-info__time, .info', 'DD MMMM, YYYY', { match: /\d{1,2} \w+, \d{4}/ }); release.duration = query.duration('.video-info__time, .info'); release.actors = query.all('.video-detail__desktop-sidebar .video-info__text a[href*="/model"], .right-info .info a[href*="/pornstars"]').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), })); release.tags = query.contents('.tag-list a, .tags a'); // release.poster = query.sourceSet('.image-container img') || query.background('.xp-poster'); release.poster = query.img(['meta[property="og:image"]', 'meta[property="twitter:image"'], { attribute: 'content' }) || query.poster('.video-detail__image-container *[poster]'); release.photos = getPhotos(query); release.trailer = getTrailer({ query, window }); return release; } function scrapeProfile({ query }) { const profile = {}; const bioKeys = query.contents('.pornstar-detail__params--top strong, .actor-detail__param-name, td.pornstar-detail__info--title'); const bioValues = query.exists('.actor-detail__param-value, .pornstar-detail__info--title') ? query.contents('.actor-detail__param-value, .pornstar-detail__info--title + td') : query.text('.pornstar-detail__params--top', { join: false })?.map((text) => text.split('•')[0].replace(':', '').trim()); const bio = Object.fromEntries(bioKeys.map((key, index) => [slugify(key, '_'), bioValues[index]])); const tags = query.contents('.actor-detail__tags a').map((tag) => slugify(tag, '_')); profile.description = query.content('.pornstar-detail__description, .actor-detail__description') || null; profile.birthPlace = query.content('.pornstar-detail__info span, .actor-detail__info-value')?.split(',')[0].trim(); profile.dateOfBirth = unprint.extractDate(bio.birthday, 'MMM D, YYYY'); profile.measurements = bio.measurements; profile.height = unprint.extractNumber(bio.height); profile.weight = unprint.extractNumber(bio.weight); profile.naturalBoobs = tags.includes('natural_tits') ? true : null; // seemingly no tag for fake tits profile.hasTattoos = tags.includes('no_tattoos') ? false : null; profile.avatar = query.img('img.pornstar-detail__picture, .actor-detail__picture img'); return profile; } async function fetchLatest(channel, page = 1, { parameters }) { const url = `${channel.url}${parameters.latest || '/video'}?page=${page}`; const res = await unprint.get(url, { selectAll: '.video-card__item' }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } async function fetchProfile({ name: actorName }, { entity, parameters }) { const url = `${entity.url}${parameters.actor || '/model'}/${slugify(actorName, '-')}`; const res = await unprint.get(url); if (res.ok) { return scrapeProfile(res.context, entity); } return res.status; } module.exports = { fetchLatest, fetchProfile, scrapeScene: { scraper: scrapeScene, parser: { runScripts: 'dangerously', }, }, };