'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); const { stripQuery } = require('../utils/url'); const { convert } = require('../utils/convert'); const sizeRegex = /_lg|_xl|_tn/; function resizeSrc(src) { if (!src) { return null; } return Array.from(new Set([ src.replace(sizeRegex, '_1280'), src.replace(sizeRegex, '_800'), src.replace(sizeRegex, '_xl'), src, ])); } function deriveDate(query) { const now = new Date(); // Nov. 2025 const dateMY = query.date('.i-date', 'MMM. YYYY', { match: /(\w+\.? \d{4})/ }); if (dateMY) { return { date: dateMY, precision: 'month', }; } // Nov. 12th const dateMDo = query.date('.i-date', 'MMM. Do', { match: /(\w+\.? \d{1,2}\w+)/ }); if (dateMDo) { if (dateMDo > now) { dateMDo.setFullYear(now.getFullYear() - 1); } return { date: dateMDo, precision: 'day', }; } // 8 Weeks Ago const dateAgo = query.dateAgo('.i-date'); if (dateAgo) { return { date: dateAgo.date, precision: dateAgo.precision === 'week' // not much use for weekly precision ? 'month' : dateAgo.precision, }; } return { date: null, precision: null, }; } function scrapeAll(scenes, channel, parameters) { return scenes.map(({ query }) => { const release = {}; const poster = query.img('.item-img img'); const url = stripQuery(query.url('a.i-title, .item-img a')); const { pathname, hostname } = new URL(url); release.title = query.content('a.i-title, h2.i-title'); release.duration = query.duration('.time-ol'); const { date, precision } = deriveDate(query); release.date = date; release.datePrecision = precision; release.actors = query.content('.i-model').split(',').map((actor) => actor.trim()); if (hostname.includes('join.') || pathname.includes('/join') || pathname.length <= 1) { // no link available, attempt to reconstruct from poster URL const entryId = poster?.match(/posting_(\d+)/)?.[1]; if (entryId) { // we can get deep data from this release.entryId = entryId; release.url = `${channel.origin}${parameters.path}/${slugify(release.actors[0], '-', { lower: false })}/${entryId}/`; } else { // lost cause, make up entryId to register shallow data release.entryId = slugify(release.title); } } else { release.url = url; release.entryId = pathname.match(/\/(\d+)\/?$/)[1]; } if (poster) { const caps = Array.from(new Set(Array.from({ length: 6 }, (_src, index) => { const file = `${String(index + 1).padStart(2, '0')}_lg`; return poster.replace(/0\d_lg/, file); }))).map((src) => resizeSrc(src)); release.poster = Array.from({ length: caps[0].length }).flatMap((_value, index) => caps.map((src) => src[index])); // try all the best sources first if (caps.length > 1) { release.caps = caps; } } release.photos = query.imgs('.thumbs img'); // cards layout release.teaser = [ query.video('.preview-clip source[type="video/mp4"]'), query.video('.preview-clip source[type="video/webm"]'), ].filter(Boolean); return release; }); } async function fetchLatest(channel, page = 1, { parameters }) { const res = await unprint.get(`${channel.origin}${parameters.path}/?page=${page}`, { interface: 'request', // seemingly less prone to HTTPParserError: Response does not match the HTTP/1.1 protocol (Invalid character in chunk size) selectAll: '.videos .video, .video-wide', // video-wide for cards layout e.g. Big Boobs POV }); if (res.ok) { return scrapeAll(res.context, channel, parameters); } return res.status; } function scrapeScene({ query }, url) { const release = {}; const info = Object.fromEntries(query.all('.stat').map((infoEl) => [ slugify(unprint.query.content(infoEl, '.label')), unprint.query.content(infoEl, '.value'), ])); release.url = stripQuery(url); release.entryId = new URL(url).pathname.match(/\/(\d+)\/?$/)[1]; release.title = query.content('.p-desc h2, #videos_page-page h1'); release.description = query.text('.p-desc, .desc'); release.date = unprint.extractDate(info.date, 'MMMM Do, YYYY', { match: /\w+ \d{1,2}\w+, \d{4}/ }); release.duration = unprint.extractDuration(info.duration) || Number(info.duration) * 60 || null; release.actors = query.all('//span[contains(text(), "Featuring")]/following-sibling::span/a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: stripQuery(unprint.query.url(actorEl, null)), })); release.tags = query.contents('.p-desc a[href*="tag/"], .desc a[href*="tag/"]'); const style = query.content('.vp style'); const poster = query.img('#videos_page-page .item-img img') || style?.match(/background-image: url\('(http[\w.:/_-]+)'\);/)?.[1]; const fallbackPoster = resizeSrc(query.img('meta[itemprop="image"]', { attribute: 'content' })); // usually a different image const photos = query.all('.gallery .thumb').map((imgEl) => { const link = unprint.query.url(imgEl, 'a'); const img = unprint.query.img(imgEl, 'img'); const isJoin = !link || link.includes('join.') || link.includes('/join'); return Array.from(new Set([ ...isJoin ? [] : [link], img.replace('_tn', ''), img, ])); }); if (poster) { release.poster = resizeSrc(poster); if (fallbackPoster?.includes(poster)) { release.photos = [fallbackPoster, ...photos]; // fallback poster isn't usually in photoset, append } else { release.photos = photos; } } else { release.poster = fallbackPoster; release.photos = photos; } release.trailer = query.all('.vp video source').map((videoEl) => ({ src: unprint.query.video(videoEl, null), quality: parseInt(unprint.query.attribute(videoEl, null, 'res'), 10) || null, })); return release; } async function fetchScene(url, channel, baseRelease) { const res = await unprint.get(url, { interface: 'request', }); if (res.ok) { return scrapeScene(res.context, url, channel, baseRelease); } return res.status; } function scrapeProfile({ query }, url) { const profile = { url }; const { pathname } = new URL(url); const bio = Object.fromEntries(query.all('.m-info .stat').map((bioEl) => [ slugify(unprint.query.content(bioEl, '.label'), '_'), unprint.query.content(bioEl, '.value'), ])); if (pathname.includes('big-boob-models')) { profile.gender = 'female'; } if (pathname.includes('male-performer')) { profile.gender = 'male'; } profile.avatar = query.img('.item-img a img:not([src*="posting"])'); profile.placeOfResidence = bio.location; profile.ethnicity = bio.ethnicity; profile.height = convert(bio.height, 'cm'); profile.weight = convert(bio.weight, 'lb', 'kg'); if (bio.bra_size && bio.measurements) { profile.measurements = bio.measurements.replace(/^\d+-/, `${bio.bra_size}-`); } else { profile.measurements = bio.measurements || bio.bra_size; } profile.hairColor = bio.hair_color; const birthday = unprint.extractDate(bio.birthday, 'MMMM D', { match: /\w+.?\s+\d{1,2}/ }); if (birthday) { birthday.setFullYear(0); // indicate birth year is unknown profile.dateOfBirth = birthday; } return profile; } async function getActorUrl(actor) { if (actor.url) { return actor.url; } const searchRes = await unprint.post('https://www.scoreland.com/search-es/', { keywords: actor.name, 's_filters[site]': 'all', 's_filters[type]': 'models', }, { interface: 'request', form: true, followRedirects: false, }); const res = await unprint.get(searchRes.headers.location, { interface: 'request', cookies: { cisession: searchRes.cookies.cisession, }, // followRedirects: false, selectAll: '.li-item.model', }); if (res.ok) { const actorEl = res.context.find(({ query }) => slugify(query.content('.i-model')) === actor.slug); const url = actorEl?.query.url('.i-model'); if (url) { // messy nats link pointing to unpredictable sites, all data seems to be available on scoreland const { pathname } = new URL(url); const actorPath = pathname.match(/\/[\w-]+\/\d+\/?$/); if (actorPath) { return `https://www.scoreland.com/big-boob-models${actorPath[0]}`; } } } return null; } async function fetchProfile(actor) { const url = await getActorUrl(actor); if (url) { const res = await unprint.get(url, { interface: 'request', select: '#model-page', }); if (res.ok) { return scrapeProfile(res.context, url); } return res.status; } return null; } module.exports = { fetchLatest, fetchScene, fetchProfile, };