'use strict'; const { ex, exa, get } = require('../utils/q'); const slugify = require('../utils/slugify'); const http = require('../utils/http'); const { heightToCm, lbsToKg } = require('../utils/convert'); function scrapePhotos(html) { const { qis } = ex(html, '#photos-page'); const photos = qis('img'); return photos.map((photo) => [ photo .replace('x_800', 'x_xl') .replace('_tn', ''), photo, ]); } async function fetchPhotos(url) { const res = await http.get(url); if (res.statusCode === 200) { return scrapePhotos(res.body.toString(), url); } return []; } function scrapeAll(html, site) { return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => { const release = {}; release.title = q('.title, .i-title', true); const linkEl = q('a'); const url = new URL(linkEl.href); release.url = `${url.origin}${url.pathname}`; // this is a photo album, not a scene (used for profiles) if (/photos\//.test(url)) return null; [release.entryId] = url.pathname.split('/').slice(-2); release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/) || qd('.dt-box', 'MMM.DD YYYY'); release.actors = site?.parameters?.actors || qa('.model, .i-model', true); release.duration = ql('.i-amount, .amount'); const posterEl = q('.item-img img'); if (posterEl) { release.poster = `https:${posterEl.src}`; } if (posterEl?.dataset.gifPreview) { release.teaser = { src: `https:${posterEl.dataset.gifPreview}`, }; } return release; }).filter(Boolean); } async function scrapeScene(html, url, site) { const { qu } = ex(html, '#videos-page, #content section'); const release = {}; [release.entryId] = new URL(url).pathname.split('/').slice(-2); release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true) || qu.q('h1.m-title', true)?.split(/ยป|\//).slice(-1)[0].trim(); release.description = qu.text('.p-desc, .desc'); release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true); if (release.actors.length === 0) { const actorEl = qu.all('.stat').find((stat) => /Featuring/.test(stat.textContent)); const actorString = qu.text(actorEl); release.actors = actorString?.split(/,\band\b|,/g).map((actor) => actor.trim()) || []; } if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors; release.tags = qu.all('a[href*=tag]', true); const dateEl = qu.all('.value').find((el) => /\w+ \d+\w+, \d{4}/.test(el.textContent)); release.date = qu.date(dateEl, null, 'MMMM Do, YYYY') || qu.date('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/) || qu.date('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/); const durationEl = qu.all('value').find((el) => /\d{1,3}:\d{2}/.test(el.textContent)); release.duration = qu.dur(durationEl); release.poster = qu.poster('video') || qu.img('.flowplayer img') || html.match(/posterImage: '(.*\.jpg)'/)?.[1] || null; // _800.jpg is larger than _xl.jpg in landscape const photosUrl = qu.url('.stat a[href*=photos]'); if (photosUrl) { release.photos = await fetchPhotos(photosUrl); } else { release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map((photo) => [ photo.replace('_tn', ''), photo, ]); } const trailers = qu.all('a[href*=Trailers]'); if (trailers) { release.trailer = trailers.map((trailer) => { const src = `https:${trailer.href}`; const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase(); const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10); return format === 'mp4' ? { src, quality } : null; }).filter(Boolean); } const stars = qu.q('.rate-box').dataset.score; if (stars) release.rating = { stars }; return release; } function scrapeModels(html, actorName) { const { qa } = ex(html); const model = qa('.model a').find((link) => link.title === actorName); return model?.href || null; } async function fetchActorReleases(url, accReleases = []) { const res = await get(url); if (res.ok) { const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML)); const nextPage = res.item.qu.url('.next-pg'); if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page return fetchActorReleases(nextPage, releases); } return releases; } return null; } async function scrapeProfile(html, actorUrl, withReleases) { const { q, qa, qi } = ex(html, '#model-page'); const profile = { gender: 'female' }; const bio = qa('.stat').reduce((acc, el) => { const prop = q(el, '.label', true).slice(0, -1); const key = slugify(prop, '_'); const value = q(el, '.value', true); return { ...acc, [key]: value, }; }, {}); if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion if (bio.birthday) { const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase(); const [birthDay] = bio.birthday.match(/\d+/); profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate } if (bio.ethnicity) profile.ethnicity = bio.ethnicity; if (bio.hair_color) profile.hair = bio.hair_color; if (bio.height) profile.height = heightToCm(bio.height); if (bio.weight) profile.weight = lbsToKg(bio.weight); if (bio.bra_size) profile.bust = bio.bra_size; if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-'); if (bio.occupation) profile.occupation = bio.occupation; const avatar = qi('img'); if (avatar) profile.avatar = avatar; if (withReleases) { const { origin, pathname } = new URL(actorUrl); profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`); } return profile; } async function fetchLatest(site, page = 1) { const latestPath = site.parameters?.path || '/big-boob-videos'; const url = `${site.url}${latestPath}?page=${page}`; const res = await http.get(url); if (res.statusCode === 200) { return scrapeAll(res.body.toString(), site); } return res.statusCode; } async function fetchScene(url, site) { const res = await http.get(url); if (res.statusCode === 200) { return scrapeScene(res.body.toString(), url, site); } return null; } async function fetchProfile({ name: actorName }, context, include, page = 1, source = 0) { const letter = actorName.charAt(0).toUpperCase(); const sources = [ `https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`, `https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`, ]; const url = sources[source]; const res = await http.get(url, { followRedirects: false, }); if (res.statusCode === 200) { const actorUrl = scrapeModels(res.body.toString(), actorName); if (actorUrl) { const actorRes = await http.get(actorUrl); if (actorRes.statusCode === 200) { return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes); } return null; } return fetchProfile({ name: actorName }, context, include, page + 1, source); } if (sources[source + 1]) { return fetchProfile({ name: actorName }, context, include, 1, source + 1); } return null; } module.exports = { fetchLatest, fetchScene, fetchProfile, };