262 lines
7.1 KiB
JavaScript
262 lines
7.1 KiB
JavaScript
'use strict';
|
|
|
|
const bhttp = require('bhttp');
|
|
|
|
const { ex, exa, get } = require('../utils/q');
|
|
const slugify = require('../utils/slugify');
|
|
const { heightToCm, lbsToKg } = require('../utils/convert');
|
|
|
|
function scrapePhotos(html) {
|
|
const { qis } = ex(html, '#photos-page');
|
|
const photos = qis('img');
|
|
|
|
return photos.map(photo => [
|
|
photo
|
|
.replace('x_800', 'x_xl')
|
|
.replace('_tn', ''),
|
|
photo,
|
|
]);
|
|
}
|
|
|
|
async function fetchPhotos(url) {
|
|
const res = await bhttp.get(url);
|
|
|
|
if (res.statusCode === 200) {
|
|
return scrapePhotos(res.body.toString(), url);
|
|
}
|
|
|
|
return [];
|
|
}
|
|
|
|
function scrapeAll(html, site) {
|
|
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
|
|
const release = {};
|
|
|
|
release.title = q('.title, .i-title', true);
|
|
|
|
const linkEl = q('a');
|
|
const url = new URL(linkEl.href);
|
|
release.url = `${url.origin}${url.pathname}`;
|
|
|
|
// this is a photo album, not a scene (used for profiles)
|
|
if (/photos\//.test(url)) return null;
|
|
|
|
[release.entryId] = url.pathname.split('/').slice(-2);
|
|
|
|
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
|
|
|| qd('.dt-box', 'MMM.DD YYYY');
|
|
release.actors = site?.parameters?.actors || qa('.model, .i-model', true);
|
|
release.duration = ql('.i-amount, .amount');
|
|
|
|
const posterEl = q('.item-img img');
|
|
|
|
if (posterEl) {
|
|
release.poster = `https:${posterEl.src}`;
|
|
}
|
|
|
|
if (posterEl?.dataset.gifPreview) {
|
|
release.teaser = {
|
|
src: `https:${posterEl.dataset.gifPreview}`,
|
|
};
|
|
}
|
|
|
|
return release;
|
|
}).filter(Boolean);
|
|
}
|
|
|
|
async function scrapeScene(html, url, site) {
|
|
const { qu } = ex(html, '#videos-page, #content');
|
|
const release = {};
|
|
|
|
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
|
|
|
|
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|
|
|| qu.q('h1.m-title', true)?.split(/»|\//).slice(-1)[0].trim();
|
|
release.description = qu.text('.p-desc, .desc');
|
|
|
|
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
|
|
|
|
if (release.actors.length === 0) {
|
|
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
|
|
const actorString = qu.text(actorEl);
|
|
|
|
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
|
|
}
|
|
|
|
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
|
|
|
|
release.tags = qu.all('a[href*=tag]', true);
|
|
|
|
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
|
|
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
|
|
|| qu.date('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/)
|
|
|| qu.date('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
|
|
|
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
|
release.duration = qu.dur(durationEl);
|
|
|
|
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
|
|
const photosUrl = qu.url('.stat a[href*=photos]');
|
|
|
|
if (photosUrl) {
|
|
release.photos = await fetchPhotos(photosUrl);
|
|
} else {
|
|
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
|
|
photo.replace('_tn', ''),
|
|
photo,
|
|
]);
|
|
}
|
|
|
|
const trailers = qu.all('a[href*=Trailers]');
|
|
|
|
if (trailers) {
|
|
release.trailer = trailers.map((trailer) => {
|
|
const src = `https:${trailer.href}`;
|
|
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
|
|
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
|
|
|
|
return format === 'mp4' ? { src, quality } : null;
|
|
}).filter(Boolean);
|
|
}
|
|
|
|
const stars = qu.q('.rate-box').dataset.score;
|
|
if (stars) release.rating = { stars };
|
|
|
|
return release;
|
|
}
|
|
|
|
function scrapeModels(html, actorName) {
|
|
const { qa } = ex(html);
|
|
const model = qa('.model a').find(link => link.title === actorName);
|
|
|
|
return model?.href || null;
|
|
}
|
|
|
|
async function fetchActorReleases(url, accReleases = []) {
|
|
const res = await get(url);
|
|
|
|
if (res.ok) {
|
|
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
|
|
const nextPage = res.item.qu.url('.next-pg');
|
|
|
|
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
|
|
return fetchActorReleases(nextPage, releases);
|
|
}
|
|
|
|
return releases;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function scrapeProfile(html, actorUrl, withReleases) {
|
|
const { q, qa, qi } = ex(html, '#model-page');
|
|
const profile = { gender: 'female' };
|
|
|
|
const bio = qa('.stat').reduce((acc, el) => {
|
|
const prop = q(el, '.label', true).slice(0, -1);
|
|
const key = slugify(prop, '_');
|
|
const value = q(el, '.value', true);
|
|
|
|
return {
|
|
...acc,
|
|
[key]: value,
|
|
};
|
|
}, {});
|
|
|
|
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
|
|
|
|
if (bio.birthday) {
|
|
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
|
|
const [birthDay] = bio.birthday.match(/\d+/);
|
|
|
|
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
|
|
}
|
|
|
|
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
|
if (bio.hair_color) profile.hair = bio.hair_color;
|
|
|
|
if (bio.height) profile.height = heightToCm(bio.height);
|
|
if (bio.weight) profile.weight = lbsToKg(bio.weight);
|
|
|
|
if (bio.bra_size) profile.bust = bio.bra_size;
|
|
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
|
|
|
|
if (bio.occupation) profile.occupation = bio.occupation;
|
|
|
|
const avatar = qi('img');
|
|
if (avatar) profile.avatar = avatar;
|
|
|
|
if (withReleases) {
|
|
const { origin, pathname } = new URL(actorUrl);
|
|
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
|
|
}
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function fetchLatest(site, page = 1) {
|
|
const latestPath = site.parameters?.path || '/big-boob-videos';
|
|
const url = `${site.url}${latestPath}?page=${page}`;
|
|
const res = await bhttp.get(url);
|
|
|
|
if (res.statusCode === 200) {
|
|
return scrapeAll(res.body.toString(), site);
|
|
}
|
|
|
|
return res.statusCode;
|
|
}
|
|
|
|
async function fetchScene(url, site) {
|
|
const res = await bhttp.get(url);
|
|
|
|
if (res.statusCode === 200) {
|
|
return scrapeScene(res.body.toString(), url, site);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function fetchProfile(actorName, scraperSlug, site, include, page = 1, source = 0) {
|
|
const letter = actorName.charAt(0).toUpperCase();
|
|
|
|
const sources = [
|
|
`https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`,
|
|
`https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`,
|
|
];
|
|
|
|
const url = sources[source];
|
|
|
|
const res = await bhttp.get(url, {
|
|
followRedirects: false,
|
|
});
|
|
|
|
if (res.statusCode === 200) {
|
|
const actorUrl = scrapeModels(res.body.toString(), actorName);
|
|
|
|
if (actorUrl) {
|
|
const actorRes = await bhttp.get(actorUrl);
|
|
|
|
if (actorRes.statusCode === 200) {
|
|
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
|
|
}
|
|
|
|
if (sources[source + 1]) {
|
|
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
fetchScene,
|
|
fetchProfile,
|
|
};
|