forked from DebaucheryLibrarian/traxxx
Added teaser support. Added Score network with scraper for Scoreland. Improved q. Added assets.
This commit is contained in:
206
src/scrapers/score.js
Normal file
206
src/scrapers/score.js
Normal file
@@ -0,0 +1,206 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
|
||||
const { ex, exa } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
function scrapePhotos(html) {
|
||||
const { qis } = ex(html, '#photos-page');
|
||||
const photos = qis('img');
|
||||
|
||||
return photos.map(photo => [
|
||||
photo
|
||||
.replace('x_800', 'x_xl')
|
||||
.replace('_tn', ''),
|
||||
photo,
|
||||
]);
|
||||
}
|
||||
|
||||
async function fetchPhotos(url) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapePhotos(res.body.toString(), url);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function scrapeAll(html) {
|
||||
return exa(html, '.container .video').map(({ q, qa, qd, ql }) => {
|
||||
const release = {};
|
||||
|
||||
const linkEl = q('a.i-title');
|
||||
|
||||
release.title = linkEl.textContent.trim();
|
||||
|
||||
const url = new URL(linkEl.href);
|
||||
release.url = `${url.origin}${url.pathname}`;
|
||||
|
||||
// this is a photo album, not a scene (used for profiles)
|
||||
if (/photos\//.test(url)) return null;
|
||||
|
||||
[release.entryId] = url.pathname.split('/').slice(-2);
|
||||
|
||||
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/);
|
||||
release.actors = qa('.i-model', true);
|
||||
release.duration = ql('.i-amount');
|
||||
|
||||
const posterEl = q('.item-img img');
|
||||
|
||||
if (posterEl) {
|
||||
release.poster = `https:${posterEl.src}`;
|
||||
release.teaser = {
|
||||
src: `https:${posterEl.dataset.gifPreview}`,
|
||||
};
|
||||
}
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url) {
|
||||
const { q, qa, qd, ql, qu, qp, qt } = ex(html, '#videos-page');
|
||||
const release = {};
|
||||
|
||||
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
|
||||
|
||||
release.title = q('#breadcrumb-top + h1', true);
|
||||
release.description = q('.p-desc', true);
|
||||
|
||||
release.actors = qa('a[href*=models]', true);
|
||||
release.tags = qa('a[href*=tag]', true);
|
||||
|
||||
const dateEl = qa('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
|
||||
release.date = qd(dateEl, null, 'MMMM Do, YYYY');
|
||||
|
||||
const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
||||
release.duration = ql(durationEl);
|
||||
|
||||
const photosUrl = qu('a[href*=photos]');
|
||||
release.photos = await fetchPhotos(photosUrl);
|
||||
release.poster = qp('video'); // _800.jpg is larger than _xl.jpg in landscape
|
||||
|
||||
const trailer = qt();
|
||||
release.trailer = [
|
||||
{
|
||||
// don't rely on trailer always being 720p by default
|
||||
src: trailer.replace(/\d+p\.mp4/, '720p.mp4'),
|
||||
quality: 720,
|
||||
},
|
||||
{
|
||||
src: trailer.replace(/\d+p\.mp4/, '360p.mp4'),
|
||||
quality: 360,
|
||||
},
|
||||
];
|
||||
|
||||
const stars = q('.rate-box').dataset.score;
|
||||
if (stars) release.rating = { stars };
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeModels(html, actorName) {
|
||||
const { qa } = ex(html);
|
||||
const model = qa('.model a').find(link => link.title === actorName);
|
||||
|
||||
return model?.href || null;
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { q, qa, qi } = ex(html, '#model-page');
|
||||
const profile = { gender: 'female' };
|
||||
|
||||
const bio = qa('.stat').reduce((acc, el) => {
|
||||
const prop = q(el, '.label', true).slice(0, -1);
|
||||
const key = slugify(prop, false, '_');
|
||||
const value = q(el, '.value', true);
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[key]: value,
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
|
||||
|
||||
if (bio.birthday) {
|
||||
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
|
||||
const [birthDay] = bio.birthday.match(/\d+/);
|
||||
|
||||
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
|
||||
}
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.hair_color) profile.hair = bio.hair_color;
|
||||
|
||||
if (bio.height) profile.height = heightToCm(bio.height);
|
||||
if (bio.weight) profile.weight = lbsToKg(bio.weight);
|
||||
|
||||
if (bio.bra_size) profile.bust = bio.bra_size;
|
||||
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
|
||||
|
||||
if (bio.occupation) profile.occupation = bio.occupation;
|
||||
|
||||
const avatar = qi('img');
|
||||
if (avatar) profile.avatar = avatar;
|
||||
|
||||
const releases = ex(html, '#model-page + .container');
|
||||
profile.releases = scrapeAll(releases.document.outerHTML);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/big-boob-videos?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, page = 1) {
|
||||
const letter = actorName.charAt(0).toUpperCase();
|
||||
|
||||
const url = `https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeModels(res.body.toString(), actorName);
|
||||
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString());
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return fetchProfile(actorName, scraperSlug, page + 1);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
Reference in New Issue
Block a user