forked from DebaucheryLibrarian/traxxx
324 lines
8.3 KiB
JavaScript
Executable File
324 lines
8.3 KiB
JavaScript
Executable File
'use strict';
|
|
|
|
const unprint = require('unprint');
|
|
|
|
const slugify = require('../utils/slugify');
|
|
const { stripQuery } = require('../utils/url');
|
|
const { convert } = require('../utils/convert');
|
|
|
|
const sizeRegex = /_lg|_xl|_tn/;
|
|
|
|
function resizeSrc(src) {
|
|
if (!src) {
|
|
return null;
|
|
}
|
|
|
|
return Array.from(new Set([
|
|
src.replace(sizeRegex, '_1280'),
|
|
src.replace(sizeRegex, '_800'),
|
|
src.replace(sizeRegex, '_xl'),
|
|
src,
|
|
]));
|
|
}
|
|
|
|
function deriveDate(query) {
|
|
const now = new Date();
|
|
|
|
// Nov. 2025
|
|
const dateMY = query.date('.i-date', 'MMM. YYYY', { match: /(\w+\.? \d{4})/ });
|
|
|
|
if (dateMY) {
|
|
return {
|
|
date: dateMY,
|
|
precision: 'month',
|
|
};
|
|
}
|
|
|
|
// Nov. 12th
|
|
const dateMDo = query.date('.i-date', 'MMM. Do', { match: /(\w+\.? \d{1,2}\w+)/ });
|
|
|
|
if (dateMDo) {
|
|
if (dateMDo > now) {
|
|
dateMDo.setFullYear(now.getFullYear() - 1);
|
|
}
|
|
|
|
return {
|
|
date: dateMDo,
|
|
precision: 'day',
|
|
};
|
|
}
|
|
|
|
// 8 Weeks Ago
|
|
const dateAgo = query.dateAgo('.i-date');
|
|
|
|
if (dateAgo) {
|
|
return {
|
|
date: dateAgo.date,
|
|
precision: dateAgo.precision === 'week' // not much use for weekly precision
|
|
? 'month'
|
|
: dateAgo.precision,
|
|
};
|
|
}
|
|
|
|
return {
|
|
date: null,
|
|
precision: null,
|
|
};
|
|
}
|
|
|
|
function scrapeAll(scenes, channel, parameters) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
const poster = query.img('.item-img img');
|
|
|
|
const url = stripQuery(query.url('a.i-title, .item-img a'));
|
|
const { pathname, hostname } = new URL(url);
|
|
|
|
release.title = query.content('a.i-title, h2.i-title');
|
|
release.duration = query.duration('.time-ol');
|
|
|
|
const { date, precision } = deriveDate(query);
|
|
|
|
release.date = date;
|
|
release.datePrecision = precision;
|
|
|
|
release.actors = query.content('.i-model').split(',').map((actor) => actor.trim());
|
|
|
|
if (hostname.includes('join.') || pathname.includes('/join') || pathname.length <= 1) {
|
|
// no link available, attempt to reconstruct from poster URL
|
|
const entryId = poster?.match(/posting_(\d+)/)?.[1];
|
|
|
|
if (entryId) {
|
|
// we can get deep data from this
|
|
release.entryId = entryId;
|
|
release.url = `${channel.origin}${parameters.path}/${slugify(release.actors[0], '-', { lower: false })}/${entryId}/`;
|
|
} else {
|
|
// lost cause, make up entryId to register shallow data
|
|
release.entryId = slugify(release.title);
|
|
}
|
|
} else {
|
|
release.url = url;
|
|
release.entryId = pathname.match(/\/(\d+)\/?$/)[1];
|
|
}
|
|
|
|
if (poster) {
|
|
const caps = Array.from(new Set(Array.from({ length: 6 }, (_src, index) => {
|
|
const file = `${String(index + 1).padStart(2, '0')}_lg`;
|
|
|
|
return poster.replace(/0\d_lg/, file);
|
|
}))).map((src) => resizeSrc(src));
|
|
|
|
release.poster = Array.from({ length: caps[0].length }).flatMap((_value, index) => caps.map((src) => src[index])); // try all the best sources first
|
|
|
|
if (caps.length > 1) {
|
|
release.caps = caps;
|
|
}
|
|
}
|
|
|
|
release.photos = query.imgs('.thumbs img'); // cards layout
|
|
|
|
release.teaser = [
|
|
query.video('.preview-clip source[type="video/mp4"]'),
|
|
query.video('.preview-clip source[type="video/webm"]'),
|
|
].filter(Boolean);
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
async function fetchLatest(channel, page = 1, { parameters }) {
|
|
const res = await unprint.get(`${channel.origin}${parameters.path}/?page=${page}`, {
|
|
interface: 'request', // seemingly less prone to HTTPParserError: Response does not match the HTTP/1.1 protocol (Invalid character in chunk size)
|
|
selectAll: '.videos .video, .video-wide', // video-wide for cards layout e.g. Big Boobs POV
|
|
});
|
|
|
|
if (res.ok) {
|
|
return scrapeAll(res.context, channel, parameters);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeScene({ query }, url) {
|
|
const release = {};
|
|
|
|
const info = Object.fromEntries(query.all('.stat').map((infoEl) => [
|
|
slugify(unprint.query.content(infoEl, '.label')),
|
|
unprint.query.content(infoEl, '.value'),
|
|
]));
|
|
|
|
release.url = stripQuery(url);
|
|
release.entryId = new URL(url).pathname.match(/\/(\d+)\/?$/)[1];
|
|
|
|
release.title = query.content('.p-desc h2, #videos_page-page h1');
|
|
release.description = query.text('.p-desc, .desc');
|
|
|
|
release.date = unprint.extractDate(info.date, 'MMMM Do, YYYY', { match: /\w+ \d{1,2}\w+, \d{4}/ });
|
|
release.duration = unprint.extractDuration(info.duration) || Number(info.duration) * 60 || null;
|
|
|
|
release.actors = query.all('//span[contains(text(), "Featuring")]/following-sibling::span/a').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: stripQuery(unprint.query.url(actorEl, null)),
|
|
}));
|
|
|
|
release.tags = query.contents('.p-desc a[href*="tag/"], .desc a[href*="tag/"]');
|
|
|
|
const style = query.content('.vp style');
|
|
const poster = query.img('#videos_page-page .item-img img') || style?.match(/background-image: url\('(http[\w.:/_-]+)'\);/)?.[1];
|
|
const fallbackPoster = resizeSrc(query.img('meta[itemprop="image"]', { attribute: 'content' })); // usually a different image
|
|
|
|
const photos = query.all('.gallery .thumb').map((imgEl) => {
|
|
const link = unprint.query.url(imgEl, 'a');
|
|
const img = unprint.query.img(imgEl, 'img');
|
|
const isJoin = !link || link.includes('join.') || link.includes('/join');
|
|
|
|
return Array.from(new Set([
|
|
...isJoin ? [] : [link],
|
|
img.replace('_tn', ''),
|
|
img,
|
|
]));
|
|
});
|
|
|
|
if (poster) {
|
|
release.poster = resizeSrc(poster);
|
|
|
|
if (fallbackPoster?.includes(poster)) {
|
|
release.photos = [fallbackPoster, ...photos]; // fallback poster isn't usually in photoset, append
|
|
} else {
|
|
release.photos = photos;
|
|
}
|
|
} else {
|
|
release.poster = fallbackPoster;
|
|
release.photos = photos;
|
|
}
|
|
|
|
release.trailer = query.all('.vp video source').map((videoEl) => ({
|
|
src: unprint.query.video(videoEl, null),
|
|
quality: parseInt(unprint.query.attribute(videoEl, null, 'res'), 10) || null,
|
|
}));
|
|
|
|
return release;
|
|
}
|
|
|
|
async function fetchScene(url, channel, baseRelease) {
|
|
const res = await unprint.get(url, {
|
|
interface: 'request',
|
|
});
|
|
|
|
if (res.ok) {
|
|
return scrapeScene(res.context, url, channel, baseRelease);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeProfile({ query }, url) {
|
|
const profile = { url };
|
|
const { pathname } = new URL(url);
|
|
|
|
const bio = Object.fromEntries(query.all('.m-info .stat').map((bioEl) => [
|
|
slugify(unprint.query.content(bioEl, '.label'), '_'),
|
|
unprint.query.content(bioEl, '.value'),
|
|
]));
|
|
|
|
if (pathname.includes('big-boob-models')) {
|
|
profile.gender = 'female';
|
|
}
|
|
|
|
if (pathname.includes('male-performer')) {
|
|
profile.gender = 'male';
|
|
}
|
|
|
|
profile.avatar = query.img('.item-img a img:not([src*="posting"])');
|
|
|
|
profile.placeOfResidence = bio.location;
|
|
profile.ethnicity = bio.ethnicity;
|
|
|
|
profile.height = convert(bio.height, 'cm');
|
|
profile.weight = convert(bio.weight, 'lb', 'kg');
|
|
|
|
if (bio.bra_size && bio.measurements) {
|
|
profile.measurements = bio.measurements.replace(/^\d+-/, `${bio.bra_size}-`);
|
|
} else {
|
|
profile.measurements = bio.measurements || bio.bra_size;
|
|
}
|
|
|
|
profile.hairColor = bio.hair_color;
|
|
|
|
const birthday = unprint.extractDate(bio.birthday, 'MMMM D', { match: /\w+.?\s+\d{1,2}/ });
|
|
|
|
if (birthday) {
|
|
birthday.setFullYear(0); // indicate birth year is unknown
|
|
profile.dateOfBirth = birthday;
|
|
}
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function getActorUrl(actor) {
|
|
if (actor.url) {
|
|
return actor.url;
|
|
}
|
|
|
|
const searchRes = await unprint.post('https://www.scoreland.com/search-es/', {
|
|
keywords: actor.name,
|
|
's_filters[site]': 'all',
|
|
's_filters[type]': 'models',
|
|
}, {
|
|
interface: 'request',
|
|
form: true,
|
|
followRedirects: false,
|
|
});
|
|
|
|
const res = await unprint.get(searchRes.headers.location, {
|
|
interface: 'request',
|
|
cookies: {
|
|
cisession: searchRes.cookies.cisession,
|
|
},
|
|
// followRedirects: false,
|
|
selectAll: '.li-item.model',
|
|
});
|
|
|
|
if (res.ok) {
|
|
const actorEl = res.context.find(({ query }) => slugify(query.content('.i-model')) === actor.slug);
|
|
const url = actorEl?.query.url('.i-model');
|
|
|
|
if (url) {
|
|
// messy nats link pointing to unpredictable sites, all data seems to be available on scoreland
|
|
const { pathname } = new URL(url);
|
|
const actorPath = pathname.match(/\/[\w-]+\/\d+\/?$/);
|
|
|
|
if (actorPath) {
|
|
return `https://www.scoreland.com/big-boob-models${actorPath[0]}`;
|
|
}
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function fetchProfile(actor) {
|
|
const url = await getActorUrl(actor);
|
|
|
|
if (url) {
|
|
const res = await unprint.get(url, {
|
|
interface: 'request',
|
|
select: '#model-page',
|
|
});
|
|
|
|
if (res.ok) {
|
|
return scrapeProfile(res.context, url);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
fetchScene,
|
|
fetchProfile,
|
|
};
|