traxxx/src/scrapers/mariskax.js

172 lines
3.8 KiB
JavaScript

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');
function scrapeScene(data, channel) {
const release = {};
release.entryId = data.id;
release.url = `${channel.origin}/scenes/${data.slug}`;
release.title = data.title;
release.description = data.description;
release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD hh:mm:ss');
release.duration = unprint.extractDuration(data.videos_duration);
release.actors = (data.models_thumbs || data.models_slugs)?.map((actor) => ({
name: actor.name,
url: actor.slug && `${channel.origin}/models/${actor.slug}`,
avatar: actor.thumb,
})) || data.models;
release.tags = data.tags;
release.poster = data.thumb || data.trailer_screencap;
const posterPath = release.poster && new URL(release.poster).pathname.replace('//', '/');
release.photos = data.extra_thumbnails?.filter((src) => !src.includes(posterPath));
release.caps = data.thumbs;
release.teaser = data.special_thumbnails;
release.trailer = data.trailer_url;
release.photoCount = data.photos_duration;
release.channel = data.site?.toLowerCase();
release.qualities = data.videos && Array.from(new Set(Object.values(data.videos).map((video) => video.height))).filter(Boolean);
return release;
}
async function fetchLatest(channel, page) {
const res = await unprint.get(`https://tour.mariskax.com/scenes?page=${page}`, {
timeout: 30000, // slow site
});
if (res.ok) {
const data = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.contents.data;
if (data) {
return data.map((scene) => scrapeScene(scene, channel));
}
return null;
}
return res.status;
}
async function fetchScene(url, entity, baseRelease) {
if (baseRelease.entryId) {
// same as as deep data
return baseRelease;
}
const res = await unprint.get(url, {
timeout: 30000, // slow site
});
if (res.ok) {
const data = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.content;
if (data) {
return scrapeScene(data, entity);
}
return null;
}
return res.status;
}
function scrapeProfile(data) {
const profile = {};
const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [slugify(key, '_'), value]));
profile.url = `https://tour.mariskax.com/models/${data.slug}`;
profile.entryId = data.id;
profile.gender = bio.gender;
profile.dateOfBirth = bio.birthdate;
profile.age = bio.age;
profile.placeOfBirth = bio.born;
profile.measurements = bio.measurements;
profile.height = convert(bio.height, 'cm');
profile.weight = convert(bio.weight, 'lb', 'kg');
profile.hairColor = bio.hair_color;
profile.eyes = bio.eye_color;
profile.avatar = data.thumb;
profile.socials = [bio.x && `https://x.com/${bio.x.replace('@', '')}`].filter(Boolean);
return profile;
}
async function getActorUrl(actor, entity) {
if (actor.url) {
return { url: actor.url };
}
const res = await unprint.post(`${entity.origin}/api/search/${actor.name}`);
if (res.ok) {
const model = res.data.models.find((result) => slugify(result.name) === actor.slug);
if (model?.slug) {
return {
url: `${entity.origin}/models/${model.slug}`,
model,
};
}
}
return null;
}
async function fetchProfile(actor, entity) {
const { url, model } = await getActorUrl(actor, entity);
if (model) {
// search data already contains everything except for age, but DOB is included
return scrapeProfile(model);
}
if (url) {
const res = await unprint.get(url, {
parser: {
runScripts: 'dangerously',
},
});
if (res.ok) {
const data = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.model;
if (data) {
return scrapeProfile(data);
}
return null;
}
return res.status;
}
return null;
}
module.exports = {
fetchLatest,
scrapeScene,
fetchScene,
fetchProfile,
};