traxxx/src/scrapers/mariskax.js

97 lines
2.4 KiB
JavaScript

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
function scrapeLatest(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.content('.title a');
release.url = query.url('.title a') || query.url('.thumb-wrap a');
release.entryId = new URL(release.url).pathname.match(/view\/(\d+)\//)[1];
release.date = query.date('time', 'Do MMM YYYY', { match: /\d+\w+ \w+ \d{4}/ });
release.duration = query.duration('.total-time');
release.actors = query.all('.models a').map((el) => ({
name: unprint.query.content(el),
url: unprint.query.url(el, null),
}));
[release.poster, ...release.photos] = query.json('.thumb-wrap a', { attribute: 'data-images' });
release.photoCount = query.number('.total-photos');
return release;
});
}
async function fetchLatest(channel, page) {
const res = await unprint.get(`https://tour.mariskax.com/scenes?page=${page}`, {
selectAll: '.content-item',
timeout: 30000, // slow site
});
if (res.ok) {
return scrapeLatest(res.context);
}
return res.status;
}
function scrapeScene({ query }, { url }) {
const release = {};
release.title = query.content('.content-meta .title');
release.entryId = new URL(url).pathname.match(/view\/(\d+)\//)[1];
release.date = query.date('.post-date', 'Do MMM YYYY', { match: /\d+\w+ \w+ \d{4}/ });
release.actors = query.all('.content-meta .models a').map((el) => ({
name: unprint.query.content(el),
url: unprint.query.url(el, null),
}));
release.poster = query.poster('.trailer-wrap video');
release.trailer = query.video('.trailer-wrap source') || query.video('.download-trailer-wrap a', { attribute: 'href' });
return release;
}
async function fetchProfile(actor) {
const res = await unprint.post('https://tour.mariskax.com/search-preview-mrx', `q=${slugify(actor.name, '+')}`, {
headers: {
'Accept-Language': 'en-US,en', // necessary for some reason
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
},
});
if (res.ok) {
const model = res.data.find((result) => result.type === 'model' && slugify(result.title) === actor.slug);
if (model) {
const curatedModel = {
entryId: model.url?.match(/model\/(\d+)\//)?.[1],
url: model.url,
avatar: model.thumb,
};
return curatedModel;
}
}
return null;
}
module.exports = {
fetchLatest,
scrapeScene: {
scraper: scrapeScene,
unprint: true,
},
fetchProfile,
};