traxxx/src/scrapers/modelmedia.js

177 lines
4.3 KiB
JavaScript

'use strict';
const unprint = require('unprint');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.entryId = query.attribute(null, 'video-id');
const url = query.url(null);
if (url) {
const { origin, pathname, searchParams } = new URL(url);
release.url = `${origin}${pathname}`;
release.shootId = pathname.match(/((HP)|(LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))\w*-\w+((EP)?\d+)?/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives
release.actors = searchParams.get('models_name')?.split(',').map((actor) => {
const [han, english] = actor.split('/').map((name) => name.trim());
if (/amateur/i.test(english)) {
// not a name
return null;
}
return {
name: english || han,
alias: english && han,
};
}).filter(Boolean);
}
const rawTitle = query.content('.video-title div')?.replace(release.shootId, '');
if (rawTitle) {
// find / closest to Han in case there are multiple, account for no / at all
const hanIndex = rawTitle.match(/\p{Script_Extensions=Han}/u)?.index;
const splitIndex = rawTitle.slice(0, hanIndex).lastIndexOf('/') || hanIndex;
if (hanIndex && splitIndex > -1) {
release.title = rawTitle.slice(0, splitIndex).trim();
release.altTitles = [rawTitle.slice(splitIndex + 1).trim()];
} else {
release.title = rawTitle;
}
}
release.duration = query.duration('.timestamp');
const poster = query.img('img', { attribute: 'data-src' });
if (poster) {
release.poster = [
poster.replace(/w=\d+/, 'w=1920').replace(/h=\d+/, 'h=1080'),
poster,
];
}
release.teaser = query.video(null, { attribute: 'data-video-src' });
return release;
});
}
function scrapeProfile({ query }) {
const profile = {};
const avatar = query.img('div[class*="prof-pic"] > img');
if (avatar) {
profile.avatar = [
avatar.replace(/w=\d+/, 'w=720').replace(/h=\d+/, 'h=1080'),
avatar,
];
}
profile.description = query.content('h2') || null;
profile.height = query.number('//span[text()="Height"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
// can't find a single profile wiht this information available, but add for good measure
profile.measurements = query.content('//span[text()="Measurements"]/following-sibling::span');
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
profile.banner = query.img('div[class*="banner"] > img');
profile.photos = query.imgs('#MusModelSwiper img');
return profile;
}
async function getCookie(channel) {
const tokenRes = await unprint.get(channel.url);
if (!tokenRes.ok) {
return tokenRes.status;
}
const csrfToken = tokenRes.context?.query.attribute('meta[name="csrf-token"]', 'content');
const cookie = tokenRes.response.headers['set-cookie']?.join(';');
if (!csrfToken || !cookie) {
return null;
}
const confirmAdultRes = await unprint.post(`${channel.url}/adult_confirmation_and_accept_cookie`, null, {
headers: {
cookie,
'x-csrf-token': csrfToken,
},
});
if (!confirmAdultRes.ok) {
return confirmAdultRes.status;
}
return cookie;
}
async function fetchLatest(channel, page) {
const cookie = await getCookie(channel);
const res = await unprint.get(`${channel.url}/videos?sort=published_at&page=${page}`, {
selectAll: '.row a[video-id]',
headers: {
cookie,
},
});
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
// deep pages are paywalled
async function searchProfile(actor, context, cookie) {
const searchRes = await unprint.get(`${context.channel.url}/livesearch?keyword=${actor.name}`, {
headers: {
cookie,
},
});
if (!searchRes.ok) {
return searchRes.status;
}
return searchRes.context.query.url(`a[title="${actor.name}"]`);
}
async function fetchProfile(actor, context) {
const cookie = await getCookie(context.entity);
const actorUrl = actor.url || await searchProfile(actor, context, cookie);
if (!actorUrl) {
return null;
}
const res = await unprint.get(actorUrl, {
headers: {
cookie,
},
});
if (res.ok) {
return scrapeProfile(res.context, actorUrl);
}
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
};