forked from DebaucheryLibrarian/traxxx
189 lines
6.3 KiB
JavaScript
189 lines
6.3 KiB
JavaScript
'use strict';
|
|
|
|
const bhttp = require('bhttp');
|
|
const { JSDOM } = require('jsdom');
|
|
const moment = require('moment');
|
|
|
|
const { feetInchesToCm } = require('../utils/convert');
|
|
|
|
const siteMapByKey = {
|
|
PF: 'pornfidelity',
|
|
TF: 'teenfidelity',
|
|
KM: 'kellymadison',
|
|
};
|
|
|
|
const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {});
|
|
|
|
function extractTextNode(parentEl) {
|
|
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
|
|
}
|
|
|
|
function scrapeLatest(html, site) {
|
|
const { document } = new JSDOM(html).window;
|
|
|
|
return Array.from(document.querySelectorAll('.episode'), (scene) => {
|
|
const release = { site };
|
|
|
|
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
|
|
|
|
const siteId = release.shootId.match(/\w{2}/)[0];
|
|
const siteSlug = siteMapByKey[siteId];
|
|
|
|
if (site.slug !== siteSlug) {
|
|
// using generic network overview, scene is not from the site we want
|
|
return null;
|
|
}
|
|
|
|
const durationEl = scene.querySelector('.content a');
|
|
|
|
[release.entryId] = durationEl.href.match(/\d+$/);
|
|
release.url = `${site.url}/episodes/${release.entryId}`;
|
|
|
|
release.title = scene.querySelector('h5 a').textContent.trim();
|
|
|
|
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
|
|
const dateString = extractTextNode(dateEl);
|
|
|
|
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
|
|
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
|
|
|
|
const durationString = durationEl.textContent.match(/\d+ min/);
|
|
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
|
|
|
|
release.poster = scene.querySelector('.card-img-top').src;
|
|
release.trailer = {
|
|
src: scene.querySelector('video').src,
|
|
};
|
|
|
|
return release;
|
|
}).filter(scene => scene);
|
|
}
|
|
|
|
function scrapeScene(html, url, site, shallowRelease) {
|
|
const { document } = new JSDOM(html).window;
|
|
const release = { url, site };
|
|
|
|
const titleEl = document.querySelector('.card-header.row h4').childNodes;
|
|
const titleString = extractTextNode(titleEl);
|
|
|
|
if (!shallowRelease) [release.entryId] = url.match(/\d+/);
|
|
|
|
release.title = titleString
|
|
.replace('Trailer: ', '')
|
|
.replace(/- \w+ #\d+$/, '')
|
|
.trim();
|
|
|
|
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
|
|
|
|
const episode = titleString.match(/#\d+$/)[0];
|
|
const siteKey = siteMapBySlug[release.channel];
|
|
|
|
release.shootId = `${siteKey} ${episode}`;
|
|
release.description = document.querySelector('p.card-text').textContent.trim();
|
|
|
|
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
|
|
const dateString = extractTextNode(dateEl);
|
|
|
|
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
|
|
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
|
|
|
|
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
|
|
const durationString = durationRaw.match(/\d+:\d+/)[0];
|
|
|
|
release.duration = moment.duration(`00:${durationString}`).asSeconds();
|
|
|
|
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
|
|
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
|
|
|
|
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
|
|
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
|
|
const resolution = Number(res.match(/\d+/)[0]);
|
|
|
|
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
|
|
});
|
|
|
|
release.trailer = trailers.map((trailer, index) => ({
|
|
src: trailer,
|
|
quality: resolutions[index],
|
|
}));
|
|
|
|
release.photos = [document.body.innerHTML
|
|
.match(/poster: .*\.jpg/)[0]
|
|
.match(/https:\/\/.*\.jpg/)[0]];
|
|
|
|
return release;
|
|
}
|
|
|
|
function scrapeProfile(html, actorName) {
|
|
const { document } = new JSDOM(html).window;
|
|
const profile = { name: actorName };
|
|
|
|
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
|
|
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
|
|
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
|
|
|
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
|
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
|
|
|
|
if (bio.Height) {
|
|
const [feet, inches] = bio.Height.match(/\d+/g);
|
|
profile.height = feetInchesToCm(feet, inches);
|
|
}
|
|
|
|
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
|
|
|
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
|
|
|
|
if (avatarEl) profile.avatar = avatarEl.src;
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function fetchLatest(site, page = 1) {
|
|
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
|
const res = await bhttp.get(url, {
|
|
headers: {
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
},
|
|
});
|
|
|
|
if (res.statusCode === 200 && res.body.status === 'success') {
|
|
return scrapeLatest(res.body.html, site);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function fetchScene(url, site, shallowRelease) {
|
|
const { pathname } = new URL(url);
|
|
|
|
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
|
|
headers: {
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
},
|
|
});
|
|
|
|
return scrapeScene(res.body.toString(), url, site, shallowRelease);
|
|
}
|
|
|
|
async function fetchProfile(actorName) {
|
|
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
|
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
|
|
headers: {
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
},
|
|
});
|
|
|
|
if (res.statusCode === 200) {
|
|
return scrapeProfile(res.body.toString(), actorName);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
fetchProfile,
|
|
fetchScene,
|
|
};
|