Added Amateur Euro, For Bondage, Mamacitaz, TransBella and VIP Sex Vault. Refactored Kelly Madison scraper using qu, fixed trailers and improved reliability.
This commit is contained in:
@@ -1,9 +1,8 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
const siteMapByKey = {
|
||||
@@ -14,17 +13,11 @@ const siteMapByKey = {
|
||||
|
||||
const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {});
|
||||
|
||||
function extractTextNode(parentEl) {
|
||||
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return Array.from(document.querySelectorAll('.episode'), (scene) => {
|
||||
function scrapeLatest(scenes, site) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = { site };
|
||||
|
||||
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
|
||||
release.shootId = query.q('.card-meta .text-right', true);
|
||||
|
||||
const siteId = release.shootId.match(/\w{2}/)[0];
|
||||
const siteSlug = siteMapByKey[siteId];
|
||||
@@ -34,96 +27,94 @@ function scrapeLatest(html, site) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const durationEl = scene.querySelector('.content a');
|
||||
|
||||
[release.entryId] = durationEl.href.match(/\d+$/);
|
||||
const { pathname } = new URL(query.url('h5 a'));
|
||||
[release.entryId] = pathname.match(/\d+$/);
|
||||
release.url = `${site.url}/episodes/${release.entryId}`;
|
||||
|
||||
release.title = scene.querySelector('h5 a').textContent.trim();
|
||||
release.title = query.q('h5 a', true);
|
||||
|
||||
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
release.date = query.date('.card-meta .text-left', ['MMM D', 'MMM D, YYYY'], /\w+ \d+(, \w+)?/);
|
||||
release.actors = query.all('.models a', true);
|
||||
|
||||
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
|
||||
release.duration = query.dur('.content a');
|
||||
|
||||
const durationString = durationEl.textContent.match(/\d+ min/);
|
||||
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
|
||||
const duration = query.q('.content a', true).match(/(\d+) min/)[1];
|
||||
if (duration) release.duration = Number(duration) * 60;
|
||||
|
||||
release.poster = scene.querySelector('.card-img-top').dataset.src;
|
||||
release.poster = query.img('.card-img-top');
|
||||
release.teaser = {
|
||||
src: scene.querySelector('video').src,
|
||||
src: query.video('video'),
|
||||
};
|
||||
|
||||
return release;
|
||||
}).filter(scene => scene);
|
||||
}
|
||||
|
||||
function scrapeScene(html, url, site, baseRelease) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, site };
|
||||
async function scrapeScene({ query, html }, url, baseRelease) {
|
||||
const { pathname, origin } = new URL(url);
|
||||
const release = {};
|
||||
|
||||
const titleEl = document.querySelector('.card-header.row h4').childNodes;
|
||||
const titleString = extractTextNode(titleEl);
|
||||
|
||||
if (!baseRelease) [release.entryId] = url.match(/\d+/);
|
||||
|
||||
release.title = titleString
|
||||
.replace('Trailer: ', '')
|
||||
.replace(/- \w+ #\d+$/, '')
|
||||
.trim();
|
||||
|
||||
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
|
||||
[release.entryId] = pathname.match(/\d+$/);
|
||||
|
||||
const titleString = query.q('.card-header.row h4', true);
|
||||
const episode = titleString.match(/#\d+$/)[0];
|
||||
|
||||
release.title = titleString.match(/Trailer: ([\w\s]+) -/)[1];
|
||||
release.channel = slugify(titleString.match(/([\w\s]+) #\d+$/)[1], '');
|
||||
|
||||
const siteKey = siteMapBySlug[release.channel];
|
||||
|
||||
release.shootId = `${siteKey} ${episode}`;
|
||||
release.description = document.querySelector('p.card-text').textContent.trim();
|
||||
release.description = query.q('p.card-text', true);
|
||||
|
||||
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
// order not reliable, get keys
|
||||
const detailElsByKey = query.all('.card-body h4.card-title').reduce((acc, rowEl) => ({
|
||||
...acc,
|
||||
[slugify(rowEl.textContent.match(/(\w+):/)?.[1])]: rowEl,
|
||||
}), {});
|
||||
|
||||
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
|
||||
release.date = query.date(detailElsByKey.published, null, 'YYYY-MM-DD');
|
||||
release.duration = query.dur(detailElsByKey.episode);
|
||||
|
||||
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
|
||||
const durationString = durationRaw.match(/\d+:\d+/)[0];
|
||||
release.actors = query.all(detailElsByKey.starring, 'a', true);
|
||||
|
||||
release.duration = moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
|
||||
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
|
||||
|
||||
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
|
||||
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
|
||||
const resolution = Number(res.match(/\d+/)[0]);
|
||||
|
||||
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
|
||||
const token = query.meta('name=_token');
|
||||
const trailerInfoUrl = `${origin}/episodes/trailer/sources/${release.entryId}?type=trailer`;
|
||||
const trailerInfoRes = await http.post(trailerInfoUrl, null, {
|
||||
'X-CSRF-Token': token,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
});
|
||||
|
||||
release.trailer = trailers.map((trailer, index) => ({
|
||||
src: trailer,
|
||||
quality: resolutions[index],
|
||||
}));
|
||||
if (trailerInfoRes.ok && trailerInfoRes.body.sources.length > 0) {
|
||||
release.trailer = trailerInfoRes.body.sources.map(trailer => ({
|
||||
src: trailer.src,
|
||||
type: trailer.type,
|
||||
quality: trailer.res.replace(4000, 2160),
|
||||
}));
|
||||
}
|
||||
|
||||
const posterPrefix = html.indexOf('poster:');
|
||||
const poster = html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
|
||||
|
||||
if (baseRelease?.poster) release.photos = [poster];
|
||||
else release.poster = poster;
|
||||
if (poster) {
|
||||
if (baseRelease?.poster) {
|
||||
release.photos = [poster];
|
||||
} else {
|
||||
release.poster = poster;
|
||||
}
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
|
||||
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
const bioKeys = query.all('table.table td:nth-child(1)', true);
|
||||
const bioValues = query.all('table.table td:nth-child(2)', true);
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key.slice(0, -1)]: bioValues[index] }), {});
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
|
||||
|
||||
@@ -132,55 +123,45 @@ function scrapeProfile(html, actorName) {
|
||||
profile.height = feetInchesToCm(feet, inches);
|
||||
}
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
|
||||
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
|
||||
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
profile.avatar = query.img('img[src*="model"]');
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||
const res = await bhttp.get(url, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
const res = await http.get(url, {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.status === 'success') {
|
||||
return scrapeLatest(res.body.html, site);
|
||||
if (res.ok && res.body.status === 'success') {
|
||||
return scrapeLatest(qu.extractAll(res.body.html, '.episode'), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease) {
|
||||
async function fetchScene(url, channel, baseRelease) {
|
||||
const { pathname } = new URL(url);
|
||||
|
||||
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
const res = await qu.get(`https://www.kellymadison.com${pathname}`, null, {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
});
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site, baseRelease);
|
||||
return res.ok ? scrapeScene(res.item, url, baseRelease) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
const actorSlug = slugify(actorName);
|
||||
const res = await qu.get(`https://www.kellymadison.com/models/${actorSlug}`, null, {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item);
|
||||
}
|
||||
|
||||
return null;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -31,7 +31,7 @@ const kellymadison = require('./kellymadison');
|
||||
const killergram = require('./killergram');
|
||||
const kink = require('./kink');
|
||||
const legalporno = require('./legalporno');
|
||||
const letsdoeit = require('./letsdoeit');
|
||||
const porndoe = require('./porndoe');
|
||||
const men = require('./men');
|
||||
const metrohd = require('./metrohd');
|
||||
const mikeadriano = require('./mikeadriano');
|
||||
@@ -74,6 +74,7 @@ module.exports = {
|
||||
'21sextury': sextury,
|
||||
adulttime,
|
||||
amateurallure,
|
||||
amateureuro: porndoe,
|
||||
assylum,
|
||||
aziani,
|
||||
babes,
|
||||
@@ -92,6 +93,7 @@ module.exports = {
|
||||
fakehub,
|
||||
famedigital,
|
||||
fantasymassage,
|
||||
forbondage: porndoe,
|
||||
fullpornnetwork,
|
||||
girlsway,
|
||||
girlgirl: julesjordan,
|
||||
@@ -106,7 +108,8 @@ module.exports = {
|
||||
killergram,
|
||||
kink,
|
||||
legalporno,
|
||||
letsdoeit,
|
||||
letsdoeit: porndoe,
|
||||
mamacitaz: porndoe,
|
||||
men,
|
||||
metrohd,
|
||||
mikeadriano,
|
||||
@@ -128,7 +131,9 @@ module.exports = {
|
||||
swallowsalon: julesjordan,
|
||||
teencoreclub,
|
||||
teamskeet,
|
||||
transbella: porndoe,
|
||||
twistys,
|
||||
vipsexvault: porndoe,
|
||||
vivid,
|
||||
vixen,
|
||||
vogov,
|
||||
@@ -139,6 +144,7 @@ module.exports = {
|
||||
actors: {
|
||||
'21sextury': sextury,
|
||||
allanal: mikeadriano,
|
||||
amateureuro: porndoe,
|
||||
analbbc: fullpornnetwork,
|
||||
analized: fullpornnetwork,
|
||||
analviolation: fullpornnetwork,
|
||||
@@ -166,6 +172,7 @@ module.exports = {
|
||||
evilangel,
|
||||
eyeontheguy: hush,
|
||||
fakehub,
|
||||
forbondage: porndoe,
|
||||
freeones,
|
||||
gangbangcreampie: aziani,
|
||||
girlfaction: fullpornnetwork,
|
||||
@@ -184,7 +191,6 @@ module.exports = {
|
||||
killergram,
|
||||
kink,
|
||||
legalporno,
|
||||
letsdoeit,
|
||||
men,
|
||||
metrohd,
|
||||
milehighmedia,
|
||||
@@ -201,6 +207,8 @@ module.exports = {
|
||||
pervertgallery: fullpornnetwork,
|
||||
peternorth: famedigital,
|
||||
pimpxxx: cherrypimps,
|
||||
letsdoeit: porndoe,
|
||||
mamacitaz: porndoe,
|
||||
pornhub,
|
||||
povperverts: fullpornnetwork,
|
||||
povpornstars: hush,
|
||||
@@ -216,10 +224,12 @@ module.exports = {
|
||||
teencoreclub,
|
||||
thatsitcomshow: nubiles,
|
||||
transangels,
|
||||
transbella: porndoe,
|
||||
trueanal: mikeadriano,
|
||||
tushy: vixen,
|
||||
tushyraw: vixen,
|
||||
twistys,
|
||||
vipsexvault: porndoe,
|
||||
vixen,
|
||||
wicked,
|
||||
wildoncam: cherrypimps,
|
||||
|
||||
Reference in New Issue
Block a user