Added Brazzers and Jules Jordan as profile sources. Changed profile structure for proper bust-waist-hip properties and improved stability.

This commit is contained in:
2019-11-21 04:05:32 +01:00
parent 9fcc40dd17
commit 9224b441e2
12 changed files with 224 additions and 60 deletions

View File

@@ -3,11 +3,20 @@
/* eslint-disable newline-per-chained-call */
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { heightToCm, lbsToKg } = require('../utils/convert');
const { fetchSites } = require('../sites');
const { matchTags } = require('../tags');
const hairMap = {
Blonde: 'blonde',
Brunette: 'brown',
'Black Hair': 'black',
Redhead: 'red',
};
function scrape(html, site, upcoming) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.release-card.scene').toArray();
@@ -117,6 +126,50 @@ async function scrapeScene(html, url, site) {
};
}
function scrapeActorSearch(html, url, actorName) {
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}"]`);
return actorLink;
}
function scrapeProfile(html, url, actorName) {
const { document } = new JSDOM(html).window;
const avatarEl = document.querySelector('.big-pic-model-container img');
const descriptionEl = document.querySelector('.model-profile-specs p');
const bioKeys = Array.from(document.querySelectorAll('.profile-spec-list label'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = Array.from(document.querySelectorAll('.profile-spec-list var'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
const profile = {
name: actorName,
};
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
if (bio.Height) profile.height = heightToCm(bio.Height);
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio['Body Art']) {
profile.hasTattoo = !!bio['Body Art'].match('Tattoo');
profile.hasPiercing = !!bio['Body Art'].match('Piercing');
}
if (descriptionEl) profile.description = descriptionEl.textContent.trim();
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
profile.releases = Array.from(document.querySelectorAll('.release-card-container .scene-card-title a'), el => `https://brazzers.com${el.href}`);
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/page/${page}/`);
@@ -135,8 +188,29 @@ async function fetchScene(url, site) {
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const searchUrl = 'https://brazzers.com/pornstars-search/';
const searchRes = await bhttp.get(searchUrl, {
headers: {
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
},
});
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
if (actorLink) {
const url = `https://brazzers.com${actorLink}`;
const res = await bhttp.get(url);
return scrapeProfile(res.body.toString(), url, actorName);
}
return null;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchProfile,
fetchScene,
fetchUpcoming,
};

View File

@@ -23,9 +23,9 @@ async function scrapeProfileFrontpage(html, url, name) {
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
: null;
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
const boobsNatural = bio['Fake Boobs:'] === 'No';
const measurementsString = bio['Measurements:'];
const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
const naturalBoobs = bio['Fake Boobs:'] === 'No';
const residenceCountryName = bio['Country of Origin:'];
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
@@ -36,10 +36,12 @@ async function scrapeProfileFrontpage(html, url, name) {
const eyes = bio['Eye Color:'].toLowerCase();
const piercingsString = bio['Piercings:'];
const piercings = piercingsString === 'None' ? null : piercingsString;
const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
const piercings = hasPiercings && piercingsString;
const tattoosString = bio['Tattoos:'];
const tattoos = tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString;
const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
const tattoos = hasTattoos && tattoosString;
const social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
@@ -50,10 +52,10 @@ async function scrapeProfileFrontpage(html, url, name) {
birthdate,
residenceCountry,
birthPlace,
boobs: {
size: boobsSize,
natural: boobsNatural,
},
naturalBoobs,
bust,
waist,
hip,
hair,
eyes,
piercings,
@@ -78,8 +80,8 @@ async function scrapeProfileBio(html, frontpageBio, url, name) {
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
: null;
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
const measurementsString = bio['Measurements:'];
const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
const boobsNatural = bio['Fake boobs:'] === 'No';
const ethnicity = bio['Ethnicity:'];
@@ -94,10 +96,12 @@ async function scrapeProfileBio(html, frontpageBio, url, name) {
const weight = Number(bio['Weight:'].match(/\d+/)[0]);
const piercingsString = bio['Piercings:'];
const piercings = piercingsString === 'None' ? null : piercingsString;
const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
const piercings = hasPiercings && piercingsString;
const tattoosString = bio['Tattoos:'];
const tattoos = tattoosString === undefined || tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString;
const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
const tattoos = hasTattoos && tattoosString;
const social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
@@ -109,14 +113,16 @@ async function scrapeProfileBio(html, frontpageBio, url, name) {
residenceCountry,
birthPlace,
ethnicity,
boobs: {
size: boobsSize,
natural: boobsNatural,
},
naturalBoobs: boobsNatural,
bust,
waist,
hip,
height,
weight,
hair,
eyes,
hasPiercings,
hasTattoos,
piercings,
tattoos,
social,

View File

@@ -3,8 +3,10 @@
const Promise = require('bluebird');
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { heightToCm } = require('../utils/convert');
const { matchTags } = require('../tags');
const pluckPhotos = require('../utils/pluck-photos');
@@ -190,6 +192,37 @@ async function scrapeScene(html, url, site) {
};
}
function scrapeProfile(html, url, actorName) {
const { document } = new JSDOM(html).window;
const bio = document.querySelector('.model_bio').textContent;
const avatarEl = document.querySelector('.model_bio_pic');
const profile = {
name: actorName,
};
const heightString = bio.match(/\d+ feet \d+ inches/);
const ageString = bio.match(/Age:\s*\d{2}/);
const measurementsString = bio.match(/\w+-\d+-\d+/);
if (heightString) profile.height = heightToCm(heightString[0]);
if (ageString) profile.age = Number(ageString[0].match(/\d{2}/)[0]);
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString[0].split('-');
if (avatarEl) {
const src1 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6);
const src2 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6);
const src3 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6);
profile.avatar = src3 || src2 || src1;
}
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/trial/categories/movies_${page}_d.html`);
@@ -208,8 +241,22 @@ async function fetchScene(url, site) {
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const url = `https://julesjordan.com/trial/models/${actorSlug}.html`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), url, actorName);
}
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchUpcoming,
fetchScene,
};

View File

@@ -26,7 +26,6 @@ async function scrapeProfile(html, _url, actorName) {
const profile = {
name: actorName,
boobs: {},
};
const descriptionString = document.querySelector('div[itemprop="description"]');
@@ -60,14 +59,14 @@ async function scrapeProfile(html, _url, actorName) {
profile.residenceCountry = residenceCountryEntry ? residenceCountryEntry.alpha2 : null;
}
if (bio.Measurements && bio.Measurements !== '--') profile.boobs.size = bio.Measurements;
if (bio['Fake Boobs']) profile.boobs.natural = bio['Fake Boobs'] === 'No';
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Piercings) profile.piercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.tattoos = bio.tattoos === 'Yes';
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.hasTattoos = bio.hasTattoos === 'Yes';
if (avatarEl) profile.avatar = avatarEl.src;
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason

View File

@@ -4,11 +4,9 @@
const twentyonesextury = require('./21sextury');
const bangbros = require('./bangbros');
const blowpass = require('./blowpass');
const brazzers = require('./brazzers');
const ddfnetwork = require('./ddfnetwork');
const dogfart = require('./dogfart');
const evilangel = require('./evilangel');
const julesjordan = require('./julesjordan');
const kink = require('./kink');
const mikeadriano = require('./mikeadriano');
const mofos = require('./mofos');
@@ -20,6 +18,8 @@ const vixen = require('./vixen');
const xempire = require('./xempire');
// releases and profiles
const brazzers = require('./brazzers');
const julesjordan = require('./julesjordan');
const legalporno = require('./legalporno');
// profiles
@@ -49,7 +49,9 @@ module.exports = {
xempire,
},
actors: {
brazzers,
freeones,
julesjordan,
legalporno,
pornhub,
},