Added Brazzers and Jules Jordan as profile sources. Changed profile structure for proper bust-waist-hip properties and improved stability.

This commit is contained in:
ThePendulum 2019-11-21 04:05:32 +01:00
parent 9fcc40dd17
commit 9224b441e2
12 changed files with 224 additions and 60 deletions

View File

@ -71,10 +71,9 @@
<span>{{ actor.height }} cm</span>
</li>
<li v-if="actor.boobSize || actor.boobsNatural">
<dfn class="bio-heading">Boobs</dfn>
<span v-if="actor.boobSize">{{ actor.boobSize }}</span>
<span v-if="actor.boobsNatural !== null">{{ actor.boobsNatural ? 'Natural' : 'Enhanced' }}</span>
<li v-if="actor.bust || actor.waist || actor.hip">
<dfn class="bio-heading">Measurements</dfn>
<span>{{ actor.bust || '??' }}-{{ actor.waist || '??' }}-{{ actor.hip || '??' }}</span>
</li>
</ul>

View File

@ -33,8 +33,10 @@ exports.up = knex => Promise.resolve()
table.string('residence_place');
table.string('boobs_size');
table.boolean('boobs_natural');
table.string('bust', 10);
table.integer('waist', 3);
table.integer('hip', 3);
table.boolean('natural_boobs');
table.integer('height', 3);
table.integer('weight', 3);

View File

@ -37,8 +37,10 @@ async function curateActor(actor) {
: null,
ethnicity: actor.ethnicity,
height: actor.height,
boobSize: actor.boobs_size,
boobsNatural: actor.boobs_natural,
bust: actor.bust,
waist: actor.waist,
hip: actor.hip,
naturalBoobs: actor.natural_boobs,
aliases: aliases.map(({ name }) => name),
slug: actor.slug,
avatars,
@ -51,7 +53,6 @@ function curateActors(releases) {
function curateActorEntry(actor, scraped, scrapeSuccess) {
const curatedActor = {
id: actor.id,
name: actor.name
.split(' ')
.map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`)
@ -65,22 +66,27 @@ function curateActorEntry(actor, scraped, scrapeSuccess) {
residence_country_alpha2: actor.residenceCountry,
birth_place: actor.birthPlace,
residence_place: actor.residencePlace,
boobs_size: actor.boobs && actor.boobs.size,
boobs_natural: actor.boobs && actor.boobs.natural,
bust: actor.bust,
waist: actor.waist,
hip: actor.hip,
natural_boobs: actor.naturalBoobs,
height: actor.height,
weight: actor.weight,
hair: actor.hair,
eyes: actor.eyes,
has_tattoos: actor.hasTattoos,
has_piercings: actor.hasPiercings,
tattoos: actor.tattoos,
piercings: actor.piercings,
};
if (actor.id) {
curatedActor.id = actor.id;
}
if (scraped) {
return {
...curatedActor,
scraped_at: new Date(),
scrape_success: scrapeSuccess,
};
curatedActor.scraped_at = new Date();
curatedActor.scrape_success = scrapeSuccess;
}
return curatedActor;
@ -141,30 +147,32 @@ function mergeProfiles(profiles, actor) {
}
return {
id: actor.id,
name: actor.name,
id: actor ? actor.id : null,
name: actor ? actor.name : profile.name,
description: prevProfile.description || profile.description,
gender: prevProfile.gender || profile.gender,
birthdate: prevProfile.birthdate || profile.birthdate,
birthdate: Number.isNaN(prevProfile.birthdate) ? profile.birthdate : prevProfile.birthdate,
birthCountry: prevProfile.birthCountry || profile.birthCountry,
residenceCountry: prevProfile.residenceCountry || profile.residenceCountry,
birthPlace: prevProfile.birthPlace || profile.birthPlace,
residencePlace: prevProfile.residencePlace || profile.residencePlace,
ethnicity: prevProfile.ethnicity || profile.ethnicity,
boobs: profile.boobs
? {
size: prevProfile.boobs.size || profile.boobs.size,
natural: prevProfile.boobs.natural || profile.boobs.natural,
}
: {},
bust: prevProfile.bust || profile.bust,
waist: prevProfile.waist || profile.waist,
hip: prevProfile.hip || profile.hip,
naturalBoobs: prevProfile.naturalBoobs || profile.naturalBoobs,
height: prevProfile.height || profile.height,
weight: prevProfile.weight || profile.weight,
hair: prevProfile.hair || profile.hair,
eyes: prevProfile.eyes || profile.eyes,
hasPiercings: prevProfile.hasPiercings || profile.hasPiercings,
hasTattoos: prevProfile.hasTattoos || profile.hasTattoos,
piercings: prevProfile.piercings || profile.piercings,
tattoos: prevProfile.tattoos || profile.tattoos,
social: prevProfile.social.concat(profile.social || []),
avatars: prevProfile.avatars.concat(profile.avatar || []),
};
}, {
boobs: {},
social: [],
avatars: [],
...actor,
@ -176,7 +184,11 @@ async function scrapeActors(actorNames) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName)));
const profiles = await Promise.all(
Object.values(scrapers.actors)
.map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName)),
);
const profile = mergeProfiles(profiles, actorEntry);
if (profile === null) {
@ -203,7 +215,7 @@ async function scrapeActors(actorNames) {
await createActorMediaDirectory(profile, newActorEntry);
await storeAvatars(profile, newActorEntry);
}, {
concurrency: 1,
concurrency: 3,
});
}

View File

@ -198,8 +198,10 @@ async function storeAvatars(profile, actor) {
const thumbnail = await getThumbnail(res.body);
const extension = mime.getExtension(mimetype);
const filepath = path.join('actors', actor.slug, `${index + 1}.${extension}`);
const thumbpath = path.join('actors', actor.slug, `${index + 1}_thumb.${extension}`);
const timestamp = new Date().getTime();
const filepath = path.join('actors', actor.slug, `${timestamp + index}.${extension}`);
const thumbpath = path.join('actors', actor.slug, `${timestamp + index}_thumb.${extension}`);
const hash = getHash(res.body);
await Promise.all([

View File

@ -235,7 +235,7 @@ async function storeRelease(release) {
await storeReleaseAssets(release, releaseEntry.id);
console.log(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
return null;
return releaseEntry.id;
}
async function storeReleases(releases) {

View File

@ -49,10 +49,8 @@ async function scrapeRelease(url, release, deep = false) {
if (!deep && argv.save) {
// don't store release when called by site scraper
const releaseId = await Promise.all([
storeReleases([scene]),
scrapeBasicActors(),
]);
const [releaseId] = await storeReleases([scene]);
await scrapeBasicActors();
console.log(`http://${config.web.host}:${config.web.port}/scene/${releaseId}`);
}

View File

@ -3,11 +3,20 @@
/* eslint-disable newline-per-chained-call */
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { heightToCm, lbsToKg } = require('../utils/convert');
const { fetchSites } = require('../sites');
const { matchTags } = require('../tags');
const hairMap = {
Blonde: 'blonde',
Brunette: 'brown',
'Black Hair': 'black',
Redhead: 'red',
};
function scrape(html, site, upcoming) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.release-card.scene').toArray();
@ -117,6 +126,50 @@ async function scrapeScene(html, url, site) {
};
}
function scrapeActorSearch(html, url, actorName) {
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}"]`);
return actorLink;
}
function scrapeProfile(html, url, actorName) {
const { document } = new JSDOM(html).window;
const avatarEl = document.querySelector('.big-pic-model-container img');
const descriptionEl = document.querySelector('.model-profile-specs p');
const bioKeys = Array.from(document.querySelectorAll('.profile-spec-list label'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = Array.from(document.querySelectorAll('.profile-spec-list var'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
const profile = {
name: actorName,
};
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
if (bio.Height) profile.height = heightToCm(bio.Height);
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio['Body Art']) {
profile.hasTattoo = !!bio['Body Art'].match('Tattoo');
profile.hasPiercing = !!bio['Body Art'].match('Piercing');
}
if (descriptionEl) profile.description = descriptionEl.textContent.trim();
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
profile.releases = Array.from(document.querySelectorAll('.release-card-container .scene-card-title a'), el => `https://brazzers.com${el.href}`);
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/page/${page}/`);
@ -135,8 +188,29 @@ async function fetchScene(url, site) {
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const searchUrl = 'https://brazzers.com/pornstars-search/';
const searchRes = await bhttp.get(searchUrl, {
headers: {
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
},
});
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
if (actorLink) {
const url = `https://brazzers.com${actorLink}`;
const res = await bhttp.get(url);
return scrapeProfile(res.body.toString(), url, actorName);
}
return null;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchProfile,
fetchScene,
fetchUpcoming,
};

View File

@ -23,9 +23,9 @@ async function scrapeProfileFrontpage(html, url, name) {
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
: null;
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
const boobsNatural = bio['Fake Boobs:'] === 'No';
const measurementsString = bio['Measurements:'];
const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
const naturalBoobs = bio['Fake Boobs:'] === 'No';
const residenceCountryName = bio['Country of Origin:'];
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
@ -36,10 +36,12 @@ async function scrapeProfileFrontpage(html, url, name) {
const eyes = bio['Eye Color:'].toLowerCase();
const piercingsString = bio['Piercings:'];
const piercings = piercingsString === 'None' ? null : piercingsString;
const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
const piercings = hasPiercings && piercingsString;
const tattoosString = bio['Tattoos:'];
const tattoos = tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString;
const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
const tattoos = hasTattoos && tattoosString;
const social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
@ -50,10 +52,10 @@ async function scrapeProfileFrontpage(html, url, name) {
birthdate,
residenceCountry,
birthPlace,
boobs: {
size: boobsSize,
natural: boobsNatural,
},
naturalBoobs,
bust,
waist,
hip,
hair,
eyes,
piercings,
@ -78,8 +80,8 @@ async function scrapeProfileBio(html, frontpageBio, url, name) {
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
: null;
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
const measurementsString = bio['Measurements:'];
const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
const boobsNatural = bio['Fake boobs:'] === 'No';
const ethnicity = bio['Ethnicity:'];
@ -94,10 +96,12 @@ async function scrapeProfileBio(html, frontpageBio, url, name) {
const weight = Number(bio['Weight:'].match(/\d+/)[0]);
const piercingsString = bio['Piercings:'];
const piercings = piercingsString === 'None' ? null : piercingsString;
const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
const piercings = hasPiercings && piercingsString;
const tattoosString = bio['Tattoos:'];
const tattoos = tattoosString === undefined || tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString;
const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
const tattoos = hasTattoos && tattoosString;
const social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
@ -109,14 +113,16 @@ async function scrapeProfileBio(html, frontpageBio, url, name) {
residenceCountry,
birthPlace,
ethnicity,
boobs: {
size: boobsSize,
natural: boobsNatural,
},
naturalBoobs: boobsNatural,
bust,
waist,
hip,
height,
weight,
hair,
eyes,
hasPiercings,
hasTattoos,
piercings,
tattoos,
social,

View File

@ -3,8 +3,10 @@
const Promise = require('bluebird');
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { heightToCm } = require('../utils/convert');
const { matchTags } = require('../tags');
const pluckPhotos = require('../utils/pluck-photos');
@ -190,6 +192,37 @@ async function scrapeScene(html, url, site) {
};
}
function scrapeProfile(html, url, actorName) {
const { document } = new JSDOM(html).window;
const bio = document.querySelector('.model_bio').textContent;
const avatarEl = document.querySelector('.model_bio_pic');
const profile = {
name: actorName,
};
const heightString = bio.match(/\d+ feet \d+ inches/);
const ageString = bio.match(/Age:\s*\d{2}/);
const measurementsString = bio.match(/\w+-\d+-\d+/);
if (heightString) profile.height = heightToCm(heightString[0]);
if (ageString) profile.age = Number(ageString[0].match(/\d{2}/)[0]);
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString[0].split('-');
if (avatarEl) {
const src1 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6);
const src2 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6);
const src3 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6);
profile.avatar = src3 || src2 || src1;
}
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/trial/categories/movies_${page}_d.html`);
@ -208,8 +241,22 @@ async function fetchScene(url, site) {
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const url = `https://julesjordan.com/trial/models/${actorSlug}.html`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), url, actorName);
}
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchUpcoming,
fetchScene,
};

View File

@ -26,7 +26,6 @@ async function scrapeProfile(html, _url, actorName) {
const profile = {
name: actorName,
boobs: {},
};
const descriptionString = document.querySelector('div[itemprop="description"]');
@ -60,14 +59,14 @@ async function scrapeProfile(html, _url, actorName) {
profile.residenceCountry = residenceCountryEntry ? residenceCountryEntry.alpha2 : null;
}
if (bio.Measurements && bio.Measurements !== '--') profile.boobs.size = bio.Measurements;
if (bio['Fake Boobs']) profile.boobs.natural = bio['Fake Boobs'] === 'No';
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Piercings) profile.piercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.tattoos = bio.tattoos === 'Yes';
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.hasTattoos = bio.hasTattoos === 'Yes';
if (avatarEl) profile.avatar = avatarEl.src;
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason

View File

@ -4,11 +4,9 @@
const twentyonesextury = require('./21sextury');
const bangbros = require('./bangbros');
const blowpass = require('./blowpass');
const brazzers = require('./brazzers');
const ddfnetwork = require('./ddfnetwork');
const dogfart = require('./dogfart');
const evilangel = require('./evilangel');
const julesjordan = require('./julesjordan');
const kink = require('./kink');
const mikeadriano = require('./mikeadriano');
const mofos = require('./mofos');
@ -20,6 +18,8 @@ const vixen = require('./vixen');
const xempire = require('./xempire');
// releases and profiles
const brazzers = require('./brazzers');
const julesjordan = require('./julesjordan');
const legalporno = require('./legalporno');
// profiles
@ -49,7 +49,9 @@ module.exports = {
xempire,
},
actors: {
brazzers,
freeones,
julesjordan,
legalporno,
pornhub,
},

23
src/utils/convert.js Normal file
View File

@ -0,0 +1,23 @@
'use strict';
function feetInchesToCm(feet, inches) {
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54));
}
function heightToCm(height) {
const [feet, inches] = height.match(/\d+/g);
return feetInchesToCm(feet, inches);
}
function lbsToKg(lbs) {
const pounds = lbs.toString().match(/\d+/)[0];
return Math.round(Number(pounds) * 0.453592);
}
module.exports = {
feetInchesToCm,
heightToCm,
lbsToKg,
};