From 9224b441e2c0dd741365144eaeade96174d98b4c Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Thu, 21 Nov 2019 04:05:32 +0100 Subject: [PATCH] Added Brazzers and Jules Jordan as profile sources. Changed profile structure for proper bust-waist-hip properties and improved stability. --- assets/components/actor/actor.vue | 7 ++- migrations/20190325001339_releases.js | 6 ++- src/actors.js | 56 ++++++++++++-------- src/media.js | 6 ++- src/releases.js | 2 +- src/scrape-release.js | 6 +-- src/scrapers/brazzers.js | 76 ++++++++++++++++++++++++++- src/scrapers/freeones.js | 40 ++++++++------ src/scrapers/julesjordan.js | 47 +++++++++++++++++ src/scrapers/pornhub.js | 9 ++-- src/scrapers/scrapers.js | 6 ++- src/utils/convert.js | 23 ++++++++ 12 files changed, 224 insertions(+), 60 deletions(-) create mode 100644 src/utils/convert.js diff --git a/assets/components/actor/actor.vue b/assets/components/actor/actor.vue index 9f4ddc82..24c3146b 100644 --- a/assets/components/actor/actor.vue +++ b/assets/components/actor/actor.vue @@ -71,10 +71,9 @@ {{ actor.height }} cm -
  • - Boobs - {{ actor.boobSize }} - {{ actor.boobsNatural ? 'Natural' : 'Enhanced' }} +
  • + Measurements + {{ actor.bust || '??' }}-{{ actor.waist || '??' }}-{{ actor.hip || '??' }}
  • diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index 9237344f..5a9ef085 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -33,8 +33,10 @@ exports.up = knex => Promise.resolve() table.string('residence_place'); - table.string('boobs_size'); - table.boolean('boobs_natural'); + table.string('bust', 10); + table.integer('waist', 3); + table.integer('hip', 3); + table.boolean('natural_boobs'); table.integer('height', 3); table.integer('weight', 3); diff --git a/src/actors.js b/src/actors.js index ac179291..2b26601d 100644 --- a/src/actors.js +++ b/src/actors.js @@ -37,8 +37,10 @@ async function curateActor(actor) { : null, ethnicity: actor.ethnicity, height: actor.height, - boobSize: actor.boobs_size, - boobsNatural: actor.boobs_natural, + bust: actor.bust, + waist: actor.waist, + hip: actor.hip, + naturalBoobs: actor.natural_boobs, aliases: aliases.map(({ name }) => name), slug: actor.slug, avatars, @@ -51,7 +53,6 @@ function curateActors(releases) { function curateActorEntry(actor, scraped, scrapeSuccess) { const curatedActor = { - id: actor.id, name: actor.name .split(' ') .map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`) @@ -65,22 +66,27 @@ function curateActorEntry(actor, scraped, scrapeSuccess) { residence_country_alpha2: actor.residenceCountry, birth_place: actor.birthPlace, residence_place: actor.residencePlace, - boobs_size: actor.boobs && actor.boobs.size, - boobs_natural: actor.boobs && actor.boobs.natural, + bust: actor.bust, + waist: actor.waist, + hip: actor.hip, + natural_boobs: actor.naturalBoobs, height: actor.height, weight: actor.weight, hair: actor.hair, eyes: actor.eyes, + has_tattoos: actor.hasTattoos, + has_piercings: actor.hasPiercings, tattoos: actor.tattoos, piercings: actor.piercings, }; + if (actor.id) { + curatedActor.id = actor.id; + } + if (scraped) { - return { - ...curatedActor, - scraped_at: new Date(), - scrape_success: scrapeSuccess, - }; + curatedActor.scraped_at = new Date(); + curatedActor.scrape_success = scrapeSuccess; } return curatedActor; @@ -141,30 +147,32 @@ function mergeProfiles(profiles, actor) { } return { - id: actor.id, - name: actor.name, + id: actor ? actor.id : null, + name: actor ? actor.name : profile.name, + description: prevProfile.description || profile.description, gender: prevProfile.gender || profile.gender, - birthdate: prevProfile.birthdate || profile.birthdate, + birthdate: Number.isNaN(prevProfile.birthdate) ? profile.birthdate : prevProfile.birthdate, + birthCountry: prevProfile.birthCountry || profile.birthCountry, residenceCountry: prevProfile.residenceCountry || profile.residenceCountry, birthPlace: prevProfile.birthPlace || profile.birthPlace, + residencePlace: prevProfile.residencePlace || profile.residencePlace, ethnicity: prevProfile.ethnicity || profile.ethnicity, - boobs: profile.boobs - ? { - size: prevProfile.boobs.size || profile.boobs.size, - natural: prevProfile.boobs.natural || profile.boobs.natural, - } - : {}, + bust: prevProfile.bust || profile.bust, + waist: prevProfile.waist || profile.waist, + hip: prevProfile.hip || profile.hip, + naturalBoobs: prevProfile.naturalBoobs || profile.naturalBoobs, height: prevProfile.height || profile.height, weight: prevProfile.weight || profile.weight, hair: prevProfile.hair || profile.hair, eyes: prevProfile.eyes || profile.eyes, + hasPiercings: prevProfile.hasPiercings || profile.hasPiercings, + hasTattoos: prevProfile.hasTattoos || profile.hasTattoos, piercings: prevProfile.piercings || profile.piercings, tattoos: prevProfile.tattoos || profile.tattoos, social: prevProfile.social.concat(profile.social || []), avatars: prevProfile.avatars.concat(profile.avatar || []), }; }, { - boobs: {}, social: [], avatars: [], ...actor, @@ -176,7 +184,11 @@ async function scrapeActors(actorNames) { const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); const actorEntry = await knex('actors').where({ slug: actorSlug }).first(); - const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName))); + const profiles = await Promise.all( + Object.values(scrapers.actors) + .map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName)), + ); + const profile = mergeProfiles(profiles, actorEntry); if (profile === null) { @@ -203,7 +215,7 @@ async function scrapeActors(actorNames) { await createActorMediaDirectory(profile, newActorEntry); await storeAvatars(profile, newActorEntry); }, { - concurrency: 1, + concurrency: 3, }); } diff --git a/src/media.js b/src/media.js index 472c1a61..36ddecd4 100644 --- a/src/media.js +++ b/src/media.js @@ -198,8 +198,10 @@ async function storeAvatars(profile, actor) { const thumbnail = await getThumbnail(res.body); const extension = mime.getExtension(mimetype); - const filepath = path.join('actors', actor.slug, `${index + 1}.${extension}`); - const thumbpath = path.join('actors', actor.slug, `${index + 1}_thumb.${extension}`); + const timestamp = new Date().getTime(); + + const filepath = path.join('actors', actor.slug, `${timestamp + index}.${extension}`); + const thumbpath = path.join('actors', actor.slug, `${timestamp + index}_thumb.${extension}`); const hash = getHash(res.body); await Promise.all([ diff --git a/src/releases.js b/src/releases.js index 6464b745..09253705 100644 --- a/src/releases.js +++ b/src/releases.js @@ -235,7 +235,7 @@ async function storeRelease(release) { await storeReleaseAssets(release, releaseEntry.id); console.log(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`); - return null; + return releaseEntry.id; } async function storeReleases(releases) { diff --git a/src/scrape-release.js b/src/scrape-release.js index db5f5453..2b4de7ae 100644 --- a/src/scrape-release.js +++ b/src/scrape-release.js @@ -49,10 +49,8 @@ async function scrapeRelease(url, release, deep = false) { if (!deep && argv.save) { // don't store release when called by site scraper - const releaseId = await Promise.all([ - storeReleases([scene]), - scrapeBasicActors(), - ]); + const [releaseId] = await storeReleases([scene]); + await scrapeBasicActors(); console.log(`http://${config.web.host}:${config.web.port}/scene/${releaseId}`); } diff --git a/src/scrapers/brazzers.js b/src/scrapers/brazzers.js index 2e03ad78..59711334 100644 --- a/src/scrapers/brazzers.js +++ b/src/scrapers/brazzers.js @@ -3,11 +3,20 @@ /* eslint-disable newline-per-chained-call */ const bhttp = require('bhttp'); const cheerio = require('cheerio'); +const { JSDOM } = require('jsdom'); const moment = require('moment'); +const { heightToCm, lbsToKg } = require('../utils/convert'); const { fetchSites } = require('../sites'); const { matchTags } = require('../tags'); +const hairMap = { + Blonde: 'blonde', + Brunette: 'brown', + 'Black Hair': 'black', + Redhead: 'red', +}; + function scrape(html, site, upcoming) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const sceneElements = $('.release-card.scene').toArray(); @@ -117,6 +126,50 @@ async function scrapeScene(html, url, site) { }; } +function scrapeActorSearch(html, url, actorName) { + const { document } = new JSDOM(html).window; + const actorLink = document.querySelector(`a[title="${actorName}"]`); + + return actorLink; +} + +function scrapeProfile(html, url, actorName) { + const { document } = new JSDOM(html).window; + + const avatarEl = document.querySelector('.big-pic-model-container img'); + const descriptionEl = document.querySelector('.model-profile-specs p'); + const bioKeys = Array.from(document.querySelectorAll('.profile-spec-list label'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim()); + const bioValues = Array.from(document.querySelectorAll('.profile-spec-list var'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim()); + + const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {}); + + const profile = { + name: actorName, + }; + + if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity; + if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-'); + if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate(); + if (bio['Birth Location']) profile.birthPlace = bio['Birth Location']; + if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase(); + + if (bio.Height) profile.height = heightToCm(bio.Height); + if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]); + if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase(); + + if (bio['Body Art']) { + profile.hasTattoo = !!bio['Body Art'].match('Tattoo'); + profile.hasPiercing = !!bio['Body Art'].match('Piercing'); + } + + if (descriptionEl) profile.description = descriptionEl.textContent.trim(); + if (avatarEl) profile.avatar = `https:${avatarEl.src}`; + + profile.releases = Array.from(document.querySelectorAll('.release-card-container .scene-card-title a'), el => `https://brazzers.com${el.href}`); + + return profile; +} + async function fetchLatest(site, page = 1) { const res = await bhttp.get(`${site.url}/page/${page}/`); @@ -135,8 +188,29 @@ async function fetchScene(url, site) { return scrapeScene(res.body.toString(), url, site); } +async function fetchProfile(actorName) { + const searchUrl = 'https://brazzers.com/pornstars-search/'; + const searchRes = await bhttp.get(searchUrl, { + headers: { + Cookie: `textSearch=${encodeURIComponent(actorName)};`, + }, + }); + + const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName); + + if (actorLink) { + const url = `https://brazzers.com${actorLink}`; + const res = await bhttp.get(url); + + return scrapeProfile(res.body.toString(), url, actorName); + } + + return null; +} + module.exports = { fetchLatest, - fetchUpcoming, + fetchProfile, fetchScene, + fetchUpcoming, }; diff --git a/src/scrapers/freeones.js b/src/scrapers/freeones.js index cd524209..e64ae4f0 100644 --- a/src/scrapers/freeones.js +++ b/src/scrapers/freeones.js @@ -23,9 +23,9 @@ async function scrapeProfileFrontpage(html, url, name) { ? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate() : null; - const boobsSizeString = bio['Measurements:']; - const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString; - const boobsNatural = bio['Fake Boobs:'] === 'No'; + const measurementsString = bio['Measurements:']; + const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement)); + const naturalBoobs = bio['Fake Boobs:'] === 'No'; const residenceCountryName = bio['Country of Origin:']; const countryEntry = await knex('countries').where({ name: residenceCountryName }).first(); @@ -36,10 +36,12 @@ async function scrapeProfileFrontpage(html, url, name) { const eyes = bio['Eye Color:'].toLowerCase(); const piercingsString = bio['Piercings:']; - const piercings = piercingsString === 'None' ? null : piercingsString; + const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None'); + const piercings = hasPiercings && piercingsString; const tattoosString = bio['Tattoos:']; - const tattoos = tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString; + const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None'); + const tattoos = hasTattoos && tattoosString; const social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href); @@ -50,10 +52,10 @@ async function scrapeProfileFrontpage(html, url, name) { birthdate, residenceCountry, birthPlace, - boobs: { - size: boobsSize, - natural: boobsNatural, - }, + naturalBoobs, + bust, + waist, + hip, hair, eyes, piercings, @@ -78,8 +80,8 @@ async function scrapeProfileBio(html, frontpageBio, url, name) { ? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate() : null; - const boobsSizeString = bio['Measurements:']; - const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString; + const measurementsString = bio['Measurements:']; + const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement)); const boobsNatural = bio['Fake boobs:'] === 'No'; const ethnicity = bio['Ethnicity:']; @@ -94,10 +96,12 @@ async function scrapeProfileBio(html, frontpageBio, url, name) { const weight = Number(bio['Weight:'].match(/\d+/)[0]); const piercingsString = bio['Piercings:']; - const piercings = piercingsString === 'None' ? null : piercingsString; + const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None'); + const piercings = hasPiercings && piercingsString; const tattoosString = bio['Tattoos:']; - const tattoos = tattoosString === undefined || tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString; + const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None'); + const tattoos = hasTattoos && tattoosString; const social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href); @@ -109,14 +113,16 @@ async function scrapeProfileBio(html, frontpageBio, url, name) { residenceCountry, birthPlace, ethnicity, - boobs: { - size: boobsSize, - natural: boobsNatural, - }, + naturalBoobs: boobsNatural, + bust, + waist, + hip, height, weight, hair, eyes, + hasPiercings, + hasTattoos, piercings, tattoos, social, diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index 297a379d..5585228c 100644 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -3,8 +3,10 @@ const Promise = require('bluebird'); const bhttp = require('bhttp'); const cheerio = require('cheerio'); +const { JSDOM } = require('jsdom'); const moment = require('moment'); +const { heightToCm } = require('../utils/convert'); const { matchTags } = require('../tags'); const pluckPhotos = require('../utils/pluck-photos'); @@ -190,6 +192,37 @@ async function scrapeScene(html, url, site) { }; } +function scrapeProfile(html, url, actorName) { + const { document } = new JSDOM(html).window; + + const bio = document.querySelector('.model_bio').textContent; + const avatarEl = document.querySelector('.model_bio_pic'); + + const profile = { + name: actorName, + }; + + const heightString = bio.match(/\d+ feet \d+ inches/); + const ageString = bio.match(/Age:\s*\d{2}/); + const measurementsString = bio.match(/\w+-\d+-\d+/); + + if (heightString) profile.height = heightToCm(heightString[0]); + if (ageString) profile.age = Number(ageString[0].match(/\d{2}/)[0]); + if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString[0].split('-'); + + if (avatarEl) { + const src1 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6); + const src2 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6); + const src3 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6); + + profile.avatar = src3 || src2 || src1; + } + + profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href); + + return profile; +} + async function fetchLatest(site, page = 1) { const res = await bhttp.get(`${site.url}/trial/categories/movies_${page}_d.html`); @@ -208,8 +241,22 @@ async function fetchScene(url, site) { return scrapeScene(res.body.toString(), url, site); } +async function fetchProfile(actorName) { + const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); + const url = `https://julesjordan.com/trial/models/${actorSlug}.html`; + + const res = await bhttp.get(url); + + if (res.statusCode === 200) { + return scrapeProfile(res.body.toString(), url, actorName); + } + + return null; +} + module.exports = { fetchLatest, + fetchProfile, fetchUpcoming, fetchScene, }; diff --git a/src/scrapers/pornhub.js b/src/scrapers/pornhub.js index 72f5839e..47e36e2f 100644 --- a/src/scrapers/pornhub.js +++ b/src/scrapers/pornhub.js @@ -26,7 +26,6 @@ async function scrapeProfile(html, _url, actorName) { const profile = { name: actorName, - boobs: {}, }; const descriptionString = document.querySelector('div[itemprop="description"]'); @@ -60,14 +59,14 @@ async function scrapeProfile(html, _url, actorName) { profile.residenceCountry = residenceCountryEntry ? residenceCountryEntry.alpha2 : null; } - if (bio.Measurements && bio.Measurements !== '--') profile.boobs.size = bio.Measurements; - if (bio['Fake Boobs']) profile.boobs.natural = bio['Fake Boobs'] === 'No'; + if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-'); + if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No'; if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1)); if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1)); if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase(); - if (bio.Piercings) profile.piercings = bio.Piercings === 'Yes'; - if (bio.Tattoos) profile.tattoos = bio.tattoos === 'Yes'; + if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes'; + if (bio.Tattoos) profile.hasTattoos = bio.hasTattoos === 'Yes'; if (avatarEl) profile.avatar = avatarEl.src; profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 045914f0..568fa054 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -4,11 +4,9 @@ const twentyonesextury = require('./21sextury'); const bangbros = require('./bangbros'); const blowpass = require('./blowpass'); -const brazzers = require('./brazzers'); const ddfnetwork = require('./ddfnetwork'); const dogfart = require('./dogfart'); const evilangel = require('./evilangel'); -const julesjordan = require('./julesjordan'); const kink = require('./kink'); const mikeadriano = require('./mikeadriano'); const mofos = require('./mofos'); @@ -20,6 +18,8 @@ const vixen = require('./vixen'); const xempire = require('./xempire'); // releases and profiles +const brazzers = require('./brazzers'); +const julesjordan = require('./julesjordan'); const legalporno = require('./legalporno'); // profiles @@ -49,7 +49,9 @@ module.exports = { xempire, }, actors: { + brazzers, freeones, + julesjordan, legalporno, pornhub, }, diff --git a/src/utils/convert.js b/src/utils/convert.js new file mode 100644 index 00000000..5e1923cd --- /dev/null +++ b/src/utils/convert.js @@ -0,0 +1,23 @@ +'use strict'; + +function feetInchesToCm(feet, inches) { + return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54)); +} + +function heightToCm(height) { + const [feet, inches] = height.match(/\d+/g); + + return feetInchesToCm(feet, inches); +} + +function lbsToKg(lbs) { + const pounds = lbs.toString().match(/\d+/)[0]; + + return Math.round(Number(pounds) * 0.453592); +} + +module.exports = { + feetInchesToCm, + heightToCm, + lbsToKg, +};