From 9fcc40dd17920c597ae4ada4ff197fbf63899e18 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Wed, 20 Nov 2019 04:53:36 +0100 Subject: [PATCH] Added avatars. Added PornHub and LegalPorno actor profile scrapers. --- assets/components/actor/actor.vue | 28 ++++++- assets/components/network/network.vue | 2 +- assets/components/release/release.vue | 7 +- assets/components/tile/actor.vue | 45 +++++++++-- migrations/20190325001339_releases.js | 18 +++-- public/css/style.css | 40 +++++++++- src/actors.js | 80 ++++++++++++++++--- src/media.js | 104 +++++++++++++++++++++--- src/releases.js | 20 +++-- src/scrapers/freeones.js | 33 +++++--- src/scrapers/legalporno.js | 43 ++++++++++ src/scrapers/pornhub.js | 109 ++++++++++++++++++++++++++ src/scrapers/scrapers.js | 9 ++- 13 files changed, 475 insertions(+), 63 deletions(-) create mode 100644 src/scrapers/pornhub.js diff --git a/assets/components/actor/actor.vue b/assets/components/actor/actor.vue index c3442704..9f4ddc82 100644 --- a/assets/components/actor/actor.vue +++ b/assets/components/actor/actor.vue @@ -9,10 +9,15 @@

{{ actor.name }}

- {{ actor.description }} -
-

Biography

+
+ +
+ {{ actor.description }} +
diff --git a/assets/components/release/release.vue b/assets/components/release/release.vue index 894adb28..4b8b46ff 100644 --- a/assets/components/release/release.vue +++ b/assets/components/release/release.vue @@ -97,8 +97,6 @@

{{ release.title }}

- -
@@ -27,14 +40,34 @@ export default { .actor { background: $background; display: inline-block; - margin: 0 .25rem .25rem 0; + margin: 0 .5rem .5rem 0; box-shadow: 0 0 3px $shadow-weak; } -.name { +.link { color: $link; - display: inline-block; - padding: .5rem; text-decoration: none; + text-align: center; + + &:hover { + color: $primary; + } +} + +.name { + display: block; + padding: .5rem; + font-weight: bold; +} + +.avatar { + color: $shadow-weak; + background: $shadow-hint; + height: 12rem; + width: 10rem; + display: flex; + align-items: center; + justify-content: center; + object-fit: cover; } diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index 297d82a9..9237344f 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -18,11 +18,13 @@ exports.up = knex => Promise.resolve() table.date('birthdate'); table.string('gender', 18); + table.text('description'); table.string('birth_country_alpha2', 2) .references('alpha2') .inTable('countries'); + table.string('ethnicity'); table.string('birth_place'); table.string('residence_country_alpha2', 2) @@ -31,18 +33,18 @@ exports.up = knex => Promise.resolve() table.string('residence_place'); - table.string('ethnicity'); - table.integer('height'); - table.string('eyes'); table.string('boobs_size'); table.boolean('boobs_natural'); - table.string('piercings'); - table.string('tattoos'); + table.integer('height', 3); + table.integer('weight', 3); + table.string('eyes'); table.string('hair'); - table.text('description'); - table.boolean('active'); + table.boolean('has_tattoos'); + table.boolean('has_piercings'); + table.string('piercings'); + table.string('tattoos'); table.integer('alias_for', 12) .references('id') @@ -217,6 +219,8 @@ exports.up = knex => Promise.resolve() table.string('hash'); table.string('source', 1000); + table.unique(['domain', 'target_id', 'role', 'hash']); + table.datetime('created_at') .defaultTo(knex.fn.now()); })) diff --git a/public/css/style.css b/public/css/style.css index d1806b2a..cecb5e0f 100644 --- a/public/css/style.css +++ b/public/css/style.css @@ -249,14 +249,32 @@ .actor[data-v-6989dc6f] { background: #fff; display: inline-block; - margin: 0 .25rem .25rem 0; + margin: 0 .5rem .5rem 0; box-shadow: 0 0 3px rgba(0, 0, 0, 0.2); } -.name[data-v-6989dc6f] { +.link[data-v-6989dc6f] { color: #cc4466; - display: inline-block; - padding: .5rem; text-decoration: none; + text-align: center; +} +.link[data-v-6989dc6f]:hover { + color: #ff6c88; +} +.name[data-v-6989dc6f] { + display: block; + padding: .5rem; + font-weight: bold; +} +.avatar[data-v-6989dc6f] { + color: rgba(0, 0, 0, 0.2); + background: rgba(0, 0, 0, 0.1); + height: 12rem; + width: 10rem; + display: flex; + align-items: center; + justify-content: center; + -o-object-fit: cover; + object-fit: cover; } /* $primary: #ff886c; */ @@ -375,6 +393,10 @@ .duration-segment[data-v-2bc41e74] { font-size: 1rem; } +.actors[data-v-2bc41e74] { + display: flex; + flex-wrap: wrap; +} .link[data-v-2bc41e74] { display: inline-block; color: #cc4466; @@ -574,6 +596,16 @@ .bio-heading[data-v-677a8360]::after { content: ':'; } +.description[data-v-677a8360] { + padding: 1rem; +} +.avatars[data-v-677a8360] { + padding: 1rem; +} +.avatar[data-v-677a8360] { + height: 20rem; + margin: 0 1rem 0 0; +} .flag[data-v-677a8360] { height: 1rem; border: solid 1px rgba(0, 0, 0, 0.2); diff --git a/src/actors.js b/src/actors.js index 319e65d8..ac179291 100644 --- a/src/actors.js +++ b/src/actors.js @@ -1,14 +1,18 @@ 'use strict'; const Promise = require('bluebird'); + const knex = require('./knex'); const argv = require('./argv'); const scrapers = require('./scrapers/scrapers'); const whereOr = require('./utils/where-or'); +const { createActorMediaDirectory, storeAvatars } = require('./media'); async function curateActor(actor) { - const aliases = await knex('actors') - .where({ alias_for: actor.id }); + const [aliases, avatars] = await Promise.all([ + knex('actors').where({ alias_for: actor.id }), + knex('media').where({ domain: 'actors', target_id: actor.id }), + ]); return { id: actor.id, @@ -37,6 +41,7 @@ async function curateActor(actor) { boobsNatural: actor.boobs_natural, aliases: aliases.map(({ name }) => name), slug: actor.slug, + avatars, }; } @@ -60,10 +65,10 @@ function curateActorEntry(actor, scraped, scrapeSuccess) { residence_country_alpha2: actor.residenceCountry, birth_place: actor.birthPlace, residence_place: actor.residencePlace, - active: actor.active, boobs_size: actor.boobs && actor.boobs.size, boobs_natural: actor.boobs && actor.boobs.natural, height: actor.height, + weight: actor.weight, hair: actor.hair, eyes: actor.eyes, tattoos: actor.tattoos, @@ -116,11 +121,11 @@ async function storeActor(actor, scraped = false, scrapeSuccess = false) { return null; } -async function updateActor(actorEntry, actor, scraped = false, scrapeSuccess = false) { +async function updateActor(actor, scraped = false, scrapeSuccess = false) { const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess); const actorEntries = await knex('actors') - .where({ id: actorEntry.id }) + .where({ id: actor.id }) .update(curatedActor) .returning('*'); @@ -129,23 +134,74 @@ async function updateActor(actorEntry, actor, scraped = false, scrapeSuccess = f return actorEntries[0]; } +function mergeProfiles(profiles, actor) { + return profiles.reduce((prevProfile, profile) => { + if (profile === null) { + return prevProfile; + } + + return { + id: actor.id, + name: actor.name, + gender: prevProfile.gender || profile.gender, + birthdate: prevProfile.birthdate || profile.birthdate, + residenceCountry: prevProfile.residenceCountry || profile.residenceCountry, + birthPlace: prevProfile.birthPlace || profile.birthPlace, + ethnicity: prevProfile.ethnicity || profile.ethnicity, + boobs: profile.boobs + ? { + size: prevProfile.boobs.size || profile.boobs.size, + natural: prevProfile.boobs.natural || profile.boobs.natural, + } + : {}, + height: prevProfile.height || profile.height, + weight: prevProfile.weight || profile.weight, + hair: prevProfile.hair || profile.hair, + eyes: prevProfile.eyes || profile.eyes, + piercings: prevProfile.piercings || profile.piercings, + tattoos: prevProfile.tattoos || profile.tattoos, + social: prevProfile.social.concat(profile.social || []), + avatars: prevProfile.avatars.concat(profile.avatar || []), + }; + }, { + boobs: {}, + social: [], + avatars: [], + ...actor, + }); +} + async function scrapeActors(actorNames) { await Promise.map(actorNames || argv.actors, async (actorName) => { const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); - const [actorEntry] = await fetchActors({ slug: actorSlug }); - const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorEntry ? actorEntry.name : actorName))); + const actorEntry = await knex('actors').where({ slug: actorSlug }).first(); + const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName))); + const profile = mergeProfiles(profiles, actorEntry); - if (profiles[0] === null) { + if (profile === null) { console.log(`Could not find profile for actor '${actorName}'`); - return updateActor(actorEntry, actorEntry, true, false); + await updateActor(profile, true, false); + + return; } - if (actorEntry && profiles[0]) { - return updateActor(actorEntry, profiles[0], true, true); + + if (actorEntry && profile) { + await createActorMediaDirectory(profile, actorEntry); + + await Promise.all([ + updateActor(profile, true, true), + storeAvatars(profile, actorEntry), + ]); + + return; } - return storeActor(profiles[0], true, true); + const newActorEntry = await storeActor(profile, true, true); + + await createActorMediaDirectory(profile, newActorEntry); + await storeAvatars(profile, newActorEntry); }, { concurrency: 1, }); diff --git a/src/media.js b/src/media.js index 9fe243a0..472c1a61 100644 --- a/src/media.js +++ b/src/media.js @@ -28,10 +28,19 @@ async function getThumbnail(buffer) { .toBuffer(); } -async function createMediaDirectory(release, releaseId) { +async function createReleaseMediaDirectory(release, releaseId) { if (release.poster || (release.photos && release.photos.length)) { await fs.mkdir( - path.join(config.media.path, release.site.network.slug, release.site.slug, releaseId.toString()), + path.join(config.media.path, 'releases', release.site.network.slug, release.site.slug, releaseId.toString()), + { recursive: true }, + ); + } +} + +async function createActorMediaDirectory(profile, actor) { + if (profile.avatars && profile.avatars.length) { + await fs.mkdir( + path.join(config.media.path, 'actors', actor.slug), { recursive: true }, ); } @@ -46,15 +55,16 @@ async function storePoster(release, releaseId) { console.log(`Storing poster for (${release.site.name}, ${releaseId}) "${release.title}"`); const res = await bhttp.get(release.poster); - const thumbnail = await getThumbnail(res.body); if (res.statusCode === 200) { + const thumbnail = await getThumbnail(res.body); + const { pathname } = new URL(release.poster); const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg'; const extension = mime.getExtension(mimetype); - const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `poster.${extension}`); - const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `poster_thumb.${extension}`); + const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `poster.${extension}`); + const thumbpath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `poster_thumb.${extension}`); const hash = getHash(res.body); await Promise.all([ @@ -93,13 +103,13 @@ async function storePhotos(release, releaseId) { try { const res = await bhttp.get(photoUrl); - const thumbnail = await getThumbnail(res.body); if (res.statusCode === 200) { + const thumbnail = await getThumbnail(res.body); const extension = mime.getExtension(mimetype); - const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}.${extension}`); - const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}_thumb.${extension}`); + const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}.${extension}`); + const thumbpath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}_thumb.${extension}`); const hash = getHash(res.body); await Promise.all([ @@ -153,7 +163,7 @@ async function storeTrailer(release, releaseId) { const mimetype = release.trailer.type || mime.getType(pathname); const res = await bhttp.get(release.trailer.src); - const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`); + const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`); await Promise.all([ fs.writeFile(path.join(config.media.path, filepath), res.body), @@ -169,8 +179,82 @@ async function storeTrailer(release, releaseId) { ]); } +async function storeAvatars(profile, actor) { + if (!profile.avatars || profile.avatars.length === 0) { + console.warn(`No avatars available for '${profile.name}'`); + return; + } + + console.log(`Storing ${profile.avatars.length} avatars for '${profile.name}'`); + + const files = await Promise.map(profile.avatars, async (avatarUrl, index) => { + const { pathname } = new URL(avatarUrl); + const mimetype = mime.getType(pathname); + + try { + const res = await bhttp.get(avatarUrl); + + if (res.statusCode === 200) { + const thumbnail = await getThumbnail(res.body); + const extension = mime.getExtension(mimetype); + + const filepath = path.join('actors', actor.slug, `${index + 1}.${extension}`); + const thumbpath = path.join('actors', actor.slug, `${index + 1}_thumb.${extension}`); + const hash = getHash(res.body); + + await Promise.all([ + fs.writeFile(path.join(config.media.path, filepath), res.body), + fs.writeFile(path.join(config.media.path, thumbpath), thumbnail), + ]); + + return { + filepath, + thumbpath, + mimetype, + hash, + source: avatarUrl, + }; + } + + throw new Error(`Response ${res.statusCode} not OK`); + } catch (error) { + console.warn(`Failed to store avatar ${index + 1} for '${profile.name}'`); + + return null; + } + }, { + concurrency: 2, + }); + + const existingAvatars = await knex('media') + .whereIn('hash', files.map(file => file.hash)); + + const newAvatars = files.filter((file) => { + if (!file) { + return false; + } + + return !existingAvatars.some(avatar => file.hash === avatar.hash); + }); + + await knex('media') + .insert(newAvatars.map((file, index) => ({ + path: file.filepath, + thumbnail: file.thumbpath, + mime: file.mimetype, + hash: file.hash, + source: file.source, + index, + domain: 'actors', + target_id: actor.id, + role: 'avatar', + }))); +} + module.exports = { - createMediaDirectory, + createActorMediaDirectory, + createReleaseMediaDirectory, + storeAvatars, storePoster, storePhotos, storeTrailer, diff --git a/src/releases.js b/src/releases.js index be7c3d21..6464b745 100644 --- a/src/releases.js +++ b/src/releases.js @@ -7,7 +7,7 @@ const whereOr = require('./utils/where-or'); const { associateTags } = require('./tags'); const { associateActors } = require('./actors'); const { - createMediaDirectory, + createReleaseMediaDirectory, storePoster, storePhotos, storeTrailer, @@ -16,16 +16,26 @@ const { async function curateRelease(release) { const [actors, tags, media] = await Promise.all([ knex('actors_associated') - .select('actors.id', 'actors.name', 'actors.gender', 'actors.slug') + .select('actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'media.thumbnail as avatar') .where({ release_id: release.id }) - .leftJoin('actors', 'actors.id', 'actors_associated.actor_id'), + .leftJoin('actors', 'actors.id', 'actors_associated.actor_id') + .leftJoin('media', (builder) => { + builder + .on('media.target_id', 'actors.id') + .andOnVal('media.domain', 'actors') + .andOnVal('media.index', '0'); + }) + .orderBy('actors.gender'), knex('tags_associated') .select('tags.name', 'tags.slug') .where({ release_id: release.id }) .leftJoin('tags', 'tags.id', 'tags_associated.tag_id') .orderBy('tags.priority', 'desc'), knex('media') - .where({ target_id: release.id }) + .where({ + target_id: release.id, + domain: 'releases', + }) .orderBy('role'), ]); @@ -184,7 +194,7 @@ async function fetchTagReleases(queryObject, options = {}) { } async function storeReleaseAssets(release, releaseId) { - await createMediaDirectory(release, releaseId); + await createReleaseMediaDirectory(release, releaseId); await Promise.all([ associateActors(release, releaseId), diff --git a/src/scrapers/freeones.js b/src/scrapers/freeones.js index 47f7f005..cd524209 100644 --- a/src/scrapers/freeones.js +++ b/src/scrapers/freeones.js @@ -7,7 +7,7 @@ const moment = require('moment'); const knex = require('../knex'); -async function scrapeActorFrontpage(html, url, name) { +async function scrapeProfileFrontpage(html, url, name) { const { document } = new JSDOM(html).window; const bioEl = document.querySelector('.dashboard-bio-list'); @@ -26,7 +26,6 @@ async function scrapeActorFrontpage(html, url, name) { const boobsSizeString = bio['Measurements:']; const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString; const boobsNatural = bio['Fake Boobs:'] === 'No'; - const active = bio['Career Status:'].trim() === 'Active'; const residenceCountryName = bio['Country of Origin:']; const countryEntry = await knex('countries').where({ name: residenceCountryName }).first(); @@ -59,14 +58,13 @@ async function scrapeActorFrontpage(html, url, name) { eyes, piercings, tattoos, - active, social, }, url: bioUrl, }; } -async function scrapeActorBio(html, frontpageBio, url, name) { +async function scrapeProfileBio(html, frontpageBio, url, name) { const { document } = new JSDOM(html).window; const bioEl = document.querySelector('#biographyTable'); @@ -92,6 +90,8 @@ async function scrapeActorBio(html, frontpageBio, url, name) { const hair = bio['Hair Color:'].toLowerCase(); const eyes = bio['Eye Color:'].toLowerCase(); + const height = Number(bio['Height:'].match(/\d+/)[0]); + const weight = Number(bio['Weight:'].match(/\d+/)[0]); const piercingsString = bio['Piercings:']; const piercings = piercingsString === 'None' ? null : piercingsString; @@ -113,6 +113,8 @@ async function scrapeActorBio(html, frontpageBio, url, name) { size: boobsSize, natural: boobsNatural, }, + height, + weight, hair, eyes, piercings, @@ -121,23 +123,34 @@ async function scrapeActorBio(html, frontpageBio, url, name) { }; } -async function fetchActor(actorName) { +async function fetchProfile(actorName) { const slug = actorName.replace(' ', '_'); - const frontpageUrl = `https://freeones.com/html/v_links/${slug}`; + const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`; const resFrontpage = await bhttp.get(frontpageUrl); if (resFrontpage.statusCode === 200) { - const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName); - + const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName); const resBio = await bhttp.get(url); - return scrapeActorBio(resBio.body.toString(), bio, url, actorName); + return scrapeProfileBio(resBio.body.toString(), bio, url, actorName); + } + + // apparently some actors are appended 'Babe' as their surname... + const fallbackSlug = `${slug}_Babe`; + const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`; + const resFallback = await bhttp.get(fallbackUrl); + + if (resFallback.statusCode === 200) { + const { url, bio } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName); + const resBio = await bhttp.get(url); + + return scrapeProfileBio(resBio.body.toString(), bio, url, actorName); } return null; } module.exports = { - fetchActor, + fetchProfile, }; diff --git a/src/scrapers/legalporno.js b/src/scrapers/legalporno.js index a45278ce..0befca39 100644 --- a/src/scrapers/legalporno.js +++ b/src/scrapers/legalporno.js @@ -1,6 +1,7 @@ 'use strict'; const bhttp = require('bhttp'); +const { JSDOM } = require('jsdom'); const cheerio = require('cheerio'); const moment = require('moment'); const knex = require('../knex'); @@ -69,6 +70,31 @@ function scrapeLatest(html, site) { }); } +async function scrapeProfile(html, _url, actorName) { + const { document } = new JSDOM(html).window; + + const profile = { + name: actorName, + }; + + const avatarEl = document.querySelector('.model--avatar img[src^="http"]'); + const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':')); + const bio = entries.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {}); + + const birthCountryName = bio.Nationality; + + if (birthCountryName) { + const countryEntry = await knex('countries').where({ name: birthCountryName }).first(); + + if (countryEntry) profile.birthCountry = countryEntry.alpha2; + } + + if (bio.Age) profile.age = bio.Age; + if (avatarEl) profile.avatar = avatarEl.src; + + return profile; +} + async function scrapeScene(html, url, site, useGallery) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const playerObject = $('script:contains("new VideoPlayer")').html(); @@ -145,7 +171,24 @@ async function fetchScene(url, site) { return scrapeScene(res.body.toString(), url, site, useGallery); } +async function fetchProfile(actorName) { + const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`); + const data = res.body; + + const result = data.terms.find(item => item.type === 'model'); + + if (result) { + const bioRes = await bhttp.get(result.url); + const html = bioRes.body.toString(); + + return scrapeProfile(html, result.url, actorName); + } + + return null; +} + module.exports = { fetchLatest, + fetchProfile, fetchScene, }; diff --git a/src/scrapers/pornhub.js b/src/scrapers/pornhub.js new file mode 100644 index 00000000..72f5839e --- /dev/null +++ b/src/scrapers/pornhub.js @@ -0,0 +1,109 @@ +'use strict'; + +const bhttp = require('bhttp'); +const { JSDOM } = require('jsdom'); +const moment = require('moment'); + +const knex = require('../knex'); + +const ethnicityMap = { + White: 'Caucasian', +}; + +const hairMap = { + Brunette: 'brown', +}; + +const countryMap = { + 'United States of America': 'United States', +}; + +async function scrapeProfile(html, _url, actorName) { + const { document } = new JSDOM(html).window; + + const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':')); + const bio = entries.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {}); + + const profile = { + name: actorName, + boobs: {}, + }; + + const descriptionString = document.querySelector('div[itemprop="description"]'); + const birthPlaceString = bio['Birth Place'] || bio.Birthplace; + const residencePlaceString = bio['City and Country']; + const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img'); + + if (bio.Gender) profile.gender = bio.Gender.toLowerCase(); + if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity; + + if (descriptionString) profile.description = descriptionString.textContent; + + if (bio.Birthday) bio.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); + if (bio.Born) bio.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate(); + + if (birthPlaceString) { + const birthPlaceSegments = birthPlaceString.split(','); + const birthCountryName = birthPlaceSegments.slice(-1)[0].trim(); + const birthCountryEntry = await knex('countries').where('name', countryMap[birthCountryName] || birthCountryName).first(); + + profile.birthPlace = birthPlaceSegments.slice(0, -1).join(',').trim(); + profile.birthCountry = birthCountryEntry ? birthCountryEntry.alpha2 : null; + } + + if (residencePlaceString) { + const residencePlaceSegments = residencePlaceString.split(','); + const residenceCountryAlpha2 = residencePlaceSegments.slice(-1)[0].trim(); + const residenceCountryEntry = await knex('countries').where('alpha2', residenceCountryAlpha2).first(); + + profile.residencePlace = residencePlaceSegments.slice(0, -1).join(',').trim(); + profile.residenceCountry = residenceCountryEntry ? residenceCountryEntry.alpha2 : null; + } + + if (bio.Measurements && bio.Measurements !== '--') profile.boobs.size = bio.Measurements; + if (bio['Fake Boobs']) profile.boobs.natural = bio['Fake Boobs'] === 'No'; + + if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1)); + if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1)); + if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase(); + if (bio.Piercings) profile.piercings = bio.Piercings === 'Yes'; + if (bio.Tattoos) profile.tattoos = bio.tattoos === 'Yes'; + + if (avatarEl) profile.avatar = avatarEl.src; + profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason + + return profile; +} + +async function fetchProfile(actorName) { + const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); + + /* Model pages are not reliably associated with actual porn stars + const modelUrl = `https://pornhub.com/model/${actorSlug}`; + const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`; + + const [modelRes, pornstarRes] = await Promise.all([ + bhttp.get(modelUrl), + bhttp.get(pornstarUrl), + ]); + + const model = modelRes.statusCode === 200 && await scrapeProfile(modelRes.body.toString(), modelUrl, actorName); + const pornstar = pornstarRes.statusCode === 200 && await scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName); + + if (model && pornstar) { + return { + ...model, + ...pornstar, + }; + } + */ + + const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`; + const pornstarRes = await bhttp.get(pornstarUrl); + + return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName); +} + +module.exports = { + fetchProfile, +}; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 88ed9bdf..045914f0 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -10,7 +10,6 @@ const dogfart = require('./dogfart'); const evilangel = require('./evilangel'); const julesjordan = require('./julesjordan'); const kink = require('./kink'); -const legalporno = require('./legalporno'); const mikeadriano = require('./mikeadriano'); const mofos = require('./mofos'); const pervcity = require('./pervcity'); @@ -20,8 +19,12 @@ const realitykings = require('./realitykings'); const vixen = require('./vixen'); const xempire = require('./xempire'); -// actors +// releases and profiles +const legalporno = require('./legalporno'); + +// profiles const freeones = require('./freeones'); +const pornhub = require('./pornhub'); module.exports = { releases: { @@ -47,5 +50,7 @@ module.exports = { }, actors: { freeones, + legalporno, + pornhub, }, };