forked from DebaucheryLibrarian/traxxx
Added avatars. Added PornHub and LegalPorno actor profile scrapers.
This commit is contained in:
@@ -7,7 +7,7 @@ const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
|
||||
async function scrapeActorFrontpage(html, url, name) {
|
||||
async function scrapeProfileFrontpage(html, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('.dashboard-bio-list');
|
||||
|
||||
@@ -26,7 +26,6 @@ async function scrapeActorFrontpage(html, url, name) {
|
||||
const boobsSizeString = bio['Measurements:'];
|
||||
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
|
||||
const boobsNatural = bio['Fake Boobs:'] === 'No';
|
||||
const active = bio['Career Status:'].trim() === 'Active';
|
||||
|
||||
const residenceCountryName = bio['Country of Origin:'];
|
||||
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
|
||||
@@ -59,14 +58,13 @@ async function scrapeActorFrontpage(html, url, name) {
|
||||
eyes,
|
||||
piercings,
|
||||
tattoos,
|
||||
active,
|
||||
social,
|
||||
},
|
||||
url: bioUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeActorBio(html, frontpageBio, url, name) {
|
||||
async function scrapeProfileBio(html, frontpageBio, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('#biographyTable');
|
||||
|
||||
@@ -92,6 +90,8 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
|
||||
|
||||
const hair = bio['Hair Color:'].toLowerCase();
|
||||
const eyes = bio['Eye Color:'].toLowerCase();
|
||||
const height = Number(bio['Height:'].match(/\d+/)[0]);
|
||||
const weight = Number(bio['Weight:'].match(/\d+/)[0]);
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const piercings = piercingsString === 'None' ? null : piercingsString;
|
||||
@@ -113,6 +113,8 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
|
||||
size: boobsSize,
|
||||
natural: boobsNatural,
|
||||
},
|
||||
height,
|
||||
weight,
|
||||
hair,
|
||||
eyes,
|
||||
piercings,
|
||||
@@ -121,23 +123,34 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchActor(actorName) {
|
||||
async function fetchProfile(actorName) {
|
||||
const slug = actorName.replace(' ', '_');
|
||||
const frontpageUrl = `https://freeones.com/html/v_links/${slug}`;
|
||||
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
|
||||
|
||||
const resFrontpage = await bhttp.get(frontpageUrl);
|
||||
|
||||
if (resFrontpage.statusCode === 200) {
|
||||
const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
|
||||
|
||||
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
|
||||
return scrapeActorBio(resBio.body.toString(), bio, url, actorName);
|
||||
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
||||
}
|
||||
|
||||
// apparently some actors are appended 'Babe' as their surname...
|
||||
const fallbackSlug = `${slug}_Babe`;
|
||||
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
|
||||
const resFallback = await bhttp.get(fallbackUrl);
|
||||
|
||||
if (resFallback.statusCode === 200) {
|
||||
const { url, bio } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
|
||||
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchActor,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const knex = require('../knex');
|
||||
@@ -69,6 +70,31 @@ function scrapeLatest(html, site) {
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
|
||||
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
|
||||
const bio = entries.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
|
||||
|
||||
const birthCountryName = bio.Nationality;
|
||||
|
||||
if (birthCountryName) {
|
||||
const countryEntry = await knex('countries').where({ name: birthCountryName }).first();
|
||||
|
||||
if (countryEntry) profile.birthCountry = countryEntry.alpha2;
|
||||
}
|
||||
|
||||
if (bio.Age) profile.age = bio.Age;
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site, useGallery) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const playerObject = $('script:contains("new VideoPlayer")').html();
|
||||
@@ -145,7 +171,24 @@ async function fetchScene(url, site) {
|
||||
return scrapeScene(res.body.toString(), url, site, useGallery);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
|
||||
const data = res.body;
|
||||
|
||||
const result = data.terms.find(item => item.type === 'model');
|
||||
|
||||
if (result) {
|
||||
const bioRes = await bhttp.get(result.url);
|
||||
const html = bioRes.body.toString();
|
||||
|
||||
return scrapeProfile(html, result.url, actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
109
src/scrapers/pornhub.js
Normal file
109
src/scrapers/pornhub.js
Normal file
@@ -0,0 +1,109 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
|
||||
const ethnicityMap = {
|
||||
White: 'Caucasian',
|
||||
};
|
||||
|
||||
const hairMap = {
|
||||
Brunette: 'brown',
|
||||
};
|
||||
|
||||
const countryMap = {
|
||||
'United States of America': 'United States',
|
||||
};
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
|
||||
const bio = entries.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
boobs: {},
|
||||
};
|
||||
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]');
|
||||
const birthPlaceString = bio['Birth Place'] || bio.Birthplace;
|
||||
const residencePlaceString = bio['City and Country'];
|
||||
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
|
||||
|
||||
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
|
||||
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
|
||||
|
||||
if (descriptionString) profile.description = descriptionString.textContent;
|
||||
|
||||
if (bio.Birthday) bio.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate();
|
||||
if (bio.Born) bio.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
|
||||
if (birthPlaceString) {
|
||||
const birthPlaceSegments = birthPlaceString.split(',');
|
||||
const birthCountryName = birthPlaceSegments.slice(-1)[0].trim();
|
||||
const birthCountryEntry = await knex('countries').where('name', countryMap[birthCountryName] || birthCountryName).first();
|
||||
|
||||
profile.birthPlace = birthPlaceSegments.slice(0, -1).join(',').trim();
|
||||
profile.birthCountry = birthCountryEntry ? birthCountryEntry.alpha2 : null;
|
||||
}
|
||||
|
||||
if (residencePlaceString) {
|
||||
const residencePlaceSegments = residencePlaceString.split(',');
|
||||
const residenceCountryAlpha2 = residencePlaceSegments.slice(-1)[0].trim();
|
||||
const residenceCountryEntry = await knex('countries').where('alpha2', residenceCountryAlpha2).first();
|
||||
|
||||
profile.residencePlace = residencePlaceSegments.slice(0, -1).join(',').trim();
|
||||
profile.residenceCountry = residenceCountryEntry ? residenceCountryEntry.alpha2 : null;
|
||||
}
|
||||
|
||||
if (bio.Measurements && bio.Measurements !== '--') profile.boobs.size = bio.Measurements;
|
||||
if (bio['Fake Boobs']) profile.boobs.natural = bio['Fake Boobs'] === 'No';
|
||||
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
if (bio.Piercings) profile.piercings = bio.Piercings === 'Yes';
|
||||
if (bio.Tattoos) profile.tattoos = bio.tattoos === 'Yes';
|
||||
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
/* Model pages are not reliably associated with actual porn stars
|
||||
const modelUrl = `https://pornhub.com/model/${actorSlug}`;
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
|
||||
const [modelRes, pornstarRes] = await Promise.all([
|
||||
bhttp.get(modelUrl),
|
||||
bhttp.get(pornstarUrl),
|
||||
]);
|
||||
|
||||
const model = modelRes.statusCode === 200 && await scrapeProfile(modelRes.body.toString(), modelUrl, actorName);
|
||||
const pornstar = pornstarRes.statusCode === 200 && await scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
||||
|
||||
if (model && pornstar) {
|
||||
return {
|
||||
...model,
|
||||
...pornstar,
|
||||
};
|
||||
}
|
||||
*/
|
||||
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
const pornstarRes = await bhttp.get(pornstarUrl);
|
||||
|
||||
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
};
|
||||
@@ -10,7 +10,6 @@ const dogfart = require('./dogfart');
|
||||
const evilangel = require('./evilangel');
|
||||
const julesjordan = require('./julesjordan');
|
||||
const kink = require('./kink');
|
||||
const legalporno = require('./legalporno');
|
||||
const mikeadriano = require('./mikeadriano');
|
||||
const mofos = require('./mofos');
|
||||
const pervcity = require('./pervcity');
|
||||
@@ -20,8 +19,12 @@ const realitykings = require('./realitykings');
|
||||
const vixen = require('./vixen');
|
||||
const xempire = require('./xempire');
|
||||
|
||||
// actors
|
||||
// releases and profiles
|
||||
const legalporno = require('./legalporno');
|
||||
|
||||
// profiles
|
||||
const freeones = require('./freeones');
|
||||
const pornhub = require('./pornhub');
|
||||
|
||||
module.exports = {
|
||||
releases: {
|
||||
@@ -47,5 +50,7 @@ module.exports = {
|
||||
},
|
||||
actors: {
|
||||
freeones,
|
||||
legalporno,
|
||||
pornhub,
|
||||
},
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user