2019-11-17 02:56:45 +00:00
|
|
|
'use strict';
|
|
|
|
|
|
|
|
/* eslint-disable newline-per-chained-call */
|
|
|
|
const bhttp = require('bhttp');
|
|
|
|
const { JSDOM } = require('jsdom');
|
|
|
|
const moment = require('moment');
|
|
|
|
|
|
|
|
const knex = require('../knex');
|
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
async function scrapeProfileFrontpage(html, url, name) {
|
2019-11-17 02:56:45 +00:00
|
|
|
const { document } = new JSDOM(html).window;
|
|
|
|
const bioEl = document.querySelector('.dashboard-bio-list');
|
|
|
|
|
|
|
|
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
|
|
|
|
|
|
|
|
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
|
|
|
|
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
|
|
|
|
|
|
|
|
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
|
|
|
|
|
|
|
const birthdateString = bio['Date of Birth:'];
|
2019-11-19 03:36:15 +00:00
|
|
|
const birthdate = birthdateString && birthdateString !== 'Unknown (Add)'
|
|
|
|
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
|
|
|
|
: null;
|
2019-11-17 02:56:45 +00:00
|
|
|
|
2019-11-21 03:05:32 +00:00
|
|
|
const measurementsString = bio['Measurements:'];
|
|
|
|
const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
|
|
|
const naturalBoobs = bio['Fake Boobs:'] === 'No';
|
2019-11-17 02:56:45 +00:00
|
|
|
|
|
|
|
const residenceCountryName = bio['Country of Origin:'];
|
|
|
|
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
|
|
|
|
const residenceCountry = countryEntry ? countryEntry.alpha2 : null;
|
|
|
|
const birthPlace = bio['Place of Birth:'];
|
|
|
|
|
|
|
|
const hair = bio['Hair Color:'].toLowerCase();
|
|
|
|
const eyes = bio['Eye Color:'].toLowerCase();
|
|
|
|
|
|
|
|
const piercingsString = bio['Piercings:'];
|
2019-11-21 03:05:32 +00:00
|
|
|
const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
|
|
|
const piercings = hasPiercings && piercingsString;
|
2019-11-17 02:56:45 +00:00
|
|
|
|
|
|
|
const tattoosString = bio['Tattoos:'];
|
2019-11-21 03:05:32 +00:00
|
|
|
const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
|
|
|
const tattoos = hasTattoos && tattoosString;
|
2019-11-17 02:56:45 +00:00
|
|
|
|
|
|
|
const social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
|
|
|
|
|
|
|
|
return {
|
|
|
|
bio: {
|
|
|
|
name,
|
|
|
|
gender: 'female',
|
|
|
|
birthdate,
|
|
|
|
residenceCountry,
|
|
|
|
birthPlace,
|
2019-11-21 03:05:32 +00:00
|
|
|
naturalBoobs,
|
|
|
|
bust,
|
|
|
|
waist,
|
|
|
|
hip,
|
2019-11-17 02:56:45 +00:00
|
|
|
hair,
|
|
|
|
eyes,
|
|
|
|
piercings,
|
|
|
|
tattoos,
|
|
|
|
social,
|
|
|
|
},
|
|
|
|
url: bioUrl,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
async function scrapeProfileBio(html, frontpageBio, url, name) {
|
2019-11-17 02:56:45 +00:00
|
|
|
const { document } = new JSDOM(html).window;
|
|
|
|
const bioEl = document.querySelector('#biographyTable');
|
|
|
|
|
|
|
|
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
|
|
|
|
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
|
|
|
|
|
|
|
|
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
|
|
|
|
|
|
|
const birthdateString = bio['Date of Birth:'];
|
2019-11-19 03:36:15 +00:00
|
|
|
const birthdate = birthdateString && birthdateString !== 'Unknown'
|
|
|
|
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
|
|
|
|
: null;
|
2019-11-17 02:56:45 +00:00
|
|
|
|
2019-11-21 03:05:32 +00:00
|
|
|
const measurementsString = bio['Measurements:'];
|
|
|
|
const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
2019-11-17 02:56:45 +00:00
|
|
|
const boobsNatural = bio['Fake boobs:'] === 'No';
|
|
|
|
const ethnicity = bio['Ethnicity:'];
|
|
|
|
|
|
|
|
const residenceCountryName = bio['Country of Origin:'];
|
|
|
|
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
|
|
|
|
const residenceCountry = countryEntry ? countryEntry.alpha2 : null;
|
|
|
|
const birthPlace = bio['Place of Birth:'];
|
|
|
|
|
|
|
|
const hair = bio['Hair Color:'].toLowerCase();
|
|
|
|
const eyes = bio['Eye Color:'].toLowerCase();
|
2019-11-20 03:53:36 +00:00
|
|
|
const height = Number(bio['Height:'].match(/\d+/)[0]);
|
|
|
|
const weight = Number(bio['Weight:'].match(/\d+/)[0]);
|
2019-11-17 02:56:45 +00:00
|
|
|
|
|
|
|
const piercingsString = bio['Piercings:'];
|
2019-11-21 03:05:32 +00:00
|
|
|
const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
|
|
|
const piercings = hasPiercings && piercingsString;
|
2019-11-17 02:56:45 +00:00
|
|
|
|
|
|
|
const tattoosString = bio['Tattoos:'];
|
2019-11-21 03:05:32 +00:00
|
|
|
const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
|
|
|
const tattoos = hasTattoos && tattoosString;
|
2019-11-17 02:56:45 +00:00
|
|
|
|
|
|
|
const social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
|
|
|
|
|
|
|
|
return {
|
|
|
|
...frontpageBio,
|
|
|
|
name,
|
|
|
|
gender: 'female',
|
|
|
|
birthdate,
|
|
|
|
residenceCountry,
|
|
|
|
birthPlace,
|
|
|
|
ethnicity,
|
2019-11-21 03:05:32 +00:00
|
|
|
naturalBoobs: boobsNatural,
|
|
|
|
bust,
|
|
|
|
waist,
|
|
|
|
hip,
|
2019-11-20 03:53:36 +00:00
|
|
|
height,
|
|
|
|
weight,
|
2019-11-17 02:56:45 +00:00
|
|
|
hair,
|
|
|
|
eyes,
|
2019-11-21 03:05:32 +00:00
|
|
|
hasPiercings,
|
|
|
|
hasTattoos,
|
2019-11-17 02:56:45 +00:00
|
|
|
piercings,
|
|
|
|
tattoos,
|
|
|
|
social,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
async function fetchProfile(actorName) {
|
2019-11-17 02:56:45 +00:00
|
|
|
const slug = actorName.replace(' ', '_');
|
2019-11-20 03:53:36 +00:00
|
|
|
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
|
2019-11-17 02:56:45 +00:00
|
|
|
|
|
|
|
const resFrontpage = await bhttp.get(frontpageUrl);
|
|
|
|
|
2019-11-19 03:36:15 +00:00
|
|
|
if (resFrontpage.statusCode === 200) {
|
2019-11-20 03:53:36 +00:00
|
|
|
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
|
|
|
|
const resBio = await bhttp.get(url);
|
|
|
|
|
|
|
|
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
|
|
|
}
|
|
|
|
|
|
|
|
// apparently some actors are appended 'Babe' as their surname...
|
|
|
|
const fallbackSlug = `${slug}_Babe`;
|
|
|
|
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
|
|
|
|
const resFallback = await bhttp.get(fallbackUrl);
|
2019-11-19 03:36:15 +00:00
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
if (resFallback.statusCode === 200) {
|
|
|
|
const { url, bio } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
|
2019-11-19 03:36:15 +00:00
|
|
|
const resBio = await bhttp.get(url);
|
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
2019-11-19 03:36:15 +00:00
|
|
|
}
|
2019-11-17 02:56:45 +00:00
|
|
|
|
2019-11-19 03:36:15 +00:00
|
|
|
return null;
|
2019-11-17 02:56:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
2019-11-20 03:53:36 +00:00
|
|
|
fetchProfile,
|
2019-11-17 02:56:45 +00:00
|
|
|
};
|