traxxx/src/scrapers/freeones.js

144 lines
4.9 KiB
JavaScript
Raw Normal View History

2019-11-17 02:56:45 +00:00
'use strict';
/* eslint-disable newline-per-chained-call */
const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const knex = require('../knex');
async function scrapeActorFrontpage(html, url, name) {
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('.dashboard-bio-list');
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const birthdateString = bio['Date of Birth:'];
const birthdate = birthdateString && birthdateString !== 'Unknown (Add)'
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
: null;
2019-11-17 02:56:45 +00:00
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
const boobsNatural = bio['Fake Boobs:'] === 'No';
const active = bio['Career Status:'].trim() === 'Active';
const residenceCountryName = bio['Country of Origin:'];
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
const residenceCountry = countryEntry ? countryEntry.alpha2 : null;
const birthPlace = bio['Place of Birth:'];
const hair = bio['Hair Color:'].toLowerCase();
const eyes = bio['Eye Color:'].toLowerCase();
const piercingsString = bio['Piercings:'];
const piercings = piercingsString === 'None' ? null : piercingsString;
const tattoosString = bio['Tattoos:'];
const tattoos = tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString;
const social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
return {
bio: {
name,
gender: 'female',
birthdate,
residenceCountry,
birthPlace,
boobs: {
size: boobsSize,
natural: boobsNatural,
},
hair,
eyes,
piercings,
tattoos,
active,
social,
},
url: bioUrl,
};
}
async function scrapeActorBio(html, frontpageBio, url, name) {
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('#biographyTable');
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const birthdateString = bio['Date of Birth:'];
const birthdate = birthdateString && birthdateString !== 'Unknown'
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
: null;
2019-11-17 02:56:45 +00:00
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
const boobsNatural = bio['Fake boobs:'] === 'No';
const ethnicity = bio['Ethnicity:'];
const residenceCountryName = bio['Country of Origin:'];
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
const residenceCountry = countryEntry ? countryEntry.alpha2 : null;
const birthPlace = bio['Place of Birth:'];
const hair = bio['Hair Color:'].toLowerCase();
const eyes = bio['Eye Color:'].toLowerCase();
const piercingsString = bio['Piercings:'];
const piercings = piercingsString === 'None' ? null : piercingsString;
const tattoosString = bio['Tattoos:'];
const tattoos = tattoosString === undefined || tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString;
const social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
return {
...frontpageBio,
name,
gender: 'female',
birthdate,
residenceCountry,
birthPlace,
ethnicity,
boobs: {
size: boobsSize,
natural: boobsNatural,
},
hair,
eyes,
piercings,
tattoos,
social,
};
}
async function fetchActor(actorName) {
const slug = actorName.replace(' ', '_');
const frontpageUrl = `https://freeones.com/html/v_links/${slug}`;
const resFrontpage = await bhttp.get(frontpageUrl);
if (resFrontpage.statusCode === 200) {
const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
const resBio = await bhttp.get(url);
return scrapeActorBio(resBio.body.toString(), bio, url, actorName);
}
2019-11-17 02:56:45 +00:00
return null;
2019-11-17 02:56:45 +00:00
}
module.exports = {
fetchActor,
};