forked from DebaucheryLibrarian/traxxx
Scraping actor profiles from FreeOnes.
This commit is contained in:
136
src/scrapers/freeones.js
Normal file
136
src/scrapers/freeones.js
Normal file
@@ -0,0 +1,136 @@
|
||||
'use strict';
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
|
||||
async function scrapeActorFrontpage(html, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('.dashboard-bio-list');
|
||||
|
||||
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
|
||||
|
||||
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
|
||||
const boobsSizeString = bio['Measurements:'];
|
||||
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
|
||||
const boobsNatural = bio['Fake Boobs:'] === 'No';
|
||||
const active = bio['Career Status:'].trim() === 'Active';
|
||||
|
||||
const residenceCountryName = bio['Country of Origin:'];
|
||||
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
|
||||
const residenceCountry = countryEntry ? countryEntry.alpha2 : null;
|
||||
const birthPlace = bio['Place of Birth:'];
|
||||
|
||||
const hair = bio['Hair Color:'].toLowerCase();
|
||||
const eyes = bio['Eye Color:'].toLowerCase();
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const piercings = piercingsString === 'None' ? null : piercingsString;
|
||||
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
const tattoos = tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString;
|
||||
|
||||
const social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
|
||||
|
||||
return {
|
||||
bio: {
|
||||
name,
|
||||
gender: 'female',
|
||||
birthdate,
|
||||
residenceCountry,
|
||||
birthPlace,
|
||||
boobs: {
|
||||
size: boobsSize,
|
||||
natural: boobsNatural,
|
||||
},
|
||||
hair,
|
||||
eyes,
|
||||
piercings,
|
||||
tattoos,
|
||||
active,
|
||||
social,
|
||||
},
|
||||
url: bioUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeActorBio(html, frontpageBio, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('#biographyTable');
|
||||
|
||||
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
const active = bio['Career Status:'].trim() === 'Active';
|
||||
|
||||
const boobsSizeString = bio['Measurements:'];
|
||||
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
|
||||
const boobsNatural = bio['Fake boobs:'] === 'No';
|
||||
const ethnicity = bio['Ethnicity:'];
|
||||
|
||||
const residenceCountryName = bio['Country of Origin:'];
|
||||
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
|
||||
const residenceCountry = countryEntry ? countryEntry.alpha2 : null;
|
||||
const birthPlace = bio['Place of Birth:'];
|
||||
|
||||
const hair = bio['Hair Color:'].toLowerCase();
|
||||
const eyes = bio['Eye Color:'].toLowerCase();
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const piercings = piercingsString === 'None' ? null : piercingsString;
|
||||
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
const tattoos = tattoosString === undefined || tattoosString === 'Unknown (add)' || tattoosString === 'None' ? null : tattoosString;
|
||||
|
||||
const social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
|
||||
|
||||
return {
|
||||
...frontpageBio,
|
||||
name,
|
||||
gender: 'female',
|
||||
birthdate,
|
||||
residenceCountry,
|
||||
birthPlace,
|
||||
ethnicity,
|
||||
boobs: {
|
||||
size: boobsSize,
|
||||
natural: boobsNatural,
|
||||
},
|
||||
hair,
|
||||
eyes,
|
||||
piercings,
|
||||
tattoos,
|
||||
active,
|
||||
social,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchActor(actorName) {
|
||||
const slug = actorName.replace(' ', '_');
|
||||
const frontpageUrl = `https://freeones.com/html/v_links/${slug}`;
|
||||
|
||||
const resFrontpage = await bhttp.get(frontpageUrl);
|
||||
const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
|
||||
|
||||
const resBio = await bhttp.get(url);
|
||||
|
||||
return scrapeActorBio(resBio.body.toString(), bio, url, actorName);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchActor,
|
||||
};
|
||||
@@ -1,5 +1,6 @@
|
||||
'use strict';
|
||||
|
||||
// releases
|
||||
const twentyonesextury = require('./21sextury');
|
||||
const bangbros = require('./bangbros');
|
||||
const blowpass = require('./blowpass');
|
||||
@@ -19,24 +20,32 @@ const realitykings = require('./realitykings');
|
||||
const vixen = require('./vixen');
|
||||
const xempire = require('./xempire');
|
||||
|
||||
// actors
|
||||
const freeones = require('./freeones');
|
||||
|
||||
module.exports = {
|
||||
'21sextury': twentyonesextury,
|
||||
bangbros,
|
||||
blowpass,
|
||||
brazzers,
|
||||
ddfnetwork,
|
||||
dogfart,
|
||||
dogfartnetwork: dogfart,
|
||||
evilangel,
|
||||
julesjordan,
|
||||
kink,
|
||||
legalporno,
|
||||
mikeadriano,
|
||||
mofos,
|
||||
pervcity,
|
||||
private: privateNetwork,
|
||||
naughtyamerica,
|
||||
realitykings,
|
||||
vixen,
|
||||
xempire,
|
||||
releases: {
|
||||
'21sextury': twentyonesextury,
|
||||
bangbros,
|
||||
blowpass,
|
||||
brazzers,
|
||||
ddfnetwork,
|
||||
dogfart,
|
||||
dogfartnetwork: dogfart,
|
||||
evilangel,
|
||||
julesjordan,
|
||||
kink,
|
||||
legalporno,
|
||||
mikeadriano,
|
||||
mofos,
|
||||
pervcity,
|
||||
private: privateNetwork,
|
||||
naughtyamerica,
|
||||
realitykings,
|
||||
vixen,
|
||||
xempire,
|
||||
},
|
||||
actors: {
|
||||
freeones,
|
||||
},
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user