Refactored FreeOnes scraper.
This commit is contained in:
parent
e8d6345400
commit
acb114012c
|
|
@ -128,7 +128,7 @@ async function scrapeProfile({ query }) {
|
|||
profile.measurements = bio.measurements?.replace(/["\s]+/g, '');
|
||||
|
||||
profile.hair = bio.hair;
|
||||
profile.eyes = bio.eyes;
|
||||
profile.eyes = bio.eyes?.replace(/eyes?/i, '').trim();
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.height = convert(bio.height, 'cm');
|
||||
|
|
@ -145,8 +145,6 @@ async function scrapeProfile({ query }) {
|
|||
];
|
||||
}
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,91 +1,115 @@
|
|||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeProfile(html, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), (el) => decodeURI(el.href)).reduce((acc, item) => {
|
||||
const keyMatch = item.match(/\[\w+\]/);
|
||||
const bio = Object.fromEntries(query.all('.profile-meta-list li').map((bioEl) => [
|
||||
slugify(unprint.query.content(bioEl, 'span:first-child'), '_'),
|
||||
unprint.query.content(bioEl, 'span:last-child'),
|
||||
]).filter(([_key, value]) => value?.toLowerCase() !== 'unknown'));
|
||||
|
||||
if (keyMatch) {
|
||||
const key = keyMatch[0].slice(1, -1);
|
||||
const [, value] = item.split('=');
|
||||
profile.description = query.content('#description div[data-test="biography"]');
|
||||
|
||||
// both hip and waist link to 'waist', assume biggest value is hip
|
||||
if (key === 'waist' && acc.waist) {
|
||||
if (acc.waist > value) {
|
||||
acc.hip = acc.waist;
|
||||
acc.waist = value;
|
||||
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
profile.age = unprint.extractNumber(bio.age);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc.hip = value;
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc[key] = value;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
|
||||
|
||||
if (bio.placeOfBirth && bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
|
||||
else if (bio.country) profile.birthPlace = bio.country;
|
||||
|
||||
profile.eyes = bio.eyeColor;
|
||||
profile.hair = bio.hairColor;
|
||||
profile.birthPlace = bio.place_of_birth;
|
||||
profile.nationality = bio.nationality;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.bust = bio.bra;
|
||||
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
|
||||
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
|
||||
profile.eyes = bio.eye_color;
|
||||
profile.hairColor = bio.hair_color;
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
|
||||
[profile.bust, profile.cup] = bio.bra?.match(/(\d+)([a-z]+)/i)?.slice(1) || [];
|
||||
|
||||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), (el) => el.href);
|
||||
// TODO: differentiate between bust and bra band size
|
||||
if (!profile.bust) {
|
||||
profile.bust = bio.bust;
|
||||
}
|
||||
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, credit: null };
|
||||
if (!profile.cup) {
|
||||
profile.cup = bio.cup;
|
||||
}
|
||||
|
||||
profile.bust = unprint.extractNumber(bio.bra);
|
||||
profile.cup = bio.cup;
|
||||
profile.waist = unprint.extractNumber(bio.waist);
|
||||
profile.hip = unprint.extractNumber(bio.hip);
|
||||
|
||||
profile.height = unprint.extractNumber(bio.height);
|
||||
profile.weight = unprint.extractNumber(bio.weight);
|
||||
|
||||
profile.foot = unprint.extractNumber(bio.shoe_size);
|
||||
|
||||
profile.socials = query.urls('.profile-meta-item .teaser__link');
|
||||
|
||||
if (/yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (/no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
|
||||
profile.tattoos = bio.tattoo_locations;
|
||||
|
||||
if (/yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (/no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
profile.piercings = bio.piercing_locations;
|
||||
|
||||
if (/natural/i.test(bio.boobs)) profile.naturalBoobs = true;
|
||||
if (/fake/i.test(bio.boobs)) profile.naturalBoobs = false;
|
||||
|
||||
if (/natural/i.test(bio.butt)) profile.naturalButt = true;
|
||||
if (/fake/i.test(bio.butt)) profile.naturalButt = false;
|
||||
|
||||
const avatar = query.img('.dashboard-image-container img');
|
||||
|
||||
if (!avatar?.match(/placeholder/i)) {
|
||||
profile.avatar = avatar;
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeSearch(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return document.querySelector('a.image-link')?.href || null;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
const res = await http.get(`https://freeones.nl/${actorSlug}/profile`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
async function getActorUrl(actor) {
|
||||
if (actor.url) {
|
||||
return actor.url;
|
||||
}
|
||||
|
||||
const searchRes = await http.get(`https://freeones.nl/babes?q=${actorName}`);
|
||||
const actorPath = scrapeSearch(searchRes.body.toString());
|
||||
const res = await unprint.post('https://www.freeones.com/xhr/search', {
|
||||
performerTypes: ['babe', 'male', 'trans'],
|
||||
query: actor.name,
|
||||
recipe: 'subject',
|
||||
size: 12,
|
||||
});
|
||||
|
||||
if (actorPath) {
|
||||
const actorRes = await http.get(`https://freeones.nl${actorPath}/profile`);
|
||||
if (res.ok) {
|
||||
const model = res.data.hits?.find((result) => slugify(result.name) === actor.slug);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorName);
|
||||
if (model?.url) {
|
||||
return `https://www.freeones.com${model.url}/bio`;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor) {
|
||||
const res = await unprint.get(`https://freeones.com/${actor.slug}/bio`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context);
|
||||
}
|
||||
|
||||
const actorUrl = await getActorUrl(actor);
|
||||
|
||||
if (actorUrl) {
|
||||
const actorRes = await unprint.get(actorUrl);
|
||||
|
||||
if (actorRes.ok) {
|
||||
return scrapeProfile(actorRes.context);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -249,7 +249,8 @@ const actors = [
|
|||
{ entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] },
|
||||
{ entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] },
|
||||
{ entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'adultempire', name: 'Melissa Moore', fields: ['avatar'] },
|
||||
{ entity: 'adultempire', name: 'Abella Danger', fields: ['avatar', 'description', 'measurements', 'eyes', 'height', 'weight'] },
|
||||
{ entity: 'freeones', name: 'Sophia Locke', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'birthPlace', 'nationality', 'ethnicity', 'eyes', 'hairColor', 'bust', 'cup', 'waist', 'hip', 'height', 'weight', 'foot', 'socials', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings', 'naturalBoobs'] },
|
||||
];
|
||||
|
||||
const actorScrapers = scrapers.actors;
|
||||
|
|
@ -291,7 +292,7 @@ const validators = {
|
|||
height: (value) => !!Number(value) && value > 130,
|
||||
weight: (value) => !!Number(value) && value > 40,
|
||||
eyes: (value) => typeof value === 'string' && value.length > 3,
|
||||
hairColor: (value) => typeof value === 'string' && value.length > 3,
|
||||
hairColor: (value) => typeof value === 'string' && value.length > 2,
|
||||
measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module
|
||||
dateOfBirth: (value) => value instanceof Date && !Number.isNaN(value.getFullYear()),
|
||||
hasTattoos: (value) => typeof value === 'boolean',
|
||||
|
|
|
|||
Loading…
Reference in New Issue