Refactored FreeOnes scraper.

This commit is contained in:
DebaucheryLibrarian 2026-02-06 06:44:03 +01:00
parent e8d6345400
commit acb114012c
3 changed files with 93 additions and 70 deletions

View File

@ -128,7 +128,7 @@ async function scrapeProfile({ query }) {
profile.measurements = bio.measurements?.replace(/["\s]+/g, ''); profile.measurements = bio.measurements?.replace(/["\s]+/g, '');
profile.hair = bio.hair; profile.hair = bio.hair;
profile.eyes = bio.eyes; profile.eyes = bio.eyes?.replace(/eyes?/i, '').trim();
profile.ethnicity = bio.ethnicity; profile.ethnicity = bio.ethnicity;
profile.height = convert(bio.height, 'cm'); profile.height = convert(bio.height, 'cm');
@ -145,8 +145,6 @@ async function scrapeProfile({ query }) {
]; ];
} }
console.log(profile);
return profile; return profile;
} }

View File

@ -1,91 +1,115 @@
'use strict'; 'use strict';
const { JSDOM } = require('jsdom'); const unprint = require('unprint');
const moment = require('moment');
const http = require('../utils/http'); const slugify = require('../utils/slugify');
function scrapeProfile(html, actorName) { function scrapeProfile({ query }) {
const { document } = new JSDOM(html).window; const profile = {};
const profile = { name: actorName };
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), (el) => decodeURI(el.href)).reduce((acc, item) => { const bio = Object.fromEntries(query.all('.profile-meta-list li').map((bioEl) => [
const keyMatch = item.match(/\[\w+\]/); slugify(unprint.query.content(bioEl, 'span:first-child'), '_'),
unprint.query.content(bioEl, 'span:last-child'),
]).filter(([_key, value]) => value?.toLowerCase() !== 'unknown'));
if (keyMatch) { profile.description = query.content('#description div[data-test="biography"]');
const key = keyMatch[0].slice(1, -1);
const [, value] = item.split('=');
// both hip and waist link to 'waist', assume biggest value is hip profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
if (key === 'waist' && acc.waist) { profile.age = unprint.extractNumber(bio.age);
if (acc.waist > value) {
acc.hip = acc.waist;
acc.waist = value;
return acc; profile.birthPlace = bio.place_of_birth;
} profile.nationality = bio.nationality;
acc.hip = value;
return acc;
}
acc[key] = value;
}
return acc;
}, {});
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
if (bio.placeOfBirth && bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
else if (bio.country) profile.birthPlace = bio.country;
profile.eyes = bio.eyeColor;
profile.hair = bio.hairColor;
profile.ethnicity = bio.ethnicity; profile.ethnicity = bio.ethnicity;
profile.bust = bio.bra; profile.eyes = bio.eye_color;
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]); profile.hairColor = bio.hair_color;
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
if (bio.height) profile.height = Number(bio.height.split(',')[0]); [profile.bust, profile.cup] = bio.bra?.match(/(\d+)([a-z]+)/i)?.slice(1) || [];
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), (el) => el.href); // TODO: differentiate between bust and bra band size
if (!profile.bust) {
profile.bust = bio.bust;
}
const avatar = document.querySelector('.profile-image-large img').src; if (!profile.cup) {
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, credit: null }; profile.cup = bio.cup;
}
profile.bust = unprint.extractNumber(bio.bra);
profile.cup = bio.cup;
profile.waist = unprint.extractNumber(bio.waist);
profile.hip = unprint.extractNumber(bio.hip);
profile.height = unprint.extractNumber(bio.height);
profile.weight = unprint.extractNumber(bio.weight);
profile.foot = unprint.extractNumber(bio.shoe_size);
profile.socials = query.urls('.profile-meta-item .teaser__link');
if (/yes/i.test(bio.tattoos)) profile.hasTattoos = true;
if (/no/i.test(bio.tattoos)) profile.hasTattoos = false;
profile.tattoos = bio.tattoo_locations;
if (/yes/i.test(bio.piercings)) profile.hasPiercings = true;
if (/no/i.test(bio.piercings)) profile.hasPiercings = false;
profile.piercings = bio.piercing_locations;
if (/natural/i.test(bio.boobs)) profile.naturalBoobs = true;
if (/fake/i.test(bio.boobs)) profile.naturalBoobs = false;
if (/natural/i.test(bio.butt)) profile.naturalButt = true;
if (/fake/i.test(bio.butt)) profile.naturalButt = false;
const avatar = query.img('.dashboard-image-container img');
if (!avatar?.match(/placeholder/i)) {
profile.avatar = avatar;
}
return profile; return profile;
} }
function scrapeSearch(html) { async function getActorUrl(actor) {
const { document } = new JSDOM(html).window; if (actor.url) {
return actor.url;
return document.querySelector('a.image-link')?.href || null;
}
async function fetchProfile({ name: actorName }) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await http.get(`https://freeones.nl/${actorSlug}/profile`);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
} }
const searchRes = await http.get(`https://freeones.nl/babes?q=${actorName}`); const res = await unprint.post('https://www.freeones.com/xhr/search', {
const actorPath = scrapeSearch(searchRes.body.toString()); performerTypes: ['babe', 'male', 'trans'],
query: actor.name,
recipe: 'subject',
size: 12,
});
if (actorPath) { if (res.ok) {
const actorRes = await http.get(`https://freeones.nl${actorPath}/profile`); const model = res.data.hits?.find((result) => slugify(result.name) === actor.slug);
if (actorRes.statusCode === 200) { if (model?.url) {
return scrapeProfile(actorRes.body.toString(), actorName); return `https://www.freeones.com${model.url}/bio`;
}
} }
return null; return null;
}
async function fetchProfile(actor) {
const res = await unprint.get(`https://freeones.com/${actor.slug}/bio`);
if (res.ok) {
return scrapeProfile(res.context);
}
const actorUrl = await getActorUrl(actor);
if (actorUrl) {
const actorRes = await unprint.get(actorUrl);
if (actorRes.ok) {
return scrapeProfile(actorRes.context);
}
} }
return null; return null;

View File

@ -249,7 +249,8 @@ const actors = [
{ entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] }, { entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] },
{ entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] }, { entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] },
{ entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] }, { entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] },
{ entity: 'adultempire', name: 'Melissa Moore', fields: ['avatar'] }, { entity: 'adultempire', name: 'Abella Danger', fields: ['avatar', 'description', 'measurements', 'eyes', 'height', 'weight'] },
{ entity: 'freeones', name: 'Sophia Locke', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'birthPlace', 'nationality', 'ethnicity', 'eyes', 'hairColor', 'bust', 'cup', 'waist', 'hip', 'height', 'weight', 'foot', 'socials', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings', 'naturalBoobs'] },
]; ];
const actorScrapers = scrapers.actors; const actorScrapers = scrapers.actors;
@ -291,7 +292,7 @@ const validators = {
height: (value) => !!Number(value) && value > 130, height: (value) => !!Number(value) && value > 130,
weight: (value) => !!Number(value) && value > 40, weight: (value) => !!Number(value) && value > 40,
eyes: (value) => typeof value === 'string' && value.length > 3, eyes: (value) => typeof value === 'string' && value.length > 3,
hairColor: (value) => typeof value === 'string' && value.length > 3, hairColor: (value) => typeof value === 'string' && value.length > 2,
measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module
dateOfBirth: (value) => value instanceof Date && !Number.isNaN(value.getFullYear()), dateOfBirth: (value) => value instanceof Date && !Number.isNaN(value.getFullYear()),
hasTattoos: (value) => typeof value === 'boolean', hasTattoos: (value) => typeof value === 'boolean',