Refactored FreeOnes scraper.

This commit is contained in:
DebaucheryLibrarian 2026-02-06 06:44:03 +01:00
parent e8d6345400
commit acb114012c
3 changed files with 93 additions and 70 deletions

View File

@ -128,7 +128,7 @@ async function scrapeProfile({ query }) {
profile.measurements = bio.measurements?.replace(/["\s]+/g, '');
profile.hair = bio.hair;
profile.eyes = bio.eyes;
profile.eyes = bio.eyes?.replace(/eyes?/i, '').trim();
profile.ethnicity = bio.ethnicity;
profile.height = convert(bio.height, 'cm');
@ -145,8 +145,6 @@ async function scrapeProfile({ query }) {
];
}
console.log(profile);
return profile;
}

View File

@ -1,93 +1,117 @@
'use strict';
const { JSDOM } = require('jsdom');
const moment = require('moment');
const unprint = require('unprint');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
function scrapeProfile(html, actorName) {
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
function scrapeProfile({ query }) {
const profile = {};
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), (el) => decodeURI(el.href)).reduce((acc, item) => {
const keyMatch = item.match(/\[\w+\]/);
const bio = Object.fromEntries(query.all('.profile-meta-list li').map((bioEl) => [
slugify(unprint.query.content(bioEl, 'span:first-child'), '_'),
unprint.query.content(bioEl, 'span:last-child'),
]).filter(([_key, value]) => value?.toLowerCase() !== 'unknown'));
if (keyMatch) {
const key = keyMatch[0].slice(1, -1);
const [, value] = item.split('=');
profile.description = query.content('#description div[data-test="biography"]');
// both hip and waist link to 'waist', assume biggest value is hip
if (key === 'waist' && acc.waist) {
if (acc.waist > value) {
acc.hip = acc.waist;
acc.waist = value;
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
profile.age = unprint.extractNumber(bio.age);
return acc;
}
acc.hip = value;
return acc;
}
acc[key] = value;
}
return acc;
}, {});
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
if (bio.placeOfBirth && bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
else if (bio.country) profile.birthPlace = bio.country;
profile.eyes = bio.eyeColor;
profile.hair = bio.hairColor;
profile.birthPlace = bio.place_of_birth;
profile.nationality = bio.nationality;
profile.ethnicity = bio.ethnicity;
profile.bust = bio.bra;
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
profile.eyes = bio.eye_color;
profile.hairColor = bio.hair_color;
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
[profile.bust, profile.cup] = bio.bra?.match(/(\d+)([a-z]+)/i)?.slice(1) || [];
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), (el) => el.href);
// TODO: differentiate between bust and bra band size
if (!profile.bust) {
profile.bust = bio.bust;
}
const avatar = document.querySelector('.profile-image-large img').src;
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, credit: null };
if (!profile.cup) {
profile.cup = bio.cup;
}
profile.bust = unprint.extractNumber(bio.bra);
profile.cup = bio.cup;
profile.waist = unprint.extractNumber(bio.waist);
profile.hip = unprint.extractNumber(bio.hip);
profile.height = unprint.extractNumber(bio.height);
profile.weight = unprint.extractNumber(bio.weight);
profile.foot = unprint.extractNumber(bio.shoe_size);
profile.socials = query.urls('.profile-meta-item .teaser__link');
if (/yes/i.test(bio.tattoos)) profile.hasTattoos = true;
if (/no/i.test(bio.tattoos)) profile.hasTattoos = false;
profile.tattoos = bio.tattoo_locations;
if (/yes/i.test(bio.piercings)) profile.hasPiercings = true;
if (/no/i.test(bio.piercings)) profile.hasPiercings = false;
profile.piercings = bio.piercing_locations;
if (/natural/i.test(bio.boobs)) profile.naturalBoobs = true;
if (/fake/i.test(bio.boobs)) profile.naturalBoobs = false;
if (/natural/i.test(bio.butt)) profile.naturalButt = true;
if (/fake/i.test(bio.butt)) profile.naturalButt = false;
const avatar = query.img('.dashboard-image-container img');
if (!avatar?.match(/placeholder/i)) {
profile.avatar = avatar;
}
return profile;
}
function scrapeSearch(html) {
const { document } = new JSDOM(html).window;
return document.querySelector('a.image-link')?.href || null;
async function getActorUrl(actor) {
if (actor.url) {
return actor.url;
}
async function fetchProfile({ name: actorName }) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await unprint.post('https://www.freeones.com/xhr/search', {
performerTypes: ['babe', 'male', 'trans'],
query: actor.name,
recipe: 'subject',
size: 12,
});
const res = await http.get(`https://freeones.nl/${actorSlug}/profile`);
if (res.ok) {
const model = res.data.hits?.find((result) => slugify(result.name) === actor.slug);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
if (model?.url) {
return `https://www.freeones.com${model.url}/bio`;
}
const searchRes = await http.get(`https://freeones.nl/babes?q=${actorName}`);
const actorPath = scrapeSearch(searchRes.body.toString());
if (actorPath) {
const actorRes = await http.get(`https://freeones.nl${actorPath}/profile`);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorName);
}
return null;
}
async function fetchProfile(actor) {
const res = await unprint.get(`https://freeones.com/${actor.slug}/bio`);
if (res.ok) {
return scrapeProfile(res.context);
}
const actorUrl = await getActorUrl(actor);
if (actorUrl) {
const actorRes = await unprint.get(actorUrl);
if (actorRes.ok) {
return scrapeProfile(actorRes.context);
}
}
return null;
}

View File

@ -249,7 +249,8 @@ const actors = [
{ entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] },
{ entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] },
{ entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] },
{ entity: 'adultempire', name: 'Melissa Moore', fields: ['avatar'] },
{ entity: 'adultempire', name: 'Abella Danger', fields: ['avatar', 'description', 'measurements', 'eyes', 'height', 'weight'] },
{ entity: 'freeones', name: 'Sophia Locke', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'birthPlace', 'nationality', 'ethnicity', 'eyes', 'hairColor', 'bust', 'cup', 'waist', 'hip', 'height', 'weight', 'foot', 'socials', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings', 'naturalBoobs'] },
];
const actorScrapers = scrapers.actors;
@ -291,7 +292,7 @@ const validators = {
height: (value) => !!Number(value) && value > 130,
weight: (value) => !!Number(value) && value > 40,
eyes: (value) => typeof value === 'string' && value.length > 3,
hairColor: (value) => typeof value === 'string' && value.length > 3,
hairColor: (value) => typeof value === 'string' && value.length > 2,
measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module
dateOfBirth: (value) => value instanceof Date && !Number.isNaN(value.getFullYear()),
hasTattoos: (value) => typeof value === 'boolean',