From acb114012c590f18786d524bd841e16c84b7883f Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Fri, 6 Feb 2026 06:44:03 +0100 Subject: [PATCH] Refactored FreeOnes scraper. --- src/scrapers/adultempire.js | 4 +- src/scrapers/freeones.js | 154 +++++++++++++++++++++--------------- tests/profiles.js | 5 +- 3 files changed, 93 insertions(+), 70 deletions(-) diff --git a/src/scrapers/adultempire.js b/src/scrapers/adultempire.js index c93941d9..51befb04 100755 --- a/src/scrapers/adultempire.js +++ b/src/scrapers/adultempire.js @@ -128,7 +128,7 @@ async function scrapeProfile({ query }) { profile.measurements = bio.measurements?.replace(/["\s]+/g, ''); profile.hair = bio.hair; - profile.eyes = bio.eyes; + profile.eyes = bio.eyes?.replace(/eyes?/i, '').trim(); profile.ethnicity = bio.ethnicity; profile.height = convert(bio.height, 'cm'); @@ -145,8 +145,6 @@ async function scrapeProfile({ query }) { ]; } - console.log(profile); - return profile; } diff --git a/src/scrapers/freeones.js b/src/scrapers/freeones.js index 632feb1f..8a0be553 100755 --- a/src/scrapers/freeones.js +++ b/src/scrapers/freeones.js @@ -1,91 +1,115 @@ 'use strict'; -const { JSDOM } = require('jsdom'); -const moment = require('moment'); +const unprint = require('unprint'); -const http = require('../utils/http'); +const slugify = require('../utils/slugify'); -function scrapeProfile(html, actorName) { - const { document } = new JSDOM(html).window; - const profile = { name: actorName }; +function scrapeProfile({ query }) { + const profile = {}; - const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), (el) => decodeURI(el.href)).reduce((acc, item) => { - const keyMatch = item.match(/\[\w+\]/); + const bio = Object.fromEntries(query.all('.profile-meta-list li').map((bioEl) => [ + slugify(unprint.query.content(bioEl, 'span:first-child'), '_'), + unprint.query.content(bioEl, 'span:last-child'), + ]).filter(([_key, value]) => value?.toLowerCase() !== 'unknown')); - if (keyMatch) { - const key = keyMatch[0].slice(1, -1); - const [, value] = item.split('='); + profile.description = query.content('#description div[data-test="biography"]'); - // both hip and waist link to 'waist', assume biggest value is hip - if (key === 'waist' && acc.waist) { - if (acc.waist > value) { - acc.hip = acc.waist; - acc.waist = value; + profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY'); + profile.age = unprint.extractNumber(bio.age); - return acc; - } - - acc.hip = value; - - return acc; - } - - acc[key] = value; - } - - return acc; - }, {}); - - if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate(); - - if (bio.placeOfBirth && bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`; - else if (bio.country) profile.birthPlace = bio.country; - - profile.eyes = bio.eyeColor; - profile.hair = bio.hairColor; + profile.birthPlace = bio.place_of_birth; + profile.nationality = bio.nationality; profile.ethnicity = bio.ethnicity; - profile.bust = bio.bra; - if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]); - if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]); + profile.eyes = bio.eye_color; + profile.hairColor = bio.hair_color; - if (bio.height) profile.height = Number(bio.height.split(',')[0]); - if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]); + [profile.bust, profile.cup] = bio.bra?.match(/(\d+)([a-z]+)/i)?.slice(1) || []; - profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), (el) => el.href); + // TODO: differentiate between bust and bra band size + if (!profile.bust) { + profile.bust = bio.bust; + } - const avatar = document.querySelector('.profile-image-large img').src; - if (!avatar.match('placeholder')) profile.avatar = { src: avatar, credit: null }; + if (!profile.cup) { + profile.cup = bio.cup; + } + + profile.bust = unprint.extractNumber(bio.bra); + profile.cup = bio.cup; + profile.waist = unprint.extractNumber(bio.waist); + profile.hip = unprint.extractNumber(bio.hip); + + profile.height = unprint.extractNumber(bio.height); + profile.weight = unprint.extractNumber(bio.weight); + + profile.foot = unprint.extractNumber(bio.shoe_size); + + profile.socials = query.urls('.profile-meta-item .teaser__link'); + + if (/yes/i.test(bio.tattoos)) profile.hasTattoos = true; + if (/no/i.test(bio.tattoos)) profile.hasTattoos = false; + + profile.tattoos = bio.tattoo_locations; + + if (/yes/i.test(bio.piercings)) profile.hasPiercings = true; + if (/no/i.test(bio.piercings)) profile.hasPiercings = false; + + profile.piercings = bio.piercing_locations; + + if (/natural/i.test(bio.boobs)) profile.naturalBoobs = true; + if (/fake/i.test(bio.boobs)) profile.naturalBoobs = false; + + if (/natural/i.test(bio.butt)) profile.naturalButt = true; + if (/fake/i.test(bio.butt)) profile.naturalButt = false; + + const avatar = query.img('.dashboard-image-container img'); + + if (!avatar?.match(/placeholder/i)) { + profile.avatar = avatar; + } return profile; } -function scrapeSearch(html) { - const { document } = new JSDOM(html).window; - - return document.querySelector('a.image-link')?.href || null; -} - -async function fetchProfile({ name: actorName }) { - const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); - - const res = await http.get(`https://freeones.nl/${actorSlug}/profile`); - - if (res.statusCode === 200) { - return scrapeProfile(res.body.toString(), actorName); +async function getActorUrl(actor) { + if (actor.url) { + return actor.url; } - const searchRes = await http.get(`https://freeones.nl/babes?q=${actorName}`); - const actorPath = scrapeSearch(searchRes.body.toString()); + const res = await unprint.post('https://www.freeones.com/xhr/search', { + performerTypes: ['babe', 'male', 'trans'], + query: actor.name, + recipe: 'subject', + size: 12, + }); - if (actorPath) { - const actorRes = await http.get(`https://freeones.nl${actorPath}/profile`); + if (res.ok) { + const model = res.data.hits?.find((result) => slugify(result.name) === actor.slug); - if (actorRes.statusCode === 200) { - return scrapeProfile(actorRes.body.toString(), actorName); + if (model?.url) { + return `https://www.freeones.com${model.url}/bio`; } + } - return null; + return null; +} + +async function fetchProfile(actor) { + const res = await unprint.get(`https://freeones.com/${actor.slug}/bio`); + + if (res.ok) { + return scrapeProfile(res.context); + } + + const actorUrl = await getActorUrl(actor); + + if (actorUrl) { + const actorRes = await unprint.get(actorUrl); + + if (actorRes.ok) { + return scrapeProfile(actorRes.context); + } } return null; diff --git a/tests/profiles.js b/tests/profiles.js index cef12cc7..f8a1e93b 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -249,7 +249,8 @@ const actors = [ { entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] }, { entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] }, { entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] }, - { entity: 'adultempire', name: 'Melissa Moore', fields: ['avatar'] }, + { entity: 'adultempire', name: 'Abella Danger', fields: ['avatar', 'description', 'measurements', 'eyes', 'height', 'weight'] }, + { entity: 'freeones', name: 'Sophia Locke', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'birthPlace', 'nationality', 'ethnicity', 'eyes', 'hairColor', 'bust', 'cup', 'waist', 'hip', 'height', 'weight', 'foot', 'socials', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings', 'naturalBoobs'] }, ]; const actorScrapers = scrapers.actors; @@ -291,7 +292,7 @@ const validators = { height: (value) => !!Number(value) && value > 130, weight: (value) => !!Number(value) && value > 40, eyes: (value) => typeof value === 'string' && value.length > 3, - hairColor: (value) => typeof value === 'string' && value.length > 3, + hairColor: (value) => typeof value === 'string' && value.length > 2, measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module dateOfBirth: (value) => value instanceof Date && !Number.isNaN(value.getFullYear()), hasTattoos: (value) => typeof value === 'boolean',