Added profile scraper tests (WIP), fixed some profile scrapers. Fixed slugify not breaking existing slugs.

This commit is contained in:
DebaucheryLibrarian
2026-01-10 02:58:50 +01:00
parent 5acc2c607b
commit bddc33a734
12 changed files with 293 additions and 111 deletions

View File

@@ -1,6 +1,7 @@
'use strict';
const { convert, convertMany } = require('convert');
const { decode } = require('html-entities');
const logger = require('../logger')(__filename);
@@ -60,18 +61,20 @@ function kgToLbs(kgs) {
function convertManyApi(input, to) {
const curatedInput = input
.replace('\'', 'ft')
.replace(/"|''/, 'in')
.replace(/[']\s*/, 'ft ') // ensure 1 space
.replace(/["”]|('')/, 'in') // 54”
.replace(/\d+ft\s*\d+\s*$/, (match) => `${match}in`); // height without any inch symbol
return Math.round(convertMany(curatedInput).to(to)) || null;
}
function convertApi(input, fromOrTo, to) {
if (!input) {
function convertApi(rawInput, fromOrTo, to) {
if (!rawInput) {
return null;
}
const input = decode(rawInput); // remove html entities, e.g. 5' 8" for 5' 8"
try {
if (typeof input === 'string' && to === undefined) {
return convertManyApi(input, fromOrTo);

View File

@@ -42,7 +42,7 @@ const accentMap = {
};
const plainCharRegex = /[a-zA-Z0-9]/;
const defaultPunctuationRegex = /[.,?!:;&'"“”…()[]{}<>\/*—-]/;
const defaultPunctuationRegex = /[.,?!:;&'"“”…()[]{}<>\/*—]/;
const defaultSymbolRegex = /[@$€£#%^+=\\~]/;
function slugify(strings, delimiter = '-', {
@@ -66,6 +66,7 @@ function slugify(strings, delimiter = '-', {
: string;
const normalized = casedString
.replace(/[_-]/g, ' ')
.split('')
.map((char) => {
if (char === ' ') {