schat2-clive/assets/dictionary-to-mash.js

70 lines
1.8 KiB
JavaScript
Raw Normal View History

'use strict';
const fs = require('fs').promises;
2024-06-09 21:37:17 +00:00
/*
const inflect = require('inflect');
const tensify = require('tensify');
function getTenses(word) {
try {
const { past, past_participle: participle } = tensify(word);
return { past, participle };
} catch (error) {
return {};
}
}
2024-06-09 21:37:17 +00:00
*/
const dictionary = require('./dictionary.json');
async function init() {
2024-06-09 21:37:17 +00:00
/*
const formsDictionary = Object.fromEntries(Object.entries(dictionary).flatMap(([word, definition]) => {
const plural = inflect.pluralize(word);
const { past, participle } = getTenses(word);
return [
[word, definition],
2024-06-09 21:37:17 +00:00
...(plural && !dictionary[plural] ? [[plural, definition]] : []),
...(past && !dictionary[past] ? [[past, definition]] : []),
...(participle && !dictionary[participle] ? [[past, definition]] : []),
];
}));
2024-06-09 21:37:17 +00:00
*/
2024-06-09 21:37:17 +00:00
const validWords = Object.entries(dictionary).filter(([word]) => /^[a-zA-Z]+$/.test(word));
const sortedWords = validWords.reduce((acc, [rawWord, fullDefinition]) => {
const word = rawWord.toLowerCase();
const anagram = word.split('').sort().join('');
const definitions = fullDefinition
?.split(/\d+\.\s+/).filter(Boolean).map((definition) => {
const splitIndex = definition.indexOf('.', 16); // split after n characters to avoid splitting on e.g. abbreviated categories at the start of the definition: (Anat.)
if (splitIndex > -1) {
return definition.slice(0, splitIndex).trim().toLowerCase();
}
return definition.toLowerCase();
}) || [];
if (!acc[anagram.length]) {
acc[anagram.length] = {};
}
if (!acc[anagram.length][anagram]) {
acc[anagram.length][anagram] = [];
}
acc[anagram.length][anagram].push({ word, definitions });
return acc;
}, {});
await fs.writeFile('./mash-words.json', JSON.stringify(sortedWords, null, 4));
}
init();