Differences From Artifact [1ca4ee03ac]:
- File graphspell-js/str_transform.js — part of check-in [2381f7c9ae] at 2020-08-05 06:56:15 on branch trunk — [graphspell] move function from char_player to str_transform (user: olr, size: 7608) [annotate] [blame] [check-ins using]
To Artifact [b78c1f2098]:
- File graphspell-js/str_transform.js — part of check-in [19fccd89d6] at 2020-08-05 09:30:48 on branch trunk — [graphspell] move functions from char_player to str_transform and lexicographer (user: olr, size: 9863) [annotate] [blame] [check-ins using]
| ︙ | ︙ | |||
32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
spellingNormalization: function (sWord) {
let sNewWord = "";
for (let c of sWord) {
sNewWord += this._xTransCharsForSpelling.gl_get(c, c);
}
return sNewWord.normalize("NFC");
},
longestCommonSubstring: function (string1, string2) {
// https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring
// untested
// init max value
let longestCommonSubstring = 0;
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
spellingNormalization: function (sWord) {
let sNewWord = "";
for (let c of sWord) {
sNewWord += this._xTransCharsForSpelling.gl_get(c, c);
}
return sNewWord.normalize("NFC");
},
_xTransCharsForSimplification: new Map([
['à', 'a'], ['é', 'é'], ['î', 'i'], ['ô', 'o'], ['û', 'u'], ['ÿ', 'y'],
['â', 'a'], ['è', 'é'], ['ï', 'i'], ['ö', 'o'], ['ù', 'u'], ['ŷ', 'y'],
['ä', 'a'], ['ê', 'é'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'y'],
['á', 'a'], ['ë', 'é'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'y'],
['ā', 'a'], ['ē', 'é'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'y'],
['ç', 'c'], ['ñ', 'n'],
['œ', 'oe'], ['æ', 'ae'],
['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'],
["⁰", "0"], ["¹", "1"], ["²", "2"], ["³", "3"], ["⁴", "4"], ["⁵", "5"], ["⁶", "6"], ["⁷", "7"], ["⁸", "8"], ["⁹", "9"],
["₀", "0"], ["₁", "1"], ["₂", "2"], ["₃", "3"], ["₄", "4"], ["₅", "5"], ["₆", "6"], ["₇", "7"], ["₈", "8"], ["₉", "9"]
]),
simplifyWord: function (sWord) {
// word simplication before calculating distance between words
sWord = sWord.toLowerCase();
sWord = [...sWord].map(c => this._xTransCharsForSimplification.gl_get(c, c)).join('');
let sNewWord = "";
let i = 1;
for (let c of sWord) {
if (c == 'e' || c != sWord.slice(i, i+1)) { // exception for <e> to avoid confusion between crée / créai
sNewWord += c;
}
i++;
}
return sNewWord.replace(/eau/g, "o").replace(/au/g, "o").replace(/ai/g, "é").replace(/ei/g, "é").replace(/ph/g, "f");
},
_xTransNumbersToExponent: new Map([
["0", "⁰"], ["1", "¹"], ["2", "²"], ["3", "³"], ["4", "⁴"], ["5", "⁵"], ["6", "⁶"], ["7", "⁷"], ["8", "⁸"], ["9", "⁹"]
]),
numbersToExponent: function (sWord) {
let sNewWord = "";
for (let c of sWord) {
sNewWord += this._xTransNumbersToExponent.gl_get(c, c);
}
return sNewWord;
},
longestCommonSubstring: function (string1, string2) {
// https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring
// untested
// init max value
let longestCommonSubstring = 0;
|
| ︙ | ︙ | |||
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
sWord = sPfxCode.slice(1) + sWord.slice(sPfxCode.charCodeAt(0)-48);
return sSfxCode[0] == '0' ? sWord + sSfxCode.slice(1) : sWord.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
}
};
if (typeof(exports) !== 'undefined') {
exports.longestCommonSubstring = str_transform.longestCommonSubstring;
exports.distanceDamerauLevenshtein = str_transform.distanceDamerauLevenshtein;
exports.distanceDamerauLevenshtein2 = str_transform.distanceDamerauLevenshtein2;
exports.showDistance = str_transform.showDistance;
exports.changeWordWithSuffixCode = str_transform.changeWordWithSuffixCode;
exports.changeWordWithAffixCode = str_transform.changeWordWithAffixCode;
exports.defineAffixCode = str_transform.defineAffixCode;
exports.defineSuffixCode = str_transform.defineSuffixCode;
}
| > > > | 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
sWord = sPfxCode.slice(1) + sWord.slice(sPfxCode.charCodeAt(0)-48);
return sSfxCode[0] == '0' ? sWord + sSfxCode.slice(1) : sWord.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
}
};
if (typeof(exports) !== 'undefined') {
exports.simplifyWord = str_transform.simplifyWord;
exports.numbersToExponent = str_transform.numbersToExponent;
exports.spellingNormalization = str_transform.spellingNormalization;
exports.longestCommonSubstring = str_transform.longestCommonSubstring;
exports.distanceDamerauLevenshtein = str_transform.distanceDamerauLevenshtein;
exports.distanceDamerauLevenshtein2 = str_transform.distanceDamerauLevenshtein2;
exports.showDistance = str_transform.showDistance;
exports.changeWordWithSuffixCode = str_transform.changeWordWithSuffixCode;
exports.changeWordWithAffixCode = str_transform.changeWordWithAffixCode;
exports.defineAffixCode = str_transform.defineAffixCode;
exports.defineSuffixCode = str_transform.defineSuffixCode;
}
|