Overview
| Comment: | [graphspell] spellchecker: add parseParagraph() |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
7616aa7ef9d3cc203b118f77e0749b0b |
| User & Date: | olr on 2018-02-20 08:40:03 |
| Other Links: | manifest | tags |
Context
|
2018-02-20
| ||
| 12:06 | [fr][tests] Update: Le Horla check-in: bf58e39b3f user: olr tags: trunk, fr | |
| 08:40 | [graphspell] spellchecker: add parseParagraph() check-in: 7616aa7ef9 user: olr tags: trunk, graphspell | |
|
2018-02-19
| ||
| 18:08 | [fr] new performance test (better when the processor isn’t converting a video!) check-in: b34690f0d8 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/webext/gce_worker.js from [c20f81d8f3] to [efd11a103b].
| ︙ | |||
200 201 202 203 204 205 206 | 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | - + - + |
}
function parseAndSpellcheck (sText, sCountry, bDebug, bContext, dInfo={}) {
let i = 0;
sText = sText.replace(//g, "").normalize("NFC");
for (let sParagraph of text.getParagraph(sText)) {
let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext);
|
| ︙ |
Modified graphspell-js/spellchecker.js from [e878cd2181] to [7b8a526c88].
| ︙ | |||
9 10 11 12 13 14 15 16 17 18 19 20 21 22 | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | + |
"use strict";
if (typeof(require) !== 'undefined') {
var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js");
var tokenizer = require("resource://grammalecte/graphspell/tokenizer.js");
}
${map}
const dDefaultDictionaries = new Map([
|
| ︙ | |||
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | + - + + + + + + + + + + + + + + + + + + + + + + + + |
this.sLangCode = sLangCode;
if (!mainDic) {
mainDic = dDefaultDictionaries.gl_get(sLangCode, "");
}
this.oMainDic = this._loadDictionary(mainDic, sPath, true);
this.oExtendedDic = this._loadDictionary(extentedDic, sPath);
this.oPersonalDic = this._loadDictionary(personalDic, sPath);
this.oTokenizer = null;
}
_loadDictionary (dictionary, sPath, bNecessary=false) {
// returns an IBDAWG object
if (!dictionary) {
return null;
}
try {
|
| ︙ |
Modified graphspell-js/tokenizer.js from [c3f0ee8c90] to [bdd895b918].
| ︙ | |||
83 84 85 86 87 88 89 | 83 84 85 86 87 88 89 90 91 92 93 94 95 | - - - - - - - - - - |
helpers.logerror(e);
}
}
i += nCut;
sText = sText.slice(nCut);
}
}
|
Modified graphspell/spellchecker.py from [638f8d8cdf] to [b9fb2c7b70].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | + + + + + + + + + + + + + + + + + + + |
# Spellchecker
# Wrapper for the IBDAWG class.
# Useful to check several dictionaries at once.
# To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
# - the main dictionary, bundled with the package
# - the extended dictionary, added by an organization
# - the personal dictionary, created by the user for its own convenience
import traceback
from . import ibdawg
from . import tokenizer
dDefaultDictionaries = {
"fr": "fr.bdic",
"en": "en.bdic"
}
class SpellChecker ():
def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfPersonalDic=""):
"returns True if the main dictionary is loaded"
self.sLangCode = sLangCode
if not sfMainDic:
sfMainDic = dDefaultDictionaries.get(sLangCode, "")
self.oMainDic = self._loadDictionary(sfMainDic, True)
self.oExtendedDic = self._loadDictionary(sfExtendedDic)
self.oPersonalDic = self._loadDictionary(sfPersonalDic)
self.oTokenizer = None
def _loadDictionary (self, sfDictionary, bNecessary=False):
"returns an IBDAWG object"
if not sfDictionary:
return None
try:
return ibdawg.IBDAWG(sfDictionary)
except Exception as e:
if bNecessary:
raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.")
print("Error: <" + sfDictionary + "> not loaded.")
traceback.print_exc()
return None
def loadTokenizer (self):
self.oTokenizer = tokenizer.Tokenizer(self.sLangCode)
def setMainDictionary (self, sfDictionary):
"returns True if the dictionary is loaded"
self.oMainDic = self._loadDictionary(sfDictionary)
return bool(self.oMainDic)
def setExtendedDictionary (self, sfDictionary):
"returns True if the dictionary is loaded"
self.oExtendedDic = self._loadDictionary(sfDictionary)
return bool(self.oExtendedDic)
def setPersonalDictionary (self, sfDictionary):
"returns True if the dictionary is loaded"
self.oPersonalDic = self._loadDictionary(sfDictionary)
return bool(self.oPersonalDic)
# parse text functions
def parseParagraph (self, sText, bSpellSugg=False):
if not self.oTokenizer:
self.loadTokenizer()
aSpellErrs = []
for dToken in self.oTokenizer.genTokens(sText):
if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
if bSpellSugg:
dToken['aSuggestions'] = []
for lSugg in self.suggest(dToken['sValue']):
dToken['aSuggestions'].extend(lSugg)
aSpellErrs.append(dToken)
return aSpellErrs
# IBDAWG functions
def isValidToken (self, sToken):
"checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
if self.oMainDic.isValidToken(sToken):
return True
|
| ︙ |