Overview
| Comment: | [graphspell] spellchecker: add parseParagraph() |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
7616aa7ef9d3cc203b118f77e0749b0b |
| User & Date: | olr on 2018-02-20 08:40:03 |
| Other Links: | manifest | tags |
Context
|
2018-02-20
| ||
| 12:06 | [fr][tests] Update: Le Horla check-in: bf58e39b3f user: olr tags: trunk, fr | |
| 08:40 | [graphspell] spellchecker: add parseParagraph() check-in: 7616aa7ef9 user: olr tags: trunk, graphspell | |
|
2018-02-19
| ||
| 18:08 | [fr] new performance test (better when the processor isn’t converting a video!) check-in: b34690f0d8 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/webext/gce_worker.js from [c20f81d8f3] to [efd11a103b].
| ︙ | ︙ | |||
200 201 202 203 204 205 206 |
}
function parseAndSpellcheck (sText, sCountry, bDebug, bContext, dInfo={}) {
let i = 0;
sText = sText.replace(//g, "").normalize("NFC");
for (let sParagraph of text.getParagraph(sText)) {
let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext);
| | | | 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
}
function parseAndSpellcheck (sText, sCountry, bDebug, bContext, dInfo={}) {
let i = 0;
sText = sText.replace(//g, "").normalize("NFC");
for (let sParagraph of text.getParagraph(sText)) {
let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext);
let aSpellErr = oSpellChecker.parseParagraph(sParagraph);
postMessage(createResponse("parseAndSpellcheck", {sParagraph: sParagraph, iParaNum: i, aGrammErr: aGrammErr, aSpellErr: aSpellErr}, dInfo, false));
i += 1;
}
postMessage(createResponse("parseAndSpellcheck", null, dInfo, true));
}
function parseAndSpellcheck1 (sParagraph, sCountry, bDebug, bContext, dInfo={}) {
sParagraph = sParagraph.replace(//g, "").normalize("NFC");
let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext);
let aSpellErr = oSpellChecker.parseParagraph(sParagraph);
postMessage(createResponse("parseAndSpellcheck1", {sParagraph: sParagraph, aGrammErr: aGrammErr, aSpellErr: aSpellErr}, dInfo, true));
}
function getOptions (dInfo={}) {
postMessage(createResponse("getOptions", gc_engine.getOptions(), dInfo, true));
}
|
| ︙ | ︙ |
Modified graphspell-js/spellchecker.js from [e878cd2181] to [7b8a526c88].
| ︙ | ︙ | |||
9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
"use strict";
if (typeof(require) !== 'undefined') {
var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js");
}
${map}
const dDefaultDictionaries = new Map([
| > | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
"use strict";
if (typeof(require) !== 'undefined') {
var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js");
var tokenizer = require("resource://grammalecte/graphspell/tokenizer.js");
}
${map}
const dDefaultDictionaries = new Map([
|
| ︙ | ︙ | |||
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
this.sLangCode = sLangCode;
if (!mainDic) {
mainDic = dDefaultDictionaries.gl_get(sLangCode, "");
}
this.oMainDic = this._loadDictionary(mainDic, sPath, true);
this.oExtendedDic = this._loadDictionary(extentedDic, sPath);
this.oPersonalDic = this._loadDictionary(personalDic, sPath);
}
_loadDictionary (dictionary, sPath, bNecessary=false) {
// returns an IBDAWG object
if (!dictionary) {
return null;
}
try {
| > | > > > > > > > > > > > > > > > > > > > > > > > | 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
this.sLangCode = sLangCode;
if (!mainDic) {
mainDic = dDefaultDictionaries.gl_get(sLangCode, "");
}
this.oMainDic = this._loadDictionary(mainDic, sPath, true);
this.oExtendedDic = this._loadDictionary(extentedDic, sPath);
this.oPersonalDic = this._loadDictionary(personalDic, sPath);
this.oTokenizer = null;
}
_loadDictionary (dictionary, sPath, bNecessary=false) {
// returns an IBDAWG object
if (!dictionary) {
return null;
}
try {
if (typeof(ibdawg) !== 'undefined') {
return new ibdawg.IBDAWG(dictionary); // dictionary can be a filename or a JSON object
} else {
return new IBDAWG(dictionary, sPath); // dictionary can be a filename or a JSON object
}
}
catch (e) {
let sfDictionary = (typeof(dictionary) == "string") ? dictionary : dictionary.sLangName + "/" + dictionary.sFileName;
if (bNecessary) {
throw "Error: <" + sfDictionary + "> not loaded. " + e.message;
}
console.log("Error: <" + sfDictionary + "> not loaded.")
console.log(e.message);
return null;
}
}
loadTokenizer () {
if (typeof(tokenizer) !== 'undefined') {
this.oTokenizer = new tokenizer.Tokenizer(this.sLangCode);
} else {
this.oTokenizer = new Tokenizer(this.sLangCode);
}
}
setMainDictionary (dictionary) {
// returns true if the dictionary is loaded
this.oMainDic = this._loadDictionary(dictionary);
return Boolean(this.oMainDic);
}
setExtendedDictionary (dictionary) {
// returns true if the dictionary is loaded
this.oExtendedDic = this._loadDictionary(dictionary);
return Boolean(this.oExtendedDic);
}
setPersonalDictionary (dictionary) {
// returns true if the dictionary is loaded
this.oPersonalDic = this._loadDictionary(dictionary);
return Boolean(this.oPersonalDic);
}
// parse text functions
parseParagraph (sText) {
if (!this.oTokenizer) {
this.loadTokenizer();
}
let aSpellErr = [];
for (let oToken of this.oTokenizer.genTokens(sText)) {
if (oToken.sType === 'WORD' && !this.isValidToken(oToken.sValue)) {
aSpellErr.push(oToken);
}
}
return aSpellErr;
}
// IBDAWG functions
isValidToken (sToken) {
// checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)
if (this.oMainDic.isValidToken(sToken)) {
return true;
|
| ︙ | ︙ |
Modified graphspell-js/tokenizer.js from [c3f0ee8c90] to [bdd895b918].
| ︙ | ︙ | |||
83 84 85 86 87 88 89 |
helpers.logerror(e);
}
}
i += nCut;
sText = sText.slice(nCut);
}
}
| < < < < < < < < < < | 83 84 85 86 87 88 89 90 91 92 93 94 95 |
helpers.logerror(e);
}
}
i += nCut;
sText = sText.slice(nCut);
}
}
}
if (typeof(exports) !== 'undefined') {
exports.Tokenizer = Tokenizer;
}
|
Modified graphspell/spellchecker.py from [638f8d8cdf] to [b9fb2c7b70].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# Spellchecker
# Wrapper for the IBDAWG class.
# Useful to check several dictionaries at once.
# To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
# - the main dictionary, bundled with the package
# - the extended dictionary, added by an organization
# - the personal dictionary, created by the user for its own convenience
import traceback
from . import ibdawg
dDefaultDictionaries = {
"fr": "fr.bdic",
"en": "en.bdic"
}
class SpellChecker ():
def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfPersonalDic=""):
"returns True if the main dictionary is loaded"
self.sLangCode = sLangCode
if not sfMainDic:
sfMainDic = dDefaultDictionaries.get(sLangCode, "")
self.oMainDic = self._loadDictionary(sfMainDic, True)
self.oExtendedDic = self._loadDictionary(sfExtendedDic)
self.oPersonalDic = self._loadDictionary(sfPersonalDic)
def _loadDictionary (self, sfDictionary, bNecessary=False):
"returns an IBDAWG object"
if not sfDictionary:
return None
try:
return ibdawg.IBDAWG(sfDictionary)
except Exception as e:
if bNecessary:
raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.")
print("Error: <" + sfDictionary + "> not loaded.")
traceback.print_exc()
return None
def setMainDictionary (self, sfDictionary):
"returns True if the dictionary is loaded"
self.oMainDic = self._loadDictionary(sfDictionary)
return bool(self.oMainDic)
def setExtendedDictionary (self, sfDictionary):
"returns True if the dictionary is loaded"
self.oExtendedDic = self._loadDictionary(sfDictionary)
return bool(self.oExtendedDic)
def setPersonalDictionary (self, sfDictionary):
"returns True if the dictionary is loaded"
self.oPersonalDic = self._loadDictionary(sfDictionary)
return bool(self.oPersonalDic)
# IBDAWG functions
def isValidToken (self, sToken):
"checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
if self.oMainDic.isValidToken(sToken):
return True
| > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# Spellchecker
# Wrapper for the IBDAWG class.
# Useful to check several dictionaries at once.
# To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
# - the main dictionary, bundled with the package
# - the extended dictionary, added by an organization
# - the personal dictionary, created by the user for its own convenience
import traceback
from . import ibdawg
from . import tokenizer
dDefaultDictionaries = {
"fr": "fr.bdic",
"en": "en.bdic"
}
class SpellChecker ():
def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfPersonalDic=""):
"returns True if the main dictionary is loaded"
self.sLangCode = sLangCode
if not sfMainDic:
sfMainDic = dDefaultDictionaries.get(sLangCode, "")
self.oMainDic = self._loadDictionary(sfMainDic, True)
self.oExtendedDic = self._loadDictionary(sfExtendedDic)
self.oPersonalDic = self._loadDictionary(sfPersonalDic)
self.oTokenizer = None
def _loadDictionary (self, sfDictionary, bNecessary=False):
"returns an IBDAWG object"
if not sfDictionary:
return None
try:
return ibdawg.IBDAWG(sfDictionary)
except Exception as e:
if bNecessary:
raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.")
print("Error: <" + sfDictionary + "> not loaded.")
traceback.print_exc()
return None
def loadTokenizer (self):
self.oTokenizer = tokenizer.Tokenizer(self.sLangCode)
def setMainDictionary (self, sfDictionary):
"returns True if the dictionary is loaded"
self.oMainDic = self._loadDictionary(sfDictionary)
return bool(self.oMainDic)
def setExtendedDictionary (self, sfDictionary):
"returns True if the dictionary is loaded"
self.oExtendedDic = self._loadDictionary(sfDictionary)
return bool(self.oExtendedDic)
def setPersonalDictionary (self, sfDictionary):
"returns True if the dictionary is loaded"
self.oPersonalDic = self._loadDictionary(sfDictionary)
return bool(self.oPersonalDic)
# parse text functions
def parseParagraph (self, sText, bSpellSugg=False):
if not self.oTokenizer:
self.loadTokenizer()
aSpellErrs = []
for dToken in self.oTokenizer.genTokens(sText):
if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
if bSpellSugg:
dToken['aSuggestions'] = []
for lSugg in self.suggest(dToken['sValue']):
dToken['aSuggestions'].extend(lSugg)
aSpellErrs.append(dToken)
return aSpellErrs
# IBDAWG functions
def isValidToken (self, sToken):
"checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
if self.oMainDic.isValidToken(sToken):
return True
|
| ︙ | ︙ |