54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
|
try:
oDict = ibdawg.IBDAWG("fr-allvars.json")
except:
traceback.print_exc()
def makeDictionaries (sp, sVersion):
print("> Exécution du script dans gc_lang/fr/dictionnaire:")
print(" genfrdic.py -s -gl -v "+sVersion)
with cd(sp+"/dictionnaire"):
if platform.system() == "Windows":
os.system("python genfrdic.py -s -gl -v "+sVersion)
else:
os.system("python3 ./genfrdic.py -s -gl -v "+sVersion)
def makeThesaurusFiles (sp, bJS=False):
dThesaurus = {}
sContent = open(sp+'/data/thes_fr.json', "r", encoding="utf-8").read()
dThesaurus = json.loads(sContent)
## write file for Python
sCode = "# generated data (do not edit)\n\n" + \
"dThesaurus = " + str(dThesaurus) + "\n"
open(sp+"/modules/thesaurus_data.py", "w", encoding="utf-8", newline="\n").write(sCode)
if bJS:
## write file for JavaScript
shutil.copy2(sp+'/data/thes_fr.json', sp+"/modules-js/thesaurus_data.json")
def makeConj (sp, bJS=False):
print("> Conjugaisons ", end="")
print("> Fichier des conjugaisons pour Grammalecte")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
dVerb = {}
lVinfo = []; dVinfo = {}; nVinfo = 0
lTags = []; dTags = {}; nTags = 0
dVerbNames = {}
dPatternList = {
":P": [], ":Q": [], ":Ip": [], ":Iq": [], ":Is": [], ":If": [], ":K": [], ":Sp": [], ":Sq": [], ":E": []
|
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
|
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
|
-
+
-
|
hDst.write(' "dPatternConj": ' + json.dumps(dPatternList, ensure_ascii=False) + ",\n")
hDst.write(' "dVerb": ' + json.dumps(dVerb, ensure_ascii=False) + ",\n")
hDst.write(' "dVerbNames": ' + json.dumps(dVerbNames, ensure_ascii=False) + "\n")
hDst.write("}\n")
def makeMfsp (sp, bJS=False):
print("> Pluriel/singulier/masculin/féminin ", end="")
print("> Fichier des pluriels/singuliers/masculins/féminins pour Grammalecte")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
aPlurS = set() # pluriels en -s
dTag = {}
lTagFemForm = []
lTagMiscPlur = [] # pluriels spéciaux
dMiscPlur = {}
dMasForm = {}
lTag = []
|
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
|
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
|
-
+
-
|
' "lTagFemForm": ' + json.dumps(lTagFemForm, ensure_ascii=False) + ",\n" + \
' "dMiscPlur": ' + json.dumps(dMiscPlur, ensure_ascii=False) + ",\n" + \
' "dMasForm": ' + json.dumps(dMasForm, ensure_ascii=False) + "\n}"
open(sp+"/modules-js/mfsp_data.json", "w", encoding="utf-8", newline="\n").write(sCode)
def makePhonetTable (sp, bJS=False):
print("> Correspondances phonétiques ", end="")
print("> Fichier des correspondances phonétiques pour Grammalecte")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
loadDictionary()
conj = importlib.import_module("gc_lang.fr.modules.conj")
# set of homophonic words
lSet = []
|
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
|
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
+
-
+
+
-
+
|
## write file for JavaScript
sCode = "{\n" + \
' "dWord": ' + json.dumps(dWord, ensure_ascii=False) + ",\n" + \
' "lSet": ' + json.dumps(lSet, ensure_ascii=False) + ",\n" + \
' "dMorph": ' + json.dumps(dMorph, ensure_ascii=False) + "\n}"
open(sp+"/modules-js/phonet_data.json", "w", encoding="utf-8", newline="\n").write(sCode)
def makeThesaurusFiles (sp, bJS=False):
print("> Fichiers du Thésaurus pour Grammalecte")
dThesaurus = {}
sContent = open(sp+'/data/thes_fr.json', "r", encoding="utf-8").read()
dThesaurus = json.loads(sContent)
## write file for Python
sCode = "# generated data (do not edit)\n\n" + \
"dThesaurus = " + str(dThesaurus) + "\n"
open(sp+"/modules/thesaurus_data.py", "w", encoding="utf-8", newline="\n").write(sCode)
if bJS:
## write file for JavaScript
#shutil.copy2(sp+'/data/thes_fr.json', sp+"/modules-js/thesaurus_data.json")
# thes_fr.json is too big, we have to split it.
# Addons.mozilla.org doesn’t a file that big. 5 Mo maximum.
nHalfSize = len(dThesaurus) // 2
dThes1 = { k: v for i, (k, v) in enumerate(dThesaurus.items()) if i < nHalfSize }
dThes2 = { k: v for i, (k, v) in enumerate(dThesaurus.items()) if i >= nHalfSize }
open(sp+"/modules-js/thesaurus1_data.json", "w", encoding="utf-8").write(json.dumps(dThes1, ensure_ascii=False))
open(sp+"/modules-js/thesaurus2_data.json", "w", encoding="utf-8").write(json.dumps(dThes2, ensure_ascii=False))
def before (spLaunch, dVars, bJS=False):
print("========== Build Hunspell dictionaries / Thesaurus ==========")
print("========== Construction des dictionnaires Hunspell et du Thesaurus ==========")
makeDictionaries(spLaunch, dVars['oxt_version'])
makeThesaurusFiles(spLaunch, bJS)
def after (spLaunch, dVars, bJS=False):
print("========== Build French data ==========")
print("========== Création des fichiers de données pour Grammalecte ==========")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
makeMfsp(spLaunch, bJS)
makeConj(spLaunch, bJS)
makePhonetTable(spLaunch, bJS)
makeThesaurusFiles(spLaunch, bJS)
|