Grammalecte  Check-in [6e729d98df]

Overview
Comment:[fr][build][fx] split the thesaurus in two parts to bypass AMO, then merge them after loading
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | build | fx
Files: files | file ages | folders
SHA3-256: 6e729d98df7a790c0e3f1437892f0ac367b5d08844f25e7be1b87fccc92a74bd
User & Date: olr on 2025-12-15 14:53:24
Other Links: manifest | tags
Context
2025-12-15
17:15
[fx] intitulé pour le lancement du Thesaurus check-in: aae1c5282e user: olr tags: trunk, fx
14:53
[fr][build][fx] split the thesaurus in two parts to bypass AMO, then merge them after loading check-in: 6e729d98df user: olr tags: trunk, fr, build, fx
10:47
[fr][build] fix thesaurus build check-in: cc5f0a602d user: olr tags: trunk, fr, build
Changes

Modified gc_lang/fr/build_data.py from [a24db6ede9] to [4e92c3f4f5].

54
55
56
57
58
59
60


61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

83
84
85
86
87
88
89
90
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69













70

71

72
73
74
75
76
77
78







+
+







-
-
-
-
-
-
-
-
-
-
-
-
-

-
+
-







        try:
            oDict = ibdawg.IBDAWG("fr-allvars.json")
        except:
            traceback.print_exc()


def makeDictionaries (sp, sVersion):
    print("> Exécution du script dans gc_lang/fr/dictionnaire:")
    print("  genfrdic.py -s -gl -v "+sVersion)
    with cd(sp+"/dictionnaire"):
        if platform.system() == "Windows":
            os.system("python genfrdic.py -s -gl -v "+sVersion)
        else:
            os.system("python3 ./genfrdic.py -s -gl -v "+sVersion)


def makeThesaurusFiles (sp, bJS=False):
    dThesaurus = {}
    sContent = open(sp+'/data/thes_fr.json', "r", encoding="utf-8").read()
    dThesaurus = json.loads(sContent)
    ## write file for Python
    sCode = "# generated data (do not edit)\n\n" + \
            "dThesaurus = " + str(dThesaurus) + "\n"
    open(sp+"/modules/thesaurus_data.py", "w", encoding="utf-8", newline="\n").write(sCode)
    if bJS:
        ## write file for JavaScript
        shutil.copy2(sp+'/data/thes_fr.json', sp+"/modules-js/thesaurus_data.json")


def makeConj (sp, bJS=False):
    print("> Conjugaisons ", end="")
    print("> Fichier des conjugaisons pour Grammalecte")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dVerb = {}
    lVinfo = []; dVinfo = {}; nVinfo = 0
    lTags = []; dTags = {}; nTags = 0
    dVerbNames = {}

    dPatternList = {
        ":P": [], ":Q": [], ":Ip": [], ":Iq": [], ":Is": [], ":If": [], ":K": [], ":Sp": [], ":Sq": [], ":E": []
206
207
208
209
210
211
212
213

214
215
216
217
218
219
220
221
194
195
196
197
198
199
200

201

202
203
204
205
206
207
208







-
+
-







            hDst.write('    "dPatternConj": ' + json.dumps(dPatternList, ensure_ascii=False) + ",\n")
            hDst.write('    "dVerb": ' + json.dumps(dVerb, ensure_ascii=False) + ",\n")
            hDst.write('    "dVerbNames": ' + json.dumps(dVerbNames, ensure_ascii=False) + "\n")
            hDst.write("}\n")


def makeMfsp (sp, bJS=False):
    print("> Pluriel/singulier/masculin/féminin ", end="")
    print("> Fichier des pluriels/singuliers/masculins/féminins pour Grammalecte")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    aPlurS = set()      # pluriels en -s
    dTag = {}
    lTagFemForm = []
    lTagMiscPlur = []   # pluriels spéciaux
    dMiscPlur = {}
    dMasForm = {}
    lTag = []
295
296
297
298
299
300
301
302

303
304
305
306
307
308
309
310
282
283
284
285
286
287
288

289

290
291
292
293
294
295
296







-
+
-







                '    "lTagFemForm": ' +  json.dumps(lTagFemForm, ensure_ascii=False) + ",\n" + \
                '    "dMiscPlur": ' +  json.dumps(dMiscPlur, ensure_ascii=False) + ",\n" + \
                '    "dMasForm": ' +  json.dumps(dMasForm, ensure_ascii=False) + "\n}"
        open(sp+"/modules-js/mfsp_data.json", "w", encoding="utf-8", newline="\n").write(sCode)


def makePhonetTable (sp, bJS=False):
    print("> Correspondances phonétiques ", end="")
    print("> Fichier des correspondances phonétiques pour Grammalecte")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")

    loadDictionary()

    conj = importlib.import_module("gc_lang.fr.modules.conj")

    # set of homophonic words
    lSet = []
354
355
356
357
358
359
360
361






















362
363

364
365

366
367
368


369
370
371
372

340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370

371
372

373
374
375

376
377
378
379
380

381








+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
+

-
+


-
+
+



-
+
        ## write file for JavaScript
        sCode = "{\n" + \
                '    "dWord": ' + json.dumps(dWord, ensure_ascii=False) + ",\n" + \
                '    "lSet": ' + json.dumps(lSet, ensure_ascii=False) + ",\n" + \
                '    "dMorph": ' + json.dumps(dMorph, ensure_ascii=False) + "\n}"
        open(sp+"/modules-js/phonet_data.json", "w", encoding="utf-8", newline="\n").write(sCode)


def makeThesaurusFiles (sp, bJS=False):
    print("> Fichiers du Thésaurus pour Grammalecte")
    dThesaurus = {}
    sContent = open(sp+'/data/thes_fr.json', "r", encoding="utf-8").read()
    dThesaurus = json.loads(sContent)
    ## write file for Python
    sCode = "# generated data (do not edit)\n\n" + \
            "dThesaurus = " + str(dThesaurus) + "\n"
    open(sp+"/modules/thesaurus_data.py", "w", encoding="utf-8", newline="\n").write(sCode)
    if bJS:
        ## write file for JavaScript
        #shutil.copy2(sp+'/data/thes_fr.json', sp+"/modules-js/thesaurus_data.json")

        # thes_fr.json is too big, we have to split it.
        # Addons.mozilla.org doesn’t a file that big. 5 Mo maximum.
        nHalfSize = len(dThesaurus) // 2
        dThes1 = { k: v  for i, (k, v) in enumerate(dThesaurus.items())  if i < nHalfSize  }
        dThes2 = { k: v  for i, (k, v) in enumerate(dThesaurus.items())  if i >= nHalfSize }
        open(sp+"/modules-js/thesaurus1_data.json", "w", encoding="utf-8").write(json.dumps(dThes1, ensure_ascii=False))
        open(sp+"/modules-js/thesaurus2_data.json", "w", encoding="utf-8").write(json.dumps(dThes2, ensure_ascii=False))


def before (spLaunch, dVars, bJS=False):
    print("========== Build Hunspell dictionaries / Thesaurus ==========")
    print("========== Construction des dictionnaires Hunspell et du Thesaurus ==========")
    makeDictionaries(spLaunch, dVars['oxt_version'])
    makeThesaurusFiles(spLaunch, bJS)


def after (spLaunch, dVars, bJS=False):
    print("========== Build French data ==========")
    print("========== Création des fichiers de données pour Grammalecte ==========")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    makeMfsp(spLaunch, bJS)
    makeConj(spLaunch, bJS)
    makePhonetTable(spLaunch, bJS)

    makeThesaurusFiles(spLaunch, bJS)

Modified gc_lang/fr/modules-js/conj_data.json from [a2e2224de3] to [6ea791c67f].

cannot compute difference between binary files

Modified gc_lang/fr/modules-js/phonet_data.json from [b44369a83a] to [eaf6532f4d].

cannot compute difference between binary files

Modified gc_lang/fr/modules-js/thesaurus.js from [009b56b2e7] to [ea28bf31d8].

14
15
16
17
18
19
20
21

22


23




24
25
26
27
28
29
30
14
15
16
17
18
19
20

21
22
23
24

25
26
27
28
29
30
31
32
33
34
35







-
+

+
+
-
+
+
+
+







}


var thesaurus = {
    _dWord: new Map(),

    bInit: false,
    init: function (sJSONData) {
    init: function (sJSONData1, sJSONData2) {
        try {
            // As addons.mozilla.org doesn’t accept file bigger than 5 Mb,
            // we had to split the thesaurus in two parts. And now we merge them.
            let _oData = JSON.parse(sJSONData);
            let _oData1 = JSON.parse(sJSONData1);
            let _oData2 = JSON.parse(sJSONData2);
            let _oData = { ..._oData1, ..._oData2 };
            // convert to Map
            this._dWord = helpers.objectToMap(_oData);
            this.bInit = true;
            //console.log(this._dWord);
        }
        catch (e) {
            console.error(e);
        }
49
50
51
52
53
54
55
56

57
58
59

60
61
62

63
64
65
66
67
68
69
70
71
72
73
74
54
55
56
57
58
59
60

61
62
63

64
65
66

67
68
69
70
71
72
73
74
75
76
77
78
79







-
+


-
+


-
+












};



// Initialization
if (!thesaurus.bInit && typeof(process) !== 'undefined') {
    // NodeJS
    thesaurus.init(helpers.loadFile(__dirname+"/thesaurus_data.json"));
    thesaurus.init(helpers.loadFile(__dirname+"/thesaurus1_data.json"), helpers.loadFile(__dirname+"/thesaurus2_data.json"));
} else if (!thesaurus.bInit && typeof(browser) !== 'undefined') {
    // WebExtension Standard (but not in Worker)
    thesaurus.init(helpers.loadFile(browser.runtime.getURL("grammalecte/fr/thesaurus_data.json")));
    thesaurus.init(helpers.loadFile(browser.runtime.getURL("grammalecte/fr/thesaurus1_data.json")), helpers.loadFile(browser.runtime.getURL("grammalecte/fr/thesaurus2_data.json")));
} else if (!thesaurus.bInit && typeof(chrome) !== 'undefined') {
    // WebExtension Chrome (but not in Worker)
    thesaurus.init(helpers.loadFile(chrome.runtime.getURL("grammalecte/fr/thesaurus_data.json")));
    thesaurus.init(helpers.loadFile(chrome.runtime.getURL("grammalecte/fr/thesaurus1_data.json")), helpers.loadFile(chrome.runtime.getURL("grammalecte/fr/thesaurus2_data.json")));
} else if (thesaurus.bInit){
    console.log("Module thesaurus déjà initialisé");
} else {
    //console.log("Module thesaurus non initialisé");
}


if (typeof(exports) !== 'undefined') {
    exports._dWord = thesaurus._dWord;
    exports.init = thesaurus.init;
    exports.getSyns = thesaurus.getSyns;
}

Added gc_lang/fr/modules-js/thesaurus1_data.json version [89efb4ec45].

cannot compute difference between binary files

Added gc_lang/fr/modules-js/thesaurus2_data.json version [91477a6b41].

cannot compute difference between binary files

Deleted gc_lang/fr/modules-js/thesaurus_data.json version [98eda155ed].

cannot compute difference between binary files

Modified gc_lang/fr/modules/conj_data.py from [98e2c62b97] to [69e6366454].

cannot compute difference between binary files

Modified gc_lang/fr/modules/phonet_data.py from [db27fd047c] to [4a78ac706b].

cannot compute difference between binary files

Modified gc_lang/fr/webext/gce_worker.js from [946a071330] to [94be7196a8].

174
175
176
177
178
179
180
181

182
183
184
185
186
187
188
174
175
176
177
178
179
180

181
182
183
184
185
186
187
188







-
+







function init (sExtensionPath, dOptions=null, sContext="JavaScript", oInfo={}) {
    try {
        if (!bInitDone) {
            //console.log("[Worker] Loading… Extension path: " + sExtensionPath);
            conj.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/conj_data.json"));
            phonet.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/phonet_data.json"));
            mfsp.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/mfsp_data.json"));
            thesaurus.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/thesaurus_data.json"));
            thesaurus.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/thesaurus1_data.json"), helpers.loadFile(sExtensionPath + "/grammalecte/fr/thesaurus2_data.json"));
            //console.log("[Worker] Modules have been initialized…");
            gc_engine.load(sContext, "aHSL", sExtensionPath+"grammalecte/graphspell/_dictionaries");
            oSpellChecker = gc_engine.getSpellChecker();
            oTest = new TestGrammarChecking(gc_engine, sExtensionPath+"/grammalecte/fr/tests_data.json");
            oTokenizer = new Tokenizer("fr");
            if (dOptions !== null) {
                if (!(dOptions instanceof Map)) {

Modified make.py from [78c9ddad51] to [0e97d75023].

174
175
176
177
178
179
180
181

182
183
184
185
186
187
188
174
175
176
177
178
179
180

181
182
183
184
185
186
187
188







-
+







    for sf in os.listdir(spLangPack):
        if not os.path.isdir(spLangPack+"/"+sf):
            hZip.write(spLangPack+"/"+sf, sAddPath+spLangPack+"/"+sf)


def create (sLang, xConfig, bInstallOXT, bJavaScript, bUseCache):
    "make Grammalecte for project <sLang>"
    print(f">>>> MAKE GC ENGINE: {sLang} <<<<")
    print(f"========== MAKE GC ENGINE: {sLang} ==========")

    #### READ CONFIGURATION
    print("> read configuration...")
    spLang = "gc_lang/" + sLang

    dVars = xConfig._sections['args']
    dVars['locales'] = dVars["locales"].replace("_", "-")
279
280
281
282
283
284
285
286

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306

307
308
309
310
311
312
313
279
280
281
282
283
284
285

286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305

306
307
308
309
310
311
312
313







-
+



















-
+







            buildjs.build(sLang, dVars)

    return dVars['version']


def copyGraphspellCore (bJavaScript=False):
    "copy Graphspell package in Grammalecte package"
    print("> Copy Graphspell package in Grammalecte package")
    print("===== Copy Graphspell package in Grammalecte package =====")
    helpers.createCleanFolder("grammalecte/graphspell")
    os.makedirs("grammalecte/graphspell/_dictionaries", exist_ok=True)
    for sf in os.listdir("graphspell"):
        if not os.path.isdir("graphspell/"+sf):
            shutil.copy2("graphspell/"+sf, "grammalecte/graphspell")
    if bJavaScript:
        helpers.createCleanFolder("grammalecte-js/graphspell")
        os.makedirs("grammalecte-js/graphspell/_dictionaries", exist_ok=True)
        dVars = {}
        for sf in os.listdir("js_extension"):
            dVars[sf[:-3]] = open("js_extension/"+sf, "r", encoding="utf-8").read()
        for sf in os.listdir("graphspell-js"):
            if not os.path.isdir("graphspell-js/"+sf):
                shutil.copy2("graphspell-js/"+sf, "grammalecte-js/graphspell")
                helpers.copyAndFileTemplate("graphspell-js/"+sf, "grammalecte-js/graphspell/"+sf, dVars)


def copyGraphspellDictionaries (dVars, bJavaScript=False, bCommunityDict=False, bPersonalDict=False):
    "copy requested Graphspell dictionaries in Grammalecte package"
    print("> Copy requested Graphspell dictionaries in Grammalecte package")
    print("===== Copy requested Graphspell dictionaries in Grammalecte package =====")
    dVars["dic_main_filename_py"] = ""
    dVars["dic_main_filename_js"] = ""
    dVars["dic_community_filename_py"] = ""
    dVars["dic_community_filename_js"] = ""
    dVars["dic_personal_filename_py"] = ""
    dVars["dic_personal_filename_js"] = ""
    lDict = [ ("main", s)  for s in dVars['dic_filenames'].split(",") ]
411
412
413
414
415
416
417
418

419
420
421
422
423
424
425
411
412
413
414
415
416
417

418
419
420
421
422
423
424
425







-
+







    xParser.add_argument("-l", "--lint_web_ext", help="web-ext lint on the WebExtension", action="store_true")
    xParser.add_argument("-tb", "--thunderbird", help="Launch Thunderbird", action="store_true")
    xParser.add_argument("-tbb", "--thunderbird_beta", help="Launch Thunderbird Beta", action="store_true")
    xParser.add_argument("-i", "--install", help="install the extension in Writer (path of unopkg must be set in config.ini)", action="store_true")
    xArgs = xParser.parse_args()

    oNow = datetime.datetime.now()
    print("============== MAKE GRAMMALECTE at {0.hour:>2} h {0.minute:>2} min {0.second:>2} s ==============".format(oNow))
    print("#################### MAKE GRAMMALECTE at {0.hour:>2} h {0.minute:>2} min {0.second:>2} s ####################".format(oNow))

    if xArgs.build_data:
        xArgs.build_data_before = True
        xArgs.build_data_after = True

    os.makedirs("_build", exist_ok=True)
    os.makedirs("grammalecte", exist_ok=True)