Overview
| Comment: | [fr][build] contrôle des entrées |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | trunk | fr | build |
| Files: | files | file ages | folders |
| SHA3-256: |
605502dcf59cbc0e4f8a25473054beb2 |
| User & Date: | olr on 2025-12-13 20:50:35 |
| Other Links: | manifest | tags |
Context
|
2025-12-13
| ||
| 20:50 | [fr][build] contrôle des entrées Leaf check-in: 605502dcf5 user: olr tags: trunk, fr, build | |
| 19:35 | [fr] Thesaurus: fichier d’index inutile dans les sources check-in: 264f54b664 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/dictionnaire/genfrdic.py from [3f7c9cfb70] to [d7ae02087f].
| ︙ | ︙ | |||
340 341 342 343 344 345 346 |
def sortLexiconByIdx (self):
echo(' * Dictionnaire - tri du lexique (par index)...')
self.lFlexions = sorted(self.lFlexions, key=Flexion.keyIdx)
def checkEntries (self):
echo(' * Dictionnaire - contrôle des entrées...')
| | | | 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 |
def sortLexiconByIdx (self):
echo(' * Dictionnaire - tri du lexique (par index)...')
self.lFlexions = sorted(self.lFlexions, key=Flexion.keyIdx)
def checkEntries (self):
echo(' * Dictionnaire - contrôle des entrées...')
for o in self.lEntry:
o.check()
def generateFlexions (self):
echo(' * Lexique - genèse des formes fléchies...')
for oEntry in self.lEntry:
oEntry.generateFlexions(self.dFlags)
self.lFlexions.extend(oEntry.lFlexions)
# Count flexions in multiple entries
|
| ︙ | ︙ | |||
692 693 694 695 696 697 698 |
self.lemma = firstElems[0]
self.flags = firstElems[1] if len(firstElems) > 1 else ''
# morph
for i in range(1, nElems):
if len(elems[i]) > 3 and elems[i][2] == ':':
sAttr, sContent = elems[i].split(':', 1)
if sAttr in {"po", "is", "ds", "ts", "ip", "dp", "tp", "sp", "pa", "st", "al", "ph", "lx", "se", "et", "di", "fq", "id"}:
| < < < < | | | > > > > > > > > > > > > > > | 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 |
self.lemma = firstElems[0]
self.flags = firstElems[1] if len(firstElems) > 1 else ''
# morph
for i in range(1, nElems):
if len(elems[i]) > 3 and elems[i][2] == ':':
sAttr, sContent = elems[i].split(':', 1)
if sAttr in {"po", "is", "ds", "ts", "ip", "dp", "tp", "sp", "pa", "st", "al", "ph", "lx", "se", "et", "di", "fq", "id"}:
# renommage des attributs
if sAttr == "is":
sAttr = "iz"
if sAttr == "id":
sAttr = "iD"
# modification
try:
if sAttr in {"po", "iz", "ds", "ts", "ip", "dp", "tp", "sp", "pa", "st", "al", "ph", "lx", "se", "et"}:
sContent = getattr(self, sAttr) + " " + sContent
setattr(self, sAttr, sContent.strip())
except:
echo(f' ## Erreur. Attribut non attribuable: {sAttr} @ {self.lemma}/{self.flags}')
else:
echo(f' ## Champ inconnu: {sAttr} @ {self.lemma}/{self.flags}')
else:
self.err = self.err + elems[i]
if self.err:
echo(f"\n## Erreur dans le dictionnaire : {self.err}")
echo(" @ : " + self.lemma)
def __str__ (self):
return "{0.lemma}/{0.flags} {1}".format(self, self.getMorph(2))
def check (self):
sErr = ''
# lemme
if self.lemma == '':
sErr += ' > lemme vide'
if re.match(r"^\s", self.lemma):
sErr += ' > espace en début de lemme <' + self.lemma + '>'
if re.search(r"\s$", self.lemma):
sErr += ' > espace en fin de lemme <' + self.lemma + '>'
# détection des tags inconnus
if self.po:
for sTag in self.po.split():
if sTag not in tags.dTags["po"] and not re.match("v[0123]", sTag):
sErr += f" > Étiquette inconnue pour l’attribut <po>: {sTag}"
if self.iz:
for sTag in self.iz.split():
if sTag not in tags.dTags["is"]:
sErr += f" > Étiquette inconnue pour l’attribut <is>: {sTag}"
for sAttr in {"lx", "se", "et"}:
if getattr(self, sAttr):
for sTag in getattr(self, sAttr).split(" "):
if sTag not in tags.dTags[sAttr] and not re.match("v[0123]", sTag):
sErr += f" > Étiquette inconnue pour l’attribut <{sAttr}>: {sTag}"
# verbe
if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]):
sErr += ' > verbe mal étiqueté: ' + self.po
if re.match(r"[abcdf]0", self.flags):
if not re.search(r"p[+.]", self.flags):
sErr += ' > verbe sans participe passé: ' + self.po
if "()" not in self.flags:
|
| ︙ | ︙ | |||
759 760 761 762 763 764 765 766 767 768 769 770 771 772 |
sErr += ' > étiquettes <is> incohérentes '
if re.search(r"pl|sg|inv", self.iz) and re.match(r"[SXAIFGW](?!=)", self.flags):
sErr += ' > étiquettes <is> incohérentes '
if self.iz.endswith(("mas", "fem", "epi")) and (not self.flags or not self.flags.startswith(("S", "X", "F", "W", "A", "I", "U"))):
sErr += ' > étiquettes <is> incomplètes'
if re.match(r"[SXAIFGW](?!=)", self.flags) and "()" not in self.flags:
sErr += ' > drapeau () manquant'
if sErr:
echo(f" erreur {sErr} sur " + self.__str__())
def setTagsFrom (self, oEnt):
self.po = oEnt.po
self.iz = oEnt.iz
self.ds = oEnt.ds
| > | 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 |
sErr += ' > étiquettes <is> incohérentes '
if re.search(r"pl|sg|inv", self.iz) and re.match(r"[SXAIFGW](?!=)", self.flags):
sErr += ' > étiquettes <is> incohérentes '
if self.iz.endswith(("mas", "fem", "epi")) and (not self.flags or not self.flags.startswith(("S", "X", "F", "W", "A", "I", "U"))):
sErr += ' > étiquettes <is> incomplètes'
if re.match(r"[SXAIFGW](?!=)", self.flags) and "()" not in self.flags:
sErr += ' > drapeau () manquant'
# print
if sErr:
echo(f" erreur {sErr} sur " + self.__str__())
def setTagsFrom (self, oEnt):
self.po = oEnt.po
self.iz = oEnt.iz
self.ds = oEnt.ds
|
| ︙ | ︙ |
Modified gc_lang/fr/dictionnaire/thes_build.py from [b66a405bcd] to [8b4fd3bafd].
| ︙ | ︙ | |||
55 56 57 58 59 60 61 |
nClass = 0
nClassFound = 0
for i, sLine in enumerate(genRead, 2):
sLine = sLine.strip()
if re.search(r"^[^|]+\|[1-9][0-9]*$", sLine):
# new entry
if nClass != nClassFound:
| | | 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
nClass = 0
nClassFound = 0
for i, sLine in enumerate(genRead, 2):
sLine = sLine.strip()
if re.search(r"^[^|]+\|[1-9][0-9]*$", sLine):
# new entry
if nClass != nClassFound:
print(" Erreur. Ligne:", iEntryLine, ", nombre de listes incorrect")
iEntryLine = i
sEntry, sNum = sLine.split("|")
self.dThesaurus[sEntry] = []
nClass = int(sNum)
nClassFound = 0
else:
# new list of synonyms
|
| ︙ | ︙ |