Skip to content

Commit

Permalink
producing a compilable WordNetAra.gf, with a lot of junk
Browse files Browse the repository at this point in the history
  • Loading branch information
aarneranta committed Sep 28, 2023
1 parent 5f4bb01 commit 67d1e24
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/arabic/wiktionary/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ all:
python3 read_wiktionary.py gf-map >source_of_MorphoDictAra.jsonl
gf -make MorphoDictAra.gf
python3 read_wiktionary.py eval-funs >eval.jsonl
python3 to_wordnet.py >wordnet-arabic.jsonl
python3 to_wordnet.py >WordNetAra.gf
python3 read_wiktionary.py error-analysis
21 changes: 18 additions & 3 deletions src/arabic/wiktionary/to_wordnet.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import csv
import json

Expand All @@ -6,7 +7,6 @@
# to run: python3 to_wordnet.py >arabic-wn-morpho.jsonl
# the following are assumed


# from https://www.grammaticalframework.org/~krasimir/arabic.tsv.gz
# WN_TSV = 'arabic.tsv' # Krasimir
WN_TSV = 'ar2en_words_gf.csv' # Zarzoura
Expand All @@ -31,14 +31,29 @@

# abandon_1_V2 ParseAra ترك (1,1,1,3,322,3)
with open(WN_TSV) as wnfile:
print('--# -path=.:../gf-wordnet')
print('concrete WordNetAra of WordNet = CatAra ** open MorphoDictAra, MoreAra, ParadigmsAra in {')

## wnreader = csv.reader(wnfile, delimiter='\t')
for row in wnfile:
## word = row[-1].strip() # does not show tha arabic, but the second-last word
word = unvocalize(get_arabic(row))
wnfun = row.split()[-1] # 0 in Krasimir
cat = [c for c in wnfun if c.isalpha()][-1] # the last letter; the dict only contains N, A, V
funs = funmap.get((word, cat), [])
result = {'wnfun': wnfun, 'sought': word, 'found': funs}
print(json.dumps(result, ensure_ascii=False))
mk = 'mkV2 ' if wnfun.endswith('V2') else ''
results = [' '.join(['lin', wnfun, '=', mk + fs['fun'], ';', '--', str(fs['sense'])])
for fs in funs]
if results:
print(results[0])
for r in results[1:]:
print('--', r)
else:
if (cat := wnfun[-2:]) in ['_A', '_N', '_V']:
lin = 'mk' + cat[-1] + ' "' + word + '"'
else:
lin = 'variants {}'
print(' '.join(['lin', wnfun, '=', lin, ';', '---', 'guess from', word]))
print('}')


0 comments on commit 67d1e24

Please sign in to comment.