Quick fix for missing dispersions

valency
lkrsnik 4 years ago
parent f330a37764
commit 49a8d5123e

1
.gitignore vendored

@ -7,6 +7,7 @@
.vscode
__pycache__
run.sh
prev
old
data

@ -1 +0,0 @@
pypy3 src/wani.py data/Kolokacije_strukture_JOS-32-representation_3D_08_1.xml data/input --out data/output --sloleks_db '<PUT DB CREDENTIALS HERE!>' --collocation_sentence_map_dest data/collocation_sentence_mapper --db /mnt/tmp/mysql-wani --multiple-output --load-sloleks

@ -0,0 +1 @@
pypy3 src/wani.py data/Kolokacije_strukture_JOS-32-representation_3D_08_1.xml data/input --out data/output --sloleks_db '<sloleks db data>' --collocation_sentence_map_dest data/collocation-sentence-mapper --db /mnt/tmp/mysql-wani --multiple-output --load-sloleks

@ -151,7 +151,27 @@ class StatsFormatter(Formatter):
word = words[idx]
key = (sidx, idx, word.lemma)
distribution = self.colocation_ids.dispersions[key]
# try to fix missing dispersions
if key not in self.colocation_ids.dispersions:
if word.lemma == 'k':
new_key = (sidx, idx, 'h')
elif word.lemma == 'h':
new_key = (sidx, idx, 'k')
elif word.lemma == 's':
new_key = (sidx, idx, 'z')
elif word.lemma == 'z':
new_key = (sidx, idx, 's')
else:
new_key = (sidx, idx, '')
if new_key in self.colocation_ids.dispersions:
key = new_key
print('Dispersions fixed.')
else:
print('Dispersions not fixed.')
if key in self.colocation_ids.dispersions:
distribution = self.colocation_ids.dispersions[key]
else:
distribution = 1
return [self.stat_str(distribution)]
def content_right(self, freq):
@ -203,4 +223,4 @@ class OutFormatter(Formatter):
self.f2.new_match(match)
def __str__(self):
return "out"
return "out"

Loading…
Cancel
Save