Quick fix for missing dispersions
This commit is contained in:
parent
f330a37764
commit
49a8d5123e
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -7,6 +7,7 @@
|
||||||
.vscode
|
.vscode
|
||||||
__pycache__
|
__pycache__
|
||||||
|
|
||||||
|
run.sh
|
||||||
prev
|
prev
|
||||||
old
|
old
|
||||||
data
|
data
|
||||||
|
|
1
run.sh
1
run.sh
|
@ -1 +0,0 @@
|
||||||
pypy3 src/wani.py data/Kolokacije_strukture_JOS-32-representation_3D_08_1.xml data/input --out data/output --sloleks_db '<PUT DB CREDENTIALS HERE!>' --collocation_sentence_map_dest data/collocation_sentence_mapper --db /mnt/tmp/mysql-wani --multiple-output --load-sloleks
|
|
1
run.sh.example
Executable file
1
run.sh.example
Executable file
|
@ -0,0 +1 @@
|
||||||
|
pypy3 src/wani.py data/Kolokacije_strukture_JOS-32-representation_3D_08_1.xml data/input --out data/output --sloleks_db '<sloleks db data>' --collocation_sentence_map_dest data/collocation-sentence-mapper --db /mnt/tmp/mysql-wani --multiple-output --load-sloleks
|
|
@ -151,7 +151,27 @@ class StatsFormatter(Formatter):
|
||||||
|
|
||||||
word = words[idx]
|
word = words[idx]
|
||||||
key = (sidx, idx, word.lemma)
|
key = (sidx, idx, word.lemma)
|
||||||
distribution = self.colocation_ids.dispersions[key]
|
# try to fix missing dispersions
|
||||||
|
if key not in self.colocation_ids.dispersions:
|
||||||
|
if word.lemma == 'k':
|
||||||
|
new_key = (sidx, idx, 'h')
|
||||||
|
elif word.lemma == 'h':
|
||||||
|
new_key = (sidx, idx, 'k')
|
||||||
|
elif word.lemma == 's':
|
||||||
|
new_key = (sidx, idx, 'z')
|
||||||
|
elif word.lemma == 'z':
|
||||||
|
new_key = (sidx, idx, 's')
|
||||||
|
else:
|
||||||
|
new_key = (sidx, idx, '')
|
||||||
|
if new_key in self.colocation_ids.dispersions:
|
||||||
|
key = new_key
|
||||||
|
print('Dispersions fixed.')
|
||||||
|
else:
|
||||||
|
print('Dispersions not fixed.')
|
||||||
|
if key in self.colocation_ids.dispersions:
|
||||||
|
distribution = self.colocation_ids.dispersions[key]
|
||||||
|
else:
|
||||||
|
distribution = 1
|
||||||
return [self.stat_str(distribution)]
|
return [self.stat_str(distribution)]
|
||||||
|
|
||||||
def content_right(self, freq):
|
def content_right(self, freq):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user