Quick fix for missing dispersions
This commit is contained in:
parent
f330a37764
commit
49a8d5123e
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -7,6 +7,7 @@
|
|||
.vscode
|
||||
__pycache__
|
||||
|
||||
run.sh
|
||||
prev
|
||||
old
|
||||
data
|
||||
|
|
1
run.sh
1
run.sh
|
@ -1 +0,0 @@
|
|||
pypy3 src/wani.py data/Kolokacije_strukture_JOS-32-representation_3D_08_1.xml data/input --out data/output --sloleks_db '<PUT DB CREDENTIALS HERE!>' --collocation_sentence_map_dest data/collocation_sentence_mapper --db /mnt/tmp/mysql-wani --multiple-output --load-sloleks
|
1
run.sh.example
Executable file
1
run.sh.example
Executable file
|
@ -0,0 +1 @@
|
|||
pypy3 src/wani.py data/Kolokacije_strukture_JOS-32-representation_3D_08_1.xml data/input --out data/output --sloleks_db '<sloleks db data>' --collocation_sentence_map_dest data/collocation-sentence-mapper --db /mnt/tmp/mysql-wani --multiple-output --load-sloleks
|
|
@ -151,7 +151,27 @@ class StatsFormatter(Formatter):
|
|||
|
||||
word = words[idx]
|
||||
key = (sidx, idx, word.lemma)
|
||||
# try to fix missing dispersions
|
||||
if key not in self.colocation_ids.dispersions:
|
||||
if word.lemma == 'k':
|
||||
new_key = (sidx, idx, 'h')
|
||||
elif word.lemma == 'h':
|
||||
new_key = (sidx, idx, 'k')
|
||||
elif word.lemma == 's':
|
||||
new_key = (sidx, idx, 'z')
|
||||
elif word.lemma == 'z':
|
||||
new_key = (sidx, idx, 's')
|
||||
else:
|
||||
new_key = (sidx, idx, '')
|
||||
if new_key in self.colocation_ids.dispersions:
|
||||
key = new_key
|
||||
print('Dispersions fixed.')
|
||||
else:
|
||||
print('Dispersions not fixed.')
|
||||
if key in self.colocation_ids.dispersions:
|
||||
distribution = self.colocation_ids.dispersions[key]
|
||||
else:
|
||||
distribution = 1
|
||||
return [self.stat_str(distribution)]
|
||||
|
||||
def content_right(self, freq):
|
||||
|
|
Loading…
Reference in New Issue
Block a user