Quick fix for missing dispersions

This commit is contained in:
2020-07-24 10:06:54 +02:00
parent f330a37764
commit 49a8d5123e
4 changed files with 24 additions and 3 deletions

View File

@@ -151,7 +151,27 @@ class StatsFormatter(Formatter):
word = words[idx]
key = (sidx, idx, word.lemma)
distribution = self.colocation_ids.dispersions[key]
# try to fix missing dispersions
if key not in self.colocation_ids.dispersions:
if word.lemma == 'k':
new_key = (sidx, idx, 'h')
elif word.lemma == 'h':
new_key = (sidx, idx, 'k')
elif word.lemma == 's':
new_key = (sidx, idx, 'z')
elif word.lemma == 'z':
new_key = (sidx, idx, 's')
else:
new_key = (sidx, idx, '')
if new_key in self.colocation_ids.dispersions:
key = new_key
print('Dispersions fixed.')
else:
print('Dispersions not fixed.')
if key in self.colocation_ids.dispersions:
distribution = self.colocation_ids.dispersions[key]
else:
distribution = 1
return [self.stat_str(distribution)]
def content_right(self, freq):
@@ -203,4 +223,4 @@ class OutFormatter(Formatter):
self.f2.new_match(match)
def __str__(self):
return "out"
return "out"