diff --git a/src/representation.py b/src/representation.py index 78a6388..2b5ebf2 100644 --- a/src/representation.py +++ b/src/representation.py @@ -52,7 +52,8 @@ class WordFormAnyCR(ComponentRepresentation): words_counter = [] for word in self.words: words_counter.append((word.msd, word.lemma)) - sorted_words = sorted(set(words_counter), key=lambda x: -words_counter.count(x)) + sorted_words = sorted( + set(words_counter), key=lambda x: -words_counter.count(x) + (sum(ord(l) for l in x[1]) / 1e5 if x[1] is not None else .5)) for word_msd, word_lemma in sorted_words: for agr in self.agreement: