From c2c2ce7ff80968bfaae08dcadc0c910b9eb91dd9 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Thu, 27 Jun 2019 11:44:02 +0200 Subject: [PATCH] making sorted words sorted a bit more non-randomly. --- src/representation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/representation.py b/src/representation.py index 78a6388..2b5ebf2 100644 --- a/src/representation.py +++ b/src/representation.py @@ -52,7 +52,8 @@ class WordFormAnyCR(ComponentRepresentation): words_counter = [] for word in self.words: words_counter.append((word.msd, word.lemma)) - sorted_words = sorted(set(words_counter), key=lambda x: -words_counter.count(x)) + sorted_words = sorted( + set(words_counter), key=lambda x: -words_counter.count(x) + (sum(ord(l) for l in x[1]) / 1e5 if x[1] is not None else .5)) for word_msd, word_lemma in sorted_words: for agr in self.agreement: