making sorted words sorted a bit more non-randomly.

This commit is contained in:
Ozbolt Menegatti 2019-06-27 11:44:02 +02:00
parent 8b06c4ec38
commit c2c2ce7ff8

View File

@ -52,7 +52,8 @@ class WordFormAnyCR(ComponentRepresentation):
words_counter = [] words_counter = []
for word in self.words: for word in self.words:
words_counter.append((word.msd, word.lemma)) words_counter.append((word.msd, word.lemma))
sorted_words = sorted(set(words_counter), key=lambda x: -words_counter.count(x)) sorted_words = sorted(
set(words_counter), key=lambda x: -words_counter.count(x) + (sum(ord(l) for l in x[1]) / 1e5 if x[1] is not None else .5))
for word_msd, word_lemma in sorted_words: for word_msd, word_lemma in sorted_words:
for agr in self.agreement: for agr in self.agreement: