Adding --fixed-restriction-order parameter

This commit is contained in:
Luka 2020-10-27 09:48:34 +01:00
parent dd5fa4a1b8
commit 25db8eeb7a
2 changed files with 24 additions and 2 deletions

View File

@ -1,7 +1,8 @@
class Postprocessor: class Postprocessor:
def __init__(self, fix_one_letter_words=True): def __init__(self, fix_one_letter_words=True, fixed_restriction_order=False):
self.fix_one_letter_words = fix_one_letter_words self.fix_one_letter_words = fix_one_letter_words
self.fixed_restriction_order = fixed_restriction_order
@staticmethod @staticmethod
def fix_sz(next_word): def fix_sz(next_word):
@ -28,3 +29,19 @@ class Postprocessor:
match[col_id].text = correct_letter match[col_id].text = correct_letter
collocation_id = [collocation_id[0]] + [tuple(line) for line in collocation_id[1:]] collocation_id = [collocation_id[0]] + [tuple(line) for line in collocation_id[1:]]
return match, collocation_id return match, collocation_id
def is_fixed_restriction_order(self, match):
if not self.fixed_restriction_order:
return True
sorted_dict = {k: v for k, v in sorted(match.items(), key=lambda item: item[1].int_id)}
prev_id = -1
for key in sorted_dict.keys():
if key == '#':
continue
int_key = int(key)
if prev_id > int_key:
return False
prev_id = int_key
return True

View File

@ -31,6 +31,8 @@ def match_file(words, structures, postprocessor):
for w in words: for w in words:
mhere = s.match(w) mhere = s.match(w)
for match in mhere: for match in mhere:
if not postprocessor.is_fixed_restriction_order(match):
continue
colocation_id = [[idx, w.lemma] for idx, w in match.items()] colocation_id = [[idx, w.lemma] for idx, w in match.items()]
colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x: x[0])) colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x: x[0]))
match, collocation_id = postprocessor.process(match, colocation_id) match, collocation_id = postprocessor.process(match, colocation_id)
@ -48,6 +50,7 @@ def main(args):
database = Database(args) database = Database(args)
match_store = MatchStore(args, database) match_store = MatchStore(args, database)
word_stats = WordStats(lemma_msds, database) word_stats = WordStats(lemma_msds, database)
postprocessor = Postprocessor(fixed_restriction_order=args.fixed_restriction_order)
for words in load_files(args, database): for words in load_files(args, database):
if words is None: if words is None:
@ -55,7 +58,6 @@ def main(args):
continue continue
start_time = time.time() start_time = time.time()
postprocessor = Postprocessor()
matches = match_file(words, structures, postprocessor) matches = match_file(words, structures, postprocessor)
match_store.add_matches(matches) match_store.add_matches(matches)
@ -155,6 +157,9 @@ if __name__ == '__main__':
help='Separator in output file', default="\t") help='Separator in output file', default="\t")
parser.add_argument('--ignore-punctuations', parser.add_argument('--ignore-punctuations',
help="Sort in reversed ored", action='store_true') help="Sort in reversed ored", action='store_true')
parser.add_argument('--fixed-restriction-order',
help='If used, words have to be in the same order as components.',
action='store_true')
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())