Adding --fixed-restriction-order parameter

i2198
Luka 4 years ago
parent dd5fa4a1b8
commit 25db8eeb7a

@ -1,7 +1,8 @@
class Postprocessor:
def __init__(self, fix_one_letter_words=True):
def __init__(self, fix_one_letter_words=True, fixed_restriction_order=False):
self.fix_one_letter_words = fix_one_letter_words
self.fixed_restriction_order = fixed_restriction_order
@staticmethod
def fix_sz(next_word):
@ -28,3 +29,19 @@ class Postprocessor:
match[col_id].text = correct_letter
collocation_id = [collocation_id[0]] + [tuple(line) for line in collocation_id[1:]]
return match, collocation_id
def is_fixed_restriction_order(self, match):
if not self.fixed_restriction_order:
return True
sorted_dict = {k: v for k, v in sorted(match.items(), key=lambda item: item[1].int_id)}
prev_id = -1
for key in sorted_dict.keys():
if key == '#':
continue
int_key = int(key)
if prev_id > int_key:
return False
prev_id = int_key
return True

@ -31,6 +31,8 @@ def match_file(words, structures, postprocessor):
for w in words:
mhere = s.match(w)
for match in mhere:
if not postprocessor.is_fixed_restriction_order(match):
continue
colocation_id = [[idx, w.lemma] for idx, w in match.items()]
colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x: x[0]))
match, collocation_id = postprocessor.process(match, colocation_id)
@ -48,6 +50,7 @@ def main(args):
database = Database(args)
match_store = MatchStore(args, database)
word_stats = WordStats(lemma_msds, database)
postprocessor = Postprocessor(fixed_restriction_order=args.fixed_restriction_order)
for words in load_files(args, database):
if words is None:
@ -55,7 +58,6 @@ def main(args):
continue
start_time = time.time()
postprocessor = Postprocessor()
matches = match_file(words, structures, postprocessor)
match_store.add_matches(matches)
@ -155,6 +157,9 @@ if __name__ == '__main__':
help='Separator in output file', default="\t")
parser.add_argument('--ignore-punctuations',
help="Sort in reversed ored", action='store_true')
parser.add_argument('--fixed-restriction-order',
help='If used, words have to be in the same order as components.',
action='store_true')
args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())

Loading…
Cancel
Save