Adding --fixed-restriction-order parameter
This commit is contained in:
parent
dd5fa4a1b8
commit
25db8eeb7a
|
@ -1,7 +1,8 @@
|
||||||
|
|
||||||
class Postprocessor:
|
class Postprocessor:
|
||||||
def __init__(self, fix_one_letter_words=True):
|
def __init__(self, fix_one_letter_words=True, fixed_restriction_order=False):
|
||||||
self.fix_one_letter_words = fix_one_letter_words
|
self.fix_one_letter_words = fix_one_letter_words
|
||||||
|
self.fixed_restriction_order = fixed_restriction_order
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def fix_sz(next_word):
|
def fix_sz(next_word):
|
||||||
|
@ -28,3 +29,19 @@ class Postprocessor:
|
||||||
match[col_id].text = correct_letter
|
match[col_id].text = correct_letter
|
||||||
collocation_id = [collocation_id[0]] + [tuple(line) for line in collocation_id[1:]]
|
collocation_id = [collocation_id[0]] + [tuple(line) for line in collocation_id[1:]]
|
||||||
return match, collocation_id
|
return match, collocation_id
|
||||||
|
|
||||||
|
def is_fixed_restriction_order(self, match):
|
||||||
|
if not self.fixed_restriction_order:
|
||||||
|
return True
|
||||||
|
|
||||||
|
sorted_dict = {k: v for k, v in sorted(match.items(), key=lambda item: item[1].int_id)}
|
||||||
|
prev_id = -1
|
||||||
|
for key in sorted_dict.keys():
|
||||||
|
if key == '#':
|
||||||
|
continue
|
||||||
|
int_key = int(key)
|
||||||
|
if prev_id > int_key:
|
||||||
|
return False
|
||||||
|
prev_id = int_key
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
7
wani.py
7
wani.py
|
@ -31,6 +31,8 @@ def match_file(words, structures, postprocessor):
|
||||||
for w in words:
|
for w in words:
|
||||||
mhere = s.match(w)
|
mhere = s.match(w)
|
||||||
for match in mhere:
|
for match in mhere:
|
||||||
|
if not postprocessor.is_fixed_restriction_order(match):
|
||||||
|
continue
|
||||||
colocation_id = [[idx, w.lemma] for idx, w in match.items()]
|
colocation_id = [[idx, w.lemma] for idx, w in match.items()]
|
||||||
colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x: x[0]))
|
colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x: x[0]))
|
||||||
match, collocation_id = postprocessor.process(match, colocation_id)
|
match, collocation_id = postprocessor.process(match, colocation_id)
|
||||||
|
@ -48,6 +50,7 @@ def main(args):
|
||||||
database = Database(args)
|
database = Database(args)
|
||||||
match_store = MatchStore(args, database)
|
match_store = MatchStore(args, database)
|
||||||
word_stats = WordStats(lemma_msds, database)
|
word_stats = WordStats(lemma_msds, database)
|
||||||
|
postprocessor = Postprocessor(fixed_restriction_order=args.fixed_restriction_order)
|
||||||
|
|
||||||
for words in load_files(args, database):
|
for words in load_files(args, database):
|
||||||
if words is None:
|
if words is None:
|
||||||
|
@ -55,7 +58,6 @@ def main(args):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
postprocessor = Postprocessor()
|
|
||||||
matches = match_file(words, structures, postprocessor)
|
matches = match_file(words, structures, postprocessor)
|
||||||
|
|
||||||
match_store.add_matches(matches)
|
match_store.add_matches(matches)
|
||||||
|
@ -155,6 +157,9 @@ if __name__ == '__main__':
|
||||||
help='Separator in output file', default="\t")
|
help='Separator in output file', default="\t")
|
||||||
parser.add_argument('--ignore-punctuations',
|
parser.add_argument('--ignore-punctuations',
|
||||||
help="Sort in reversed ored", action='store_true')
|
help="Sort in reversed ored", action='store_true')
|
||||||
|
parser.add_argument('--fixed-restriction-order',
|
||||||
|
help='If used, words have to be in the same order as components.',
|
||||||
|
action='store_true')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user