|
|
|
@ -31,6 +31,8 @@ def match_file(words, structures, postprocessor):
|
|
|
|
|
for w in words:
|
|
|
|
|
mhere = s.match(w)
|
|
|
|
|
for match in mhere:
|
|
|
|
|
if not postprocessor.is_fixed_restriction_order(match):
|
|
|
|
|
continue
|
|
|
|
|
colocation_id = [[idx, w.lemma] for idx, w in match.items()]
|
|
|
|
|
colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x: x[0]))
|
|
|
|
|
match, collocation_id = postprocessor.process(match, colocation_id)
|
|
|
|
@ -48,6 +50,7 @@ def main(args):
|
|
|
|
|
database = Database(args)
|
|
|
|
|
match_store = MatchStore(args, database)
|
|
|
|
|
word_stats = WordStats(lemma_msds, database)
|
|
|
|
|
postprocessor = Postprocessor(fixed_restriction_order=args.fixed_restriction_order)
|
|
|
|
|
|
|
|
|
|
for words in load_files(args, database):
|
|
|
|
|
if words is None:
|
|
|
|
@ -55,7 +58,6 @@ def main(args):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
start_time = time.time()
|
|
|
|
|
postprocessor = Postprocessor()
|
|
|
|
|
matches = match_file(words, structures, postprocessor)
|
|
|
|
|
|
|
|
|
|
match_store.add_matches(matches)
|
|
|
|
@ -155,6 +157,9 @@ if __name__ == '__main__':
|
|
|
|
|
help='Separator in output file', default="\t")
|
|
|
|
|
parser.add_argument('--ignore-punctuations',
|
|
|
|
|
help="Sort in reversed ored", action='store_true')
|
|
|
|
|
parser.add_argument('--fixed-restriction-order',
|
|
|
|
|
help='If used, words have to be in the same order as components.',
|
|
|
|
|
action='store_true')
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
|
|
|
|
|
|
|
|
|