Improved representations speed + Fixed bug in representations
This commit is contained in:
23
src/wani.py
23
src/wani.py
@@ -31,32 +31,17 @@ def match_file(words, structures, postprocessor):
|
||||
for w in words:
|
||||
mhere = s.match(w)
|
||||
for match in mhere:
|
||||
# colocation_id = [(idx, w.lemma) for idx, w in match.items()]
|
||||
colocation_id = [[idx, w.lemma] for idx, w in match.items()]
|
||||
colocation_id = [s.id] + list(sorted(colocation_id, key=lambda x: x[0]))
|
||||
match, collocation_id = postprocessor.process(match, colocation_id)
|
||||
colocation_id = tuple(colocation_id)
|
||||
|
||||
matches[s].append((match, colocation_id))
|
||||
# for key, val in matches.items():
|
||||
# if key.id == '15':
|
||||
# for el in val:
|
||||
# if el[0]['1'].lemma == 'biti' and el[0]['2'].lemma == 'po' and el[0]['3'].lemma == 'mnenje':
|
||||
# word_id = '.'.join(words[0].id.split('.')[:-1])
|
||||
# print(f"ID: {'.'.join(words[0].id.split('.')[:-1])}")
|
||||
# print(' '.join([w.text for w in words if '.'.join(w.id.split('.')[:-1]) == word_id]))
|
||||
|
||||
# if s.id == '15':
|
||||
# if match['1'].lemma == 'biti' and match['2'].lemma == 'po' and match['3'].lemma == 'mnenje':
|
||||
# word_id = '.'.join(match['1'].id.split('.')[:-1])
|
||||
# print(f"ID: {word_id}")
|
||||
# print(' '.join([w.text for w in words if '.'.join(w.id.split('.')[:-1]) == word_id]))
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
def main(args):
|
||||
sloleks_db = SloleksDatabase(args.sloleks_db)
|
||||
structures, lemma_msds, max_num_components = build_structures(args)
|
||||
timeinfo = TimeInfo(len(args.input))
|
||||
|
||||
@@ -95,7 +80,9 @@ def main(args):
|
||||
|
||||
# figure out representations!
|
||||
if args.out or args.out_no_stat:
|
||||
sloleks_db = SloleksDatabase(args.sloleks_db, args.load_sloleks)
|
||||
match_store.set_representations(word_stats, structures, sloleks_db=sloleks_db)
|
||||
sloleks_db.close()
|
||||
|
||||
Writer.make_output_writer(args, max_num_components, match_store, word_stats).write_out(
|
||||
structures, match_store)
|
||||
@@ -106,8 +93,6 @@ def main(args):
|
||||
Writer.make_stats_writer(args, max_num_components, match_store, word_stats).write_out(
|
||||
structures, match_store)
|
||||
|
||||
# sloleks_db.get_word_form(lemma, gender, number, case)
|
||||
sloleks_db.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@@ -144,6 +129,10 @@ if __name__ == '__main__':
|
||||
help='Generate one output for each syntactic structure',
|
||||
action='store_true')
|
||||
|
||||
parser.add_argument('--load-sloleks',
|
||||
help='Tells weather sloleks is loaded into memory at the beginning of processing or not.',
|
||||
action='store_true')
|
||||
|
||||
parser.add_argument('--sort-by',
|
||||
help="Sort by a this column (index)", type=int, default=-1)
|
||||
parser.add_argument('--sort-reversed',
|
||||
|
||||
Reference in New Issue
Block a user