I think this is the way to set representations, all info is available

... just have to actually use it
This commit is contained in:
Ozbolt Menegatti 2019-05-13 10:48:21 +02:00
parent 6eefd9c9f6
commit 84a184c44d

36
wani.py
View File

@ -219,6 +219,12 @@ class ComponentRendition:
else: else:
raise RuntimeError("Unknown rendition: {}".format(self.rendition)) raise RuntimeError("Unknown rendition: {}".format(self.rendition))
@staticmethod
def set_representations(matches, components):
for words, agreement in matches:
for _, w in words.items():
w.representation = ":("
def __str__(self): def __str__(self):
return str(self.rendition) return str(self.rendition)
@ -391,10 +397,6 @@ class Component:
raise RuntimeError("Unreachable") raise RuntimeError("Unreachable")
def set_representation(self, representation): def set_representation(self, representation):
# for r in representation:
# print(ElementTree.tostring(r).decode('ascii').replace('\n', ''))
# print("--")
if len(representation) > 0: if len(representation) > 0:
for feature in representation: for feature in representation:
self.representation.add_feature(feature) self.representation.add_feature(feature)
@ -676,14 +678,14 @@ class SyntacticStructure:
to_ret = [] to_ret = []
for m in matches: for m in matches:
if not self.check_agreements(m): # if not self.check_agreements(m):
bad = "Agreement" # bad = "Agreement"
elif not self.check_form(m): # elif not self.check_form(m):
bad = "Form" # bad = "Form"
else: # else:
bad = "OK" # bad = "OK"
to_ret.append((m, bad)) to_ret.append((m, self.check_agreements(m)))
return to_ret return to_ret
@ -845,7 +847,8 @@ class Writer:
elif self.all: elif self.all:
return [word.id, word.text, word.lemma, word.msd] return [word.id, word.text, word.lemma, word.msd]
else: else:
return [word.lemma, "REP?"] assert(word.representation is not None)
return [word.lemma, word.representation]
def sorted_rows(self, rows): def sorted_rows(self, rows):
if self.sort_by < 0 or len(rows) < 2: if self.sort_by < 0 or len(rows) < 2:
@ -961,6 +964,12 @@ class ColocationIds:
if group: if group:
break break
def set_representations(self, structures):
components_dict = {structure.id: structure for structure in structures}
for _1, (_2, cid_matches, sid) in self.data.items():
ComponentRendition.set_representations(cid_matches, components_dict[sid])
def match_file(words, structures): def match_file(words, structures):
matches = {s.id: [] for s in structures} matches = {s.id: [] for s in structures}
@ -1025,6 +1034,9 @@ def main(input_file, structures_file, args):
else: else:
colocation_ids.add_matches(matches) colocation_ids.add_matches(matches)
# figure out representations!
colocation_ids.set_representations(structures)
if args.all: if args.all:
Writer.make_all_writer(args).write_out(structures, colocation_ids) Writer.make_all_writer(args).write_out(structures, colocation_ids)
Writer.make_output_writer(args).write_out(structures, colocation_ids) Writer.make_output_writer(args).write_out(structures, colocation_ids)