From 84a184c44d7e973ad428b6f1dd2f42f1d6f824ac Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Mon, 13 May 2019 10:48:21 +0200 Subject: [PATCH] I think this is the way to set representations, all info is available ... just have to actually use it --- wani.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/wani.py b/wani.py index f4dc1e7..a60b7bf 100644 --- a/wani.py +++ b/wani.py @@ -219,6 +219,12 @@ class ComponentRendition: else: raise RuntimeError("Unknown rendition: {}".format(self.rendition)) + @staticmethod + def set_representations(matches, components): + for words, agreement in matches: + for _, w in words.items(): + w.representation = ":(" + def __str__(self): return str(self.rendition) @@ -391,10 +397,6 @@ class Component: raise RuntimeError("Unreachable") def set_representation(self, representation): - # for r in representation: - # print(ElementTree.tostring(r).decode('ascii').replace('\n', '')) - # print("--") - if len(representation) > 0: for feature in representation: self.representation.add_feature(feature) @@ -676,14 +678,14 @@ class SyntacticStructure: to_ret = [] for m in matches: - if not self.check_agreements(m): - bad = "Agreement" - elif not self.check_form(m): - bad = "Form" - else: - bad = "OK" + # if not self.check_agreements(m): + # bad = "Agreement" + # elif not self.check_form(m): + # bad = "Form" + # else: + # bad = "OK" - to_ret.append((m, bad)) + to_ret.append((m, self.check_agreements(m))) return to_ret @@ -845,7 +847,8 @@ class Writer: elif self.all: return [word.id, word.text, word.lemma, word.msd] else: - return [word.lemma, "REP?"] + assert(word.representation is not None) + return [word.lemma, word.representation] def sorted_rows(self, rows): if self.sort_by < 0 or len(rows) < 2: @@ -961,6 +964,12 @@ class ColocationIds: if group: break + def set_representations(self, structures): + components_dict = {structure.id: structure for structure in structures} + for _1, (_2, cid_matches, sid) in self.data.items(): + ComponentRendition.set_representations(cid_matches, components_dict[sid]) + + def match_file(words, structures): matches = {s.id: [] for s in structures} @@ -1025,6 +1034,9 @@ def main(input_file, structures_file, args): else: colocation_ids.add_matches(matches) + # figure out representations! + colocation_ids.set_representations(structures) + if args.all: Writer.make_all_writer(args).write_out(structures, colocation_ids) Writer.make_output_writer(args).write_out(structures, colocation_ids)