From 87712128be6669fb5e7a3c116a3e5ccf0b4d5360 Mon Sep 17 00:00:00 2001 From: Ozbolt Menegatti Date: Mon, 13 May 2019 00:26:00 +0200 Subject: [PATCH] joint representation form --- wani.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/wani.py b/wani.py index 9ffced7..a4ccf91 100644 --- a/wani.py +++ b/wani.py @@ -850,7 +850,7 @@ class Writer: def from_word(self, word): if word is None: - return "" * self.length() + return [""] * self.length() elif self.all: return [word.id, word.text, word.lemma, word.msd] else: @@ -879,17 +879,22 @@ class Writer: rows = [] for m, reason, cid in matches: to_write = [] + representation = "" for idx, comp in enumerate(components): idx = str(idx + 1) word = m[idx] if idx in m else None to_write.extend(self.from_word(word)) + representation += " " + to_write[-1] # make them equal size to_write.extend([""] * (MAX_NUM_COMPONENTS * self.length() - len(to_write))) to_write = [structure_id] + to_write + [colocation_ids.to_id(cid)] if not self.all: + representation = re.sub(' +', ' ', representation) + to_write.append(representation.strip()) + if colocation_ids.should_write(cid): to_write.append(colocation_ids.num(cid)) colocation_ids.set_written(cid)