continued work on representation, almost there...
This commit is contained in:
parent
84a184c44d
commit
d2f1e95a8f
89
wani.py
89
wani.py
|
@ -203,28 +203,58 @@ class ComponentRendition:
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def render(self, words):
|
def isit(self, rendition):
|
||||||
if self.rendition == Rendition.Lemma:
|
return self.rendition is rendition
|
||||||
return words[0].lemma
|
|
||||||
elif self.rendition == Rendition.Lexis:
|
|
||||||
return self.more
|
|
||||||
elif self.rendition == Rendition.Unknown:
|
|
||||||
return None
|
|
||||||
|
|
||||||
elif self.rendition == Rendition.WordForm:
|
|
||||||
# check more!
|
|
||||||
return words[0].text
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise RuntimeError("Unknown rendition: {}".format(self.rendition))
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_representations(matches, components):
|
def set_representations(matches, structure):
|
||||||
|
representations = {c.idx: [True, ""] for c in structure.components}
|
||||||
|
|
||||||
|
def render_all(lst):
|
||||||
|
return "/".join(set(lst))
|
||||||
|
|
||||||
|
def render_form(_lst):
|
||||||
|
return ":("
|
||||||
|
|
||||||
for words, agreement in matches:
|
for words, agreement in matches:
|
||||||
for _, w in words.items():
|
if not agreement:
|
||||||
w.representation = ":("
|
continue
|
||||||
|
|
||||||
|
for w_id, w in words.items():
|
||||||
|
component = structure.get_component(w_id)
|
||||||
|
rep = component.representation
|
||||||
|
|
||||||
|
if rep.isit(Rendition.Lemma):
|
||||||
|
representations[w_id][0] = False
|
||||||
|
representations[w_id][1] = w.lemma
|
||||||
|
elif rep.isit(Rendition.Lexis):
|
||||||
|
representations[w_id][0] = False
|
||||||
|
representations[w_id][1] = rep.more
|
||||||
|
elif rep.isit(Rendition.Unknown):
|
||||||
|
representations[w_id][0] = False
|
||||||
|
representations[w_id][1] = ""
|
||||||
|
|
||||||
|
# it HAS to be word_form now
|
||||||
|
else:
|
||||||
|
# set correct type first
|
||||||
|
if type(representations[w_id][1]) is str:
|
||||||
|
representations[w_id] = (
|
||||||
|
[], render_all if rep.more is StructureSelection.All else render_form
|
||||||
|
)
|
||||||
|
representations[w_id][0].append(w.text)
|
||||||
|
|
||||||
|
# just need to set representation to first group...
|
||||||
|
for w_id, w in matches[0][0].items():
|
||||||
|
data = representations[w_id]
|
||||||
|
|
||||||
|
if type(data[1]) is str:
|
||||||
|
w.representation_failed = data[0]
|
||||||
|
w.representation = w.lemma if w.representation_failed else data[1]
|
||||||
|
else:
|
||||||
|
w.representation_failed = len(data[0]) > 0
|
||||||
|
w.representation = w.lemma if w.representation_failed else data[1](data[0])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return str(self.rendition)
|
return str(self.rendition)
|
||||||
|
|
||||||
|
@ -377,9 +407,6 @@ class Component:
|
||||||
|
|
||||||
self.iter_ctr = 0
|
self.iter_ctr = 0
|
||||||
|
|
||||||
def render_word(self, word):
|
|
||||||
return self.representation.render(word)
|
|
||||||
|
|
||||||
def add_next(self, next_component, link_label, order):
|
def add_next(self, next_component, link_label, order):
|
||||||
self.next_element.append((next_component, link_label, Order.new(order)))
|
self.next_element.append((next_component, link_label, Order.new(order)))
|
||||||
|
|
||||||
|
@ -397,9 +424,8 @@ class Component:
|
||||||
raise RuntimeError("Unreachable")
|
raise RuntimeError("Unreachable")
|
||||||
|
|
||||||
def set_representation(self, representation):
|
def set_representation(self, representation):
|
||||||
if len(representation) > 0:
|
for feature in representation:
|
||||||
for feature in representation:
|
self.representation.add_feature(feature.attrib)
|
||||||
self.representation.add_feature(feature)
|
|
||||||
|
|
||||||
def find_next(self, deps, comps, restrs, reprs):
|
def find_next(self, deps, comps, restrs, reprs):
|
||||||
to_ret = []
|
to_ret = []
|
||||||
|
@ -720,6 +746,9 @@ class Word:
|
||||||
self.text = xml.text
|
self.text = xml.text
|
||||||
self.links = defaultdict(list)
|
self.links = defaultdict(list)
|
||||||
|
|
||||||
|
self.representation = None
|
||||||
|
self.representation_failed = False
|
||||||
|
|
||||||
last_num = self.id.split('.')[-1]
|
last_num = self.id.split('.')[-1]
|
||||||
if last_num[0] not in '0123456789':
|
if last_num[0] not in '0123456789':
|
||||||
last_num = last_num[1:]
|
last_num = last_num[1:]
|
||||||
|
@ -827,7 +856,7 @@ class Writer:
|
||||||
if self.all:
|
if self.all:
|
||||||
cols = ["Token_ID", "Word_form"] + cols + ["Msd"]
|
cols = ["Token_ID", "Word_form"] + cols + ["Msd"]
|
||||||
else:
|
else:
|
||||||
cols.append("Representative_form")
|
cols.extend(["Representative_form", "RF_scenario"])
|
||||||
|
|
||||||
assert(len(cols) == self.length())
|
assert(len(cols) == self.length())
|
||||||
cols = ["C{}_{}".format(i + 1, thd) for i in range(MAX_NUM_COMPONENTS) for thd in cols]
|
cols = ["C{}_{}".format(i + 1, thd) for i in range(MAX_NUM_COMPONENTS) for thd in cols]
|
||||||
|
@ -839,7 +868,7 @@ class Writer:
|
||||||
return cols
|
return cols
|
||||||
|
|
||||||
def length(self):
|
def length(self):
|
||||||
return 4 if self.all else 2
|
return 4 if self.all else 3
|
||||||
|
|
||||||
def from_word(self, word):
|
def from_word(self, word):
|
||||||
if word is None:
|
if word is None:
|
||||||
|
@ -848,7 +877,8 @@ class Writer:
|
||||||
return [word.id, word.text, word.lemma, word.msd]
|
return [word.id, word.text, word.lemma, word.msd]
|
||||||
else:
|
else:
|
||||||
assert(word.representation is not None)
|
assert(word.representation is not None)
|
||||||
return [word.lemma, word.representation]
|
failed = "ok" if word.representation_failed else "lemma_fallback"
|
||||||
|
return [word.lemma, word.representation, failed]
|
||||||
|
|
||||||
def sorted_rows(self, rows):
|
def sorted_rows(self, rows):
|
||||||
if self.sort_by < 0 or len(rows) < 2:
|
if self.sort_by < 0 or len(rows) < 2:
|
||||||
|
@ -880,7 +910,7 @@ class Writer:
|
||||||
idx = str(idx + 1)
|
idx = str(idx + 1)
|
||||||
word = m[idx] if idx in m else None
|
word = m[idx] if idx in m else None
|
||||||
to_write.extend(self.from_word(word))
|
to_write.extend(self.from_word(word))
|
||||||
representation += " " + to_write[-1]
|
representation += " " + to_write[-2]
|
||||||
|
|
||||||
# make them equal size
|
# make them equal size
|
||||||
to_write.extend([""] * (MAX_NUM_COMPONENTS * self.length() - len(to_write)))
|
to_write.extend([""] * (MAX_NUM_COMPONENTS * self.length() - len(to_write)))
|
||||||
|
@ -1071,4 +1101,3 @@ if __name__ == '__main__':
|
||||||
start = time.time()
|
start = time.time()
|
||||||
main(args.input, args.structures, args)
|
main(args.input, args.structures, args)
|
||||||
logging.info("TIME: {}".format(time.time() - start))
|
logging.info("TIME: {}".format(time.time() - start))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user