from enum import Enum import logging from restriction import Restriction from order import Order from representation_assigner import RepresentationAssigner class ComponentStatus(Enum): Optional = 0 Required = 1 Forbidden = 2 class ComponentType(Enum): Other = 0 Core = 2 Core2w = 3 class Component: def __init__(self, info): idx = info['cid'] name = info['name'] if 'name' in info else None typ = ComponentType.Core if info['type'] == "core" else ComponentType.Other if 'status' not in info: status = ComponentStatus.Required elif info['status'] == 'forbidden': status = ComponentStatus.Forbidden elif info['status'] == 'obligatory': status = ComponentStatus.Required elif info['status'] == 'optional': status = ComponentStatus.Optional else: raise NotImplementedError("strange status: {}".format(info['status'])) self.status = status self.name = name self.idx = idx self.restrictions = [] self.next_element = [] self.representation = [] self.selection = {} self.type = typ self.iter_ctr = 0 def add_next(self, next_component, link_label, order): self.next_element.append((next_component, link_label, Order.new(order))) def set_restriction(self, restrictions_tag): if restrictions_tag is None: self.restrictions = [Restriction(None)] elif restrictions_tag.tag == "restriction": self.restrictions = [Restriction(restrictions_tag)] elif restrictions_tag.tag == "restriction_or": self.restrictions = [Restriction(el) for el in restrictions_tag] else: raise RuntimeError("Unreachable") def set_representation(self, representation): for rep in representation: crend = RepresentationAssigner() for feature in rep: crend.add_feature(feature.attrib) self.representation.append(crend) def find_next(self, deps, comps, restrs, reprs): to_ret = [] for d in deps: if d[0] == self.idx: _, idx, dep_label, order = d next_component = Component(comps[idx]) next_component.set_restriction(restrs[idx]) next_component.set_representation(reprs[idx]) to_ret.append(next_component) self.add_next(next_component, dep_label, order) others = next_component.find_next(deps, comps, restrs, reprs) to_ret.extend(others) return to_ret def name_str(self): return "_" if self.name is None else self.name def match(self, word): m1 = self._match_self(word) if m1 is None: return None mn = self._match_next(word) if mn is None: return None to_ret = [m1] for cmatch in mn: # if good match but nothing to add, just continue if len(cmatch) == 0: continue # if more than one match found for particular component elif len(cmatch) > 1: # if more than one match in multiple components, NOPE! if len(to_ret) > 1: logging.warning("Strange multiple match: {}".format( str([w.id for w in cmatch[0].values()]))) for tr in to_ret: tr.update(cmatch[0]) continue # yeah, so we have found more than one match, => # more than one element in to_ret to_ret = [{**dict(to_ret[0]), **m} for m in cmatch] else: for tr in to_ret: tr.update(cmatch[0]) return to_ret def _match_self(self, word): # matching for restr in self.restrictions: if restr.match(word): # match either return {self.idx: word} def _match_next(self, word): # matches for every component in links from this component to_ret = [] # need to get all links that match for next, link, order in self.next_element: next_links = word.get_links(link) to_ret.append([]) # good flag good = next.status != ComponentStatus.Required for next_word in next_links: if not order.match(word, next_word): continue if word.lemma == 'aktivirati' and next_word.text == 'potomcih': a = 0 match = next.match(next_word) if match is not None: # special treatement for forbidden if next.status == ComponentStatus.Forbidden: good = False break else: assert type(match) is list to_ret[-1].extend(match) good = True # if none matched, nothing found! if not good: return None return to_ret