You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
luscenje_struktur/src/component.py

170 lines
5.1 KiB

from enum import Enum
import logging
from restriction import Restriction
from order import Order
from representation_assigner import RepresentationAssigner
class ComponentStatus(Enum):
Optional = 0
Required = 1
Forbidden = 2
class ComponentType(Enum):
Other = 0
Core = 2
Core2w = 3
class Component:
def __init__(self, info):
idx = info['cid']
name = info['name'] if 'name' in info else None
typ = ComponentType.Core if info['type'] == "core" else ComponentType.Other
if 'status' not in info:
status = ComponentStatus.Required
elif info['status'] == 'forbidden':
status = ComponentStatus.Forbidden
elif info['status'] == 'obligatory':
status = ComponentStatus.Required
elif info['status'] == 'optional':
status = ComponentStatus.Optional
else:
raise NotImplementedError("strange status: {}".format(info['status']))
self.status = status
self.name = name
self.idx = idx
self.restrictions = []
self.next_element = []
self.representation = []
self.selection = {}
self.type = typ
self.iter_ctr = 0
def add_next(self, next_component, link_label, order):
self.next_element.append((next_component, link_label, Order.new(order)))
def set_restriction(self, restrictions_tag):
if restrictions_tag is None:
self.restrictions = [Restriction(None)]
elif restrictions_tag.tag == "restriction":
self.restrictions = [Restriction(restrictions_tag)]
elif restrictions_tag.tag == "restriction_or":
self.restrictions = [Restriction(el) for el in restrictions_tag]
else:
raise RuntimeError("Unreachable")
def set_representation(self, representation):
for rep in representation:
crend = RepresentationAssigner()
for feature in rep:
crend.add_feature(feature.attrib)
self.representation.append(crend)
def find_next(self, deps, comps, restrs, reprs):
to_ret = []
for d in deps:
if d[0] == self.idx:
_, idx, dep_label, order = d
next_component = Component(comps[idx])
next_component.set_restriction(restrs[idx])
next_component.set_representation(reprs[idx])
to_ret.append(next_component)
self.add_next(next_component, dep_label, order)
others = next_component.find_next(deps, comps, restrs, reprs)
to_ret.extend(others)
return to_ret
def name_str(self):
return "_" if self.name is None else self.name
def match(self, word):
m1 = self._match_self(word)
if m1 is None:
return None
mn = self._match_next(word)
if mn is None:
return None
to_ret = [m1]
for cmatch in mn:
# if good match but nothing to add, just continue
if len(cmatch) == 0:
continue
# if more than one match found for particular component
elif len(cmatch) > 1:
# if more than one match in multiple components, NOPE!
if len(to_ret) > 1:
logging.warning("Strange multiple match: {}".format(
str([w.id for w in cmatch[0].values()])))
for tr in to_ret:
tr.update(cmatch[0])
continue
# yeah, so we have found more than one match, =>
# more than one element in to_ret
to_ret = [{**dict(to_ret[0]), **m} for m in cmatch]
else:
for tr in to_ret:
tr.update(cmatch[0])
return to_ret
def _match_self(self, word):
# matching
for restr in self.restrictions:
if restr.match(word): # match either
return {self.idx: word}
def _match_next(self, word):
# matches for every component in links from this component
to_ret = []
# need to get all links that match
for next, link, order in self.next_element:
next_links = word.get_links(link)
to_ret.append([])
# good flag
good = next.status != ComponentStatus.Required
for next_word in next_links:
if not order.match(word, next_word):
continue
if word.lemma == 'aktivirati' and next_word.text == 'potomcih':
a = 0
match = next.match(next_word)
if match is not None:
# special treatement for forbidden
if next.status == ComponentStatus.Forbidden:
good = False
break
else:
assert type(match) is list
to_ret[-1].extend(match)
good = True
# if none matched, nothing found!
if not good:
return None
return to_ret