redid representation storate, (as prev commit: to make it easier to use)
find_next does not collect representations, no separate class to parse representation features,
This commit is contained in:
		
							parent
							
								
									19067e4135
								
							
						
					
					
						commit
						6eefd9c9f6
					
				
							
								
								
									
										125
									
								
								wani.py
									
									
									
									
									
								
							
							
						
						
									
										125
									
								
								wani.py
									
									
									
									
									
								
							| @ -132,6 +132,10 @@ class Rendition(Enum): | ||||
|     Lexis = 2 | ||||
|     Unknown = 3 | ||||
| 
 | ||||
| class StructureSelection(Enum): | ||||
|     All = 0 | ||||
|     Msd = 1 | ||||
| 
 | ||||
| class Order(Enum): | ||||
|     FromTo = 0 | ||||
|     ToFrom = 1 | ||||
| @ -165,20 +169,53 @@ class Order(Enum): | ||||
|             raise NotImplementedError("Should not be here: Order match") | ||||
| 
 | ||||
| class ComponentRendition: | ||||
|     def __init__(self, rendition=Rendition.Unknown, lexis=None): | ||||
|         assert(type(rendition) is Rendition) | ||||
|         self.lexis = lexis | ||||
|         self.rendition = rendition | ||||
|     def __init__(self): | ||||
|         self.more = None | ||||
|         self.rendition = Rendition.Unknown | ||||
|      | ||||
|     def render(self, word): | ||||
|     def _set_rendition(self, r): | ||||
|         assert(self.rendition is Rendition.Unknown) | ||||
|         self.rendition = r | ||||
|      | ||||
|     def _set_more(self, m): | ||||
|         assert(self.more is None and m is not None) | ||||
|         self.more = m | ||||
|      | ||||
|     def add_feature(self, feature): | ||||
|         if 'rendition' in feature: | ||||
|             if feature['rendition'] == "lemma": | ||||
|                 self._set_rendition(Rendition.Lemma) | ||||
|             elif feature['rendition'] == "word_form": | ||||
|                 self._set_rendition(Rendition.WordForm) | ||||
|             elif feature['rendition'] == "lexis": | ||||
|                 self._set_rendition(Rendition.Lexis) | ||||
|                 self._set_more(feature['string']) | ||||
|             else: | ||||
|                 raise NotImplementedError("Representation rendition: {}".format(feature)) | ||||
| 
 | ||||
|         elif 'selection' in feature: | ||||
|             if feature['selection'] == "msd": | ||||
|                 self._set_more(StructureSelection.Msd) | ||||
|             elif feature['selection'] == "all": | ||||
|                 self._set_more(StructureSelection.All) | ||||
|             else: | ||||
|                 raise NotImplementedError("Representation selection: {}".format(feature)) | ||||
| 
 | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
|     def render(self, words): | ||||
|         if self.rendition == Rendition.Lemma: | ||||
|             return word.lemma | ||||
|         elif self.rendition == Rendition.WordForm: | ||||
|             return word.text | ||||
|             return words[0].lemma | ||||
|         elif self.rendition == Rendition.Lexis: | ||||
|             return self.lexis | ||||
|             return self.more | ||||
|         elif self.rendition == Rendition.Unknown: | ||||
|             return None | ||||
| 
 | ||||
|         elif self.rendition == Rendition.WordForm: | ||||
|             # check more! | ||||
|             return words[0].text | ||||
| 
 | ||||
|         else: | ||||
|             raise RuntimeError("Unknown rendition: {}".format(self.rendition)) | ||||
|      | ||||
| @ -186,34 +223,6 @@ class ComponentRendition: | ||||
|         return str(self.rendition) | ||||
| 
 | ||||
| 
 | ||||
| class StructureSelection(Enum): | ||||
|     All = 0 | ||||
|     Msd = 1 | ||||
| 
 | ||||
| class ComponentRepresentation: | ||||
|     @staticmethod | ||||
|     def new(s): | ||||
|         if 'rendition' in s: | ||||
|             if s['rendition'] == "lemma": | ||||
|                 return ComponentRendition(Rendition.Lemma) | ||||
|             elif s['rendition'] == "word_form": | ||||
|                 return ComponentRendition(Rendition.WordForm) | ||||
|             elif s['rendition'] == "lexis": | ||||
|                 assert(s['string'] is not None) | ||||
|                 return ComponentRendition(Rendition.Lexis, s['string']) | ||||
|             else: | ||||
|                 raise NotImplementedError("Rendition: {}".format(s)) | ||||
|         elif 'selection' in s: | ||||
|             if s['selection'] == "msd": | ||||
|                 return StructureSelection.Msd | ||||
|             elif s['selection'] == "all": | ||||
|                 return StructureSelection.All | ||||
|             else: | ||||
|                 raise NotImplementedError("Selection: {}".format(s)) | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
| 
 | ||||
| class ComponentStatus(Enum): | ||||
|     Optional = 0 | ||||
|     Required = 1 | ||||
| @ -357,13 +366,13 @@ class Component: | ||||
|         self.idx = idx | ||||
|         self.restriction = None | ||||
|         self.next_element = [] | ||||
|         self.rendition = ComponentRendition() | ||||
|         self.representation = ComponentRendition() | ||||
|         self.selection = {} | ||||
| 
 | ||||
|         self.iter_ctr = 0 | ||||
| 
 | ||||
|     def render_word(self, word): | ||||
|         return self.rendition.render(word) | ||||
|         return self.representation.render(word) | ||||
| 
 | ||||
|     def add_next(self, next_component, link_label, order): | ||||
|         self.next_element.append((next_component, link_label, Order.new(order))) | ||||
| @ -382,31 +391,15 @@ class Component: | ||||
|             raise RuntimeError("Unreachable") | ||||
| 
 | ||||
|     def set_representation(self, representation): | ||||
|         cr = None | ||||
|         # for r in representation: | ||||
|         #     print(ElementTree.tostring(r).decode('ascii').replace('\n', '')) | ||||
|         # print("--") | ||||
| 
 | ||||
|         if len(representation) > 0: | ||||
|             self.representation = [] | ||||
| 
 | ||||
|             for feature in representation: | ||||
|                 f = ComponentRepresentation.new(dict(feature.attrib)) | ||||
| 
 | ||||
|                 if type(f) is None: | ||||
|                     logging.warning("Unknown representation in component {}, skipping...".format(self.idx), file=sys.stderr) | ||||
|                     continue | ||||
|                 if type(f) is StructureSelection: | ||||
|                     assert(cr is None) | ||||
|                     cr = f | ||||
|                 elif type(f) is ComponentRendition: | ||||
|                     self.rendition = f | ||||
|                 elif type(f) is dict: | ||||
|                     self.selection.update(f) | ||||
|                 else: | ||||
|                     raise RuntimeError("Unreachable: {}".format(f)) | ||||
| 
 | ||||
|         return cr | ||||
|                 self.representation.add_feature(feature) | ||||
| 
 | ||||
|     def find_next(self, deps, comps, restrs, reprs): | ||||
|         representation = StructureSelection.All | ||||
| 
 | ||||
|         to_ret = [] | ||||
|         for d in deps: | ||||
|             if d[0] == self.idx: | ||||
| @ -414,23 +407,22 @@ class Component: | ||||
| 
 | ||||
|                 next_component = Component(comps[idx]) | ||||
|                 next_component.set_restriction(restrs[idx]) | ||||
|                 r1 = next_component.set_representation(reprs[idx]) | ||||
|                 next_component.set_representation(reprs[idx]) | ||||
|                 to_ret.append(next_component) | ||||
| 
 | ||||
|                 self.add_next(next_component, dep_label, order) | ||||
|                 others, r2 = next_component.find_next(deps, comps, restrs, reprs) | ||||
|                 others = next_component.find_next(deps, comps, restrs, reprs) | ||||
|                 to_ret.extend(others) | ||||
| 
 | ||||
|         return to_ret, representation | ||||
|         return to_ret | ||||
| 
 | ||||
|     def name_str(self): | ||||
|         return "_" if self.name is None else self.name | ||||
| 
 | ||||
| 
 | ||||
|     def __str__(self): | ||||
|         n = self.name_str() | ||||
|         return "{:s}) {:7s}:{} [{}] :{}".format( | ||||
|                 self.idx, n, self.status, self.restriction, self.rendition) | ||||
|                 self.idx, n, self.status, self.restriction, self.representation) | ||||
| 
 | ||||
|     def tree(self): | ||||
|         el = [] | ||||
| @ -582,7 +574,7 @@ class SyntacticStructure: | ||||
|                     raise NotImplementedError("Unknown definition: {} in structure {}".format(el.tag, st.id)) | ||||
| 
 | ||||
|         fake_root_component = Component({'cid': '#', 'type': 'other'}) | ||||
|         st.components, st.selection = fake_root_component.find_next(deps, comps, restrs, forms) | ||||
|         st.components = fake_root_component.find_next(deps, comps, restrs, forms) | ||||
|         return st | ||||
| 
 | ||||
|     def add_representation(self, n, rep_el, forms): | ||||
| @ -969,7 +961,6 @@ class ColocationIds: | ||||
|                 if group: | ||||
|                     break | ||||
| 
 | ||||
| 
 | ||||
| def match_file(words, structures): | ||||
|     matches = {s.id: [] for s in structures} | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user