determining dispersions
This commit is contained in:
		
							parent
							
								
									57c0ff6f85
								
							
						
					
					
						commit
						b2baedca52
					
				
							
								
								
									
										9
									
								
								wani.py
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								wani.py
									
									
									
									
									
								
							| @ -1168,6 +1168,7 @@ class ColocationIds: | |||||||
|     def __init__(self): |     def __init__(self): | ||||||
|         self.data = {} |         self.data = {} | ||||||
|         self.min_frequency = args.min_freq |         self.min_frequency = args.min_freq | ||||||
|  |         self.dispersions = {} | ||||||
| 
 | 
 | ||||||
|     def _add_match(self, key, sid, match): |     def _add_match(self, key, sid, match): | ||||||
|         if key not in self.data: |         if key not in self.data: | ||||||
| @ -1199,6 +1200,13 @@ class ColocationIds: | |||||||
|             ComponentRendition.set_representations(sm, components_dict[sm.structure_id], word_renderer) |             ComponentRendition.set_representations(sm, components_dict[sm.structure_id], word_renderer) | ||||||
|             idx += 1 |             idx += 1 | ||||||
|      |      | ||||||
|  |     def determine_colocation_dispersions(self): | ||||||
|  |         dispersions = defaultdict(int) | ||||||
|  |         for (structure_id, *word_tups) in self.data.keys(): | ||||||
|  |             for component_id, lemma in word_tups: | ||||||
|  |                 dispersions[(structure_id, component_id, lemma)] += 1 | ||||||
|  |         self.dispersions = dict(dispersions) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def match_file(words, structures): | def match_file(words, structures): | ||||||
|     matches = {s.id: [] for s in structures} |     matches = {s.id: [] for s in structures} | ||||||
| @ -1273,6 +1281,7 @@ def main(input_file, structures_file, args): | |||||||
| 
 | 
 | ||||||
|     # get word renders for lemma/msd |     # get word renders for lemma/msd | ||||||
|     word_renderer.generate_renders() |     word_renderer.generate_renders() | ||||||
|  |     colocation_ids.determine_colocation_dispersions() | ||||||
| 
 | 
 | ||||||
|     if args.output: |     if args.output: | ||||||
|         # figure out representations! |         # figure out representations! | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user