determining dispersions
This commit is contained in:
parent
57c0ff6f85
commit
b2baedca52
9
wani.py
9
wani.py
|
@ -1168,6 +1168,7 @@ class ColocationIds:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.data = {}
|
self.data = {}
|
||||||
self.min_frequency = args.min_freq
|
self.min_frequency = args.min_freq
|
||||||
|
self.dispersions = {}
|
||||||
|
|
||||||
def _add_match(self, key, sid, match):
|
def _add_match(self, key, sid, match):
|
||||||
if key not in self.data:
|
if key not in self.data:
|
||||||
|
@ -1198,6 +1199,13 @@ class ColocationIds:
|
||||||
for _1, sm in tqdm(self.data.items()):
|
for _1, sm in tqdm(self.data.items()):
|
||||||
ComponentRendition.set_representations(sm, components_dict[sm.structure_id], word_renderer)
|
ComponentRendition.set_representations(sm, components_dict[sm.structure_id], word_renderer)
|
||||||
idx += 1
|
idx += 1
|
||||||
|
|
||||||
|
def determine_colocation_dispersions(self):
|
||||||
|
dispersions = defaultdict(int)
|
||||||
|
for (structure_id, *word_tups) in self.data.keys():
|
||||||
|
for component_id, lemma in word_tups:
|
||||||
|
dispersions[(structure_id, component_id, lemma)] += 1
|
||||||
|
self.dispersions = dict(dispersions)
|
||||||
|
|
||||||
|
|
||||||
def match_file(words, structures):
|
def match_file(words, structures):
|
||||||
|
@ -1273,6 +1281,7 @@ def main(input_file, structures_file, args):
|
||||||
|
|
||||||
# get word renders for lemma/msd
|
# get word renders for lemma/msd
|
||||||
word_renderer.generate_renders()
|
word_renderer.generate_renders()
|
||||||
|
colocation_ids.determine_colocation_dispersions()
|
||||||
|
|
||||||
if args.output:
|
if args.output:
|
||||||
# figure out representations!
|
# figure out representations!
|
||||||
|
|
Loading…
Reference in New Issue
Block a user