Turns out previous commit was OK. Proceeding with stats work

This commit is contained in:
Ozbolt Menegatti 2019-06-09 23:00:19 +02:00
parent c6440162b8
commit 865351b3f6

24
wani.py
View File

@ -369,9 +369,9 @@ class ComponentRendition:
return self.representation_factory(self.more, word_renderer)
@staticmethod
def set_representations(matches, structure, word_renderer):
def set_representations(match, word_renderer):
representations = {}
for c in structure.components:
for c in match.structure.components:
representations[c.idx] = []
for rep in c.representation:
representations[c.idx].append(rep.cr_instance(word_renderer))
@ -382,7 +382,7 @@ class ComponentRendition:
if len(representations[agr]) != 1:
n = len(representations[agr])
raise NotImplementedError(
"Structure {}: ".format(structure.id) +
"Structure {}: ".format(match.structure.id) +
"component {} has agreement".format(cid) +
" with component {}".format(agr) +
", however there are {} (!= 1) representations".format(n) +
@ -390,10 +390,10 @@ class ComponentRendition:
representations[agr][0].agreement.append(rep)
for words in matches.matches:
for words in match.matches:
# first pass, check everything but agreements
for w_id, w in words.items():
component = structure.get_component(w_id)
component = match.structure.get_component(w_id)
component_representations = representations[component.idx]
for representation in component_representations:
representation.add_word(w)
@ -407,9 +407,9 @@ class ComponentRendition:
if reps == []:
pass
elif all(r is None for r in reps):
matches.representations[cid] = None
match.representations[cid] = None
else:
matches.representations[cid] = " ".join(("" if r is None else r) for r in reps)
match.representations[cid] = " ".join(("" if r is None else r) for r in reps)
class ComponentStatus(Enum):
@ -1360,13 +1360,9 @@ class ColocationIds:
# yield all_words, more_data
def set_representations(self, structures, word_renderer):
components_dict = {structure.id: structure for structure in structures}
idx = 1
def set_representations(self, word_renderer):
for _1, sm in tqdm(self.data.items()):
ComponentRendition.set_representations(
sm, components_dict[sm.structure.id], word_renderer)
idx += 1
ComponentRendition.set_representations(sm, word_renderer)
def determine_colocation_dispersions(self):
dispersions = defaultdict(int)
@ -1454,7 +1450,7 @@ def main(structures_file, args):
# figure out representations!
if args.output:
colocation_ids.set_representations(structures, word_renderer)
colocation_ids.set_representations(word_renderer)
Writer.make_output_writer(args, colocation_ids, word_renderer).write_out(
structures, colocation_ids)