Turns out previous commit was OK. Proceeding with stats work

This commit is contained in:
Ozbolt Menegatti 2019-06-09 23:00:19 +02:00
parent c6440162b8
commit 865351b3f6

24
wani.py
View File

@ -369,9 +369,9 @@ class ComponentRendition:
return self.representation_factory(self.more, word_renderer) return self.representation_factory(self.more, word_renderer)
@staticmethod @staticmethod
def set_representations(matches, structure, word_renderer): def set_representations(match, word_renderer):
representations = {} representations = {}
for c in structure.components: for c in match.structure.components:
representations[c.idx] = [] representations[c.idx] = []
for rep in c.representation: for rep in c.representation:
representations[c.idx].append(rep.cr_instance(word_renderer)) representations[c.idx].append(rep.cr_instance(word_renderer))
@ -382,7 +382,7 @@ class ComponentRendition:
if len(representations[agr]) != 1: if len(representations[agr]) != 1:
n = len(representations[agr]) n = len(representations[agr])
raise NotImplementedError( raise NotImplementedError(
"Structure {}: ".format(structure.id) + "Structure {}: ".format(match.structure.id) +
"component {} has agreement".format(cid) + "component {} has agreement".format(cid) +
" with component {}".format(agr) + " with component {}".format(agr) +
", however there are {} (!= 1) representations".format(n) + ", however there are {} (!= 1) representations".format(n) +
@ -390,10 +390,10 @@ class ComponentRendition:
representations[agr][0].agreement.append(rep) representations[agr][0].agreement.append(rep)
for words in matches.matches: for words in match.matches:
# first pass, check everything but agreements # first pass, check everything but agreements
for w_id, w in words.items(): for w_id, w in words.items():
component = structure.get_component(w_id) component = match.structure.get_component(w_id)
component_representations = representations[component.idx] component_representations = representations[component.idx]
for representation in component_representations: for representation in component_representations:
representation.add_word(w) representation.add_word(w)
@ -407,9 +407,9 @@ class ComponentRendition:
if reps == []: if reps == []:
pass pass
elif all(r is None for r in reps): elif all(r is None for r in reps):
matches.representations[cid] = None match.representations[cid] = None
else: else:
matches.representations[cid] = " ".join(("" if r is None else r) for r in reps) match.representations[cid] = " ".join(("" if r is None else r) for r in reps)
class ComponentStatus(Enum): class ComponentStatus(Enum):
@ -1360,13 +1360,9 @@ class ColocationIds:
# yield all_words, more_data # yield all_words, more_data
def set_representations(self, structures, word_renderer): def set_representations(self, word_renderer):
components_dict = {structure.id: structure for structure in structures}
idx = 1
for _1, sm in tqdm(self.data.items()): for _1, sm in tqdm(self.data.items()):
ComponentRendition.set_representations( ComponentRendition.set_representations(sm, word_renderer)
sm, components_dict[sm.structure.id], word_renderer)
idx += 1
def determine_colocation_dispersions(self): def determine_colocation_dispersions(self):
dispersions = defaultdict(int) dispersions = defaultdict(int)
@ -1454,7 +1450,7 @@ def main(structures_file, args):
# figure out representations! # figure out representations!
if args.output: if args.output:
colocation_ids.set_representations(structures, word_renderer) colocation_ids.set_representations(word_renderer)
Writer.make_output_writer(args, colocation_ids, word_renderer).write_out( Writer.make_output_writer(args, colocation_ids, word_renderer).write_out(
structures, colocation_ids) structures, colocation_ids)