Implemented multiple output option

This commit is contained in:
Ozbolt Menegatti 2019-02-07 10:19:36 +01:00
parent 8b47e2b317
commit b4e73e2d60

77
wani.py
View File

@ -786,6 +786,7 @@ class Writer:
self.lemma_only = args.lemma_only self.lemma_only = args.lemma_only
self.without_rep = args.without_rep self.without_rep = args.without_rep
self.output_file = args.output self.output_file = args.output
self.multiple_output = args.multiple_output
def header(self): def header(self):
cols = ["Lemma"] cols = ["Lemma"]
@ -821,43 +822,66 @@ class Writer:
return cols return cols
def write_out_worker(self, file_handler, matches, structures, colocation_ids): def write_header(self, file_handler):
file_handler.write(", ".join(self.header()) + "\n") file_handler.write(", ".join(self.header()) + "\n")
for s in structures: def write_out_worker(self, file_handler, matches, structure_id, components, colocation_ids):
ms = matches[s.id] for m, reason, cid in matches:
to_write = []
for m, reason, cid in ms: for idx, comp in enumerate(components):
to_write = [] idx = str(idx + 1)
word = m[idx] if idx in m else None
to_write.extend(self.from_word(word))
for idx, comp in enumerate(s.components): # make them equal size
idx = str(idx + 1) to_write.extend([""] * (MAX_NUM_COMPONENTS * self.length() - len(to_write)))
word = m[idx] if idx in m else None to_write = [structure_id] + to_write + [colocation_ids.to_id(cid)]
to_write.extend(self.from_word(word))
# make them equal size if not self.without_rep:
to_write.extend([""] * (MAX_NUM_COMPONENTS * self.length() - len(to_write))) to_write.append("") # not yet implemented...
to_write = [s.id] + to_write + [colocation_ids.to_id(cid)]
if not self.without_rep: if self.group:
to_write.append("") # not yet implemented... if colocation_ids.is_written(cid):
continue
else:
to_write.append(colocation_ids.num(cid))
colocation_ids.set_written(cid)
if self.group: file_handler.write(", ".join(to_write) + "\n")
if colocation_ids.is_written(cid):
continue
else:
to_write.append(colocation_ids.num(cid))
colocation_ids.set_written(cid)
file_handler.write(", ".join(to_write) + "\n")
file_handler.flush() file_handler.flush()
def write_out(self, matches, structures, colocation_ids): def write_out(self, matches, structures, colocation_ids):
fp = sys.stdout if self.output_file is None else open(self.output_file, "w") def fp_close(fp_):
self.write_out_worker(fp, matches, structures, colocation_ids) if fp_ != sys.stdout:
if self.output_file is not None: fp_.close()
fp.close()
def fp_open(snum=None):
if self.output_file is None:
return sys.stdout
elif snum is None:
return open(self.output_file, "w")
else:
return open("{}.{}".format(self.output_file, snum), "w")
if not self.multiple_output:
fp = fp_open()
self.write_header(fp)
for s in structures:
if self.multiple_output:
fp=fp_open(s.id)
self.write_header(fp)
sid_matches = matches[s.id]
self.write_out_worker(fp, sid_matches, s.id, s.components, colocation_ids)
if self.multiple_output:
fp_close(fp)
if not self.multiple_output:
fp_close(fp)
class ColocationIds: class ColocationIds:
@ -939,6 +963,7 @@ if __name__ == '__main__':
parser.add_argument('--without-rep', help='Will not write representtaions in output', action='store_true') parser.add_argument('--without-rep', help='Will not write representtaions in output', action='store_true')
parser.add_argument('--group', help='Group collocations with same collocation ID', action='store_true') parser.add_argument('--group', help='Group collocations with same collocation ID', action='store_true')
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info") parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper()) logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())