Implemented multiple output option
This commit is contained in:
parent
8b47e2b317
commit
b4e73e2d60
47
wani.py
47
wani.py
|
@ -786,6 +786,7 @@ class Writer:
|
||||||
self.lemma_only = args.lemma_only
|
self.lemma_only = args.lemma_only
|
||||||
self.without_rep = args.without_rep
|
self.without_rep = args.without_rep
|
||||||
self.output_file = args.output
|
self.output_file = args.output
|
||||||
|
self.multiple_output = args.multiple_output
|
||||||
|
|
||||||
def header(self):
|
def header(self):
|
||||||
cols = ["Lemma"]
|
cols = ["Lemma"]
|
||||||
|
@ -821,23 +822,21 @@ class Writer:
|
||||||
|
|
||||||
return cols
|
return cols
|
||||||
|
|
||||||
def write_out_worker(self, file_handler, matches, structures, colocation_ids):
|
def write_header(self, file_handler):
|
||||||
file_handler.write(", ".join(self.header()) + "\n")
|
file_handler.write(", ".join(self.header()) + "\n")
|
||||||
|
|
||||||
for s in structures:
|
def write_out_worker(self, file_handler, matches, structure_id, components, colocation_ids):
|
||||||
ms = matches[s.id]
|
for m, reason, cid in matches:
|
||||||
|
|
||||||
for m, reason, cid in ms:
|
|
||||||
to_write = []
|
to_write = []
|
||||||
|
|
||||||
for idx, comp in enumerate(s.components):
|
for idx, comp in enumerate(components):
|
||||||
idx = str(idx + 1)
|
idx = str(idx + 1)
|
||||||
word = m[idx] if idx in m else None
|
word = m[idx] if idx in m else None
|
||||||
to_write.extend(self.from_word(word))
|
to_write.extend(self.from_word(word))
|
||||||
|
|
||||||
# make them equal size
|
# make them equal size
|
||||||
to_write.extend([""] * (MAX_NUM_COMPONENTS * self.length() - len(to_write)))
|
to_write.extend([""] * (MAX_NUM_COMPONENTS * self.length() - len(to_write)))
|
||||||
to_write = [s.id] + to_write + [colocation_ids.to_id(cid)]
|
to_write = [structure_id] + to_write + [colocation_ids.to_id(cid)]
|
||||||
|
|
||||||
if not self.without_rep:
|
if not self.without_rep:
|
||||||
to_write.append("") # not yet implemented...
|
to_write.append("") # not yet implemented...
|
||||||
|
@ -854,10 +853,35 @@ class Writer:
|
||||||
file_handler.flush()
|
file_handler.flush()
|
||||||
|
|
||||||
def write_out(self, matches, structures, colocation_ids):
|
def write_out(self, matches, structures, colocation_ids):
|
||||||
fp = sys.stdout if self.output_file is None else open(self.output_file, "w")
|
def fp_close(fp_):
|
||||||
self.write_out_worker(fp, matches, structures, colocation_ids)
|
if fp_ != sys.stdout:
|
||||||
if self.output_file is not None:
|
fp_.close()
|
||||||
fp.close()
|
|
||||||
|
def fp_open(snum=None):
|
||||||
|
if self.output_file is None:
|
||||||
|
return sys.stdout
|
||||||
|
elif snum is None:
|
||||||
|
return open(self.output_file, "w")
|
||||||
|
else:
|
||||||
|
return open("{}.{}".format(self.output_file, snum), "w")
|
||||||
|
|
||||||
|
if not self.multiple_output:
|
||||||
|
fp = fp_open()
|
||||||
|
self.write_header(fp)
|
||||||
|
|
||||||
|
for s in structures:
|
||||||
|
if self.multiple_output:
|
||||||
|
fp=fp_open(s.id)
|
||||||
|
self.write_header(fp)
|
||||||
|
|
||||||
|
sid_matches = matches[s.id]
|
||||||
|
self.write_out_worker(fp, sid_matches, s.id, s.components, colocation_ids)
|
||||||
|
|
||||||
|
if self.multiple_output:
|
||||||
|
fp_close(fp)
|
||||||
|
|
||||||
|
if not self.multiple_output:
|
||||||
|
fp_close(fp)
|
||||||
|
|
||||||
|
|
||||||
class ColocationIds:
|
class ColocationIds:
|
||||||
|
@ -939,6 +963,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--without-rep', help='Will not write representtaions in output', action='store_true')
|
parser.add_argument('--without-rep', help='Will not write representtaions in output', action='store_true')
|
||||||
parser.add_argument('--group', help='Group collocations with same collocation ID', action='store_true')
|
parser.add_argument('--group', help='Group collocations with same collocation ID', action='store_true')
|
||||||
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
|
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
|
||||||
|
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
logging.basicConfig(stream=sys.stderr, level=args.verbose.upper())
|
||||||
|
|
Loading…
Reference in New Issue
Block a user