Adding sorting
This commit is contained in:
parent
8107a9f647
commit
1c9ac7c867
26
wani.py
26
wani.py
|
@ -813,6 +813,7 @@ class Writer:
|
||||||
self.without_rep = args.without_rep
|
self.without_rep = args.without_rep
|
||||||
self.output_file = args.output
|
self.output_file = args.output
|
||||||
self.multiple_output = args.multiple_output
|
self.multiple_output = args.multiple_output
|
||||||
|
self.sort_by = int(args.sort_by)
|
||||||
|
|
||||||
def header(self):
|
def header(self):
|
||||||
cols = ["Lemma"]
|
cols = ["Lemma"]
|
||||||
|
@ -848,10 +849,27 @@ class Writer:
|
||||||
|
|
||||||
return cols
|
return cols
|
||||||
|
|
||||||
|
def sorted_rows(self, rows):
|
||||||
|
if self.sort_by < 0 or len(rows) < 2:
|
||||||
|
return rows
|
||||||
|
|
||||||
|
if len(rows[0]) <= self.sort_by:
|
||||||
|
print("Cannot sort by column #{}: Not enough columns!".format(len(rows[0])), file=sys.stderr)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
try:
|
||||||
|
int(rows[0][self.sort_by])
|
||||||
|
key=lambda row: int(row[self.sort_by])
|
||||||
|
except ValueError:
|
||||||
|
key=lambda row: row[self.sort_by].lower()
|
||||||
|
|
||||||
|
return sorted(rows, key=key)
|
||||||
|
|
||||||
def write_header(self, file_handler):
|
def write_header(self, file_handler):
|
||||||
file_handler.write(", ".join(self.header()) + "\n")
|
file_handler.write(", ".join(self.header()) + "\n")
|
||||||
|
|
||||||
def write_out_worker(self, file_handler, matches, structure_id, components, colocation_ids):
|
def write_out_worker(self, file_handler, matches, structure_id, components, colocation_ids):
|
||||||
|
rows = []
|
||||||
for m, reason, cid in matches:
|
for m, reason, cid in matches:
|
||||||
to_write = []
|
to_write = []
|
||||||
|
|
||||||
|
@ -874,8 +892,10 @@ class Writer:
|
||||||
to_write.append(colocation_ids.num(cid))
|
to_write.append(colocation_ids.num(cid))
|
||||||
colocation_ids.set_written(cid)
|
colocation_ids.set_written(cid)
|
||||||
|
|
||||||
file_handler.write(", ".join(to_write) + "\n")
|
rows.append(to_write)
|
||||||
|
|
||||||
|
rows = self.sorted_rows(rows)
|
||||||
|
file_handler.write("\n".join([", ".join(row) for row in rows]) + "\n")
|
||||||
file_handler.flush()
|
file_handler.flush()
|
||||||
|
|
||||||
def write_out(self, matches, structures, colocation_ids):
|
def write_out(self, matches, structures, colocation_ids):
|
||||||
|
@ -901,7 +921,8 @@ class Writer:
|
||||||
self.write_header(fp)
|
self.write_header(fp)
|
||||||
|
|
||||||
sid_matches = matches[s.id]
|
sid_matches = matches[s.id]
|
||||||
self.write_out_worker(fp, sid_matches, s.id, s.components, colocation_ids)
|
if len(sid_matches) > 0:
|
||||||
|
self.write_out_worker(fp, sid_matches, s.id, s.components, colocation_ids)
|
||||||
|
|
||||||
if self.multiple_output:
|
if self.multiple_output:
|
||||||
fp_close(fp)
|
fp_close(fp)
|
||||||
|
@ -1029,6 +1050,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
|
parser.add_argument('--verbose', help='Enable verbose output to stderr', choices=["warning", "info", "debug"], default="info")
|
||||||
parser.add_argument('--count-files', help="Count files: more verbose output", action='store_true')
|
parser.add_argument('--count-files', help="Count files: more verbose output", action='store_true')
|
||||||
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
|
parser.add_argument('--multiple-output', help='Generate one output for each syntactic structure', action='store_true')
|
||||||
|
parser.add_argument('--sort-by', help="Sort by a this column (index)", type=int, default=-1)
|
||||||
|
|
||||||
parser.add_argument('--pc-tag', help='Tag for separators, usually pc or c', default="pc")
|
parser.add_argument('--pc-tag', help='Tag for separators, usually pc or c', default="pc")
|
||||||
parser.add_argument('--parallel', help='Run in multiple processes, should speed things up')
|
parser.add_argument('--parallel', help='Run in multiple processes, should speed things up')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user