134 lines
4.4 KiB
Plaintext
134 lines
4.4 KiB
Plaintext
|
class Writer:
|
||
|
@staticmethod
|
||
|
def other_params(args):
|
||
|
return (args.multiple_output, int(args.sort_by), args.sort_reversed)
|
||
|
|
||
|
@staticmethod
|
||
|
def make_output_writer(args, colocation_ids, word_renderer):
|
||
|
params = Writer.other_params(args)
|
||
|
return Writer(args.out, OutFormatter(colocation_ids, word_renderer), params)
|
||
|
|
||
|
@staticmethod
|
||
|
def make_output_no_stat_writer(args, colocation_ids, word_renderer):
|
||
|
params = Writer.other_params(args)
|
||
|
return Writer(args.out_no_stat, OutNoStatFormatter(colocation_ids, word_renderer), params)
|
||
|
|
||
|
@staticmethod
|
||
|
def make_all_writer(args, colocation_ids, word_renderer):
|
||
|
return Writer(args.all, AllFormatter(colocation_ids, word_renderer), None)
|
||
|
|
||
|
@staticmethod
|
||
|
def make_stats_writer(args, colocation_ids, word_renderer):
|
||
|
params = Writer.other_params(args)
|
||
|
return Writer(args.stats, StatsFormatter(colocation_ids, word_renderer), params)
|
||
|
|
||
|
def __init__(self, file_out, formatter, params):
|
||
|
if params is None:
|
||
|
self.multiple_output = False
|
||
|
self.sort_by = -1
|
||
|
self.sort_order = None
|
||
|
else:
|
||
|
self.multiple_output = params[0]
|
||
|
self.sort_by = params[1]
|
||
|
self.sort_order = params[2]
|
||
|
|
||
|
self.output_file = file_out
|
||
|
self.formatter = formatter
|
||
|
|
||
|
def header(self):
|
||
|
repeating_cols = self.formatter.header_repeat()
|
||
|
cols = ["C{}_{}".format(i + 1, thd) for i in range(MAX_NUM_COMPONENTS)
|
||
|
for thd in repeating_cols]
|
||
|
|
||
|
cols = ["Structure_ID"] + cols + ["Colocation_ID"]
|
||
|
cols += self.formatter.header_right()
|
||
|
return cols
|
||
|
|
||
|
def sorted_rows(self, rows):
|
||
|
if self.sort_by < 0 or len(rows) < 2:
|
||
|
return rows
|
||
|
|
||
|
if len(rows[0]) <= self.sort_by:
|
||
|
logging.warning("Cannot sort by column #{}: Not enough columns!".format(len(rows[0])))
|
||
|
return rows
|
||
|
|
||
|
try:
|
||
|
int(rows[0][self.sort_by])
|
||
|
def key(row):
|
||
|
return int(row[self.sort_by])
|
||
|
except ValueError:
|
||
|
def key(row):
|
||
|
return row[self.sort_by].lower()
|
||
|
|
||
|
return sorted(rows, key=key, reverse=self.sort_order)
|
||
|
|
||
|
def write_header(self, file_handler):
|
||
|
file_handler.write(", ".join(self.header()) + "\n")
|
||
|
|
||
|
def write_out_worker(self, file_handler, structure, colocation_ids):
|
||
|
rows = []
|
||
|
components = structure.components
|
||
|
|
||
|
for match in colocation_ids.get_matches_for(structure):
|
||
|
self.formatter.new_match(match)
|
||
|
|
||
|
for words in match.matches:
|
||
|
to_write = []
|
||
|
|
||
|
for idx, _comp in enumerate(components):
|
||
|
idx = str(idx + 1)
|
||
|
if idx not in words:
|
||
|
to_write.extend([""] * self.formatter.length())
|
||
|
else:
|
||
|
to_write.extend(self.formatter.content_repeat(words, match.representations, idx, structure.id))
|
||
|
|
||
|
# make them equal size
|
||
|
to_write.extend([""] * (MAX_NUM_COMPONENTS * self.formatter.length() - len(to_write)))
|
||
|
|
||
|
# structure_id and colocation_id
|
||
|
to_write = [structure.id] + to_write + [match.match_id]
|
||
|
|
||
|
# header_right
|
||
|
to_write.extend(self.formatter.content_right(len(match)))
|
||
|
rows.append(to_write)
|
||
|
|
||
|
if self.formatter.group():
|
||
|
break
|
||
|
|
||
|
if rows != []:
|
||
|
rows = self.sorted_rows(rows)
|
||
|
file_handler.write("\n".join([", ".join(row) for row in rows]) + "\n")
|
||
|
file_handler.flush()
|
||
|
|
||
|
def write_out(self, structures, colocation_ids):
|
||
|
if self.output_file is None:
|
||
|
return
|
||
|
|
||
|
def fp_close(fp_):
|
||
|
if fp_ != sys.stdout:
|
||
|
fp_.close()
|
||
|
|
||
|
def fp_open(snum=None):
|
||
|
if snum is None:
|
||
|
return open(self.output_file, "w")
|
||
|
else:
|
||
|
return open("{}.{}".format(self.output_file, snum), "w")
|
||
|
|
||
|
if not self.multiple_output:
|
||
|
fp = fp_open()
|
||
|
self.write_header(fp)
|
||
|
|
||
|
for s in structures:
|
||
|
if self.multiple_output:
|
||
|
fp = fp_open(s.id)
|
||
|
self.write_header(fp)
|
||
|
|
||
|
self.formatter.set_structure(s)
|
||
|
self.write_out_worker(fp, s, colocation_ids)
|
||
|
|
||
|
if self.multiple_output:
|
||
|
fp_close(fp)
|
||
|
|
||
|
if not self.multiple_output:
|
||
|
fp_close(fp)
|