class Writer: @staticmethod def other_params(args): return (args.multiple_output, int(args.sort_by), args.sort_reversed) @staticmethod def make_output_writer(args, colocation_ids, word_renderer): params = Writer.other_params(args) return Writer(args.out, OutFormatter(colocation_ids, word_renderer), params) @staticmethod def make_output_no_stat_writer(args, colocation_ids, word_renderer): params = Writer.other_params(args) return Writer(args.out_no_stat, OutNoStatFormatter(colocation_ids, word_renderer), params) @staticmethod def make_all_writer(args, colocation_ids, word_renderer): return Writer(args.all, AllFormatter(colocation_ids, word_renderer), None) @staticmethod def make_stats_writer(args, colocation_ids, word_renderer): params = Writer.other_params(args) return Writer(args.stats, StatsFormatter(colocation_ids, word_renderer), params) def __init__(self, file_out, formatter, params): if params is None: self.multiple_output = False self.sort_by = -1 self.sort_order = None else: self.multiple_output = params[0] self.sort_by = params[1] self.sort_order = params[2] self.output_file = file_out self.formatter = formatter def header(self): repeating_cols = self.formatter.header_repeat() cols = ["C{}_{}".format(i + 1, thd) for i in range(MAX_NUM_COMPONENTS) for thd in repeating_cols] cols = ["Structure_ID"] + cols + ["Colocation_ID"] cols += self.formatter.header_right() return cols def sorted_rows(self, rows): if self.sort_by < 0 or len(rows) < 2: return rows if len(rows[0]) <= self.sort_by: logging.warning("Cannot sort by column #{}: Not enough columns!".format(len(rows[0]))) return rows try: int(rows[0][self.sort_by]) def key(row): return int(row[self.sort_by]) except ValueError: def key(row): return row[self.sort_by].lower() return sorted(rows, key=key, reverse=self.sort_order) def write_header(self, file_handler): file_handler.write(", ".join(self.header()) + "\n") def write_out_worker(self, file_handler, structure, colocation_ids): rows = [] components = structure.components for match in colocation_ids.get_matches_for(structure): self.formatter.new_match(match) for words in match.matches: to_write = [] for idx, _comp in enumerate(components): idx = str(idx + 1) if idx not in words: to_write.extend([""] * self.formatter.length()) else: to_write.extend(self.formatter.content_repeat(words, match.representations, idx, structure.id)) # make them equal size to_write.extend([""] * (MAX_NUM_COMPONENTS * self.formatter.length() - len(to_write))) # structure_id and colocation_id to_write = [structure.id] + to_write + [match.match_id] # header_right to_write.extend(self.formatter.content_right(len(match))) rows.append(to_write) if self.formatter.group(): break if rows != []: rows = self.sorted_rows(rows) file_handler.write("\n".join([", ".join(row) for row in rows]) + "\n") file_handler.flush() def write_out(self, structures, colocation_ids): if self.output_file is None: return def fp_close(fp_): if fp_ != sys.stdout: fp_.close() def fp_open(snum=None): if snum is None: return open(self.output_file, "w") else: return open("{}.{}".format(self.output_file, snum), "w") if not self.multiple_output: fp = fp_open() self.write_header(fp) for s in structures: if self.multiple_output: fp = fp_open(s.id) self.write_header(fp) self.formatter.set_structure(s) self.write_out_worker(fp, s, colocation_ids) if self.multiple_output: fp_close(fp) if not self.multiple_output: fp_close(fp)