|
|
|
@ -1,8 +1,13 @@
|
|
|
|
|
import logging
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
from progress_bar import progress
|
|
|
|
|
|
|
|
|
|
from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter
|
|
|
|
|
|
|
|
|
|
from collocation_sentence_mapper import CollocationSentenceMapper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Writer:
|
|
|
|
|
@staticmethod
|
|
|
|
|
def other_params(args):
|
|
|
|
@ -11,23 +16,25 @@ class Writer:
|
|
|
|
|
@staticmethod
|
|
|
|
|
def make_output_writer(args, num_components, colocation_ids, word_renderer):
|
|
|
|
|
params = Writer.other_params(args)
|
|
|
|
|
return Writer(args.out, num_components, OutFormatter(colocation_ids, word_renderer), params)
|
|
|
|
|
return Writer(args.out, num_components, OutFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def make_output_no_stat_writer(args, num_components, colocation_ids, word_renderer):
|
|
|
|
|
params = Writer.other_params(args)
|
|
|
|
|
return Writer(args.out_no_stat, num_components, OutNoStatFormatter(colocation_ids, word_renderer), params)
|
|
|
|
|
return Writer(args.out_no_stat, num_components, OutNoStatFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def make_all_writer(args, num_components, colocation_ids, word_renderer):
|
|
|
|
|
return Writer(args.all, num_components, AllFormatter(colocation_ids, word_renderer), None)
|
|
|
|
|
return Writer(args.all, num_components, AllFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, None)
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def make_stats_writer(args, num_components, colocation_ids, word_renderer):
|
|
|
|
|
params = Writer.other_params(args)
|
|
|
|
|
return Writer(args.stats, num_components, StatsFormatter(colocation_ids, word_renderer), params)
|
|
|
|
|
return Writer(args.stats, num_components, StatsFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
|
|
|
|
|
|
|
|
|
|
def __init__(self, file_out, num_components, formatter, params):
|
|
|
|
|
def __init__(self, file_out, num_components, formatter, collocation_sentence_map_dest, params):
|
|
|
|
|
# TODO FIX THIS
|
|
|
|
|
self.collocation_sentence_map_dest = collocation_sentence_map_dest
|
|
|
|
|
if params is None:
|
|
|
|
|
self.multiple_output = False
|
|
|
|
|
self.sort_by = -1
|
|
|
|
@ -73,7 +80,7 @@ class Writer:
|
|
|
|
|
def write_header(self, file_handler):
|
|
|
|
|
file_handler.write(",".join(self.header()) + "\n")
|
|
|
|
|
|
|
|
|
|
def write_out_worker(self, file_handler, structure, colocation_ids):
|
|
|
|
|
def write_out_worker(self, file_handler, structure, colocation_ids, col_sent_map):
|
|
|
|
|
rows = []
|
|
|
|
|
components = structure.components
|
|
|
|
|
for match in progress(colocation_ids.get_matches_for(structure), "Writing matches: {}".format(structure.id)):
|
|
|
|
@ -84,6 +91,11 @@ class Writer:
|
|
|
|
|
|
|
|
|
|
variable_word_order = self.find_variable_word_order(match.matches)
|
|
|
|
|
|
|
|
|
|
if col_sent_map is not None:
|
|
|
|
|
# TODO find better way to get sentence_id
|
|
|
|
|
for words in match.matches:
|
|
|
|
|
col_sent_map.add_map(match.match_id, '.'.join(words['1'].id.split('.')[:-1]))
|
|
|
|
|
|
|
|
|
|
for words in match.matches:
|
|
|
|
|
to_write = []
|
|
|
|
|
|
|
|
|
@ -128,14 +140,18 @@ class Writer:
|
|
|
|
|
if not self.multiple_output:
|
|
|
|
|
fp = fp_open()
|
|
|
|
|
self.write_header(fp)
|
|
|
|
|
col_sent_map = CollocationSentenceMapper(os.path.join(self.collocation_sentence_map_dest, 'mapper.txt')) \
|
|
|
|
|
if self.collocation_sentence_map_dest is not None else None
|
|
|
|
|
|
|
|
|
|
for s in progress(structures, "writing:{}".format(self.formatter)):
|
|
|
|
|
if self.multiple_output:
|
|
|
|
|
fp = fp_open(s.id)
|
|
|
|
|
self.write_header(fp)
|
|
|
|
|
col_sent_map = CollocationSentenceMapper(os.path.join(self.collocation_sentence_map_dest, f'{s.id}_mapper.txt')) \
|
|
|
|
|
if self.collocation_sentence_map_dest is not None else None
|
|
|
|
|
|
|
|
|
|
self.formatter.set_structure(s)
|
|
|
|
|
self.write_out_worker(fp, s, colocation_ids)
|
|
|
|
|
self.write_out_worker(fp, s, colocation_ids, col_sent_map)
|
|
|
|
|
|
|
|
|
|
if self.multiple_output:
|
|
|
|
|
fp_close(fp)
|
|
|
|
|