Added collocation_sentence_map_dest

This commit is contained in:
Luka 2020-07-20 10:51:09 +02:00
parent 9a9d344510
commit eb86a6bb1c
3 changed files with 37 additions and 8 deletions

View File

@ -0,0 +1,11 @@
class CollocationSentenceMapper:
def __init__(self, output_dir):
self.output = open(output_dir, "w")
self.output.write(f'Collocation_id\tSentence_id\n')
def close(self):
self.output.close()
def add_map(self, collocation_id, sentence_id):
self.output.write(f'{collocation_id}\t{sentence_id}\n')

View File

@ -151,6 +151,8 @@ if __name__ == '__main__':
parser.add_argument('--db',
help="Database file to use (instead of memory)", default=None)
parser.add_argument('--collocation_sentence_map_dest',
help="Destination to folder where collocation-sentence mapper (mappers in case of multiple-output).", default=None)
parser.add_argument('--new-db',
help="Writes over database file, if there exists one", action='store_true')

View File

@ -1,8 +1,13 @@
import logging
import logging
import os
from progress_bar import progress
from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter
from collocation_sentence_mapper import CollocationSentenceMapper
class Writer:
@staticmethod
def other_params(args):
@ -11,23 +16,25 @@ class Writer:
@staticmethod
def make_output_writer(args, num_components, colocation_ids, word_renderer):
params = Writer.other_params(args)
return Writer(args.out, num_components, OutFormatter(colocation_ids, word_renderer), params)
return Writer(args.out, num_components, OutFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
@staticmethod
def make_output_no_stat_writer(args, num_components, colocation_ids, word_renderer):
params = Writer.other_params(args)
return Writer(args.out_no_stat, num_components, OutNoStatFormatter(colocation_ids, word_renderer), params)
return Writer(args.out_no_stat, num_components, OutNoStatFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
@staticmethod
def make_all_writer(args, num_components, colocation_ids, word_renderer):
return Writer(args.all, num_components, AllFormatter(colocation_ids, word_renderer), None)
return Writer(args.all, num_components, AllFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, None)
@staticmethod
def make_stats_writer(args, num_components, colocation_ids, word_renderer):
params = Writer.other_params(args)
return Writer(args.stats, num_components, StatsFormatter(colocation_ids, word_renderer), params)
return Writer(args.stats, num_components, StatsFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
def __init__(self, file_out, num_components, formatter, params):
def __init__(self, file_out, num_components, formatter, collocation_sentence_map_dest, params):
# TODO FIX THIS
self.collocation_sentence_map_dest = collocation_sentence_map_dest
if params is None:
self.multiple_output = False
self.sort_by = -1
@ -73,7 +80,7 @@ class Writer:
def write_header(self, file_handler):
file_handler.write(",".join(self.header()) + "\n")
def write_out_worker(self, file_handler, structure, colocation_ids):
def write_out_worker(self, file_handler, structure, colocation_ids, col_sent_map):
rows = []
components = structure.components
for match in progress(colocation_ids.get_matches_for(structure), "Writing matches: {}".format(structure.id)):
@ -84,6 +91,11 @@ class Writer:
variable_word_order = self.find_variable_word_order(match.matches)
if col_sent_map is not None:
# TODO find better way to get sentence_id
for words in match.matches:
col_sent_map.add_map(match.match_id, '.'.join(words['1'].id.split('.')[:-1]))
for words in match.matches:
to_write = []
@ -128,14 +140,18 @@ class Writer:
if not self.multiple_output:
fp = fp_open()
self.write_header(fp)
col_sent_map = CollocationSentenceMapper(os.path.join(self.collocation_sentence_map_dest, 'mapper.txt')) \
if self.collocation_sentence_map_dest is not None else None
for s in progress(structures, "writing:{}".format(self.formatter)):
if self.multiple_output:
fp = fp_open(s.id)
self.write_header(fp)
col_sent_map = CollocationSentenceMapper(os.path.join(self.collocation_sentence_map_dest, f'{s.id}_mapper.txt')) \
if self.collocation_sentence_map_dest is not None else None
self.formatter.set_structure(s)
self.write_out_worker(fp, s, colocation_ids)
self.write_out_worker(fp, s, colocation_ids, col_sent_map)
if self.multiple_output:
fp_close(fp)