Added collocation_sentence_map_dest
This commit is contained in:
parent
9a9d344510
commit
eb86a6bb1c
11
src/collocation_sentence_mapper.py
Normal file
11
src/collocation_sentence_mapper.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
|
||||
class CollocationSentenceMapper:
|
||||
def __init__(self, output_dir):
|
||||
self.output = open(output_dir, "w")
|
||||
self.output.write(f'Collocation_id\tSentence_id\n')
|
||||
|
||||
def close(self):
|
||||
self.output.close()
|
||||
|
||||
def add_map(self, collocation_id, sentence_id):
|
||||
self.output.write(f'{collocation_id}\t{sentence_id}\n')
|
|
@ -151,6 +151,8 @@ if __name__ == '__main__':
|
|||
|
||||
parser.add_argument('--db',
|
||||
help="Database file to use (instead of memory)", default=None)
|
||||
parser.add_argument('--collocation_sentence_map_dest',
|
||||
help="Destination to folder where collocation-sentence mapper (mappers in case of multiple-output).", default=None)
|
||||
parser.add_argument('--new-db',
|
||||
help="Writes over database file, if there exists one", action='store_true')
|
||||
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
import logging
|
||||
import logging
|
||||
import os
|
||||
|
||||
from progress_bar import progress
|
||||
|
||||
from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter
|
||||
|
||||
from collocation_sentence_mapper import CollocationSentenceMapper
|
||||
|
||||
|
||||
class Writer:
|
||||
@staticmethod
|
||||
def other_params(args):
|
||||
|
@ -11,23 +16,25 @@ class Writer:
|
|||
@staticmethod
|
||||
def make_output_writer(args, num_components, colocation_ids, word_renderer):
|
||||
params = Writer.other_params(args)
|
||||
return Writer(args.out, num_components, OutFormatter(colocation_ids, word_renderer), params)
|
||||
return Writer(args.out, num_components, OutFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
|
||||
|
||||
@staticmethod
|
||||
def make_output_no_stat_writer(args, num_components, colocation_ids, word_renderer):
|
||||
params = Writer.other_params(args)
|
||||
return Writer(args.out_no_stat, num_components, OutNoStatFormatter(colocation_ids, word_renderer), params)
|
||||
return Writer(args.out_no_stat, num_components, OutNoStatFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
|
||||
|
||||
@staticmethod
|
||||
def make_all_writer(args, num_components, colocation_ids, word_renderer):
|
||||
return Writer(args.all, num_components, AllFormatter(colocation_ids, word_renderer), None)
|
||||
return Writer(args.all, num_components, AllFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, None)
|
||||
|
||||
@staticmethod
|
||||
def make_stats_writer(args, num_components, colocation_ids, word_renderer):
|
||||
params = Writer.other_params(args)
|
||||
return Writer(args.stats, num_components, StatsFormatter(colocation_ids, word_renderer), params)
|
||||
return Writer(args.stats, num_components, StatsFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
|
||||
|
||||
def __init__(self, file_out, num_components, formatter, params):
|
||||
def __init__(self, file_out, num_components, formatter, collocation_sentence_map_dest, params):
|
||||
# TODO FIX THIS
|
||||
self.collocation_sentence_map_dest = collocation_sentence_map_dest
|
||||
if params is None:
|
||||
self.multiple_output = False
|
||||
self.sort_by = -1
|
||||
|
@ -73,7 +80,7 @@ class Writer:
|
|||
def write_header(self, file_handler):
|
||||
file_handler.write(",".join(self.header()) + "\n")
|
||||
|
||||
def write_out_worker(self, file_handler, structure, colocation_ids):
|
||||
def write_out_worker(self, file_handler, structure, colocation_ids, col_sent_map):
|
||||
rows = []
|
||||
components = structure.components
|
||||
for match in progress(colocation_ids.get_matches_for(structure), "Writing matches: {}".format(structure.id)):
|
||||
|
@ -84,6 +91,11 @@ class Writer:
|
|||
|
||||
variable_word_order = self.find_variable_word_order(match.matches)
|
||||
|
||||
if col_sent_map is not None:
|
||||
# TODO find better way to get sentence_id
|
||||
for words in match.matches:
|
||||
col_sent_map.add_map(match.match_id, '.'.join(words['1'].id.split('.')[:-1]))
|
||||
|
||||
for words in match.matches:
|
||||
to_write = []
|
||||
|
||||
|
@ -128,14 +140,18 @@ class Writer:
|
|||
if not self.multiple_output:
|
||||
fp = fp_open()
|
||||
self.write_header(fp)
|
||||
col_sent_map = CollocationSentenceMapper(os.path.join(self.collocation_sentence_map_dest, 'mapper.txt')) \
|
||||
if self.collocation_sentence_map_dest is not None else None
|
||||
|
||||
for s in progress(structures, "writing:{}".format(self.formatter)):
|
||||
if self.multiple_output:
|
||||
fp = fp_open(s.id)
|
||||
self.write_header(fp)
|
||||
col_sent_map = CollocationSentenceMapper(os.path.join(self.collocation_sentence_map_dest, f'{s.id}_mapper.txt')) \
|
||||
if self.collocation_sentence_map_dest is not None else None
|
||||
|
||||
self.formatter.set_structure(s)
|
||||
self.write_out_worker(fp, s, colocation_ids)
|
||||
self.write_out_worker(fp, s, colocation_ids, col_sent_map)
|
||||
|
||||
if self.multiple_output:
|
||||
fp_close(fp)
|
||||
|
|
Loading…
Reference in New Issue
Block a user