Added collocation_sentence_map_dest
This commit is contained in:
parent
9a9d344510
commit
eb86a6bb1c
11
src/collocation_sentence_mapper.py
Normal file
11
src/collocation_sentence_mapper.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
|
||||||
|
class CollocationSentenceMapper:
|
||||||
|
def __init__(self, output_dir):
|
||||||
|
self.output = open(output_dir, "w")
|
||||||
|
self.output.write(f'Collocation_id\tSentence_id\n')
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.output.close()
|
||||||
|
|
||||||
|
def add_map(self, collocation_id, sentence_id):
|
||||||
|
self.output.write(f'{collocation_id}\t{sentence_id}\n')
|
|
@ -151,6 +151,8 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
parser.add_argument('--db',
|
parser.add_argument('--db',
|
||||||
help="Database file to use (instead of memory)", default=None)
|
help="Database file to use (instead of memory)", default=None)
|
||||||
|
parser.add_argument('--collocation_sentence_map_dest',
|
||||||
|
help="Destination to folder where collocation-sentence mapper (mappers in case of multiple-output).", default=None)
|
||||||
parser.add_argument('--new-db',
|
parser.add_argument('--new-db',
|
||||||
help="Writes over database file, if there exists one", action='store_true')
|
help="Writes over database file, if there exists one", action='store_true')
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
from progress_bar import progress
|
from progress_bar import progress
|
||||||
|
|
||||||
from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter
|
from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsFormatter
|
||||||
|
|
||||||
|
from collocation_sentence_mapper import CollocationSentenceMapper
|
||||||
|
|
||||||
|
|
||||||
class Writer:
|
class Writer:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def other_params(args):
|
def other_params(args):
|
||||||
|
@ -11,23 +16,25 @@ class Writer:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def make_output_writer(args, num_components, colocation_ids, word_renderer):
|
def make_output_writer(args, num_components, colocation_ids, word_renderer):
|
||||||
params = Writer.other_params(args)
|
params = Writer.other_params(args)
|
||||||
return Writer(args.out, num_components, OutFormatter(colocation_ids, word_renderer), params)
|
return Writer(args.out, num_components, OutFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def make_output_no_stat_writer(args, num_components, colocation_ids, word_renderer):
|
def make_output_no_stat_writer(args, num_components, colocation_ids, word_renderer):
|
||||||
params = Writer.other_params(args)
|
params = Writer.other_params(args)
|
||||||
return Writer(args.out_no_stat, num_components, OutNoStatFormatter(colocation_ids, word_renderer), params)
|
return Writer(args.out_no_stat, num_components, OutNoStatFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def make_all_writer(args, num_components, colocation_ids, word_renderer):
|
def make_all_writer(args, num_components, colocation_ids, word_renderer):
|
||||||
return Writer(args.all, num_components, AllFormatter(colocation_ids, word_renderer), None)
|
return Writer(args.all, num_components, AllFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, None)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def make_stats_writer(args, num_components, colocation_ids, word_renderer):
|
def make_stats_writer(args, num_components, colocation_ids, word_renderer):
|
||||||
params = Writer.other_params(args)
|
params = Writer.other_params(args)
|
||||||
return Writer(args.stats, num_components, StatsFormatter(colocation_ids, word_renderer), params)
|
return Writer(args.stats, num_components, StatsFormatter(colocation_ids, word_renderer), args.collocation_sentence_map_dest, params)
|
||||||
|
|
||||||
def __init__(self, file_out, num_components, formatter, params):
|
def __init__(self, file_out, num_components, formatter, collocation_sentence_map_dest, params):
|
||||||
|
# TODO FIX THIS
|
||||||
|
self.collocation_sentence_map_dest = collocation_sentence_map_dest
|
||||||
if params is None:
|
if params is None:
|
||||||
self.multiple_output = False
|
self.multiple_output = False
|
||||||
self.sort_by = -1
|
self.sort_by = -1
|
||||||
|
@ -73,7 +80,7 @@ class Writer:
|
||||||
def write_header(self, file_handler):
|
def write_header(self, file_handler):
|
||||||
file_handler.write(",".join(self.header()) + "\n")
|
file_handler.write(",".join(self.header()) + "\n")
|
||||||
|
|
||||||
def write_out_worker(self, file_handler, structure, colocation_ids):
|
def write_out_worker(self, file_handler, structure, colocation_ids, col_sent_map):
|
||||||
rows = []
|
rows = []
|
||||||
components = structure.components
|
components = structure.components
|
||||||
for match in progress(colocation_ids.get_matches_for(structure), "Writing matches: {}".format(structure.id)):
|
for match in progress(colocation_ids.get_matches_for(structure), "Writing matches: {}".format(structure.id)):
|
||||||
|
@ -84,6 +91,11 @@ class Writer:
|
||||||
|
|
||||||
variable_word_order = self.find_variable_word_order(match.matches)
|
variable_word_order = self.find_variable_word_order(match.matches)
|
||||||
|
|
||||||
|
if col_sent_map is not None:
|
||||||
|
# TODO find better way to get sentence_id
|
||||||
|
for words in match.matches:
|
||||||
|
col_sent_map.add_map(match.match_id, '.'.join(words['1'].id.split('.')[:-1]))
|
||||||
|
|
||||||
for words in match.matches:
|
for words in match.matches:
|
||||||
to_write = []
|
to_write = []
|
||||||
|
|
||||||
|
@ -128,14 +140,18 @@ class Writer:
|
||||||
if not self.multiple_output:
|
if not self.multiple_output:
|
||||||
fp = fp_open()
|
fp = fp_open()
|
||||||
self.write_header(fp)
|
self.write_header(fp)
|
||||||
|
col_sent_map = CollocationSentenceMapper(os.path.join(self.collocation_sentence_map_dest, 'mapper.txt')) \
|
||||||
|
if self.collocation_sentence_map_dest is not None else None
|
||||||
|
|
||||||
for s in progress(structures, "writing:{}".format(self.formatter)):
|
for s in progress(structures, "writing:{}".format(self.formatter)):
|
||||||
if self.multiple_output:
|
if self.multiple_output:
|
||||||
fp = fp_open(s.id)
|
fp = fp_open(s.id)
|
||||||
self.write_header(fp)
|
self.write_header(fp)
|
||||||
|
col_sent_map = CollocationSentenceMapper(os.path.join(self.collocation_sentence_map_dest, f'{s.id}_mapper.txt')) \
|
||||||
|
if self.collocation_sentence_map_dest is not None else None
|
||||||
|
|
||||||
self.formatter.set_structure(s)
|
self.formatter.set_structure(s)
|
||||||
self.write_out_worker(fp, s, colocation_ids)
|
self.write_out_worker(fp, s, colocation_ids, col_sent_map)
|
||||||
|
|
||||||
if self.multiple_output:
|
if self.multiple_output:
|
||||||
fp_close(fp)
|
fp_close(fp)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user