min frequecy now part of writer

This commit is contained in:
Ozbolt Menegatti 2019-08-21 11:11:06 +02:00
parent 3f1c154705
commit 8cca761b91
2 changed files with 6 additions and 2 deletions

View File

@ -8,7 +8,6 @@ from progress_bar import progress
class MatchStore: class MatchStore:
def __init__(self, args, db): def __init__(self, args, db):
self.db = db self.db = db
self.min_frequency = args.min_freq
self.dispersions = {} self.dispersions = {}
self.match_num = 0 self.match_num = 0

View File

@ -6,7 +6,7 @@ from formatter import OutFormatter, OutNoStatFormatter, AllFormatter, StatsForma
class Writer: class Writer:
@staticmethod @staticmethod
def other_params(args): def other_params(args):
return (args.multiple_output, int(args.sort_by), args.sort_reversed) return (args.multiple_output, int(args.sort_by), args.sort_reversed, args.min_freq)
@staticmethod @staticmethod
def make_output_writer(args, num_components, colocation_ids, word_renderer): def make_output_writer(args, num_components, colocation_ids, word_renderer):
@ -32,10 +32,12 @@ class Writer:
self.multiple_output = False self.multiple_output = False
self.sort_by = -1 self.sort_by = -1
self.sort_order = None self.sort_order = None
self.min_frequency = 1
else: else:
self.multiple_output = params[0] self.multiple_output = params[0]
self.sort_by = params[1] self.sort_by = params[1]
self.sort_order = params[2] self.sort_order = params[2]
self.min_frequency = params[3]
self.num_components = num_components self.num_components = num_components
self.output_file = file_out self.output_file = file_out
@ -76,6 +78,9 @@ class Writer:
components = structure.components components = structure.components
for match in colocation_ids.get_matches_for(structure): for match in colocation_ids.get_matches_for(structure):
if len(match) < self.min_frequency:
continue
self.formatter.new_match(match) self.formatter.new_match(match)
for words in match.matches: for words in match.matches: