Modified readme.md + Removed obligatory sloleks_db + Added frequency_limit and sorted parameters in recalculate_statistics.py
This commit is contained in:
@@ -175,11 +175,13 @@ def main(args):
|
||||
with open(read_file_path, 'r') as rf, open(write_file_path, 'w') as wf:
|
||||
original_text, stats = get_new_stats(rf)
|
||||
freq_pos = original_text[0].index('Frequency')
|
||||
original_text = [original_text[0]] + [l for l in original_text[1:] if int(l[freq_pos]) >= 10]
|
||||
if len(original_text) > 1:
|
||||
original_text = [original_text[0]] + sorted(original_text[1:], key=lambda x: -1 * int(x[freq_pos]))
|
||||
else:
|
||||
original_text = [original_text[0]]
|
||||
if args.frequency_limit > 1:
|
||||
original_text = [original_text[0]] + [l for l in original_text[1:] if int(l[freq_pos]) >= 10]
|
||||
if args.sorted:
|
||||
if len(original_text) > 1:
|
||||
original_text = [original_text[0]] + sorted(original_text[1:], key=lambda x: -1 * int(x[freq_pos]))
|
||||
else:
|
||||
original_text = [original_text[0]]
|
||||
write_new_stats(wf, original_text, stats, file_name, word_order)
|
||||
|
||||
if __name__ == '__main__':
|
||||
@@ -190,6 +192,8 @@ if __name__ == '__main__':
|
||||
parser.add_argument('output',
|
||||
help='Path to folder that contains all input files.')
|
||||
parser.add_argument('--word_order_file', type=str, help='File that contains word order for DeltaP calculations.')
|
||||
parser.add_argument('--frequency_limit', type=int, default=1, help='File that contains word order for DeltaP calculations.')
|
||||
parser.add_argument('--sorted', action='store_true', help='File that contains word order for DeltaP calculations.')
|
||||
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(stream=sys.stderr)
|
||||
|
||||
Reference in New Issue
Block a user