Modified readme.md + Removed obligatory sloleks_db + Added frequency_limit and sorted parameters in recalculate_statistics.py

This commit is contained in:
2020-09-02 10:53:45 +02:00
parent 41952738ed
commit 1b0e6a27eb
6 changed files with 4841 additions and 15 deletions

View File

@@ -175,11 +175,13 @@ def main(args):
with open(read_file_path, 'r') as rf, open(write_file_path, 'w') as wf:
original_text, stats = get_new_stats(rf)
freq_pos = original_text[0].index('Frequency')
original_text = [original_text[0]] + [l for l in original_text[1:] if int(l[freq_pos]) >= 10]
if len(original_text) > 1:
original_text = [original_text[0]] + sorted(original_text[1:], key=lambda x: -1 * int(x[freq_pos]))
else:
original_text = [original_text[0]]
if args.frequency_limit > 1:
original_text = [original_text[0]] + [l for l in original_text[1:] if int(l[freq_pos]) >= 10]
if args.sorted:
if len(original_text) > 1:
original_text = [original_text[0]] + sorted(original_text[1:], key=lambda x: -1 * int(x[freq_pos]))
else:
original_text = [original_text[0]]
write_new_stats(wf, original_text, stats, file_name, word_order)
if __name__ == '__main__':
@@ -190,6 +192,8 @@ if __name__ == '__main__':
parser.add_argument('output',
help='Path to folder that contains all input files.')
parser.add_argument('--word_order_file', type=str, help='File that contains word order for DeltaP calculations.')
parser.add_argument('--frequency_limit', type=int, default=1, help='File that contains word order for DeltaP calculations.')
parser.add_argument('--sorted', action='store_true', help='File that contains word order for DeltaP calculations.')
args = parser.parse_args()
logging.basicConfig(stream=sys.stderr)