|
|
|
@ -1,4 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
import csv
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
@ -166,23 +169,54 @@ def write_new_stats(wf, original_text, stats, file_name, word_order):
|
|
|
|
|
wf.write(','.join(line) + '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(args):
|
|
|
|
|
word_order = load_word_order(args.word_order_file)
|
|
|
|
|
for file_name in os.listdir(args.input):
|
|
|
|
|
read_file_path = os.path.join(args.input, file_name)
|
|
|
|
|
write_file_path = os.path.join(args.output, file_name)
|
|
|
|
|
with open(read_file_path, 'r') as rf, open(write_file_path, 'w') as wf:
|
|
|
|
|
original_text, stats = get_new_stats(rf)
|
|
|
|
|
freq_pos = original_text[0].index('Frequency')
|
|
|
|
|
if args.frequency_limit > 1:
|
|
|
|
|
original_text = [original_text[0]] + [l for l in original_text[1:] if int(l[freq_pos]) >= 10]
|
|
|
|
|
if args.sorted:
|
|
|
|
|
if len(original_text) > 1:
|
|
|
|
|
original_text = [original_text[0]] + sorted(original_text[1:], key=lambda x: -1 * int(x[freq_pos]))
|
|
|
|
|
else:
|
|
|
|
|
original_text = [original_text[0]]
|
|
|
|
|
write_new_stats(wf, original_text, stats, file_name, word_order)
|
|
|
|
|
if not args.ignore_recalculation:
|
|
|
|
|
word_order = load_word_order(args.word_order_file)
|
|
|
|
|
for file_name in os.listdir(args.input):
|
|
|
|
|
read_file_path = os.path.join(args.input, file_name)
|
|
|
|
|
write_file_path = os.path.join(args.output, file_name)
|
|
|
|
|
with open(read_file_path, 'r') as rf, open(write_file_path, 'w') as wf:
|
|
|
|
|
original_text, stats = get_new_stats(rf)
|
|
|
|
|
freq_pos = original_text[0].index('Frequency')
|
|
|
|
|
if args.frequency_limit > 1:
|
|
|
|
|
original_text = [original_text[0]] + [l for l in original_text[1:] if int(l[freq_pos]) >= 10]
|
|
|
|
|
if args.sorted:
|
|
|
|
|
if len(original_text) > 1:
|
|
|
|
|
original_text = [original_text[0]] + sorted(original_text[1:], key=lambda x: -1 * int(x[freq_pos]))
|
|
|
|
|
else:
|
|
|
|
|
original_text = [original_text[0]]
|
|
|
|
|
write_new_stats(wf, original_text, stats, file_name, word_order)
|
|
|
|
|
|
|
|
|
|
if args.format_output:
|
|
|
|
|
for file_name in os.listdir(args.output):
|
|
|
|
|
read_file_path = os.path.join(args.output, file_name)
|
|
|
|
|
write_file_path = os.path.join(args.formatted_output, file_name)
|
|
|
|
|
with open(read_file_path, 'r', encoding="utf-8") as rf, open(write_file_path, 'w') as wf:
|
|
|
|
|
first_line = True
|
|
|
|
|
lines = []
|
|
|
|
|
formatted_output = []
|
|
|
|
|
for line in rf:
|
|
|
|
|
line = line[:-1].split(',')
|
|
|
|
|
if first_line:
|
|
|
|
|
# sorting
|
|
|
|
|
a = line[-17]
|
|
|
|
|
b = line[-15]
|
|
|
|
|
# post frequency
|
|
|
|
|
c = line[-6]
|
|
|
|
|
d = line[-8]
|
|
|
|
|
formatted_output.append(line[:-14] + [line[-6], line[-8]])
|
|
|
|
|
|
|
|
|
|
first_line = False
|
|
|
|
|
continue
|
|
|
|
|
lines.append(line[:-14] + [line[-6], line[-8]])
|
|
|
|
|
|
|
|
|
|
lines = [line for line in lines if int(line[-3]) >= 10]
|
|
|
|
|
lines = sorted(lines, key=lambda x: (-int(x[-3]), x[-5]))
|
|
|
|
|
formatted_output += lines
|
|
|
|
|
for line in formatted_output:
|
|
|
|
|
wf.write(','.join(line) + '\n')
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
@ -194,6 +228,9 @@ if __name__ == '__main__':
|
|
|
|
|
parser.add_argument('--word_order_file', type=str, help='File that contains word order for DeltaP calculations.')
|
|
|
|
|
parser.add_argument('--frequency_limit', type=int, default=1, help='File that contains word order for DeltaP calculations.')
|
|
|
|
|
parser.add_argument('--sorted', action='store_true', help='File that contains word order for DeltaP calculations.')
|
|
|
|
|
parser.add_argument('--format_output', action='store_true', help='Format and cut data as specified in #1808 on redmine.')
|
|
|
|
|
parser.add_argument('--ignore_recalculation', action='store_true', help='Ignore recalculation.')
|
|
|
|
|
parser.add_argument('--formatted_output', default=None, help='Destination of final results.')
|
|
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
logging.basicConfig(stream=sys.stderr)
|
|
|
|
|