Extended recalculate statistics to filtered output
This commit is contained in:
		
							parent
							
								
									f1366548b6
								
							
						
					
					
						commit
						39692e839f
					
				| @ -1,4 +1,7 @@ | |||||||
|  | 
 | ||||||
|  | 
 | ||||||
| import argparse | import argparse | ||||||
|  | import csv | ||||||
| import logging | import logging | ||||||
| import os | import os | ||||||
| import sys | import sys | ||||||
| @ -166,23 +169,54 @@ def write_new_stats(wf, original_text, stats, file_name, word_order): | |||||||
|         wf.write(','.join(line) + '\n') |         wf.write(','.join(line) + '\n') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| def main(args): | def main(args): | ||||||
|     word_order = load_word_order(args.word_order_file) |     if not args.ignore_recalculation: | ||||||
|     for file_name in os.listdir(args.input): |         word_order = load_word_order(args.word_order_file) | ||||||
|         read_file_path = os.path.join(args.input, file_name) |         for file_name in os.listdir(args.input): | ||||||
|         write_file_path = os.path.join(args.output, file_name) |             read_file_path = os.path.join(args.input, file_name) | ||||||
|         with open(read_file_path, 'r') as rf, open(write_file_path, 'w') as wf: |             write_file_path = os.path.join(args.output, file_name) | ||||||
|             original_text, stats = get_new_stats(rf) |             with open(read_file_path, 'r') as rf, open(write_file_path, 'w') as wf: | ||||||
|             freq_pos = original_text[0].index('Frequency') |                 original_text, stats = get_new_stats(rf) | ||||||
|             if args.frequency_limit > 1: |                 freq_pos = original_text[0].index('Frequency') | ||||||
|                 original_text = [original_text[0]] + [l for l in original_text[1:] if int(l[freq_pos]) >= 10] |                 if args.frequency_limit > 1: | ||||||
|             if args.sorted: |                     original_text = [original_text[0]] + [l for l in original_text[1:] if int(l[freq_pos]) >= 10] | ||||||
|                 if len(original_text) > 1: |                 if args.sorted: | ||||||
|                     original_text = [original_text[0]] + sorted(original_text[1:], key=lambda x: -1 * int(x[freq_pos])) |                     if len(original_text) > 1: | ||||||
|                 else: |                         original_text = [original_text[0]] + sorted(original_text[1:], key=lambda x: -1 * int(x[freq_pos])) | ||||||
|                     original_text = [original_text[0]] |                     else: | ||||||
|             write_new_stats(wf, original_text, stats, file_name, word_order) |                         original_text = [original_text[0]] | ||||||
|  |                 write_new_stats(wf, original_text, stats, file_name, word_order) | ||||||
|  | 
 | ||||||
|  |     if args.format_output: | ||||||
|  |         for file_name in os.listdir(args.output): | ||||||
|  |             read_file_path = os.path.join(args.output, file_name) | ||||||
|  |             write_file_path = os.path.join(args.formatted_output, file_name) | ||||||
|  |             with open(read_file_path, 'r', encoding="utf-8") as rf, open(write_file_path, 'w') as wf: | ||||||
|  |                 first_line = True | ||||||
|  |                 lines = [] | ||||||
|  |                 formatted_output = [] | ||||||
|  |                 for line in rf: | ||||||
|  |                     line = line[:-1].split(',') | ||||||
|  |                     if first_line: | ||||||
|  |                         # sorting | ||||||
|  |                         a = line[-17] | ||||||
|  |                         b = line[-15] | ||||||
|  |                         # post frequency | ||||||
|  |                         c = line[-6] | ||||||
|  |                         d = line[-8] | ||||||
|  |                         formatted_output.append(line[:-14] + [line[-6], line[-8]]) | ||||||
|  | 
 | ||||||
|  |                         first_line = False | ||||||
|  |                         continue | ||||||
|  |                     lines.append(line[:-14] + [line[-6], line[-8]]) | ||||||
|  | 
 | ||||||
|  |                 lines = [line for line in lines if int(line[-3]) >= 10] | ||||||
|  |                 lines = sorted(lines, key=lambda x: (-int(x[-3]), x[-5])) | ||||||
|  |                 formatted_output += lines | ||||||
|  |                 for line in formatted_output: | ||||||
|  |                     wf.write(','.join(line) + '\n') | ||||||
|  |             break | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     parser = argparse.ArgumentParser( |     parser = argparse.ArgumentParser( | ||||||
| @ -194,6 +228,9 @@ if __name__ == '__main__': | |||||||
|     parser.add_argument('--word_order_file', type=str, help='File that contains word order for DeltaP calculations.') |     parser.add_argument('--word_order_file', type=str, help='File that contains word order for DeltaP calculations.') | ||||||
|     parser.add_argument('--frequency_limit', type=int, default=1, help='File that contains word order for DeltaP calculations.') |     parser.add_argument('--frequency_limit', type=int, default=1, help='File that contains word order for DeltaP calculations.') | ||||||
|     parser.add_argument('--sorted', action='store_true', help='File that contains word order for DeltaP calculations.') |     parser.add_argument('--sorted', action='store_true', help='File that contains word order for DeltaP calculations.') | ||||||
|  |     parser.add_argument('--format_output', action='store_true', help='Format and cut data as specified in #1808 on redmine.') | ||||||
|  |     parser.add_argument('--ignore_recalculation', action='store_true', help='Ignore recalculation.') | ||||||
|  |     parser.add_argument('--formatted_output', default=None, help='Destination of final results.') | ||||||
| 
 | 
 | ||||||
|     args = parser.parse_args() |     args = parser.parse_args() | ||||||
|     logging.basicConfig(stream=sys.stderr) |     logging.basicConfig(stream=sys.stderr) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user