Extended recalculate statistics to filtered output
This commit is contained in:
parent
f1366548b6
commit
39692e839f
|
@ -1,4 +1,7 @@
|
|||
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
@ -166,8 +169,8 @@ def write_new_stats(wf, original_text, stats, file_name, word_order):
|
|||
wf.write(','.join(line) + '\n')
|
||||
|
||||
|
||||
|
||||
def main(args):
|
||||
if not args.ignore_recalculation:
|
||||
word_order = load_word_order(args.word_order_file)
|
||||
for file_name in os.listdir(args.input):
|
||||
read_file_path = os.path.join(args.input, file_name)
|
||||
|
@ -184,6 +187,37 @@ def main(args):
|
|||
original_text = [original_text[0]]
|
||||
write_new_stats(wf, original_text, stats, file_name, word_order)
|
||||
|
||||
if args.format_output:
|
||||
for file_name in os.listdir(args.output):
|
||||
read_file_path = os.path.join(args.output, file_name)
|
||||
write_file_path = os.path.join(args.formatted_output, file_name)
|
||||
with open(read_file_path, 'r', encoding="utf-8") as rf, open(write_file_path, 'w') as wf:
|
||||
first_line = True
|
||||
lines = []
|
||||
formatted_output = []
|
||||
for line in rf:
|
||||
line = line[:-1].split(',')
|
||||
if first_line:
|
||||
# sorting
|
||||
a = line[-17]
|
||||
b = line[-15]
|
||||
# post frequency
|
||||
c = line[-6]
|
||||
d = line[-8]
|
||||
formatted_output.append(line[:-14] + [line[-6], line[-8]])
|
||||
|
||||
first_line = False
|
||||
continue
|
||||
lines.append(line[:-14] + [line[-6], line[-8]])
|
||||
|
||||
lines = [line for line in lines if int(line[-3]) >= 10]
|
||||
lines = sorted(lines, key=lambda x: (-int(x[-3]), x[-5]))
|
||||
formatted_output += lines
|
||||
for line in formatted_output:
|
||||
wf.write(','.join(line) + '\n')
|
||||
break
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Extract structures from a parsed corpus.')
|
||||
|
@ -194,6 +228,9 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--word_order_file', type=str, help='File that contains word order for DeltaP calculations.')
|
||||
parser.add_argument('--frequency_limit', type=int, default=1, help='File that contains word order for DeltaP calculations.')
|
||||
parser.add_argument('--sorted', action='store_true', help='File that contains word order for DeltaP calculations.')
|
||||
parser.add_argument('--format_output', action='store_true', help='Format and cut data as specified in #1808 on redmine.')
|
||||
parser.add_argument('--ignore_recalculation', action='store_true', help='Ignore recalculation.')
|
||||
parser.add_argument('--formatted_output', default=None, help='Destination of final results.')
|
||||
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(stream=sys.stderr)
|
||||
|
|
Loading…
Reference in New Issue
Block a user