import configparser import csv import os import random import re import string import time import requests from flask import Flask, render_template, request, send_file, redirect, url_for from werkzeug.utils import secure_filename from stark import run UPLOAD_FOLDER = 'uploads' ALLOWED_EXTENSIONS = {'conllu'} DAYS_BEFORE_DELETION = 1 TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS = { 'Tree': 'Tree', 'Absolute frequency': 'Frequency', 'Number of nodes': 'Number of nodes', 'Head node': 'Head node', 'Grew-match URL': 'Grew-match URL', 'Order': 'Order', 'MI': 'MI', 'logDice': 'logDice', 't-score': 't-score' } DISPLAYED_TABLE_COLUMNS2TABLE_COLUMNS = {v: k for k, v in TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS.items()} def create_app(): app = Flask(__name__) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER def create_default_configs(): configs = {} # mandatory parameters configs['input_path'] = 'data/sl_ssj-ud_v2.4.conllu' configs['output'] = 'results/out_official.tsv' configs['tree_size'] = '2-4' configs['node_type'] = 'upos' # mandatory parameters with default value configs['internal_saves'] = './internal_saves' configs['cpu_cores'] = 12 configs['complete_tree_type'] = True configs['dependency_type'] = True configs['node_order'] = True configs['association_measures'] = False configs['label_whitelist'] = [] configs['root_whitelist'] = [] configs['query'] = None configs['compare'] = None configs['frequency_threshold'] = 0 configs['lines_threshold'] = None configs['continuation_processing'] = False configs['nodes_number'] = True configs['print_root'] = True if configs['compare'] is not None: configs['other_input_path'] = configs['compare'] return configs def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @app.route('/upload') def upload_file2(): return render_template('upload.html') @app.route('/uploader', methods=['GET', 'POST']) def upload_file(): if request.method == 'POST': f = request.files['file'] f.save(secure_filename(f.filename)) return 'file uploaded successfully' @app.route('/about', methods=['GET']) def about(): return render_template('about.html') @app.route('/result/', methods=['GET', 'POST']) def result(result_id): if request.method == 'POST': for filename in os.listdir('media'): file_path = os.path.join('media', filename) f_t = os.path.getmtime(file_path) c_t = time.time() file_age_seconds = c_t - f_t if file_age_seconds > DAYS_BEFORE_DELETION * 86400: os.remove(file_path) # TODO ADD LINKS # TODO TEST VARYING SIZES OF TEXT IN TABLE return send_file(os.path.join('media', result_id), as_attachment=True, download_name='results.tsv') order_by_display = request.args.get('order_by') order_by = DISPLAYED_TABLE_COLUMNS2TABLE_COLUMNS[order_by_display[:-1]] if order_by_display is not None else None order_type = request.args.get('order_type') with open(os.path.join('media', result_id), 'r') as rf: content = list(csv.reader(rf, delimiter='\t')) head = content[0] content_dict = {h: [] for h in head} if order_by is not None and order_by in head: sort_id = head.index(order_by) if order_type == 'asc': # check if a number can be converted to float or int ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id], reverse=True) else: ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id]) else: ordered_content = content[1:] for i, row in enumerate(ordered_content): for j, v in enumerate(row): content_dict[head[j]].append(v) displayed_head = [TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS[col] for col in head if col in TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS] displayed_content_dict = {} for column, v in content_dict.items(): if column in TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS: displayed_content_dict[TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS[column]] = v return render_template('result.html', head_row=displayed_head, content=displayed_content_dict) @app.route('/', methods=['GET', 'POST']) def index(): if request.method == 'POST': form = request.form configs = {} # mandatory parameters configs['input_path'] = '' validation = {} # handling input if 'file' in request.files and request.files['file']: # TODO ADD OPTION FOR MULTIPLE FILES - ZIP! # store file f = request.files['file'] input_path = os.path.join('media', secure_filename(f.filename)) f.save(input_path) configs['input_path'] = input_path if 'input_url' in form and form['input_url']: validation['file'] = 'Please insert either input url or file, not both of them.' validation['input_url'] = 'Please insert either input url or file, not both of them.' # TODO OPTIONALLY ADD conllu FILE CHECK elif 'input_url' in form and form['input_url']: try: name = form['input_url'].split('/')[-1] input_path = os.path.join('media', name) response = requests.get(form['input_url']) open(input_path, "wb").write(response.content) configs['input_path'] = input_path except: validation['input_url'] = 'Incorrect URL!' else: validation['file'] = 'Please insert either input url or provide a file.' validation['input_url'] = 'Please insert either input url or provide a file.' tree_size_min = None if 'tree_size_min' in form: tree_size_min = form['tree_size_min'] tree_size_max = None if 'tree_size_max' in form: tree_size_max = form['tree_size_max'] def validate_tree_size(tree_size_min, tree_size_max): if tree_size_min is None or tree_size_max is None: validation['tree_size'] = 'Please provide information about minimum and maximum tree size.' return False if int(tree_size_min) > int(tree_size_max): validation['tree_size'] = 'Tree size minimum should be smaller than tree size maximum.' return False return True if validate_tree_size(tree_size_min, tree_size_max): configs['tree_size'] = f'{tree_size_min}-{tree_size_max}' if tree_size_min != tree_size_max else f'{tree_size_min}' def validate_node_type(node_type): # TODO EXPAND NODE TYPE node_type_options = {'upos', 'form', 'lemma', 'upos', 'xpos', 'feats', 'deprel'} if len(node_type) == 0: validation['node_type'] = 'Please select at least one node type.' return False for el in node_type: if el not in node_type_options: validation['node_type'] = f'Node option {el} is not supported. Please enter valid options.' return False return True node_type = [] if 'node_type_upos' in form: node_type.append('upos') if 'node_type_form' in form: node_type.append('form') if 'node_type_lemma' in form: node_type.append('lemma') if validate_node_type(node_type): configs['node_type'] = '+'.join(node_type) # mandatory parameters with default value configs['internal_saves'] = None # TODO depends on computer configs['cpu_cores'] = 12 # TODO FINALIZE THIS! configs['complete_tree_type'] = True configs['dependency_type'] = 'labeled_trees' in form and form['labeled_trees'] == 'on' configs['node_order'] = 'fixed_order' in form and form['fixed_order'] == 'on' configs['association_measures'] = 'association_measures' in form and form['association_measures'] == 'on' configs['label_whitelist'] = [] configs['root_whitelist'] = [] if 'root_restriction' in form and form['root_restriction']: configs['root_whitelist'] = form['root_restriction'].split('|') configs['query'] = None configs['compare'] = None configs['frequency_threshold'] = 0 configs['lines_threshold'] = None configs['continuation_processing'] = False configs['nodes_number'] = True configs['print_root'] = True if configs['compare'] is not None: configs['other_input_path'] = configs['compare'] configs['grew_match'] = True configs['depsearch'] = False name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=60)) configs['output'] = os.path.join('media', name) if len(validation) > 0: return render_template('index.html', validation=validation) try: run(configs) except Exception as e: validation['general'] = 'Processing failed! Please recheck your settings.' if len(validation) > 0: return render_template('index.html', validation=validation) return redirect(url_for('result', result_id=name)) return render_template('index.html') return app if __name__ == '__main__': app = create_app() app.run(debug=True)