diff --git a/Dockerfile b/Dockerfile index 41db1b8..d78ddb7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,4 +5,4 @@ RUN pip install --upgrade pip RUN pip install waitress RUN pip install . -CMD ["waitress-serve", "--call", "app:app"] \ No newline at end of file +CMD ["waitress-serve", "--call", "app:create_app"] diff --git a/app.py b/app.py index b97991d..0d32fbb 100755 --- a/app.py +++ b/app.py @@ -9,220 +9,35 @@ import time import requests from flask import Flask, render_template, request, send_file, redirect, url_for from werkzeug.utils import secure_filename - from stark import run -app = Flask(__name__) UPLOAD_FOLDER = 'uploads' ALLOWED_EXTENSIONS = {'conllu'} DAYS_BEFORE_DELETION = 1 -app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER -def create_default_configs(): - configs = {} - # mandatory parameters - configs['input_path'] = 'data/sl_ssj-ud_v2.4.conllu' - configs['output'] = 'results/out_official.tsv' - configs['tree_size'] = '2-4' - configs['node_type'] = 'upos' +def create_app(): + app = Flask(__name__) + app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER - # mandatory parameters with default value - configs['internal_saves'] = './internal_saves' - configs['cpu_cores'] = 12 - configs['complete_tree_type'] = True - configs['dependency_type'] = True - configs['node_order'] = True - configs['association_measures'] = False - - configs['label_whitelist'] = [] - configs['root_whitelist'] = [] - - configs['query'] = None - - configs['compare'] = None - - configs['frequency_threshold'] = 0 - configs['lines_threshold'] = None - - configs['continuation_processing'] = False - - configs['nodes_number'] = True - configs['print_root'] = True - - if configs['compare'] is not None: - configs['other_input_path'] = configs['compare'] - return configs - - -def allowed_file(filename): - return '.' in filename and \ - filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS - - -@app.route('/upload') -def upload_file2(): - return render_template('upload.html') - - -@app.route('/uploader', methods=['GET', 'POST']) -def upload_file(): - if request.method == 'POST': - f = request.files['file'] - f.save(secure_filename(f.filename)) - return 'file uploaded successfully' - - -@app.route('/about', methods=['GET']) -def about(): - return render_template('about.html') - - -# @app.route('/result//download', methods=['GET']) -# def download_result(result_id): -# return - - -@app.route('/result/', methods=['GET', 'POST']) -def result(result_id): - - if request.method == 'POST': - for filename in os.listdir('media'): - file_path = os.path.join('media', filename) - f_t = os.path.getmtime(file_path) - c_t = time.time() - file_age_seconds = c_t - f_t - if file_age_seconds > DAYS_BEFORE_DELETION * 86400: - os.remove(file_path) - # TODO ADD LINKS - # TODO TEST VARYING SIZES OF TEXT IN TABLE - return send_file(os.path.join('media', result_id), as_attachment=True, download_name='results.tsv') - - order_by = request.args.get('order_by') - order_type = request.args.get('order_type') - with open(os.path.join('media', result_id), 'r') as rf: - - content = list(csv.reader(rf, delimiter='\t')) - head = content[0] - content_dict = {h: [] for h in head} - if order_by is not None and order_by[:-1] in head: - sort_id = head.index(order_by[:-1]) - if order_type == 'asc': - # check if a number can be converted to float or int - ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id], reverse=True) - else: - ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id]) - else: - ordered_content = content[1:] - - for i, row in enumerate(ordered_content): - for j, v in enumerate(row): - content_dict[head[j]].append(v) - - # content.sort(key=lambda x: x[1]) - a = request - print(result_id) - return render_template('result.html', head_row=head, content=content_dict) - - -@app.route('/', methods=['GET', 'POST']) -def index(): - if request.method == 'POST': - form = request.form + def create_default_configs(): configs = {} # mandatory parameters - configs['input_path'] = '' - validation = {} - - - # handling input - if 'file' in request.files and request.files['file']: - # TODO ADD OPTION FOR MULTIPLE FILES - ZIP! - # store file - f = request.files['file'] - input_path = os.path.join('media', secure_filename(f.filename)) - f.save(input_path) - - configs['input_path'] = input_path - - if 'input_url' in form and form['input_url']: - validation['file'] = 'Please insert either input url or file, not both of them.' - validation['input_url'] = 'Please insert either input url or file, not both of them.' - # TODO OPTIONALLY ADD conllu FILE CHECK - elif 'input_url' in form and form['input_url']: - try: - name = form['input_url'].split('/')[-1] - input_path = os.path.join('media', name) - response = requests.get(form['input_url']) - open(input_path, "wb").write(response.content) - configs['input_path'] = input_path - except: - validation['input_url'] = 'Incorrect URL!' - else: - validation['file'] = 'Please insert either input url or provide a file.' - validation['input_url'] = 'Please insert either input url or provide a file.' - - tree_size_min = None - if 'tree_size_min' in form: - tree_size_min = form['tree_size_min'] - - tree_size_max = None - if 'tree_size_max' in form: - tree_size_max = form['tree_size_max'] - - def validate_tree_size(tree_size_min, tree_size_max): - if tree_size_min is None or tree_size_max is None: - validation['tree_size'] = 'Please provide information about minimum and maximum tree size.' - return False - - if int(tree_size_min) > int(tree_size_max): - validation['tree_size'] = 'Tree size minimum should be smaller than tree size maximum.' - return False - return True - - if validate_tree_size(tree_size_min, tree_size_max): - configs['tree_size'] = f'{tree_size_min}-{tree_size_max}' if tree_size_min != tree_size_max else f'{tree_size_min}' - - def validate_node_type(node_type): - # TODO EXPAND NODE TYPE - node_type_options = {'upos', 'form', 'lemma', 'upos', 'xpos', 'feats', 'deprel'} - if len(node_type) == 0: - validation['node_type'] = 'Please provide information about node type.' - return False - - for el in node_type: - if el not in node_type_options: - validation['node_type'] = f'Node option {el} is not supported. Please enter valid options.' - return False - - return True - - node_type = [] - if 'node_type_upos' in form: - node_type.append('upos') - if 'node_type_form' in form: - node_type.append('form') - if 'node_type_lemma' in form: - node_type.append('lemma') - - if validate_node_type(node_type): - configs['node_type'] = '+'.join(node_type) + configs['input_path'] = 'data/sl_ssj-ud_v2.4.conllu' + configs['output'] = 'results/out_official.tsv' + configs['tree_size'] = '2-4' + configs['node_type'] = 'upos' # mandatory parameters with default value - configs['internal_saves'] = None - - # TODO depends on computer + configs['internal_saves'] = './internal_saves' configs['cpu_cores'] = 12 - - # TODO FINALIZE THIS! configs['complete_tree_type'] = True - configs['dependency_type'] = 'labeled_trees' in form and form['labeled_trees'] == 'on' - configs['node_order'] = 'fixed_order' in form and form['fixed_order'] == 'on' + configs['dependency_type'] = True + configs['node_order'] = True + configs['association_measures'] = False configs['label_whitelist'] = [] configs['root_whitelist'] = [] - if 'root_restriction' in form and form['root_restriction']: - configs['root_whitelist'] = form['root_restriction'].split('|') configs['query'] = None @@ -238,26 +53,215 @@ def index(): if configs['compare'] is not None: configs['other_input_path'] = configs['compare'] + return configs - ######################################## - #config = configparser.ConfigParser() - #config.read('config.ini') - # configs = read_configs(config, args) + def allowed_file(filename): + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS - configs['association_measures'] = False - configs['grew_match'] = 'grewmatch_patterns' in form and form['grewmatch_patterns'] == 'on' - configs['depsearch'] = False - name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=60)) - configs['output'] = os.path.join('media', name) - run(configs) - # TODO DELETE STORED FILE AFTER PROCESSING - return redirect(url_for('result', result_id=name)) - # return send_file(configs['output'], as_attachment=True) - # return render_template('index.html') - return render_template('index.html') + @app.route('/upload') + def upload_file2(): + return render_template('upload.html') + + + @app.route('/uploader', methods=['GET', 'POST']) + def upload_file(): + if request.method == 'POST': + f = request.files['file'] + f.save(secure_filename(f.filename)) + return 'file uploaded successfully' + + + @app.route('/about', methods=['GET']) + def about(): + return render_template('about.html') + + + # @app.route('/result//download', methods=['GET']) + # def download_result(result_id): + # return + + + @app.route('/result/', methods=['GET', 'POST']) + def result(result_id): + + if request.method == 'POST': + for filename in os.listdir('media'): + file_path = os.path.join('media', filename) + f_t = os.path.getmtime(file_path) + c_t = time.time() + file_age_seconds = c_t - f_t + if file_age_seconds > DAYS_BEFORE_DELETION * 86400: + os.remove(file_path) + # TODO ADD LINKS + # TODO TEST VARYING SIZES OF TEXT IN TABLE + return send_file(os.path.join('media', result_id), as_attachment=True, download_name='results.tsv') + + order_by = request.args.get('order_by') + order_type = request.args.get('order_type') + with open(os.path.join('media', result_id), 'r') as rf: + + content = list(csv.reader(rf, delimiter='\t')) + head = content[0] + content_dict = {h: [] for h in head} + if order_by is not None and order_by[:-1] in head: + sort_id = head.index(order_by[:-1]) + if order_type == 'asc': + # check if a number can be converted to float or int + ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id], reverse=True) + else: + ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id]) + else: + ordered_content = content[1:] + + for i, row in enumerate(ordered_content): + for j, v in enumerate(row): + content_dict[head[j]].append(v) + + # content.sort(key=lambda x: x[1]) + a = request + print(result_id) + return render_template('result.html', head_row=head, content=content_dict) + + + @app.route('/', methods=['GET', 'POST']) + def index(): + if request.method == 'POST': + form = request.form + configs = {} + # mandatory parameters + configs['input_path'] = '' + validation = {} + + + # handling input + if 'file' in request.files and request.files['file']: + # TODO ADD OPTION FOR MULTIPLE FILES - ZIP! + # store file + f = request.files['file'] + input_path = os.path.join('media', secure_filename(f.filename)) + f.save(input_path) + + configs['input_path'] = input_path + + if 'input_url' in form and form['input_url']: + validation['file'] = 'Please insert either input url or file, not both of them.' + validation['input_url'] = 'Please insert either input url or file, not both of them.' + # TODO OPTIONALLY ADD conllu FILE CHECK + elif 'input_url' in form and form['input_url']: + try: + name = form['input_url'].split('/')[-1] + input_path = os.path.join('media', name) + response = requests.get(form['input_url']) + open(input_path, "wb").write(response.content) + configs['input_path'] = input_path + except: + validation['input_url'] = 'Incorrect URL!' + else: + validation['file'] = 'Please insert either input url or provide a file.' + validation['input_url'] = 'Please insert either input url or provide a file.' + + tree_size_min = None + if 'tree_size_min' in form: + tree_size_min = form['tree_size_min'] + + tree_size_max = None + if 'tree_size_max' in form: + tree_size_max = form['tree_size_max'] + + def validate_tree_size(tree_size_min, tree_size_max): + if tree_size_min is None or tree_size_max is None: + validation['tree_size'] = 'Please provide information about minimum and maximum tree size.' + return False + + if int(tree_size_min) > int(tree_size_max): + validation['tree_size'] = 'Tree size minimum should be smaller than tree size maximum.' + return False + return True + + if validate_tree_size(tree_size_min, tree_size_max): + configs['tree_size'] = f'{tree_size_min}-{tree_size_max}' if tree_size_min != tree_size_max else f'{tree_size_min}' + + def validate_node_type(node_type): + # TODO EXPAND NODE TYPE + node_type_options = {'upos', 'form', 'lemma', 'upos', 'xpos', 'feats', 'deprel'} + if len(node_type) == 0: + validation['node_type'] = 'Please provide information about node type.' + return False + + for el in node_type: + if el not in node_type_options: + validation['node_type'] = f'Node option {el} is not supported. Please enter valid options.' + return False + + return True + + node_type = [] + if 'node_type_upos' in form: + node_type.append('upos') + if 'node_type_form' in form: + node_type.append('form') + if 'node_type_lemma' in form: + node_type.append('lemma') + + if validate_node_type(node_type): + configs['node_type'] = '+'.join(node_type) + + # mandatory parameters with default value + configs['internal_saves'] = None + + # TODO depends on computer + configs['cpu_cores'] = 12 + + # TODO FINALIZE THIS! + configs['complete_tree_type'] = True + configs['dependency_type'] = 'labeled_trees' in form and form['labeled_trees'] == 'on' + configs['node_order'] = 'fixed_order' in form and form['fixed_order'] == 'on' + + configs['label_whitelist'] = [] + configs['root_whitelist'] = [] + if 'root_restriction' in form and form['root_restriction']: + configs['root_whitelist'] = form['root_restriction'].split('|') + + configs['query'] = None + + configs['compare'] = None + + configs['frequency_threshold'] = 0 + configs['lines_threshold'] = None + + configs['continuation_processing'] = False + + configs['nodes_number'] = True + configs['print_root'] = True + + if configs['compare'] is not None: + configs['other_input_path'] = configs['compare'] + + ######################################## + #config = configparser.ConfigParser() + #config.read('config.ini') + + # configs = read_configs(config, args) + + configs['association_measures'] = False + configs['grew_match'] = 'grewmatch_patterns' in form and form['grewmatch_patterns'] == 'on' + configs['depsearch'] = False + name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=60)) + configs['output'] = os.path.join('media', name) + + run(configs) + # TODO DELETE STORED FILE AFTER PROCESSING + return redirect(url_for('result', result_id=name)) + # return send_file(configs['output'], as_attachment=True) + # return render_template('index.html') + return render_template('index.html') + + return app if __name__ == '__main__': + app = create_app() app.run(debug=True)