You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

264 lines
8.9 KiB

import configparser
import csv
import os
import random
import re
import string
import time
import requests
from flask import Flask, render_template, request, send_file, redirect, url_for
from werkzeug.utils import secure_filename
from stark import run
app = Flask(__name__)
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'conllu'}
DAYS_BEFORE_DELETION = 1
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
def create_default_configs():
configs = {}
# mandatory parameters
configs['input_path'] = 'data/sl_ssj-ud_v2.4.conllu'
configs['output'] = 'results/out_official.tsv'
configs['tree_size'] = '2-4'
configs['node_type'] = 'upos'
# mandatory parameters with default value
configs['internal_saves'] = './internal_saves'
configs['cpu_cores'] = 12
configs['complete_tree_type'] = True
configs['dependency_type'] = True
configs['node_order'] = True
configs['association_measures'] = False
configs['label_whitelist'] = []
configs['root_whitelist'] = []
configs['query'] = None
configs['compare'] = None
configs['frequency_threshold'] = 0
configs['lines_threshold'] = None
configs['continuation_processing'] = False
configs['nodes_number'] = True
configs['print_root'] = True
if configs['compare'] is not None:
configs['other_input_path'] = configs['compare']
return configs
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/upload')
def upload_file2():
return render_template('upload.html')
@app.route('/uploader', methods=['GET', 'POST'])
def upload_file():
if request.method == 'POST':
f = request.files['file']
f.save(secure_filename(f.filename))
return 'file uploaded successfully'
@app.route('/about', methods=['GET'])
def about():
return render_template('about.html')
# @app.route('/result/<result_id>/download', methods=['GET'])
# def download_result(result_id):
# return
@app.route('/result/<result_id>', methods=['GET', 'POST'])
def result(result_id):
if request.method == 'POST':
for filename in os.listdir('media'):
file_path = os.path.join('media', filename)
f_t = os.path.getmtime(file_path)
c_t = time.time()
file_age_seconds = c_t - f_t
if file_age_seconds > DAYS_BEFORE_DELETION * 86400:
os.remove(file_path)
# TODO ADD LINKS
# TODO TEST VARYING SIZES OF TEXT IN TABLE
return send_file(os.path.join('media', result_id), as_attachment=True, download_name='results.tsv')
order_by = request.args.get('order_by')
order_type = request.args.get('order_type')
with open(os.path.join('media', result_id), 'r') as rf:
content = list(csv.reader(rf, delimiter='\t'))
head = content[0]
content_dict = {h: [] for h in head}
if order_by is not None and order_by[:-1] in head:
sort_id = head.index(order_by[:-1])
if order_type == 'asc':
# check if a number can be converted to float or int
ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id], reverse=True)
else:
ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id])
else:
ordered_content = content[1:]
for i, row in enumerate(ordered_content):
for j, v in enumerate(row):
content_dict[head[j]].append(v)
# content.sort(key=lambda x: x[1])
a = request
print(result_id)
return render_template('result.html', head_row=head, content=content_dict)
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
form = request.form
configs = {}
# mandatory parameters
configs['input_path'] = ''
validation = {}
# handling input
if 'file' in request.files and request.files['file']:
# TODO ADD OPTION FOR MULTIPLE FILES - ZIP!
# store file
f = request.files['file']
input_path = os.path.join('media', secure_filename(f.filename))
f.save(input_path)
configs['input_path'] = input_path
if 'input_url' in form and form['input_url']:
validation['file'] = 'Please insert either input url or file, not both of them.'
validation['input_url'] = 'Please insert either input url or file, not both of them.'
# TODO OPTIONALLY ADD conllu FILE CHECK
elif 'input_url' in form and form['input_url']:
try:
name = form['input_url'].split('/')[-1]
input_path = os.path.join('media', name)
response = requests.get(form['input_url'])
open(input_path, "wb").write(response.content)
configs['input_path'] = input_path
except:
validation['input_url'] = 'Incorrect URL!'
else:
validation['file'] = 'Please insert either input url or provide a file.'
validation['input_url'] = 'Please insert either input url or provide a file.'
tree_size_min = None
if 'tree_size_min' in form:
tree_size_min = form['tree_size_min']
tree_size_max = None
if 'tree_size_max' in form:
tree_size_max = form['tree_size_max']
def validate_tree_size(tree_size_min, tree_size_max):
if tree_size_min is None or tree_size_max is None:
validation['tree_size'] = 'Please provide information about minimum and maximum tree size.'
return False
if int(tree_size_min) > int(tree_size_max):
validation['tree_size'] = 'Tree size minimum should be smaller than tree size maximum.'
return False
return True
if validate_tree_size(tree_size_min, tree_size_max):
configs['tree_size'] = f'{tree_size_min}-{tree_size_max}' if tree_size_min != tree_size_max else f'{tree_size_min}'
def validate_node_type(node_type):
# TODO EXPAND NODE TYPE
node_type_options = {'upos', 'form', 'lemma', 'upos', 'xpos', 'feats', 'deprel'}
if len(node_type) == 0:
validation['node_type'] = 'Please provide information about node type.'
return False
for el in node_type:
if el not in node_type_options:
validation['node_type'] = f'Node option {el} is not supported. Please enter valid options.'
return False
return True
node_type = []
if 'node_type_upos' in form:
node_type.append('upos')
if 'node_type_form' in form:
node_type.append('form')
if 'node_type_lemma' in form:
node_type.append('lemma')
if validate_node_type(node_type):
configs['node_type'] = '+'.join(node_type)
# mandatory parameters with default value
configs['internal_saves'] = None
# TODO depends on computer
configs['cpu_cores'] = 12
# TODO FINALIZE THIS!
configs['complete_tree_type'] = True
configs['dependency_type'] = 'labeled_trees' in form and form['labeled_trees'] == 'on'
configs['node_order'] = 'fixed_order' in form and form['fixed_order'] == 'on'
configs['label_whitelist'] = []
configs['root_whitelist'] = []
if 'root_restriction' in form and form['root_restriction']:
configs['root_whitelist'] = form['root_restriction'].split('|')
configs['query'] = None
configs['compare'] = None
configs['frequency_threshold'] = 0
configs['lines_threshold'] = None
configs['continuation_processing'] = False
configs['nodes_number'] = True
configs['print_root'] = True
if configs['compare'] is not None:
configs['other_input_path'] = configs['compare']
########################################
#config = configparser.ConfigParser()
#config.read('config.ini')
# configs = read_configs(config, args)
configs['association_measures'] = False
configs['grew_match'] = 'grewmatch_patterns' in form and form['grewmatch_patterns'] == 'on'
configs['depsearch'] = False
name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=60))
configs['output'] = os.path.join('media', name)
run(configs)
# TODO DELETE STORED FILE AFTER PROCESSING
return redirect(url_for('result', result_id=name))
# return send_file(configs['output'], as_attachment=True)
# return render_template('index.html')
return render_template('index.html')
if __name__ == '__main__':
app.run(debug=True)