STARK-web/app.py

340 lines
14 KiB
Python
Raw Normal View History

2023-10-04 15:24:40 +00:00
import configparser
import csv
2023-10-04 15:24:40 +00:00
import os
import random
import re
import string
import time
2023-10-04 15:24:40 +00:00
import requests
from flask import Flask, render_template, request, send_file, redirect, url_for
2024-02-19 14:33:19 +00:00
from flask_babel import Babel, gettext
2023-10-04 15:24:40 +00:00
from werkzeug.utils import secure_filename
from stark import run
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'conllu'}
DAYS_BEFORE_DELETION = 1
TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS = {
'Tree': 'Tree',
'Absolute frequency': 'Frequency',
'Number of nodes': 'Number of nodes',
'Head node': 'Head node',
'Grew-match URL': 'Grew-match URL',
'Order': 'Order',
'MI': 'MI',
'logDice': 'logDice',
't-score': 't-score'
}
DISPLAYED_TABLE_COLUMNS2TABLE_COLUMNS = {v: k for k, v in TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS.items()}
2024-02-19 14:33:19 +00:00
DEFAULT_LANGUAGE = 'en'
LANGUAGES = ['en', 'sl']
_translations = {
'en': {
'hello': 'Hello',
'welcome': 'Welcome',
'greeting': 'How are you?',
'name': 'Your name:',
'code': 'en',
'switch_code': 'SL',
'switch_link': '?lang=sl',
},
'sl': {
'hello': 'Hola',
'welcome': 'Bienvenido',
'greeting': '¿Cómo estás?',
'name': 'Tu nombre:',
'code': 'sl',
'switch_code': 'EN',
'switch_link': '?lang=en',
},
}
def get_locale():
lang = request.args.get('lang')
if lang in LANGUAGES:
return lang
return DEFAULT_LANGUAGE
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
def create_app():
2024-02-21 10:10:31 +00:00
app = Flask(__name__, static_url_path='/stark/static')
2024-02-19 14:33:19 +00:00
babel = Babel(app, locale_selector=get_locale, default_translation_directories='translations')
babel.list_translations = ['en', 'sl']
2023-12-06 15:35:15 +00:00
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
def create_default_configs():
configs = {}
# mandatory parameters
configs['input_path'] = 'data/sl_ssj-ud_v2.4.conllu'
configs['output'] = 'results/out_official.tsv'
configs['tree_size'] = '2-4'
configs['node_type'] = 'upos'
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
# mandatory parameters with default value
configs['internal_saves'] = './internal_saves'
configs['cpu_cores'] = 12
configs['complete_tree_type'] = True
configs['dependency_type'] = True
configs['node_order'] = True
configs['association_measures'] = False
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
configs['label_whitelist'] = []
configs['root_whitelist'] = []
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
configs['query'] = None
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
configs['compare'] = None
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
configs['frequency_threshold'] = 0
configs['lines_threshold'] = None
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
configs['continuation_processing'] = False
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
configs['nodes_number'] = True
configs['print_root'] = True
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
if configs['compare'] is not None:
configs['other_input_path'] = configs['compare']
return configs
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
2024-02-21 10:10:31 +00:00
@app.route('/stark/about', methods=['GET'])
2023-12-06 15:35:15 +00:00
def about():
return render_template('about.html')
2024-02-21 10:10:31 +00:00
@app.route('/stark/result/<result_id>', methods=['GET', 'POST'])
2023-12-06 15:35:15 +00:00
def result(result_id):
if request.method == 'POST':
for filename in os.listdir('media'):
file_path = os.path.join('media', filename)
f_t = os.path.getmtime(file_path)
c_t = time.time()
file_age_seconds = c_t - f_t
if file_age_seconds > DAYS_BEFORE_DELETION * 86400:
os.remove(file_path)
return send_file(os.path.join('media', result_id), as_attachment=True, download_name='results.tsv')
order_by_display = request.args.get('order_by')
order_by = DISPLAYED_TABLE_COLUMNS2TABLE_COLUMNS[order_by_display[:-1]] if order_by_display is not None else None
2023-12-06 15:35:15 +00:00
order_type = request.args.get('order_type')
with open(os.path.join('media', result_id), 'r') as rf:
content = list(csv.reader(rf, delimiter='\t'))
head = content[0]
content_dict = {h: [] for h in head}
if order_by is not None and order_by in head:
sort_id = head.index(order_by)
2023-12-06 15:35:15 +00:00
if order_type == 'asc':
# check if a number can be converted to float or int
ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id], reverse=True)
else:
ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id])
else:
ordered_content = content[1:]
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
for i, row in enumerate(ordered_content):
for j, v in enumerate(row):
content_dict[head[j]].append(v)
displayed_head = [TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS[col] for col in head if col in TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS]
displayed_content_dict = {}
for column, v in content_dict.items():
if column in TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS:
displayed_content_dict[TABLE_COLUMNS2DISPLAYED_TABLE_COLUMNS[column]] = v
return render_template('result.html', head_row=displayed_head, content=displayed_content_dict)
2024-02-21 10:10:31 +00:00
@app.route('/stark/', methods=['GET', 'POST'])
2024-02-13 14:19:22 +00:00
# @headers({'Cache-Control': 'no-store, no-cache, must-revalidate, post-check=0, pre-check=0'})
2023-12-06 15:35:15 +00:00
def index():
2024-02-19 14:33:19 +00:00
translations = _translations[get_locale()]
2023-12-06 15:35:15 +00:00
if request.method == 'POST':
form = request.form
configs = {}
# mandatory parameters
configs['input_path'] = ''
validation = {}
2023-12-06 15:35:15 +00:00
# handling input
if 'file' in request.files and request.files['file']:
# store file
f = request.files['file']
input_path = os.path.join('media', secure_filename(f.filename))
f.save(input_path)
2023-12-06 15:35:15 +00:00
configs['input_path'] = input_path
2023-12-06 15:35:15 +00:00
if 'input_url' in form and form['input_url']:
2024-02-19 14:33:19 +00:00
validation['file'] = gettext('Please insert either input url or file, not both of them.')
validation['input_url'] = gettext('Please insert either input url or file, not both of them.')
2023-12-06 15:35:15 +00:00
# TODO OPTIONALLY ADD conllu FILE CHECK
elif 'input_url' in form and form['input_url']:
try:
name = form['input_url'].split('/')[-1]
input_path = os.path.join('media', name)
response = requests.get(form['input_url'])
open(input_path, "wb").write(response.content)
configs['input_path'] = input_path
except:
2024-02-19 14:33:19 +00:00
validation['input_url'] = gettext('Incorrect URL!')
else:
2024-02-19 14:33:19 +00:00
validation['file'] = gettext('Please insert either input url or provide a file.')
validation['input_url'] = gettext('Please insert either input url or provide a file.')
2023-12-06 15:35:15 +00:00
tree_size_min = None
if 'tree_size_min' in form:
tree_size_min = form['tree_size_min']
2023-12-06 15:35:15 +00:00
tree_size_max = None
if 'tree_size_max' in form:
tree_size_max = form['tree_size_max']
2023-12-06 15:35:15 +00:00
def validate_tree_size(tree_size_min, tree_size_max):
if tree_size_min is None or tree_size_max is None:
2024-02-19 14:33:19 +00:00
validation['tree_size'] = gettext('Please provide information about minimum and maximum tree size.')
2023-12-06 15:35:15 +00:00
return False
2023-12-06 15:35:15 +00:00
if int(tree_size_min) > int(tree_size_max):
2024-02-19 14:33:19 +00:00
validation['tree_size'] = gettext('Tree size minimum should be smaller than tree size maximum.')
2023-12-06 15:35:15 +00:00
return False
return True
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
if validate_tree_size(tree_size_min, tree_size_max):
configs['tree_size'] = f'{tree_size_min}-{tree_size_max}' if tree_size_min != tree_size_max else f'{tree_size_min}'
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
def validate_node_type(node_type):
# TODO EXPAND NODE TYPE
node_type_options = {'upos', 'form', 'lemma', 'upos', 'xpos', 'feats', 'deprel'}
if len(node_type) == 0:
2024-02-19 14:33:19 +00:00
validation['node_type'] = gettext('Please select at least one node type.')
2023-10-04 15:24:40 +00:00
return False
2023-12-06 15:35:15 +00:00
for el in node_type:
if el not in node_type_options:
2024-02-19 14:33:19 +00:00
validation['node_type'] = gettext('Node option') + f' {el} ' + gettext('is not supported. Please enter valid options.')
2023-12-06 15:35:15 +00:00
return False
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
return True
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
node_type = []
if 'node_type_upos' in form:
node_type.append('upos')
if 'node_type_form' in form:
node_type.append('form')
if 'node_type_lemma' in form:
node_type.append('lemma')
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
if validate_node_type(node_type):
configs['node_type'] = '+'.join(node_type)
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
# mandatory parameters with default value
configs['internal_saves'] = None
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
# TODO depends on computer
configs['cpu_cores'] = 12
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
# TODO FINALIZE THIS!
configs['complete_tree_type'] = True
configs['dependency_type'] = 'labeled_trees' in form and form['labeled_trees'] == 'on'
configs['node_order'] = 'fixed_order' in form and form['fixed_order'] == 'on'
configs['association_measures'] = 'association_measures' in form and form['association_measures'] == 'on'
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
configs['label_whitelist'] = []
configs['root_whitelist'] = []
if 'root_restriction' in form and form['root_restriction']:
configs['root_whitelist'] = form['root_restriction'].split('|')
2023-10-04 15:24:40 +00:00
2024-02-20 09:59:58 +00:00
if 'query' in form and form['query']:
configs['query'] = form['query']
configs['tree_size'] = '0'
else:
configs['query'] = None
2024-02-21 10:10:31 +00:00
# handling input
if 'compare_file' in request.files and request.files['compare_file']:
# store file
f = request.files['compare_file']
input_path = os.path.join('media', secure_filename(f.filename))
f.save(input_path)
configs['compare'] = input_path
if 'compare_url' in form and form['compare_url']:
validation['compare_file'] = gettext('Please insert either compare url or file, not both of them.')
validation['compare_url'] = gettext('Please insert either compare url or file, not both of them.')
2024-02-20 09:59:58 +00:00
2024-02-21 10:10:31 +00:00
elif 'compare_url' in form and form['compare_url']:
try:
name = form['compare_url'].split('/')[-1]
input_path = os.path.join('media', name)
response = requests.get(form['compare_url'])
open(input_path, "wb").write(response.content)
2024-02-20 09:59:58 +00:00
configs['compare'] = input_path
2024-02-21 10:10:31 +00:00
except:
configs['compare'] = None
validation['compare_url'] = gettext('Incorrect URL!')
else:
configs['compare'] = None
2023-10-04 15:24:40 +00:00
2024-01-22 14:06:36 +00:00
configs['sentence_count_file'] = None
configs['detailed_results_file'] = None
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
configs['frequency_threshold'] = 0
2024-02-13 14:19:22 +00:00
if 'frequency_threshold' in form and form['frequency_threshold']:
try:
int(form['frequency_threshold'])
except ValueError:
2024-02-19 14:33:19 +00:00
validation['frequency_threshold'] = gettext('Please insert an Integer.')
2024-02-13 14:19:22 +00:00
else:
configs['frequency_threshold'] = int(form['frequency_threshold'])
2023-12-06 15:35:15 +00:00
configs['lines_threshold'] = None
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
configs['continuation_processing'] = False
2023-10-04 15:24:40 +00:00
2024-02-22 06:58:06 +00:00
configs['label_subtypes'] = True
2023-12-06 15:35:15 +00:00
configs['nodes_number'] = True
configs['print_root'] = True
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
if configs['compare'] is not None:
configs['other_input_path'] = configs['compare']
2023-10-04 15:24:40 +00:00
configs['grew_match'] = True
2023-12-06 15:35:15 +00:00
configs['depsearch'] = False
2024-01-22 14:06:36 +00:00
configs['example'] = False
2023-12-06 15:35:15 +00:00
name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=60))
configs['output'] = os.path.join('media', name)
if len(validation) > 0:
2024-02-19 14:33:19 +00:00
return render_template('index.html', validation=validation, translations=translations)
try:
run(configs)
except Exception as e:
2024-02-19 14:33:19 +00:00
validation['general'] = gettext('Processing failed! Please recheck your settings, e.g. input format or head node description.')
if len(validation) > 0:
2024-02-19 14:33:19 +00:00
return render_template('index.html', validation=validation, translations=translations)
2024-02-13 14:19:22 +00:00
# check if there are no results
with open(os.path.join('media', name), 'r') as rf:
content = list(csv.reader(rf, delimiter='\t'))
if len(content) == 1:
validation['results'] = False
2024-02-19 14:33:19 +00:00
return render_template('index.html', validation=validation, translations=translations)
return redirect(url_for('result', result_id=name, order_by='Frequency ', order_type='desc', lang=gettext('code')))
return render_template('index.html', translations=translations)
2023-10-04 15:24:40 +00:00
2023-12-06 15:35:15 +00:00
return app
2023-10-04 15:24:40 +00:00
if __name__ == '__main__':
2023-12-06 15:35:15 +00:00
app = create_app()
2023-10-04 15:24:40 +00:00
app.run(debug=True)