diff --git a/.gitignore b/.gitignore index 9c6524a..417479f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,10 @@ internal_saves media *.sage.py -venv +venv* .idea __pycache__ static_old +build +data +*.egg-info diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..41db1b8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.10.12 +ADD . /stark-web +WORKDIR /stark-web +RUN pip install --upgrade pip +RUN pip install waitress +RUN pip install . + +CMD ["waitress-serve", "--call", "app:app"] \ No newline at end of file diff --git a/app.py b/app.py index 54a18c7..b97991d 100755 --- a/app.py +++ b/app.py @@ -1,8 +1,13 @@ import configparser +import csv import os +import random +import re +import string +import time import requests -from flask import Flask, render_template, request, send_file +from flask import Flask, render_template, request, send_file, redirect, url_for from werkzeug.utils import secure_filename from stark import run @@ -10,6 +15,7 @@ from stark import run app = Flask(__name__) UPLOAD_FOLDER = 'uploads' ALLOWED_EXTENSIONS = {'conllu'} +DAYS_BEFORE_DELETION = 1 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER @@ -53,6 +59,7 @@ def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + @app.route('/upload') def upload_file2(): return render_template('upload.html') @@ -66,14 +73,65 @@ def upload_file(): return 'file uploaded successfully' +@app.route('/about', methods=['GET']) +def about(): + return render_template('about.html') + + +# @app.route('/result//download', methods=['GET']) +# def download_result(result_id): +# return + + +@app.route('/result/', methods=['GET', 'POST']) +def result(result_id): + + if request.method == 'POST': + for filename in os.listdir('media'): + file_path = os.path.join('media', filename) + f_t = os.path.getmtime(file_path) + c_t = time.time() + file_age_seconds = c_t - f_t + if file_age_seconds > DAYS_BEFORE_DELETION * 86400: + os.remove(file_path) + # TODO ADD LINKS + # TODO TEST VARYING SIZES OF TEXT IN TABLE + return send_file(os.path.join('media', result_id), as_attachment=True, download_name='results.tsv') + + order_by = request.args.get('order_by') + order_type = request.args.get('order_type') + with open(os.path.join('media', result_id), 'r') as rf: + + content = list(csv.reader(rf, delimiter='\t')) + head = content[0] + content_dict = {h: [] for h in head} + if order_by is not None and order_by[:-1] in head: + sort_id = head.index(order_by[:-1]) + if order_type == 'asc': + # check if a number can be converted to float or int + ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id], reverse=True) + else: + ordered_content = sorted(content[1:], key=lambda x: -1 * float(x[sort_id]) if x[sort_id].isnumeric() or re.match(r'^-?\d+(?:\.\d+)$', x[sort_id]) is not None else x[sort_id]) + else: + ordered_content = content[1:] + + for i, row in enumerate(ordered_content): + for j, v in enumerate(row): + content_dict[head[j]].append(v) + + # content.sort(key=lambda x: x[1]) + a = request + print(result_id) + return render_template('result.html', head_row=head, content=content_dict) + + @app.route('/', methods=['GET', 'POST']) def index(): if request.method == 'POST': form = request.form - a = request configs = {} # mandatory parameters - configs['input_path'] = 'data/sl_ssj-ud_v2.4.conllu' + configs['input_path'] = '' validation = {} @@ -93,7 +151,8 @@ def index(): # TODO OPTIONALLY ADD conllu FILE CHECK elif 'input_url' in form and form['input_url']: try: - input_path = os.path.join('media', 'input.conllu') + name = form['input_url'].split('/')[-1] + input_path = os.path.join('media', name) response = requests.get(form['input_url']) open(input_path, "wb").write(response.content) configs['input_path'] = input_path @@ -138,10 +197,13 @@ def index(): return True - # TODO radio button (maybe checkbutton) node_type = [] - if 'node_type' in form: - node_type = form['node_type'] + if 'node_type_upos' in form: + node_type.append('upos') + if 'node_type_form' in form: + node_type.append('form') + if 'node_type_lemma' in form: + node_type.append('lemma') if validate_node_type(node_type): configs['node_type'] = '+'.join(node_type) @@ -154,12 +216,13 @@ def index(): # TODO FINALIZE THIS! configs['complete_tree_type'] = True - configs['dependency_type'] = True - configs['node_order'] = True - configs['association_measures'] = False + configs['dependency_type'] = 'labeled_trees' in form and form['labeled_trees'] == 'on' + configs['node_order'] = 'fixed_order' in form and form['fixed_order'] == 'on' configs['label_whitelist'] = [] configs['root_whitelist'] = [] + if 'root_restriction' in form and form['root_restriction']: + configs['root_whitelist'] = form['root_restriction'].split('|') configs['query'] = None @@ -182,14 +245,16 @@ def index(): # configs = read_configs(config, args) - - - configs['output'] = os.path.join('media', 'result.tsv') - configs['complete_tree_type'] = form['complete'] == 'yes' + configs['association_measures'] = False + configs['grew_match'] = 'grewmatch_patterns' in form and form['grewmatch_patterns'] == 'on' + configs['depsearch'] = False + name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=60)) + configs['output'] = os.path.join('media', name) run(configs) # TODO DELETE STORED FILE AFTER PROCESSING - return send_file(configs['output'], as_attachment=True) + return redirect(url_for('result', result_id=name)) + # return send_file(configs['output'], as_attachment=True) # return render_template('index.html') return render_template('index.html') diff --git a/requirements.txt b/requirements.txt index 7c8f4ac..defe1b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,3 @@ -blinker==1.6.2 -certifi==2023.7.22 -charset-normalizer==3.3.0 -click==8.1.7 Flask==3.0.0 -idna==3.4 -itsdangerous==2.1.2 -Jinja2==3.1.2 -MarkupSafe==2.1.3 -pyconll==3.2.0 requests==2.31.0 stark @ git+https://github.com/clarinsi/STARK@f6c7f810979c55f96b8dac111bf2017c0dd58429 -urllib3==2.0.6 -Werkzeug==3.0.0 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..3756e92 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +import re +from os import path + +from setuptools import setup, find_packages + +here = path.abspath(path.dirname(__file__)) + +# read the version from classla/_version.py +VERSION = '0.0.1' + +setup(name='stark-api', + version=VERSION, + description=u"Stark web application", + author='CLARIN.SI', + author_email='info@clarin.si', + license='Apache 2', + packages=find_packages(), + install_requires=[ + 'Flask>=3.0.0', + 'requests>=2.31.0', + 'stark @ git+https://github.com/clarinsi/STARK@master' + ], +) \ No newline at end of file diff --git a/static/css/style.css b/static/css/style.css index eb5566b..258dd8f 100644 --- a/static/css/style.css +++ b/static/css/style.css @@ -20,4 +20,58 @@ .noUi-target.noUi-horizontal .noUi-tooltip { transform: scale(1) rotate(-45deg) translate(0px, 4px); +} + +h4 { + font-size: 1.8rem; +} + +label { + font-size: 1.2rem; +} + +.input-field > label { + font-size: 1.2rem; +} + +.table-wrapper { + overflow-x: scroll; + overflow-y: scroll; + height: 600px; + border: #555555; +} + +.table-wrapper thead th { + position: sticky; + top: 0; +} + +.table-wrapper thead th{ + cursor: pointer; +} + +.tr-link { + cursor: pointer; +} + +table { + table-layout: fixed; +} + +th { + width: 300px; + padding: 10px 0 10px; + background-color: #cccccc; + text-align: center; + border-left-radius: 2px; + border-right: solid 1px #bbbbbb; + border-left: solid 1px #bbbbbb; +} + +td { + width: 300px; + padding: 10px 0 10px; + text-align: center; + border-right: solid 1px #bbbbbb; + border-left: solid 1px #bbbbbb; } \ No newline at end of file diff --git a/static/js/init.js b/static/js/init.js index 53c49a9..3d53689 100644 --- a/static/js/init.js +++ b/static/js/init.js @@ -30,32 +30,43 @@ document.addEventListener("DOMContentLoaded", function(event) { }); // end of document ready $(document).ready(function(){ - // slider -// var valuesForSlider = [2,3,4,5]; -// var slider = document.getElementById('slider'); -// var format = { -// to: function(value) { -// return valuesForSlider[Math.round(value)]; -// }, -// from: function (value) { -// return valuesForSlider.indexOf(Number(value)); -// } -// }; -// -// noUiSlider.create(slider, { -// start: [3, 4], -// connect: true, -// step: 1, -// orientation: 'horizontal', // 'horizontal' or 'vertical' -// range: { -// 'min': 0, -// 'max': valuesForSlider.length - 1 -// }, -// format: wNumb({ -// decimals: 0 -// }) -// }); + $('#advanced-tree').hide(); + $('.input-field span.helper-text').hide(); + $('.input-field').bind('mouseenter', function(e) { + $(this).find('span.helper-text').show('fast') + }); + var advancedTreeExpanded = false; + $('#advanced-tree-expand').bind('click', function(e) { + if (!advancedTreeExpanded){ + advancedTreeExpanded = true; + $('#advanced-tree').show('fast'); + $('#advanced-tree-expand i').text('remove'); + } else { + advancedTreeExpanded = false; + $('#advanced-tree').hide('fast'); + $('#advanced-tree-expand i').text('add'); + } + $(this).find('span.helper-text').show('fast') + }); + $('.input-field').bind('mouseleave', function(e) { + $(this).find('span.helper-text').hide('fast') + }); + }); + $("#submit-form").submit( function(eventObj) { + var spans = $(".noUi-tooltip").find('span'); + var tree_size_min = spans[0].innerText; + var tree_size_max = spans[1].innerText; + console.log('amm'); + $("").attr("type", "hidden") + .attr("name", "tree_size_min") + .attr("value", tree_size_min) + .appendTo("#submit-form"); + $("").attr("type", "hidden") + .attr("name", "tree_size_max") + .attr("value", tree_size_max) + .appendTo("#submit-form"); + return true; }); })(jQuery); // end of jQuery name space diff --git a/static/js/result.js b/static/js/result.js new file mode 100644 index 0000000..c8f4709 --- /dev/null +++ b/static/js/result.js @@ -0,0 +1,52 @@ +function addOrUpdateUrlParameter(url, key, value) { + var urlObject = new URL(url); + urlObject.searchParams.set(key, value); + return urlObject.href; +} +function getUrlParameters() { + var searchParams = new URLSearchParams(window.location.search); + var params = {}; + + // Iterate over each parameter and add to the 'params' object + searchParams.forEach(function(value, key) { + params[key] = value; + }); + + return params; +} + +$(document).ready(function() { + var params = getUrlParameters(); + // Make table rows clickable + $(".table-wrapper tbody tr").click(function() { + var url = $(this).data("href"); + if (url) { + window.location.href = url; + } + }); + $(".th-desc").hide(); + $(".th-asc").hide(); + if ('order_by' in params) { + if ('order_type' in params && params.order_type == 'desc') { + $('th:contains(' + params.order_by + ') .th-desc').show(); + } else { + $('th:contains(' + params.order_by + ') .th-asc').show(); + } + } + // Make table rows clickable + $(".table-wrapper thead th").click(function(e) { + var column_name = $(this).find('span:first').text(); + + var newUrl = addOrUpdateUrlParameter(window.location.href, 'order_by', column_name); + if ('order_by' in params && 'order_type' in params && params.order_by == column_name && params.order_type == 'desc') { + newUrl = addOrUpdateUrlParameter(newUrl, 'order_type', 'asc'); + } else { + newUrl = addOrUpdateUrlParameter(newUrl, 'order_type', 'desc'); + } + + + + // Redirect to the modified URL + window.location.href = newUrl; + }); +}); \ No newline at end of file diff --git a/templates/about.html b/templates/about.html new file mode 100644 index 0000000..8ddd673 --- /dev/null +++ b/templates/about.html @@ -0,0 +1,137 @@ + + + + + + Starter Template - Materialize + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+

About

+

Welcome to the online demo interface for STARK - a highly-customizible tool designed to extract various types of syntactic trees from dependency-parsed corpora (treebanks). Unlike the original command-line version, this user-friendly interface offers a streamlined set of settings, which are described in more detail here. Simply upload your treebank and click ‘SUBMIT’ to view the initial results!

+
+
+
+ +
+
+
+
+
Credits
+

Add some logos here?

+ + +
+ + + + + + + + + + + + + + + + + + +
+
+ +
+ + + + + + + + + + + diff --git a/templates/index.html b/templates/index.html index dae0842..41a1f8c 100644 --- a/templates/index.html +++ b/templates/index.html @@ -22,6 +22,10 @@ + @@ -79,159 +83,135 @@
- -
-

General settings

+

Welcome to the online demo interface for STARK - a highly-customizible tool designed to extract various types of syntactic trees from dependency-parsed corpora (treebanks). Unlike the original command-line version, this user-friendly interface offers a streamlined set of settings, which are described in more detail here. Simply upload your treebank and click ‘SUBMIT’ to view the initial results!

+ +

Input data

- +
Browse - +
- Upload a treebank in CONLL-U format.
- - Insert a link to treebank in CONLL-U format. + +

-

Tree specifications

+

Tree specification

- +

- Specify the number of nodes in the trees to be extracted.
- +
-
-

+

+
-

-

+

+
-

-

+

+
-

- Should extracted trees be differentiated based on the surface word order? +

-

Advanced tree specifications

-
+

add Advanced settings

+
-
-
-
- +
+ +
+
+ +
- Should extracted trees be differentiated based on the surface word order?
-
- -
-
-
- +
+ +
+
+ +
- Should the extracted trees contain names of dependency relations?
- -
-
-
- -
- Should only full subtrees be extracted (rather than all possible subtrees)? -
-
- - Specify potential restrictions on the root of the trees to be extracted (e.g. ‘upos=NOUN’ if you are interested in nominal trees only) +
-
-
-

Output settings

-
-
-
-
-
- +
+ +
+
+ +
- Include measures of statistical association between nodes of the tree (MI, MI3, Dice, logDice, t-score, simple-LL) in the output?
-
- - -
-
-
- +
+ +
+
+ +
- Map the structure of the trees to the grew-match formalism used by https://universal.grew.fr?
diff --git a/templates/result.html b/templates/result.html new file mode 100644 index 0000000..f4f579f --- /dev/null +++ b/templates/result.html @@ -0,0 +1,146 @@ + + + + + + Starter Template - Materialize + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+
+
+ + + +
+
+ + +
+ + + + {% for head in head_row %} + {% if not head == 'Grew-match URL' %} + + {% endif %} + {% endfor %} + + + + {% for i in range(content['Tree']|length) %} + + {% for col in content %} + {% if not col == 'Grew-match URL' %} + + {% endif %} + {% endfor %} + + {% endfor %} + +
{{ head }}
+
+
+ +
+
+
+
+
Credits
+

Add some logos here?

+ + +
+
+
+ +
+ + + + + + + + +