diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..429f144 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea/ +data/ +__pycache__/ +venv/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1a5c8fa --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.9.1 +ADD . /classla-api +WORKDIR /classla-api +RUN pip install -r requirements.txt + +CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"] \ No newline at end of file diff --git a/README.md b/README.md index 9a5e8e9..b0ebfd3 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,75 @@ # classla-api +## Description +This tool uses classla library as an API. It allows for calls on some preset classla settings, as well as a custom one. + +## Standard UD +Preset classla settings: +```json +{ + "lang": "sl", + "pos_lemma_pretag": false +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-ud +``` + +## Standard JOS +Preset classla settings: +```json +{ + "lang": "sl", + "pos_use_lexicon": true, + "type": "standard_jos" +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-jos +``` + +## Nonstandard UD +Preset classla settings: +```json +{ + "lang": "sl", + "pos_use_lexicon": true, + "type": "nonstandard_jos" +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-ud +``` + +## Nonstandard JOS +Preset classla settings: +```json +{ + "lang": "sl", + "pos_use_lexicon": true, + "processors": { + "tokenize": "nonstandard", + "lemma": "nonstandard", + "pos": "nonstandard", + "depparse": "standard_jos", + "ner": "nonstandard" + } +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-jos +``` + +## Custom settings +Custom settings may be used, however they have to be in compliance with what the library allows (you can check this on https://github.com/clarinsi/classla) + +###Warning: Usage of custom settings is a slow action! It may take more than 30s to get a result! + +Usage example: +```commandline +curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi.", "settings": {"lang": "sl", "pos_lemma_pretag": false}}' http://127.0.0.1:5000/custom-settings +``` \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..364d158 --- /dev/null +++ b/app.py @@ -0,0 +1,107 @@ +from datetime import timedelta + +import torch +from flask import Flask, request, jsonify, session +import classla +import gc + +classla.download('sl') +classla.download('sl', type='standard_jos') +classla.download('sl', type='nonstandard') + +print(f'USE GPU = {torch.cuda.is_available()}') + +nlp_standard_UD = classla.Pipeline('sl', pos_use_lexicon=True) +nlp_standard_JOS = classla.Pipeline('sl', pos_use_lexicon=True, type='standard_jos') +nlp_nonstandard_UD = classla.Pipeline('sl', type='nonstandard') +nlp_nonstandard_JOS = classla.Pipeline('sl', processors={ + "tokenize": "nonstandard", + "lemma": "nonstandard", + "pos": "nonstandard", + "depparse": "standard_jos", + "ner": "nonstandard" + }) +app = Flask(__name__) +app.config['SESSION_TYPE'] = 'filesystem' +app.config['SECRET_KEY'] = "78df924389h138g01308ghj0d913" +# app.config['SESSION_PERMANENT']= False + + +# @app.before_request +# def make_session_permanent(): +# session.permanent = True +# app.permanent_session_lifetime = timedelta(seconds=20) +# # app.permanent_session_lifetime = timedelta(minutes=1) + +@app.route('/') +def hello_world(): + return 'This is my first API call!' + +# @app.route('/run', methods=['GET', "POST"]) +# def run(): +# if 'settings' not in session: +# return jsonify({'status': 'ERROR', 'details': 'No settings uploaded!'}) +# +# input_json = request.get_json(force=True) +# doc = session['nlp'](input_json['text']) +# +# return doc.to_conll() +# +# # return 'NOT IN SESSION!\n' + +# @app.route('/upload-settings', methods=['POST']) +# def upload_settings(): +# input_json = request.get_json(force=True) +# try: +# settings = input_json['settings'] +# session['nlp_id'] = classla.Pipeline(**settings) +# except: +# return jsonify({'status': 'ERROR', 'details': 'Incorrect settings!'}) +# +# return jsonify({'status': 'OK'}) + +@app.route('/custom-settings', methods=["POST"]) +def custom_settings(): + input_json = request.get_json(force=True) + if 'settings' in input_json: + settings = input_json['settings'] + nlp = classla.Pipeline(**settings) + # classla.Pipeline('sl', processors='tokenize,pos,lemma', pos_use_lexicon=True) + result = nlp(input_json['text']).to_conll() + del(nlp) + gc.collect() + else: + return f'ERROR `settings` were not given!' + return result + + +@app.route('/standard-ud', methods=["POST"]) +def standard_ud(): + input_json = request.get_json(force=True) + doc = nlp_standard_UD(input_json['text']) + return doc.to_conll() + + +@app.route('/standard-jos', methods=["POST"]) +def standard_jos(): + input_json = request.get_json(force=True) + doc = nlp_standard_JOS(input_json['text']) + return doc.to_conll() + + +@app.route('/nonstandard-ud', methods=["POST"]) +def nonstandard_ud(): + input_json = request.get_json(force=True) + doc = nlp_nonstandard_UD(input_json['text']) + return doc.to_conll() + + +@app.route('/nonstandard-jos', methods=["POST"]) +def nonstandard_jos(): + input_json = request.get_json(force=True) + doc = nlp_nonstandard_JOS(input_json['text']) + return doc.to_conll() + + +if __name__ == '__main__': + app.run(host="0.0.0.0", debug=True) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..24210e0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +certifi==2021.10.8 +charset-normalizer==2.0.8 +classla==1.1.0 +click==8.0.3 +Flask==2.0.2 +idna==3.3 +importlib-metadata==4.8.2 +itsdangerous==2.0.1 +Jinja2==3.0.3 +lxml==4.6.4 +MarkupSafe==2.0.1 +numpy==1.21.4 +obeliks==1.1.3 +protobuf==3.19.1 +regex==2021.11.10 +reldi-tokeniser==1.0.0 +requests==2.26.0 +torch==1.10.0 +tqdm==4.62.3 +typing_extensions==4.0.1 +urllib3==1.26.7 +Werkzeug==2.0.2 +zipp==3.6.0