Browse Source

Initial setup

master
Luka 7 months ago
parent
commit
30961c7e1c
  1. 4
      .gitignore
  2. 6
      Dockerfile
  3. 73
      README.md
  4. 107
      app.py
  5. 23
      requirements.txt

4
.gitignore

@ -0,0 +1,4 @@
.idea/
data/
__pycache__/
venv/

6
Dockerfile

@ -0,0 +1,6 @@
FROM python:3.9.1
ADD . /classla-api
WORKDIR /classla-api
RUN pip install -r requirements.txt
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0"]

73
README.md

@ -1,2 +1,75 @@
# classla-api
## Description
This tool uses classla library as an API. It allows for calls on some preset classla settings, as well as a custom one.
## Standard UD
Preset classla settings:
```json
{
"lang": "sl",
"pos_lemma_pretag": false
}
```
Usage example:
```commandline
curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-ud
```
## Standard JOS
Preset classla settings:
```json
{
"lang": "sl",
"pos_use_lexicon": true,
"type": "standard_jos"
}
```
Usage example:
```commandline
curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-jos
```
## Nonstandard UD
Preset classla settings:
```json
{
"lang": "sl",
"pos_use_lexicon": true,
"type": "nonstandard_jos"
}
```
Usage example:
```commandline
curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-ud
```
## Nonstandard JOS
Preset classla settings:
```json
{
"lang": "sl",
"pos_use_lexicon": true,
"processors": {
"tokenize": "nonstandard",
"lemma": "nonstandard",
"pos": "nonstandard",
"depparse": "standard_jos",
"ner": "nonstandard"
}
}
```
Usage example:
```commandline
curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-jos
```
## Custom settings
Custom settings may be used, however they have to be in compliance with what the library allows (you can check this on https://github.com/clarinsi/classla)
###Warning: Usage of custom settings is a slow action! It may take more than 30s to get a result!
Usage example:
```commandline
curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi.", "settings": {"lang": "sl", "pos_lemma_pretag": false}}' http://127.0.0.1:5000/custom-settings
```

107
app.py

@ -0,0 +1,107 @@
from datetime import timedelta
import torch
from flask import Flask, request, jsonify, session
import classla
import gc
classla.download('sl')
classla.download('sl', type='standard_jos')
classla.download('sl', type='nonstandard')
print(f'USE GPU = {torch.cuda.is_available()}')
nlp_standard_UD = classla.Pipeline('sl', pos_use_lexicon=True)
nlp_standard_JOS = classla.Pipeline('sl', pos_use_lexicon=True, type='standard_jos')
nlp_nonstandard_UD = classla.Pipeline('sl', type='nonstandard')
nlp_nonstandard_JOS = classla.Pipeline('sl', processors={
"tokenize": "nonstandard",
"lemma": "nonstandard",
"pos": "nonstandard",
"depparse": "standard_jos",
"ner": "nonstandard"
})
app = Flask(__name__)
app.config['SESSION_TYPE'] = 'filesystem'
app.config['SECRET_KEY'] = "78df924389h138g01308ghj0d913"
# app.config['SESSION_PERMANENT']= False
# @app.before_request
# def make_session_permanent():
# session.permanent = True
# app.permanent_session_lifetime = timedelta(seconds=20)
# # app.permanent_session_lifetime = timedelta(minutes=1)
@app.route('/')
def hello_world():
return 'This is my first API call!'
# @app.route('/run', methods=['GET', "POST"])
# def run():
# if 'settings' not in session:
# return jsonify({'status': 'ERROR', 'details': 'No settings uploaded!'})
#
# input_json = request.get_json(force=True)
# doc = session['nlp'](input_json['text'])
#
# return doc.to_conll()
#
# # return 'NOT IN SESSION!\n'
# @app.route('/upload-settings', methods=['POST'])
# def upload_settings():
# input_json = request.get_json(force=True)
# try:
# settings = input_json['settings']
# session['nlp_id'] = classla.Pipeline(**settings)
# except:
# return jsonify({'status': 'ERROR', 'details': 'Incorrect settings!'})
#
# return jsonify({'status': 'OK'})
@app.route('/custom-settings', methods=["POST"])
def custom_settings():
input_json = request.get_json(force=True)
if 'settings' in input_json:
settings = input_json['settings']
nlp = classla.Pipeline(**settings)
# classla.Pipeline('sl', processors='tokenize,pos,lemma', pos_use_lexicon=True)
result = nlp(input_json['text']).to_conll()
del(nlp)
gc.collect()
else:
return f'ERROR `settings` were not given!'
return result
@app.route('/standard-ud', methods=["POST"])
def standard_ud():
input_json = request.get_json(force=True)
doc = nlp_standard_UD(input_json['text'])
return doc.to_conll()
@app.route('/standard-jos', methods=["POST"])
def standard_jos():
input_json = request.get_json(force=True)
doc = nlp_standard_JOS(input_json['text'])
return doc.to_conll()
@app.route('/nonstandard-ud', methods=["POST"])
def nonstandard_ud():
input_json = request.get_json(force=True)
doc = nlp_nonstandard_UD(input_json['text'])
return doc.to_conll()
@app.route('/nonstandard-jos', methods=["POST"])
def nonstandard_jos():
input_json = request.get_json(force=True)
doc = nlp_nonstandard_JOS(input_json['text'])
return doc.to_conll()
if __name__ == '__main__':
app.run(host="0.0.0.0", debug=True)

23
requirements.txt

@ -0,0 +1,23 @@
certifi==2021.10.8
charset-normalizer==2.0.8
classla==1.1.0
click==8.0.3
Flask==2.0.2
idna==3.3
importlib-metadata==4.8.2
itsdangerous==2.0.1
Jinja2==3.0.3
lxml==4.6.4
MarkupSafe==2.0.1
numpy==1.21.4
obeliks==1.1.3
protobuf==3.19.1
regex==2021.11.10
reldi-tokeniser==1.0.0
requests==2.26.0
torch==1.10.0
tqdm==4.62.3
typing_extensions==4.0.1
urllib3==1.26.7
Werkzeug==2.0.2
zipp==3.6.0
Loading…
Cancel
Save