Added other languages.
This commit is contained in:
parent
f406ccc9fc
commit
d81b8dd513
98
README.md
98
README.md
|
@ -3,20 +3,20 @@
|
||||||
## Description
|
## Description
|
||||||
This tool uses classla library as an API. It allows for calls on some preset classla settings, as well as a custom one.
|
This tool uses classla library as an API. It allows for calls on some preset classla settings, as well as a custom one.
|
||||||
|
|
||||||
## Standard UD
|
## Slovenian Standard UD
|
||||||
Preset classla settings:
|
Preset classla settings:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"lang": "sl",
|
"lang": "sl",
|
||||||
"pos_lemma_pretag": false
|
"pos_use_lexicon": true
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
Usage example:
|
Usage example:
|
||||||
```commandline
|
```commandline
|
||||||
curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-ud
|
curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' https://orodja.cjvt.si/oznacevalnik/standard-ud
|
||||||
```
|
```
|
||||||
|
|
||||||
## Standard JOS
|
## Slovenian Standard JOS
|
||||||
Preset classla settings:
|
Preset classla settings:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
@ -27,24 +27,24 @@ Preset classla settings:
|
||||||
```
|
```
|
||||||
Usage example:
|
Usage example:
|
||||||
```commandline
|
```commandline
|
||||||
curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-jos
|
curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' https://orodja.cjvt.si/oznacevalnik/standard-jos
|
||||||
```
|
```
|
||||||
|
|
||||||
## Nonstandard UD
|
## Slovenian Nonstandard UD
|
||||||
Preset classla settings:
|
Preset classla settings:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"lang": "sl",
|
"lang": "sl",
|
||||||
"pos_use_lexicon": true,
|
"pos_use_lexicon": true,
|
||||||
"type": "nonstandard_jos"
|
"type": "nonstandard"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
Usage example:
|
Usage example:
|
||||||
```commandline
|
```commandline
|
||||||
curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-ud
|
curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' https://orodja.cjvt.si/oznacevalnik/nonstandard-ud
|
||||||
```
|
```
|
||||||
|
|
||||||
## Nonstandard JOS
|
## Slovenian Nonstandard JOS
|
||||||
Preset classla settings:
|
Preset classla settings:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
@ -61,9 +61,85 @@ Preset classla settings:
|
||||||
```
|
```
|
||||||
Usage example:
|
Usage example:
|
||||||
```commandline
|
```commandline
|
||||||
curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-jos
|
curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' https://orodja.cjvt.si/oznacevalnik/nonstandard-jos
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Croatian Standard UD
|
||||||
|
Preset classla settings:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"lang": "hr",
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Usage example:
|
||||||
|
```commandline
|
||||||
|
curl -X POST -d '{"text": "Ante Starčević rođen je u Velikom Žitniku."}' https://orodja.cjvt.si/oznacevalnik/hr-standard-ud
|
||||||
|
```
|
||||||
|
|
||||||
|
## Croatian Nonstandard UD
|
||||||
|
Preset classla settings:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"lang": "hr",
|
||||||
|
"type": "nonstandard"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Usage example:
|
||||||
|
```commandline
|
||||||
|
curl -X POST -d '{"text": "kaj sam ja tulumaril jucer u ljubljani..."}' https://orodja.cjvt.si/oznacevalnik/hr-nonstandard-ud
|
||||||
|
```
|
||||||
|
|
||||||
|
## Serbian Standard UD
|
||||||
|
Preset classla settings:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"lang": "sr",
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Usage example:
|
||||||
|
```commandline
|
||||||
|
curl -X POST -d '{"text": "Slobodan Jovanović rođen je u Novom Sadu."}' https://orodja.cjvt.si/oznacevalnik/sr-standard-ud
|
||||||
|
```
|
||||||
|
|
||||||
|
## Serbian Nonstandard UD
|
||||||
|
Preset classla settings:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"lang": "sr",
|
||||||
|
"type": "nonstandard"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Usage example:
|
||||||
|
```commandline
|
||||||
|
curl -X POST -d '{"text": "ne mogu da verujem kakvo je zezanje bilo prosle godine u zagrebu..."}' https://orodja.cjvt.si/oznacevalnik/sr-nonstandard-ud
|
||||||
|
```
|
||||||
|
|
||||||
|
## Bulgarian Standard UD
|
||||||
|
Preset classla settings:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"lang": "bg",
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Usage example:
|
||||||
|
```commandline
|
||||||
|
curl -X POST -d '{"text": "Алеко Константинов е роден в Свищов."}' https://orodja.cjvt.si/oznacevalnik/bg-standard-ud
|
||||||
|
```
|
||||||
|
|
||||||
|
## Macedonian Standard UD
|
||||||
|
Preset classla settings:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"lang": "mk",
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Usage example:
|
||||||
|
```commandline
|
||||||
|
curl -X POST -d '{"text": "Крсте Петков Мисирков е роден во Постол."}' https://orodja.cjvt.si/oznacevalnik/mk-standard-ud
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Custom settings
|
## Custom settings
|
||||||
Custom settings may be used, however they have to be in compliance with what the library allows (you can check this on https://github.com/clarinsi/classla)
|
Custom settings may be used, however they have to be in compliance with what the library allows (you can check this on https://github.com/clarinsi/classla)
|
||||||
|
|
||||||
|
@ -71,5 +147,5 @@ Custom settings may be used, however they have to be in compliance with what the
|
||||||
|
|
||||||
Usage example:
|
Usage example:
|
||||||
```commandline
|
```commandline
|
||||||
curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi.", "settings": {"lang": "sl", "pos_lemma_pretag": false}}' http://127.0.0.1:5000/custom-settings
|
curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi.", "settings": {"lang": "sl", "pos_lemma_pretag": false}}' https://orodja.cjvt.si/oznacevalnik/custom-settings
|
||||||
```
|
```
|
58
app.py
58
app.py
|
@ -1,10 +1,17 @@
|
||||||
from flask import Flask, request
|
from flask import Flask, request
|
||||||
import classla
|
import classla
|
||||||
import gc
|
import gc
|
||||||
|
import torch
|
||||||
|
|
||||||
classla.download('sl')
|
classla.download('sl')
|
||||||
classla.download('sl', type='standard_jos')
|
classla.download('sl', type='standard_jos')
|
||||||
classla.download('sl', type='nonstandard')
|
classla.download('sl', type='nonstandard')
|
||||||
|
classla.download('hr')
|
||||||
|
classla.download('hr', type='nonstandard')
|
||||||
|
classla.download('sr')
|
||||||
|
classla.download('sr', type='nonstandard')
|
||||||
|
classla.download('mk')
|
||||||
|
classla.download('bg')
|
||||||
|
|
||||||
nlp_standard_UD = classla.Pipeline('sl', pos_use_lexicon=True)
|
nlp_standard_UD = classla.Pipeline('sl', pos_use_lexicon=True)
|
||||||
nlp_standard_JOS = classla.Pipeline('sl', pos_use_lexicon=True, type='standard_jos')
|
nlp_standard_JOS = classla.Pipeline('sl', pos_use_lexicon=True, type='standard_jos')
|
||||||
|
@ -16,6 +23,12 @@ nlp_nonstandard_JOS = classla.Pipeline('sl', processors={
|
||||||
"depparse": "standard_jos",
|
"depparse": "standard_jos",
|
||||||
"ner": "nonstandard"
|
"ner": "nonstandard"
|
||||||
})
|
})
|
||||||
|
nlp_hr_standard_UD = classla.Pipeline('hr')
|
||||||
|
nlp_hr_nonstandard_UD = classla.Pipeline('hr', type='nonstandard')
|
||||||
|
nlp_sr_standard_UD = classla.Pipeline('sr')
|
||||||
|
nlp_sr_nonstandard_UD = classla.Pipeline('sr', type='nonstandard')
|
||||||
|
nlp_bg_standard_UD = classla.Pipeline('bg')
|
||||||
|
nlp_mk_standard_UD = classla.Pipeline('mk')
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,10 +43,10 @@ def custom_settings():
|
||||||
if 'settings' in input_json:
|
if 'settings' in input_json:
|
||||||
settings = input_json['settings']
|
settings = input_json['settings']
|
||||||
nlp = classla.Pipeline(**settings)
|
nlp = classla.Pipeline(**settings)
|
||||||
# classla.Pipeline('sl', processors='tokenize,pos,lemma', pos_use_lexicon=True)
|
|
||||||
result = nlp(input_json['text']).to_conll()
|
result = nlp(input_json['text']).to_conll()
|
||||||
del(nlp)
|
del(nlp)
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
else:
|
else:
|
||||||
return f'ERROR `settings` were not given!'
|
return f'ERROR `settings` were not given!'
|
||||||
return result
|
return result
|
||||||
|
@ -67,5 +80,48 @@ def nonstandard_jos():
|
||||||
return doc.to_conll()
|
return doc.to_conll()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/hr-standard-ud', methods=["POST"])
|
||||||
|
def hr_standard_ud():
|
||||||
|
input_json = request.get_json(force=True)
|
||||||
|
doc = nlp_hr_standard_UD(input_json['text'])
|
||||||
|
return doc.to_conll()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/hr-nonstandard-ud', methods=["POST"])
|
||||||
|
def hr_nonstandard_ud():
|
||||||
|
input_json = request.get_json(force=True)
|
||||||
|
doc = nlp_hr_nonstandard_UD(input_json['text'])
|
||||||
|
return doc.to_conll()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/sr-standard-ud', methods=["POST"])
|
||||||
|
def sr_standard_ud():
|
||||||
|
input_json = request.get_json(force=True)
|
||||||
|
doc = nlp_sr_standard_UD(input_json['text'])
|
||||||
|
return doc.to_conll()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/sr-nonstandard-ud', methods=["POST"])
|
||||||
|
def sr_nonstandard_ud():
|
||||||
|
input_json = request.get_json(force=True)
|
||||||
|
doc = nlp_sr_nonstandard_UD(input_json['text'])
|
||||||
|
return doc.to_conll()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/bg-standard-ud', methods=["POST"])
|
||||||
|
def bg_standard_ud():
|
||||||
|
input_json = request.get_json(force=True)
|
||||||
|
doc = nlp_bg_standard_UD(input_json['text'])
|
||||||
|
return doc.to_conll()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/mk-standard-ud', methods=["POST"])
|
||||||
|
def mk_standard_ud():
|
||||||
|
input_json = request.get_json(force=True)
|
||||||
|
doc = nlp_mk_standard_UD(input_json['text'])
|
||||||
|
return doc.to_conll()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run(host="0.0.0.0")
|
app.run(host="0.0.0.0")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user