diff --git a/README.md b/README.md index b0ebfd3..4986790 100644 --- a/README.md +++ b/README.md @@ -3,20 +3,20 @@ ## Description This tool uses classla library as an API. It allows for calls on some preset classla settings, as well as a custom one. -## Standard UD +## Slovenian Standard UD Preset classla settings: ```json { "lang": "sl", - "pos_lemma_pretag": false + "pos_use_lexicon": true } ``` Usage example: ```commandline -curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-ud +curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' https://orodja.cjvt.si/oznacevalnik/standard-ud ``` -## Standard JOS +## Slovenian Standard JOS Preset classla settings: ```json { @@ -27,24 +27,24 @@ Preset classla settings: ``` Usage example: ```commandline -curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-jos +curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' https://orodja.cjvt.si/oznacevalnik/standard-jos ``` -## Nonstandard UD +## Slovenian Nonstandard UD Preset classla settings: ```json { "lang": "sl", "pos_use_lexicon": true, - "type": "nonstandard_jos" + "type": "nonstandard" } ``` Usage example: ```commandline -curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-ud +curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' https://orodja.cjvt.si/oznacevalnik/nonstandard-ud ``` -## Nonstandard JOS +## Slovenian Nonstandard JOS Preset classla settings: ```json { @@ -61,9 +61,85 @@ Preset classla settings: ``` Usage example: ```commandline -curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-jos +curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' https://orodja.cjvt.si/oznacevalnik/nonstandard-jos ``` + +## Croatian Standard UD +Preset classla settings: +```json +{ + "lang": "hr", +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "Ante Starčević rođen je u Velikom Žitniku."}' https://orodja.cjvt.si/oznacevalnik/hr-standard-ud +``` + +## Croatian Nonstandard UD +Preset classla settings: +```json +{ + "lang": "hr", + "type": "nonstandard" +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "kaj sam ja tulumaril jucer u ljubljani..."}' https://orodja.cjvt.si/oznacevalnik/hr-nonstandard-ud +``` + +## Serbian Standard UD +Preset classla settings: +```json +{ + "lang": "sr", +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "Slobodan Jovanović rođen je u Novom Sadu."}' https://orodja.cjvt.si/oznacevalnik/sr-standard-ud +``` + +## Serbian Nonstandard UD +Preset classla settings: +```json +{ + "lang": "sr", + "type": "nonstandard" +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "ne mogu da verujem kakvo je zezanje bilo prosle godine u zagrebu..."}' https://orodja.cjvt.si/oznacevalnik/sr-nonstandard-ud +``` + +## Bulgarian Standard UD +Preset classla settings: +```json +{ + "lang": "bg", +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "Алеко Константинов е роден в Свищов."}' https://orodja.cjvt.si/oznacevalnik/bg-standard-ud +``` + +## Macedonian Standard UD +Preset classla settings: +```json +{ + "lang": "mk", +} +``` +Usage example: +```commandline +curl -X POST -d '{"text": "Крсте Петков Мисирков е роден во Постол."}' https://orodja.cjvt.si/oznacevalnik/mk-standard-ud +``` + + ## Custom settings Custom settings may be used, however they have to be in compliance with what the library allows (you can check this on https://github.com/clarinsi/classla) @@ -71,5 +147,5 @@ Custom settings may be used, however they have to be in compliance with what the Usage example: ```commandline -curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi.", "settings": {"lang": "sl", "pos_lemma_pretag": false}}' http://127.0.0.1:5000/custom-settings +curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi.", "settings": {"lang": "sl", "pos_lemma_pretag": false}}' https://orodja.cjvt.si/oznacevalnik/custom-settings ``` \ No newline at end of file diff --git a/app.py b/app.py index ca60b2e..4cfcc3d 100644 --- a/app.py +++ b/app.py @@ -1,10 +1,17 @@ from flask import Flask, request import classla import gc +import torch classla.download('sl') classla.download('sl', type='standard_jos') classla.download('sl', type='nonstandard') +classla.download('hr') +classla.download('hr', type='nonstandard') +classla.download('sr') +classla.download('sr', type='nonstandard') +classla.download('mk') +classla.download('bg') nlp_standard_UD = classla.Pipeline('sl', pos_use_lexicon=True) nlp_standard_JOS = classla.Pipeline('sl', pos_use_lexicon=True, type='standard_jos') @@ -16,6 +23,12 @@ nlp_nonstandard_JOS = classla.Pipeline('sl', processors={ "depparse": "standard_jos", "ner": "nonstandard" }) +nlp_hr_standard_UD = classla.Pipeline('hr') +nlp_hr_nonstandard_UD = classla.Pipeline('hr', type='nonstandard') +nlp_sr_standard_UD = classla.Pipeline('sr') +nlp_sr_nonstandard_UD = classla.Pipeline('sr', type='nonstandard') +nlp_bg_standard_UD = classla.Pipeline('bg') +nlp_mk_standard_UD = classla.Pipeline('mk') app = Flask(__name__) @@ -30,10 +43,10 @@ def custom_settings(): if 'settings' in input_json: settings = input_json['settings'] nlp = classla.Pipeline(**settings) - # classla.Pipeline('sl', processors='tokenize,pos,lemma', pos_use_lexicon=True) result = nlp(input_json['text']).to_conll() del(nlp) gc.collect() + torch.cuda.empty_cache() else: return f'ERROR `settings` were not given!' return result @@ -67,5 +80,48 @@ def nonstandard_jos(): return doc.to_conll() +@app.route('/hr-standard-ud', methods=["POST"]) +def hr_standard_ud(): + input_json = request.get_json(force=True) + doc = nlp_hr_standard_UD(input_json['text']) + return doc.to_conll() + + +@app.route('/hr-nonstandard-ud', methods=["POST"]) +def hr_nonstandard_ud(): + input_json = request.get_json(force=True) + doc = nlp_hr_nonstandard_UD(input_json['text']) + return doc.to_conll() + + +@app.route('/sr-standard-ud', methods=["POST"]) +def sr_standard_ud(): + input_json = request.get_json(force=True) + doc = nlp_sr_standard_UD(input_json['text']) + return doc.to_conll() + + +@app.route('/sr-nonstandard-ud', methods=["POST"]) +def sr_nonstandard_ud(): + input_json = request.get_json(force=True) + doc = nlp_sr_nonstandard_UD(input_json['text']) + return doc.to_conll() + + +@app.route('/bg-standard-ud', methods=["POST"]) +def bg_standard_ud(): + input_json = request.get_json(force=True) + doc = nlp_bg_standard_UD(input_json['text']) + return doc.to_conll() + + +@app.route('/mk-standard-ud', methods=["POST"]) +def mk_standard_ud(): + input_json = request.get_json(force=True) + doc = nlp_mk_standard_UD(input_json['text']) + return doc.to_conll() + + + if __name__ == '__main__': app.run(host="0.0.0.0")