Added other languages.
This commit is contained in:
		
							parent
							
								
									f406ccc9fc
								
							
						
					
					
						commit
						d81b8dd513
					
				
							
								
								
									
										98
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										98
									
								
								README.md
									
									
									
									
									
								
							| @ -3,20 +3,20 @@ | |||||||
| ## Description | ## Description | ||||||
| This tool uses classla library as an API. It allows for calls on some preset classla settings, as well as a custom one. | This tool uses classla library as an API. It allows for calls on some preset classla settings, as well as a custom one. | ||||||
| 
 | 
 | ||||||
| ## Standard UD | ## Slovenian Standard UD | ||||||
| Preset classla settings: | Preset classla settings: | ||||||
| ```json | ```json | ||||||
| { | { | ||||||
|   "lang": "sl", |   "lang": "sl", | ||||||
|   "pos_lemma_pretag": false |   "pos_use_lexicon": true | ||||||
| } | } | ||||||
| ``` | ``` | ||||||
| Usage example: | Usage example: | ||||||
| ```commandline | ```commandline | ||||||
| curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-ud | curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' https://orodja.cjvt.si/oznacevalnik/standard-ud | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## Standard JOS | ## Slovenian Standard JOS | ||||||
| Preset classla settings: | Preset classla settings: | ||||||
| ```json | ```json | ||||||
| { | { | ||||||
| @ -27,24 +27,24 @@ Preset classla settings: | |||||||
| ``` | ``` | ||||||
| Usage example: | Usage example: | ||||||
| ```commandline | ```commandline | ||||||
| curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' http://127.0.0.1:5000/standard-jos | curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi."}' https://orodja.cjvt.si/oznacevalnik/standard-jos | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## Nonstandard UD | ## Slovenian Nonstandard UD | ||||||
| Preset classla settings: | Preset classla settings: | ||||||
| ```json | ```json | ||||||
| { | { | ||||||
|   "lang": "sl", |   "lang": "sl", | ||||||
|   "pos_use_lexicon": true, |   "pos_use_lexicon": true, | ||||||
|   "type": "nonstandard_jos" |   "type": "nonstandard" | ||||||
| } | } | ||||||
| ``` | ``` | ||||||
| Usage example: | Usage example: | ||||||
| ```commandline | ```commandline | ||||||
| curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-ud | curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' https://orodja.cjvt.si/oznacevalnik/nonstandard-ud | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## Nonstandard JOS | ## Slovenian Nonstandard JOS | ||||||
| Preset classla settings: | Preset classla settings: | ||||||
| ```json | ```json | ||||||
| { | { | ||||||
| @ -61,9 +61,85 @@ Preset classla settings: | |||||||
| ``` | ``` | ||||||
| Usage example: | Usage example: | ||||||
| ```commandline | ```commandline | ||||||
| curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' http://127.0.0.1:5000/nonstandard-jos | curl -X POST -d '{"text": "kva smo mi zurali zadnje leto v zagrebu..."}' https://orodja.cjvt.si/oznacevalnik/nonstandard-jos | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | ## Croatian Standard UD | ||||||
|  | Preset classla settings: | ||||||
|  | ```json | ||||||
|  | { | ||||||
|  |   "lang": "hr", | ||||||
|  | } | ||||||
|  | ``` | ||||||
|  | Usage example: | ||||||
|  | ```commandline | ||||||
|  | curl -X POST -d '{"text": "Ante Starčević rođen je u Velikom Žitniku."}' https://orodja.cjvt.si/oznacevalnik/hr-standard-ud | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ## Croatian Nonstandard UD | ||||||
|  | Preset classla settings: | ||||||
|  | ```json | ||||||
|  | { | ||||||
|  |   "lang": "hr", | ||||||
|  |   "type": "nonstandard" | ||||||
|  | } | ||||||
|  | ``` | ||||||
|  | Usage example: | ||||||
|  | ```commandline | ||||||
|  | curl -X POST -d '{"text": "kaj sam ja tulumaril jucer u ljubljani..."}' https://orodja.cjvt.si/oznacevalnik/hr-nonstandard-ud | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ## Serbian Standard UD | ||||||
|  | Preset classla settings: | ||||||
|  | ```json | ||||||
|  | { | ||||||
|  |   "lang": "sr", | ||||||
|  | } | ||||||
|  | ``` | ||||||
|  | Usage example: | ||||||
|  | ```commandline | ||||||
|  | curl -X POST -d '{"text": "Slobodan Jovanović rođen je u Novom Sadu."}' https://orodja.cjvt.si/oznacevalnik/sr-standard-ud | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ## Serbian Nonstandard UD | ||||||
|  | Preset classla settings: | ||||||
|  | ```json | ||||||
|  | { | ||||||
|  |   "lang": "sr", | ||||||
|  |   "type": "nonstandard" | ||||||
|  | } | ||||||
|  | ``` | ||||||
|  | Usage example: | ||||||
|  | ```commandline | ||||||
|  | curl -X POST -d '{"text": "ne mogu da verujem kakvo je zezanje bilo prosle godine u zagrebu..."}' https://orodja.cjvt.si/oznacevalnik/sr-nonstandard-ud | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ## Bulgarian Standard UD | ||||||
|  | Preset classla settings: | ||||||
|  | ```json | ||||||
|  | { | ||||||
|  |   "lang": "bg", | ||||||
|  | } | ||||||
|  | ``` | ||||||
|  | Usage example: | ||||||
|  | ```commandline | ||||||
|  | curl -X POST -d '{"text": "Алеко Константинов е роден в Свищов."}' https://orodja.cjvt.si/oznacevalnik/bg-standard-ud | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ## Macedonian Standard UD | ||||||
|  | Preset classla settings: | ||||||
|  | ```json | ||||||
|  | { | ||||||
|  |   "lang": "mk", | ||||||
|  | } | ||||||
|  | ``` | ||||||
|  | Usage example: | ||||||
|  | ```commandline | ||||||
|  | curl -X POST -d '{"text": "Крсте Петков Мисирков е роден во Постол."}' https://orodja.cjvt.si/oznacevalnik/mk-standard-ud | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| ## Custom settings | ## Custom settings | ||||||
| Custom settings may be used, however they have to be in compliance with what the library allows (you can check this on https://github.com/clarinsi/classla) | Custom settings may be used, however they have to be in compliance with what the library allows (you can check this on https://github.com/clarinsi/classla) | ||||||
| 
 | 
 | ||||||
| @ -71,5 +147,5 @@ Custom settings may be used, however they have to be in compliance with what the | |||||||
| 
 | 
 | ||||||
| Usage example: | Usage example: | ||||||
| ```commandline | ```commandline | ||||||
| curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi.", "settings": {"lang": "sl", "pos_lemma_pretag": false}}' http://127.0.0.1:5000/custom-settings | curl -X POST -d '{"text": "France Prešeren je rojen v Vrbi.", "settings": {"lang": "sl", "pos_lemma_pretag": false}}' https://orodja.cjvt.si/oznacevalnik/custom-settings | ||||||
| ``` | ``` | ||||||
							
								
								
									
										58
									
								
								app.py
									
									
									
									
									
								
							
							
						
						
									
										58
									
								
								app.py
									
									
									
									
									
								
							| @ -1,10 +1,17 @@ | |||||||
| from flask import Flask, request | from flask import Flask, request | ||||||
| import classla | import classla | ||||||
| import gc | import gc | ||||||
|  | import torch | ||||||
| 
 | 
 | ||||||
| classla.download('sl') | classla.download('sl') | ||||||
| classla.download('sl', type='standard_jos') | classla.download('sl', type='standard_jos') | ||||||
| classla.download('sl', type='nonstandard') | classla.download('sl', type='nonstandard') | ||||||
|  | classla.download('hr') | ||||||
|  | classla.download('hr', type='nonstandard') | ||||||
|  | classla.download('sr') | ||||||
|  | classla.download('sr', type='nonstandard') | ||||||
|  | classla.download('mk') | ||||||
|  | classla.download('bg') | ||||||
| 
 | 
 | ||||||
| nlp_standard_UD = classla.Pipeline('sl', pos_use_lexicon=True) | nlp_standard_UD = classla.Pipeline('sl', pos_use_lexicon=True) | ||||||
| nlp_standard_JOS = classla.Pipeline('sl', pos_use_lexicon=True, type='standard_jos') | nlp_standard_JOS = classla.Pipeline('sl', pos_use_lexicon=True, type='standard_jos') | ||||||
| @ -16,6 +23,12 @@ nlp_nonstandard_JOS = classla.Pipeline('sl', processors={ | |||||||
|       "depparse": "standard_jos", |       "depparse": "standard_jos", | ||||||
|       "ner": "nonstandard" |       "ner": "nonstandard" | ||||||
|     }) |     }) | ||||||
|  | nlp_hr_standard_UD = classla.Pipeline('hr') | ||||||
|  | nlp_hr_nonstandard_UD = classla.Pipeline('hr', type='nonstandard') | ||||||
|  | nlp_sr_standard_UD = classla.Pipeline('sr') | ||||||
|  | nlp_sr_nonstandard_UD = classla.Pipeline('sr', type='nonstandard') | ||||||
|  | nlp_bg_standard_UD = classla.Pipeline('bg') | ||||||
|  | nlp_mk_standard_UD = classla.Pipeline('mk') | ||||||
| app = Flask(__name__) | app = Flask(__name__) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -30,10 +43,10 @@ def custom_settings(): | |||||||
|     if 'settings' in input_json: |     if 'settings' in input_json: | ||||||
|         settings = input_json['settings'] |         settings = input_json['settings'] | ||||||
|         nlp = classla.Pipeline(**settings) |         nlp = classla.Pipeline(**settings) | ||||||
|         # classla.Pipeline('sl', processors='tokenize,pos,lemma', pos_use_lexicon=True) |  | ||||||
|         result = nlp(input_json['text']).to_conll() |         result = nlp(input_json['text']).to_conll() | ||||||
|         del(nlp) |         del(nlp) | ||||||
|         gc.collect() |         gc.collect() | ||||||
|  |         torch.cuda.empty_cache() | ||||||
|     else: |     else: | ||||||
|         return f'ERROR `settings` were not given!' |         return f'ERROR `settings` were not given!' | ||||||
|     return result |     return result | ||||||
| @ -67,5 +80,48 @@ def nonstandard_jos(): | |||||||
|     return doc.to_conll() |     return doc.to_conll() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @app.route('/hr-standard-ud', methods=["POST"]) | ||||||
|  | def hr_standard_ud(): | ||||||
|  |     input_json = request.get_json(force=True) | ||||||
|  |     doc = nlp_hr_standard_UD(input_json['text']) | ||||||
|  |     return doc.to_conll() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @app.route('/hr-nonstandard-ud', methods=["POST"]) | ||||||
|  | def hr_nonstandard_ud(): | ||||||
|  |     input_json = request.get_json(force=True) | ||||||
|  |     doc = nlp_hr_nonstandard_UD(input_json['text']) | ||||||
|  |     return doc.to_conll() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @app.route('/sr-standard-ud', methods=["POST"]) | ||||||
|  | def sr_standard_ud(): | ||||||
|  |     input_json = request.get_json(force=True) | ||||||
|  |     doc = nlp_sr_standard_UD(input_json['text']) | ||||||
|  |     return doc.to_conll() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @app.route('/sr-nonstandard-ud', methods=["POST"]) | ||||||
|  | def sr_nonstandard_ud(): | ||||||
|  |     input_json = request.get_json(force=True) | ||||||
|  |     doc = nlp_sr_nonstandard_UD(input_json['text']) | ||||||
|  |     return doc.to_conll() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @app.route('/bg-standard-ud', methods=["POST"]) | ||||||
|  | def bg_standard_ud(): | ||||||
|  |     input_json = request.get_json(force=True) | ||||||
|  |     doc = nlp_bg_standard_UD(input_json['text']) | ||||||
|  |     return doc.to_conll() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @app.route('/mk-standard-ud', methods=["POST"]) | ||||||
|  | def mk_standard_ud(): | ||||||
|  |     input_json = request.get_json(force=True) | ||||||
|  |     doc = nlp_mk_standard_UD(input_json['text']) | ||||||
|  |     return doc.to_conll() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     app.run(host="0.0.0.0") |     app.run(host="0.0.0.0") | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user