added adjective handling (appending _ to headwords)
This commit is contained in:
		
							parent
							
								
									af4f6045bb
								
							
						
					
					
						commit
						c6b8426fb3
					
				| @ -1 +1,2 @@ | |||||||
| from corpusparser.Parser import Parser | from corpusparser.Parser import Parser | ||||||
|  | from corpusparser.main import enriched_lemma | ||||||
| @ -15,6 +15,10 @@ logger = logging.getLogger(__name__) | |||||||
| n_kres_files = -1  # for logging | n_kres_files = -1  # for logging | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def enriched_lemma(token): | ||||||
|  |     return (token["lemma"] if token["msd"][0] == "G" else token["lemma"] + "_") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def _helper_tid_to_token(tid, tokens): | def _helper_tid_to_token(tid, tokens): | ||||||
|     for t in tokens: |     for t in tokens: | ||||||
|         if t["tid"] == tid: |         if t["tid"] == tid: | ||||||
| @ -29,7 +33,7 @@ def _db_preprocess(e): | |||||||
|     else: |     else: | ||||||
|         hw_tids = list(set([x["from"] for x in e["srl_links"]])) |         hw_tids = list(set([x["from"] for x in e["srl_links"]])) | ||||||
|         hw_tokens = [_helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids] |         hw_tokens = [_helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids] | ||||||
|         headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens] |         headwords = [enriched_lemma(t) for t in hw_tokens] | ||||||
|         e["headwords"] = headwords |         e["headwords"] = headwords | ||||||
| 
 | 
 | ||||||
|         functors = list(set([x["afun"] for x in e["srl_links"]])) |         functors = list(set([x["afun"] for x in e["srl_links"]])) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user