added adjective handling (appending _ to headwords)

This commit is contained in:
voje 2019-04-19 07:41:50 +02:00
parent af4f6045bb
commit c6b8426fb3
2 changed files with 7 additions and 2 deletions

View File

@ -1 +1,2 @@
from corpusparser.Parser import Parser
from corpusparser.Parser import Parser
from corpusparser.main import enriched_lemma

View File

@ -15,6 +15,10 @@ logger = logging.getLogger(__name__)
n_kres_files = -1 # for logging
def enriched_lemma(token):
return (token["lemma"] if token["msd"][0] == "G" else token["lemma"] + "_")
def _helper_tid_to_token(tid, tokens):
for t in tokens:
if t["tid"] == tid:
@ -29,7 +33,7 @@ def _db_preprocess(e):
else:
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
hw_tokens = [_helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
headwords = [enriched_lemma(t) for t in hw_tokens]
e["headwords"] = headwords
functors = list(set([x["afun"] for x in e["srl_links"]]))