added adjective handling (appending _ to headwords)
This commit is contained in:
parent
af4f6045bb
commit
c6b8426fb3
|
@ -1 +1,2 @@
|
|||
from corpusparser.Parser import Parser
|
||||
from corpusparser.main import enriched_lemma
|
|
@ -15,6 +15,10 @@ logger = logging.getLogger(__name__)
|
|||
n_kres_files = -1 # for logging
|
||||
|
||||
|
||||
def enriched_lemma(token):
|
||||
return (token["lemma"] if token["msd"][0] == "G" else token["lemma"] + "_")
|
||||
|
||||
|
||||
def _helper_tid_to_token(tid, tokens):
|
||||
for t in tokens:
|
||||
if t["tid"] == tid:
|
||||
|
@ -29,7 +33,7 @@ def _db_preprocess(e):
|
|||
else:
|
||||
hw_tids = list(set([x["from"] for x in e["srl_links"]]))
|
||||
hw_tokens = [_helper_tid_to_token(tid, e["tokens"]) for tid in hw_tids]
|
||||
headwords = [(t["lemma"] if t["msd"][0] == "G" else t["lemma"] + "_") for t in hw_tokens]
|
||||
headwords = [enriched_lemma(t) for t in hw_tokens]
|
||||
e["headwords"] = headwords
|
||||
|
||||
functors = list(set([x["afun"] for x in e["srl_links"]]))
|
||||
|
|
Loading…
Reference in New Issue
Block a user