forked from kristjan/cjvt-srl-tagging
srl taggin pipeline (output in .tsv)
This commit is contained in:
@@ -1,13 +0,0 @@
|
||||
all: parse tag_srl
|
||||
|
||||
parse:
|
||||
python3 main.py
|
||||
|
||||
tag_srl:
|
||||
cd srl-20131216; ./scripts/parse_srl_only_mod.sh; cd -
|
||||
|
||||
tag_full:
|
||||
cd srl-20131216 ./scripts/parser_full_mod.sh; cd -
|
||||
|
||||
fillpred_model/model:
|
||||
cd fillpred_model; python3 fpmodel.py
|
||||
@@ -4,4 +4,7 @@ data1.pickle:
|
||||
python3 step1.py
|
||||
|
||||
model.pickle: data1.pickle
|
||||
python3 step2.py
|
||||
python3 step2.py
|
||||
|
||||
clean:
|
||||
rm ./*.pickle
|
||||
0
tools/fillpred_model/__init__.py
Normal file
0
tools/fillpred_model/__init__.py
Normal file
@@ -2,39 +2,53 @@ from pathlib import Path
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
INFILE = "../../data/mate_train/sl.test.mate"
|
||||
# INFILE = "../../data/mate_train/sl.all.mate"
|
||||
OUTFILE = "data1.pickle"
|
||||
|
||||
ssj_mate = Path(INFILE)
|
||||
df = pd.read_csv(ssj_mate, sep='~', header=None)
|
||||
df = df.iloc[:,0].str.split('\t', n=14, expand=True)
|
||||
print(df.head())
|
||||
|
||||
"""
|
||||
msd_set = set()
|
||||
for i, r in df.iterrows():
|
||||
msd_set.update(r[6].split("|"))
|
||||
def gen_msdlabels(df):
|
||||
msd_set = set()
|
||||
for i, r in df.iterrows():
|
||||
msd_set.update(r[6].split("|"))
|
||||
return sorted(list(msd_set))
|
||||
|
||||
msdlabels = sorted(list(msd_set))
|
||||
"""
|
||||
msdlabels = ['!', '"', '#', '%', "'", '(', ')', '+Animate', '+Clitic', '+Definiteness', '+Negative', ',', '-', '-Animate', '-Definiteness', '-Negative', '.', '/', ':', ';', '?', 'Abbreviation', 'Adjective', 'Adposition', 'Adverb', 'Conjunction', 'Interjection', 'Noun', 'Numeral', 'Particle', 'Pronoun', 'Residual', 'Verb', 'accusative', 'auxiliary', 'biaspectual', 'bound', 'cardinal', 'common', 'comparative', 'conditional', 'coordinating', 'dative', 'demonstrative', 'digit', 'dual', 'feminine', 'first', 'foreign', 'future', 'general', 'genitive', 'imperative', 'indefinite', 'infinitive', 'instrumental', 'interrogative', 'letter', 'locative', 'main', 'masculine', 'negative', 'neuter', 'nominative', 'ordinal', 'participle', 'perfective', 'personal', 'plural', 'positive', 'possessive', 'present', 'progressive', 'pronominal', 'proper', 'reflexive', 'relative', 'roman', 'second', 'singular', 'special', 'subordinating', 'superlative', 'supine', 'third', '«', '°', '»', '‘', '’', '…']
|
||||
print("labels: \n", msdlabels)
|
||||
|
||||
labels = ["biti"] + msdlabels + ["fillpred"]
|
||||
ndf = pd.DataFrame(columns=labels, dtype=bool)
|
||||
for i, r in df.iterrows():
|
||||
lemma = r[2]
|
||||
msd = r[6]
|
||||
fillpred = r[12] # y
|
||||
def build_model_row(tsv_row):
|
||||
# input: tsv_row in conll_2009 format
|
||||
|
||||
# preprocessed -- all possible msd values, alphabetically sorted
|
||||
msdlabels = ['!', '"', '#', '%', "'", '(', ')', '+Animate', '+Clitic', '+Definiteness', '+Negative', ',', '-', '-Animate', '-Definiteness', '-Negative', '.', '/', ':', ';', '?', 'Abbreviation', 'Adjective', 'Adposition', 'Adverb', 'Conjunction', 'Interjection', 'Noun', 'Numeral', 'Particle', 'Pronoun', 'Residual', 'Verb', 'accusative', 'auxiliary', 'biaspectual', 'bound', 'cardinal', 'common', 'comparative', 'conditional', 'coordinating', 'dative', 'demonstrative', 'digit', 'dual', 'feminine', 'first', 'foreign', 'future', 'general', 'genitive', 'imperative', 'indefinite', 'infinitive', 'instrumental', 'interrogative', 'letter', 'locative', 'main', 'masculine', 'negative', 'neuter', 'nominative', 'ordinal', 'participle', 'perfective', 'personal', 'plural', 'positive', 'possessive', 'present', 'progressive', 'pronominal', 'proper', 'reflexive', 'relative', 'roman', 'second', 'singular', 'special', 'subordinating', 'superlative', 'supine', 'third', '«', '°', '»', '‘', '’', '…']
|
||||
|
||||
lemma = tsv_row[2]
|
||||
msd = tsv_row[6]
|
||||
fillpred = tsv_row[12] # Y
|
||||
|
||||
row = []
|
||||
row.append(lemma == "biti")
|
||||
row.extend([lb in msd.split("|") for lb in msdlabels])
|
||||
row.append(fillpred == "Y")
|
||||
if i % 1000 == 0:
|
||||
print(i, df.shape)
|
||||
ndf.loc[i] = row
|
||||
return row
|
||||
|
||||
print(ndf.head())
|
||||
ndf.to_pickle(OUTFILE)
|
||||
|
||||
if __name__ == "__main__":
|
||||
ssj_mate = Path(INFILE)
|
||||
df = pd.read_csv(ssj_mate, sep='~', header=None)
|
||||
df = df.iloc[:,0].str.split('\t', n=14, expand=True)
|
||||
print(df.head())
|
||||
|
||||
|
||||
# msdlabels = gen_msdlabels()
|
||||
msdlabels = ['!', '"', '#', '%', "'", '(', ')', '+Animate', '+Clitic', '+Definiteness', '+Negative', ',', '-', '-Animate', '-Definiteness', '-Negative', '.', '/', ':', ';', '?', 'Abbreviation', 'Adjective', 'Adposition', 'Adverb', 'Conjunction', 'Interjection', 'Noun', 'Numeral', 'Particle', 'Pronoun', 'Residual', 'Verb', 'accusative', 'auxiliary', 'biaspectual', 'bound', 'cardinal', 'common', 'comparative', 'conditional', 'coordinating', 'dative', 'demonstrative', 'digit', 'dual', 'feminine', 'first', 'foreign', 'future', 'general', 'genitive', 'imperative', 'indefinite', 'infinitive', 'instrumental', 'interrogative', 'letter', 'locative', 'main', 'masculine', 'negative', 'neuter', 'nominative', 'ordinal', 'participle', 'perfective', 'personal', 'plural', 'positive', 'possessive', 'present', 'progressive', 'pronominal', 'proper', 'reflexive', 'relative', 'roman', 'second', 'singular', 'special', 'subordinating', 'superlative', 'supine', 'third', '«', '°', '»', '‘', '’', '…']
|
||||
print("labels: \n", msdlabels)
|
||||
|
||||
labels = ["biti"] + msdlabels + ["fillpred"]
|
||||
ndf = pd.DataFrame(columns=labels, dtype=bool)
|
||||
for i, r in df.iterrows():
|
||||
ndf.loc[i] = build_model_row(r)
|
||||
if i % 1000 == 0:
|
||||
print(i, df.shape)
|
||||
|
||||
print(ndf.head())
|
||||
ndf.to_pickle(OUTFILE)
|
||||
|
||||
@@ -8,22 +8,23 @@ from sklearn.metrics import accuracy_score
|
||||
INFILE = "data1.pickle"
|
||||
OUTFILE = "model.pickle"
|
||||
|
||||
df = pd.read_pickle(INFILE)
|
||||
if __name__ == "__main__":
|
||||
df = pd.read_pickle(INFILE)
|
||||
|
||||
X = df.values[:,0:-1]
|
||||
y = df.values[:,-1]
|
||||
X = df.values[:,0:-1]
|
||||
y = df.values[:,-1]
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 100)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 100)
|
||||
|
||||
clf = DecisionTreeClassifier()
|
||||
clf.fit(X_train, y_train)
|
||||
clf = DecisionTreeClassifier()
|
||||
clf.fit(X_train, y_train)
|
||||
|
||||
y_pred = clf.predict(X_test)
|
||||
y_pred = clf.predict(X_test)
|
||||
|
||||
print("Accuracy score: {:.4f}".format(accuracy_score(y_test, y_pred)*100))
|
||||
print("Accuracy score: {:.4f}".format(accuracy_score(y_test, y_pred)*100))
|
||||
|
||||
# above was a test, now fit the actual model using the entire data
|
||||
clf_full = DecisionTreeClassifier()
|
||||
clf_full.fit(X, y)
|
||||
# above was a test, now fit the actual model using the entire data
|
||||
clf_full = DecisionTreeClassifier()
|
||||
clf_full.fit(X, y)
|
||||
|
||||
pickle.dump(clf_full, open(OUTFILE, "wb"))
|
||||
pickle.dump(clf_full, open(OUTFILE, "wb"))
|
||||
|
||||
@@ -25,12 +25,9 @@ def main():
|
||||
# kres_file = "../data/kres_example/F0019343.xml.parsed.xml"
|
||||
kres_dir = Path("../data/kres_example/").resolve()
|
||||
|
||||
kres_out_dir = kres_dir.parent / (kres_dir.name + "_out")
|
||||
kres_out_dir = kres_dir.parent / (kres_dir.name + "_tsv")
|
||||
kres_out_dir.mkdir(exist_ok=True)
|
||||
|
||||
kres_full_out_dir = kres_dir.parent / (kres_dir.name + "_full_out")
|
||||
kres_full_out_dir.mkdir(exist_ok=True)
|
||||
|
||||
for kres_file in [x for x in kres_dir.iterdir() if x.is_file()]:
|
||||
|
||||
print("Processing file: " + str(kres_file))
|
||||
@@ -38,27 +35,13 @@ def main():
|
||||
longest_sent = max([len(e["tokens"]) for k, e in res_dict.items()])
|
||||
print("Longest sentence: ", longest_sent)
|
||||
kres_out_str = ""
|
||||
kres_full_out_str = ""
|
||||
|
||||
for _, sentence in res_dict.items():
|
||||
kres_out_str += par.to_conll_2009_SRL(sentence, longest_sent)
|
||||
kres_full_out_str += par.to_conll_2009_full(sentence)
|
||||
|
||||
# for SRL tagging
|
||||
try:
|
||||
with (kres_out_dir / kres_file.name).with_suffix(".tsv").open("wb+") as fp:
|
||||
fp.write(kres_out_str.encode("utf-8"))
|
||||
fp.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
# for full tokenization
|
||||
with (kres_full_out_dir / kres_file.name).with_suffix(".tsv").open("wb+") as fp:
|
||||
fp.write(kres_full_out_str.encode("utf-8"))
|
||||
fp.close()
|
||||
except:
|
||||
pass
|
||||
with (kres_out_dir / kres_file.name).with_suffix(".tsv").open("wb+") as fp:
|
||||
fp.write(kres_out_str.encode("utf-8"))
|
||||
fp.close()
|
||||
|
||||
print("end parsing kres")
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
from lxml import etree
|
||||
import re
|
||||
from parser.msd.msdmap import Msdmap
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
from fillpred_model.step1 import build_model_row
|
||||
|
||||
class Parser:
|
||||
# reads a TEI xml file and returns a dictionary:
|
||||
@@ -15,6 +18,8 @@ class Parser:
|
||||
self.W_TAGS = ['w']
|
||||
self.C_TAGS = ['c']
|
||||
self.S_TAGS = ['S', 'pc']
|
||||
with Path("./fillpred_model/model.pickle").open("rb") as fp:
|
||||
self.fillpred_model = pickle.load(fp)
|
||||
|
||||
def parse_tei(self, filepath):
|
||||
|
||||
@@ -111,11 +116,11 @@ class Parser:
|
||||
|
||||
def to_conll_2009_SRL(self, sentence_entry, napreds=9):
|
||||
|
||||
def fillpred(pos, feat):
|
||||
# TODO (decision tree or bayes on mate training data)
|
||||
if pos == "V" and "main" in feat.split("|"):
|
||||
return True
|
||||
return False
|
||||
def fillpred(tsv_row):
|
||||
mrow = build_model_row(tsv_row)
|
||||
x = mrow[:-1]
|
||||
y = self.fillpred_model.predict([x])
|
||||
return y[0] # bool
|
||||
|
||||
apreds_string = '\t'.join(["_" for x in range(napreds)])
|
||||
|
||||
@@ -137,7 +142,6 @@ class Parser:
|
||||
|
||||
pos = self.msdmap.slo_msd_to_eng_pos(token[4])
|
||||
feat = "|".join(self.msdmap.slo_msd_to_eng_long(token[4]).split(" "))
|
||||
fprd = fillpred(pos, feat)
|
||||
|
||||
"""
|
||||
print(t_id)
|
||||
@@ -146,10 +150,7 @@ class Parser:
|
||||
print(token)
|
||||
print(sentence_entry["links"])
|
||||
"""
|
||||
|
||||
# format: 14 + apreds
|
||||
out_str += '\t'.join(map(str,
|
||||
[
|
||||
row_list = [
|
||||
t_id,
|
||||
form,
|
||||
token[3], # lemma
|
||||
@@ -162,11 +163,18 @@ class Parser:
|
||||
sentence_entry["links"][t_id][2], # phead
|
||||
sentence_entry["links"][t_id][0], # deprel
|
||||
sentence_entry["links"][t_id][0], # pdeprel
|
||||
"Y" if fprd else "_", # fillpred
|
||||
token[3] if fprd else "_", # pred
|
||||
"_", # fillpred
|
||||
"_", # pred
|
||||
apreds_string,
|
||||
"\n",
|
||||
]
|
||||
]
|
||||
fprd = fillpred(row_list)
|
||||
row_list[12] = "Y" if fprd else "_"
|
||||
row_list[13] = token[3] if fprd else "_"
|
||||
|
||||
# format: 14 + apreds
|
||||
out_str += '\t'.join(map(str,
|
||||
row_list
|
||||
))
|
||||
out_str += "\n" # newline at the end of sentence
|
||||
return out_str
|
||||
|
||||
@@ -81,43 +81,6 @@
|
||||
81 akcijo akcija akcija N N _ Noun|common|feminine|singular|accusative 78 78 dol dol _ _ _ _ _ _ _ _
|
||||
82 . . . . . _ . 0 0 modra modra _ _ _ _ _ _ _ _
|
||||
|
||||
1 Ker ker ker C C _ Conjunction|subordinating 14 14 vez vez _ _ _ _ _ _
|
||||
2 se se se P P _ Pronoun|reflexive|+Clitic 14 14 del del _ _ _ _ _ _
|
||||
3 nas jaz jaz P P _ Pronoun|personal|first|plural|accusative 5 5 dve dve _ _ _ _ _ _
|
||||
4 v v v S S _ Adposition|locative 5 5 dol dol _ _ _ _ _ _
|
||||
5 preteklosti preteklost preteklost N N _ Noun|common|feminine|singular|locative 14 14 štiri štiri _ _ TIME _ _ _
|
||||
6 zaradi zaradi zaradi S S _ Adposition|genitive 8 8 dol dol _ _ _ _ _ _
|
||||
7 genialnega genialen genialen A A _ Adjective|general|positive|masculine|singular|genitive 8 8 dol dol _ _ _ _ _ _
|
||||
8 sistema sistem sistem N N _ Noun|common|masculine|singular|genitive 5 5 dol dol _ _ _ _ _ _
|
||||
9 splošne splošen splošen A A _ Adjective|general|positive|feminine|singular|genitive 11 11 dol dol _ _ _ _ _ _
|
||||
10 ljudske ljudski ljudski A A _ Adjective|general|positive|feminine|singular|genitive 11 11 dol dol _ _ _ _ _ _
|
||||
11 obrambe obramba obramba N N _ Noun|common|feminine|singular|genitive 8 8 dol dol _ _ _ _ _ _
|
||||
12 nihče nihče nihče P P _ Pronoun|negative|masculine|singular|nominative 14 14 ena ena _ _ ACT _ _ _
|
||||
13 ni biti biti V V _ Verb|auxiliary|present|third|singular|+Negative 14 14 del del _ _ _ _ _ _
|
||||
14 upal upati upati V V _ Verb|main|progressive|participle|singular|masculine 0 0 modra modra Y upati _ _ _ _
|
||||
15 lotiti lotiti lotiti V V _ Verb|main|perfective|infinitive 14 14 dol dol Y lotiti MWPRED _ _ _
|
||||
16 , , , , , _ , 0 0 modra modra _ _ _ _ _ _
|
||||
17 se se se P P _ Pronoun|reflexive|+Clitic 20 20 del del _ _ _ _ _ _
|
||||
18 sovrag sovrag sovrag N N _ Noun|common|masculine|singular|nominative 20 20 ena ena _ _ _ _ ACT _
|
||||
19 sedaj sedaj sedaj R R _ Adverb|general|positive 20 20 štiri štiri _ _ _ _ TIME _
|
||||
20 poslužuje posluževati posluževati V V _ Verb|main|progressive|present|third|singular 0 0 modra modra Y posluževati _ _ _ _
|
||||
21 bolj bolj bolj R R _ Adverb|general|comparative 22 22 dol dol _ _ _ _ _ _
|
||||
22 perfidnih perfiden perfiden A A _ Adjective|general|positive|feminine|plural|genitive 23 23 dol dol _ _ _ _ _ _
|
||||
23 strategij strategija strategija N N _ Noun|common|feminine|plural|genitive 20 20 dve dve _ _ _ _ PAT _
|
||||
24 , , , , , _ , 0 0 modra modra _ _ _ _ _ _
|
||||
25 saj saj saj C C _ Conjunction|coordinating 30 30 vez vez _ _ _ _ _ _
|
||||
26 nam jaz jaz P P _ Pronoun|personal|first|plural|dative 30 30 dve dve _ _ _ REC _ REC
|
||||
27 lahko lahko lahko R R _ Adverb|general|positive 30 30 del del _ _ _ _ _ _
|
||||
28 praktično praktično praktično R R _ Adverb|general|positive 29 29 dol dol _ _ _ _ _ _
|
||||
29 kadarkoli kadarkoli kadarkoli R R _ Adverb|general|positive 30 30 tri tri _ _ _ _ _ MANN
|
||||
30 odreže odrezati odrezati V V _ Verb|main|perfective|present|third|singular 0 0 modra modra Y odrezati _ _ _ _
|
||||
31 dostop dostop dostop N N _ Noun|common|masculine|singular|accusative|-Animate 30 30 dve dve _ _ _ _ _ PAT
|
||||
32 do do do S S _ Adposition|genitive 33 33 dol dol _ _ _ _ _ _
|
||||
33 hrane hrana hrana N N _ Noun|common|feminine|singular|genitive 31 31 dol dol _ _ _ _ _ _
|
||||
34 in in in C C _ Conjunction|coordinating 35 35 vez vez _ _ _ _ _ _
|
||||
35 pijače pijača pijača N N _ Noun|common|feminine|singular|genitive 33 33 prir prir _ _ _ _ _ _
|
||||
36 . . . . . _ . 0 0 modra modra _ _ _ _ _ _
|
||||
|
||||
1 Zadeva zadeva zadeva N N _ Noun|common|feminine|singular|nominative 2 2 ena ena _ _
|
||||
2 je biti biti V V _ Verb|auxiliary|present|third|singular|-Negative 0 0 modra modra _ _
|
||||
3 kristalno kristalno kristalno R R _ Adverb|general|positive 4 4 dol dol _ _
|
||||
@@ -160,3 +123,40 @@
|
||||
34 supersile supersila supersila N N _ Noun|common|feminine|singular|genitive 0 0 modra modra _ _ _ _
|
||||
35 . . . . . _ . 0 0 modra modra _ _ _ _
|
||||
|
||||
1 Ker ker ker C C _ Conjunction|subordinating 14 14 vez vez _ _ _ _ _ _ _
|
||||
2 se se se P P _ Pronoun|reflexive|+Clitic 14 14 del del _ _ _ _ _ _ _
|
||||
3 nas jaz jaz P P _ Pronoun|personal|first|plural|accusative 5 5 dve dve _ _ _ _ _ _ _
|
||||
4 v v v S S _ Adposition|locative 5 5 dol dol _ _ _ _ _ _ _
|
||||
5 preteklosti preteklost preteklost N N _ Noun|common|feminine|singular|locative 14 14 štiri štiri _ _ _ TIME _ _ _
|
||||
6 zaradi zaradi zaradi S S _ Adposition|genitive 8 8 dol dol _ _ _ _ _ _ _
|
||||
7 genialnega genialen genialen A A _ Adjective|general|positive|masculine|singular|genitive 8 8 dol dol _ _ _ _ _ _ _
|
||||
8 sistema sistem sistem N N _ Noun|common|masculine|singular|genitive 5 5 dol dol _ _ _ _ _ _ _
|
||||
9 splošne splošen splošen A A _ Adjective|general|positive|feminine|singular|genitive 11 11 dol dol _ _ _ _ _ _ _
|
||||
10 ljudske ljudski ljudski A A _ Adjective|general|positive|feminine|singular|genitive 11 11 dol dol _ _ _ _ _ _ _
|
||||
11 obrambe obramba obramba N N _ Noun|common|feminine|singular|genitive 8 8 dol dol _ _ _ _ _ _ _
|
||||
12 nihče nihče nihče P P _ Pronoun|negative|masculine|singular|nominative 14 14 ena ena _ _ _ ACT _ _ _
|
||||
13 ni biti biti V V _ Verb|auxiliary|present|third|singular|+Negative 14 14 del del Y biti _ _ _ _ _
|
||||
14 upal upati upati V V _ Verb|main|progressive|participle|singular|masculine 0 0 modra modra Y upati _ _ _ _ _
|
||||
15 lotiti lotiti lotiti V V _ Verb|main|perfective|infinitive 14 14 dol dol Y lotiti _ MWPRED _ _ _
|
||||
16 , , , , , _ , 0 0 modra modra _ _ _ _ _ _ _
|
||||
17 se se se P P _ Pronoun|reflexive|+Clitic 20 20 del del _ _ _ _ _ _ _
|
||||
18 sovrag sovrag sovrag N N _ Noun|common|masculine|singular|nominative 20 20 ena ena _ _ _ _ _ ACT _
|
||||
19 sedaj sedaj sedaj R R _ Adverb|general|positive 20 20 štiri štiri _ _ _ _ _ TIME _
|
||||
20 poslužuje posluževati posluževati V V _ Verb|main|progressive|present|third|singular 0 0 modra modra Y posluževati _ _ _ _ _
|
||||
21 bolj bolj bolj R R _ Adverb|general|comparative 22 22 dol dol _ _ _ _ _ _ _
|
||||
22 perfidnih perfiden perfiden A A _ Adjective|general|positive|feminine|plural|genitive 23 23 dol dol _ _ _ _ _ _ _
|
||||
23 strategij strategija strategija N N _ Noun|common|feminine|plural|genitive 20 20 dve dve _ _ _ _ _ PAT _
|
||||
24 , , , , , _ , 0 0 modra modra _ _ _ _ _ _ _
|
||||
25 saj saj saj C C _ Conjunction|coordinating 30 30 vez vez _ _ _ _ _ _ _
|
||||
26 nam jaz jaz P P _ Pronoun|personal|first|plural|dative 30 30 dve dve _ _ REC _ REC _ REC
|
||||
27 lahko lahko lahko R R _ Adverb|general|positive 30 30 del del _ _ _ _ _ _ _
|
||||
28 praktično praktično praktično R R _ Adverb|general|positive 29 29 dol dol _ _ _ _ _ _ _
|
||||
29 kadarkoli kadarkoli kadarkoli R R _ Adverb|general|positive 30 30 tri tri _ _ _ _ _ _ MANN
|
||||
30 odreže odrezati odrezati V V _ Verb|main|perfective|present|third|singular 0 0 modra modra Y odrezati _ _ _ _ _
|
||||
31 dostop dostop dostop N N _ Noun|common|masculine|singular|accusative|-Animate 30 30 dve dve _ _ _ _ _ _ PAT
|
||||
32 do do do S S _ Adposition|genitive 33 33 dol dol _ _ _ _ _ _ _
|
||||
33 hrane hrana hrana N N _ Noun|common|feminine|singular|genitive 31 31 dol dol _ _ _ _ _ _ _
|
||||
34 in in in C C _ Conjunction|coordinating 35 35 vez vez _ _ _ _ _ _ _
|
||||
35 pijače pijača pijača N N _ Noun|common|feminine|singular|genitive 33 33 prir prir _ _ _ _ _ _ _
|
||||
36 . . . . . _ . 0 0 modra modra _ _ _ _ _ _ _
|
||||
|
||||
|
||||
@@ -13,10 +13,11 @@
|
||||
##################################################
|
||||
|
||||
# INPUT=./../../data/mate_train/sl.test.mate
|
||||
INPUT=./../../data/kres_example_out/F0006347.xml.parsed.tsv
|
||||
INPUT="$1"
|
||||
Lang="ger"
|
||||
MODEL="./srl-ger.model"
|
||||
OUTPUT="${Lang}-eval.out"
|
||||
# OUTPUT="${Lang}-eval.out"
|
||||
OUTPUT="$2"
|
||||
|
||||
##################################################
|
||||
## (2) These ones may need to be changed
|
||||
|
||||
25
tools/srl-20131216/tag_all.sh
Executable file
25
tools/srl-20131216/tag_all.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
IN_FOLDER="$1"
|
||||
OUT_FOLDER="$2"
|
||||
SUFFIX="srl.tsv"
|
||||
|
||||
mkdir -p $OUT_FOLDER
|
||||
rm $OUT_FOLDER/*${SUFFIX}
|
||||
|
||||
for infile in $IN_FOLDER/*; do
|
||||
echo "Tagging: ${infile}"
|
||||
base=$(basename $infile | cut -d'.' -f1)
|
||||
outfile=${OUT_FOLDER}/${base}.${SUFFIX}
|
||||
|
||||
# mate-tools tagger
|
||||
./scripts/parse_srl_only_mod.sh $infile $outfile
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Saved as ${outfile}"
|
||||
else
|
||||
echo "ERR"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
Reference in New Issue
Block a user