diff --git a/dockerfiles/all/Dockerfile b/dockerfiles/all/Dockerfile
new file mode 100644
index 0000000..e6159e9
--- /dev/null
+++ b/dockerfiles/all/Dockerfile
@@ -0,0 +1,12 @@
+FROM ubuntu:16.04
+
+RUN apt-get update
+RUN apt-get install -y \
+vim \
+default-jdk \
+python3 \
+python3-pip
+
+RUN pip3 install lxml
+
+ENV PYTHONIOENCODING UTF-8
diff --git a/dockerfiles/all/README.md b/dockerfiles/all/README.md
new file mode 100644
index 0000000..166f5b1
--- /dev/null
+++ b/dockerfiles/all/README.md
@@ -0,0 +1,12 @@
+You might want to mount this whole repo into the docker container.  
+Also mount data locations.  
+
+Example container:
+```bash
+$ docker build . -t my_python
+$ docker run \
+    -it \
+    -v $(echo $(cd ../..; pwd)):/cjvt-srl-tagging \
+    python_java \
+    /bin/bash
+```
diff --git a/tools/parser/README.md b/tools/parser/README.md
index ef6ae42..a52097a 100644
--- a/tools/parser/README.md
+++ b/tools/parser/README.md
@@ -1,3 +1,6 @@
 ## msdmap.py
 Help conversion between english and slovenian MSD.  
 Hardcoded values from online documentation (html tables).  
+
+## Sources
+[1] (conll09 data format) https://nlpado.de/~sebastian/pub/papers/conll09_hajic.pdf
\ No newline at end of file
diff --git a/tools/parser/__init__.pyc b/tools/parser/__init__.pyc
deleted file mode 100644
index d333b89..0000000
Binary files a/tools/parser/__init__.pyc and /dev/null differ
diff --git a/tools/parser/__pycache__/__init__.cpython-37.pyc b/tools/parser/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index ecc16a3..0000000
Binary files a/tools/parser/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/tools/parser/__pycache__/parser.cpython-37.pyc b/tools/parser/__pycache__/parser.cpython-37.pyc
deleted file mode 100644
index d2e9eff..0000000
Binary files a/tools/parser/__pycache__/parser.cpython-37.pyc and /dev/null differ
diff --git a/tools/parser/msdmap.py b/tools/parser/msdmap.py
index 7404faa..929ed74 100644
--- a/tools/parser/msdmap.py
+++ b/tools/parser/msdmap.py
@@ -9,7 +9,7 @@ class Msdmap():
             ("prislov", "R", "Adverb", "R"),
             ("zaimek", "Z", "Pronoun", "P"),
             ("števnik", "K", "Numeral", "M"),
-            ("predlog", "D", "Preposition", "S"),
+            ("predlog", "D", "Adposition", "S"),
             ("veznik", "V", "Conjunction", "C"),
             ("členek", "L", "Particle", "Q"),
             ("medmet", "M", "Interjection", "I"),
@@ -290,6 +290,10 @@ class Msdmap():
         for pos in self.pos_val:
             if pos[7] == en_category and pos[col] == query:
                 return pos
+        print("---")
+        print(en_category)
+        print(col)
+        print(query)
         raise ValueError("Wrong part of speech value.")
 
     def msd_from_slo(self, msd):
diff --git a/tools/parser/parser.py b/tools/parser/parser.py
index 2a08c2b..0a91b5b 100644
--- a/tools/parser/parser.py
+++ b/tools/parser/parser.py
@@ -1,5 +1,6 @@
 from lxml import etree
 import re
+from parser.msdmap import Msdmap
 
 W_TAGS = ['w']
 C_TAGS = ['c']
@@ -16,13 +17,15 @@ S_TAGS = ['S', 'pc']
 def parse_tei(filepath):
     guess_corpus = None  # SSJ | KRES
     res_dict = {}
-    with open(filepath, "r") as fp:
+    with open(filepath, "rb") as fp:
         # remove namespaces
-        xmlstr = fp.read()
-        xmlstr = re.sub('\\sxmlns="[^"]+"', '', xmlstr, count=1)
-        xmlstr = re.sub(' xml:', ' ', xmlstr)
+        bstr = fp.read()
 
-        root = etree.XML(xmlstr.encode("utf-8"))
+        utf8str = bstr.decode("utf-8")
+        utf8str = re.sub('\\sxmlns="[^"]+"', '', utf8str, count=1)
+        utf8str = re.sub(' xml:', ' ', utf8str)
+
+        root = etree.XML(utf8str.encode("utf-8"))
 
         divs = []  # in ssj, there are divs, in Kres, there are separate files
         if "id" in root.keys():
@@ -104,16 +107,28 @@ def parse_links(s_el):
 
 
 def to_conll09(sentence_entry):
+
+    def fillpred(pos, feat):
+        if False:
+            # todo
+            return "Y"
+        return "_"
+
+    msdm = Msdmap()
     # works with kres, with parsed links
     out_str = ""
     for token in sentence_entry["tokens"]:
         if token[0] != "w":
             continue
+        msd = msdm.msd_from_slo(token[4])
+        fprd = fillpred("todo", "todo")
+        print(msd)
         print(token)
         print(sentence_entry["links"])
         t_id = token[1]
         print(t_id)
-        out_str += "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
+        #            1   3   4   5   6   7   8   9  10  11  12  13  14
+        out_str += "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
             t_id,  # id
             token[2],  # form
             token[3],  # lemma
@@ -126,6 +141,9 @@ def to_conll09(sentence_entry):
             sentence_entry["links"][t_id][2],  # phead
             sentence_entry["links"][t_id][1],  # deprel
             sentence_entry["links"][t_id][1],  # pdeprel
+            fprd,  # fillpred
+            (token[3] if fprd == "Y" else "_"),  # pred
+            "todo"  # apredn...
         )
     out_str += "\n"
     return out_str
diff --git a/tools/parser/parser.pyc b/tools/parser/parser.pyc
deleted file mode 100644
index 5ddae18..0000000
Binary files a/tools/parser/parser.pyc and /dev/null differ