diff --git a/Makefile b/Makefile
index 69ae834..4ace65f 100644
--- a/Makefile
+++ b/Makefile
@@ -6,8 +6,9 @@ json_files: # srl_tagged_files
 	cd tools; python3 gen_json.py
 
 srl_tagged_files: # tsv_files
-	# cd tools/srl-20131216; ./scripts/parse_srl_only_mod.sh; cd -
+# 	# cd tools/srl-20131216; ./scripts/parse_srl_only_mod.sh; cd -
 	cd tools/srl-20131216; ./tag_all.sh
+# 	cd tools/srl-20131216; ./tag_ssj500k2.3.sh
 
 tsv_files: # tools/fillpred_model/model.pickle
 	cd tools; python3 parse_all.py
diff --git a/README.md b/README.md
index 51252c2..ce7d3e3 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,11 @@
+# Instructions
+For mining ssj500k checkout to branch ssj500k.
+For running order look at Makefile. Generally it works like this:
+- tools/parse_all.py - It creates mate file that is necessary for running Java based srl.jar
+- tools/srl-20131216/tag_all.sh - Tags ssj500k
+- tools/gen_json.py - Mine SRL to json
+- tools/gen_tei.py - Mine SRL to tei
+
 # cjvt-srl-tagging
 We'll be using mate-tools to perform SRL on Kres. 
 
diff --git a/tools/gen_json.py b/tools/gen_json.py
index 1e8f821..fd16547 100644
--- a/tools/gen_json.py
+++ b/tools/gen_json.py
@@ -13,10 +13,10 @@ from multiprocessing import Pool
 
 # parse config
 config = configparser.ConfigParser()
-config.read("tools.cfg")
-ORIGPATH = Path(config["tools"]["giga"])
-INPATH = Path(config["tools"]["giga_srl"])
-OUTPATH = Path(config["tools"]["giga_json"])
+config.read("tools.cfg.ssj500k2.3")
+ORIGPATH = Path(config["tools"]["ssj500k_orig_folder"])
+INPATH = Path(config["tools"]["ssj500k_srl"])
+OUTPATH = Path(config["tools"]["ssj500k_json"])
 INTERNAL_DATA = Path(config["tools"]["internal_data"])
 DEBUG = config["tools"]["debug"] == "True"
 CPU_CORES = int(config["tools"]["cpu_cores"])
@@ -143,44 +143,36 @@ def handle_file(whole_input):
             print('PAUSE')
 
         # look at neighbouring sentences if they are correct
-        for i in range(100):
-            sentence, sentence_arr = next(gen)
-            # orig_sentence = " ".join(token[2] for token in e["tokens"])
-            if sentence == orig_val["text"]:
-                # if i != 10 and i != 0:
-                #     print('OK!')
-                sid = orig_id
+        sentence, sentence_arr = next(gen)
+        # orig_sentence = " ".join(token[2] for token in e["tokens"])
+        assert sentence.replace(' ', '') == orig_val['text']
+        # if i != 10 and i != 0:
+        #     print('OK!')
+        sid = orig_id
 
-                outdata[sid] = []
+        outdata[sid] = []
 
-                # find all predicate indices in the sentence
-                predicates = []
-                for token in sentence_arr:
-                    if token[12] == "Y":
-                        predicates += [token[0]]  # idx
+        # find all predicate indices in the sentence
+        predicates = []
+        for token in sentence_arr:
+            if token[12] == "Y":
+                predicates += [token[0]]  # idx
 
-                    deprel = get_dep_rel(token)
-                    if deprel is not None:
-                        outdata[sid].append(deprel)
+            deprel = get_dep_rel(token)
+            if deprel is not None:
+                outdata[sid].append(deprel)
 
-                # deprel["from"] points to n-th predicate
-                # replace with predicate's token index
-                for deprel in outdata[sid]:
-                    deprel["from"] = predicates[deprel["from"]]
+        # deprel["from"] points to n-th predicate
+        # replace with predicate's token index
+        for deprel in outdata[sid]:
+            deprel["from"] = predicates[deprel["from"]]
 
-                if DEBUG:
-                    print(to_sentence(sentence_arr))
-                    print(outdata[sid])
-                    print(sid)
-                    print()
-                    print()
-                break
-            else:
-                if i == 99:
-                    mismatch_sentences += 1
-                    sid = orig_id
-                    outdata[sid] = []
-                    gen = srl_multiple_files_sentences_generator(sentence_id + sentence_i)
+        if DEBUG:
+            print(to_sentence(sentence_arr))
+            print(outdata[sid])
+            print(sid)
+            print()
+            print()
 
     if mismatch_sentences > 0:
         if mismatch_sentences / len(orig_dict.items()) < 0.1:
diff --git a/tools/gen_tei.py b/tools/gen_tei.py
new file mode 100644
index 0000000..f62642d
--- /dev/null
+++ b/tools/gen_tei.py
@@ -0,0 +1,47 @@
+# parse config
+import configparser
+import json
+import logging
+import os
+from pathlib import Path
+
+from tools.parser.parser import Parser
+
+config = configparser.ConfigParser()
+config.read("tools.cfg.ssj500k2.3")
+ORIGPATH = Path(config["tools"]["ssj500k_orig_folder"])
+JSONPATH = Path(config["tools"]["ssj500k_json"] + '/ssj500k-sl.body.json')
+OUTPATH = Path(config["tools"]["ssj500k_tei"])
+INTERNAL_DATA = Path(config["tools"]["internal_data"])
+DEBUG = config["tools"]["debug"] == "True"
+CPU_CORES = int(config["tools"]["cpu_cores"])
+
+LOGFILE = Path(config["tools"]["logfile"]).absolute()
+LOGFILE.touch(exist_ok=True)
+LOGFILE.resolve()
+
+logging.basicConfig(filename=str(LOGFILE), level=logging.INFO)
+
+
+par = Parser()
+OUTPATH.mkdir(exist_ok=True)
+
+jsondata = []
+with open(JSONPATH, 'r') as jf:
+    jsondata = json.load(jf)
+
+logging.info("Generating TEI with annotated SRL.")
+
+def handle_file(file, jsondata):
+    teifile = (ORIGPATH / file)
+    resfile = (OUTPATH / file)
+
+    orig_dict = par.parse_tei(teifile)
+
+    # origfile = get_origfile()
+    orig_dict = par.minimize_tei(teifile, jsondata)
+
+origfiles = []
+for subdir, dirs, files in os.walk(ORIGPATH):
+    for file in files:
+        handle_file(file, jsondata)
diff --git a/tools/parse_all.py b/tools/parse_all.py
index 86c3caf..459935a 100644
--- a/tools/parse_all.py
+++ b/tools/parse_all.py
@@ -16,7 +16,8 @@ par = Parser()
 
 # path to data
 config = configparser.ConfigParser()
-config.read("tools.cfg")
+# config.read("tools.cfg")
+config.read("tools.cfg.ssj500k2.3")
 analysis = ''
 if 'kres_orig' in config["tools"]:
     analysis = 'kres'
@@ -31,6 +32,14 @@ elif 'giga_orig' in config["tools"]:
     OUTDIR = Path(config["tools"]["giga_tsv"])
     GIGA_PARTS = int(config["tools"]["giga_parts"])
     INTERNAL_DATA = config["tools"]["internal_data"]
+elif 'ssj500k_orig' in config["tools"]:
+    # analysis = 'gigafida'
+    analysis = 'ssj500k'
+    INDIR_SSJ500K_ORIG = Path(config["tools"]["ssj500k"])
+    INDIR_SSJ500K = Path(config["tools"]["ssj500k_orig"])
+    INDIR_JOS = Path(config["tools"]["ssj500k_jos"])
+    OUTDIR = Path(config["tools"]["ssj500k_tsv"])
+    INTERNAL_DATA = config["tools"]["internal_data"]
 
 CPU_CORES = int(config["tools"]["cpu_cores"])
 
@@ -49,40 +58,40 @@ print("end parsing ssj")
 """
 
 # kres_file = "../data/kres_example/F0019343.xml.parsed.xml"
-OUTDIR.mkdir(exist_ok=True)
+# OUTDIR.mkdir(exist_ok=True)
 
 if analysis == 'kres':
     infiles = list(enumerate([x for x in INDIR.iterdir() if x.is_file()]))
     logging.info("Parsing kres: {} files.".format(len(infiles)))
 
-def handle_file(infile):
-    i = infile[0]
-    kres_file = infile[1]
-    outfile = (OUTDIR / kres_file.name).with_suffix(".tsv")
+
+def handle_ssj500k_file():
+    kres_file = INDIR_SSJ500K_ORIG
+    outfile = OUTDIR
 
     if outfile.is_file():
         logging.info("Skipping existing file: {}.".format(str(kres_file)))
         return True
 
-    try:
-        res_dict = par.parse_tei(kres_file)
-        kres_out_str = ""
-        for _, sentence in res_dict.items():
-            kres_out_str += par.to_conll_2009_SRL(sentence)
-    except Exception as exc:
-        logging.info("Failed processing file: {}".format(str(kres_file)))
-        logging.error(exc)
-        return False
+    # try:
+    res_dict = par.parse_tei(kres_file)
+    kres_out_str = ""
+    for _, sentence in res_dict.items():
+        kres_out_str += par.to_conll_2009_SRL(sentence)
+    # except Exception as exc:
+    #     logging.info("Failed processing file: {}".format(str(kres_file)))
+    #     logging.error(exc)
+    #     return False
 
 
     with outfile.open("wb+") as fp:
         fp.write(kres_out_str.encode("utf-8"))
-        logging.info("Processed file ({}/{}): {}".format(i+1, len(infiles), str(kres_file)))
+        # logging.info("Processed file ({}/{}): {}".format(i+1, len(infiles), str(kres_file)))
         return True
     return False
 
-def giga_orig_generator():
-    with open(INDIR_GIGA, 'r') as gof:
+def ssj500k_orig_generator():
+    with open(INDIR_SSJ500K, 'r') as gof:
         previous_new_line = False
         for l_gof in gof:
             if l_gof == '\n':
@@ -104,12 +113,6 @@ def handle_gigafida_file():
     #             pass
     #     num_lines = i + 1
     # print(num_lines)
-    num_lines = 1393184026
-    # 1393184026
-    # 1393184033
-    # return
-    num_lines_per_part = num_lines / GIGA_PARTS
-    curr_part = 0
     gof_generator = giga_orig_generator()
     # with open(INDIR_GIGA, 'r') as gof:
     with open(INDIR_JOS, 'r') as gjf:
@@ -168,6 +171,70 @@ def handle_gigafida_file():
         curr_part += 1
         wf.close()
 
+
+def handle_ssj500k_file2():
+    """
+    File that splits big text file into more minor files. Only split on empty lines.
+    """
+    gof_generator = ssj500k_orig_generator()
+    # with open(INDIR_GIGA, 'r') as gof:
+    with open(INDIR_JOS, 'r') as gjf:
+        sentence = {}
+        sentence['tokens'] = []
+        sentence['links'] = {}
+        if os.path.exists(os.path.join(OUTDIR, 'giga%07d.tsv' % 0)):
+            ignore_lines = True
+            wf = False
+        else:
+            wf = open(os.path.join(OUTDIR, 'giga%07d.tsv' % curr_part), 'a')
+            ignore_lines = False
+        # for i, (l_gof, l_gjf) in enumerate(zip(gof, gjf)):
+        for i, l_gjf in enumerate(gjf):
+            l_gof = next(gof_generator)
+            if ignore_lines:
+                if i > num_lines_per_part * curr_part and l_gof == '\n':
+                    if not os.path.exists(os.path.join(OUTDIR, 'giga%07d.tsv' % (curr_part + 2))):
+                        ignore_lines = False
+                        # delete last file (probably not whole)
+                        os.remove(os.path.join(OUTDIR, 'giga%07d.tsv' % (curr_part + 1)))
+                    if ignore_lines:
+                        print(curr_part)
+                        curr_part += 1
+                        continue
+                else:
+                    continue
+            l_gof_split = l_gof.split('\t')
+            l_gjf_split = l_gjf.split('\t')
+
+            # if punctuation
+            if l_gof != '\n':
+                if l_gof_split[1][-1] == 'u':
+                    # print(l_gjf_split)
+                    sentence['tokens'].append(('c', l_gjf_split[0], l_gjf_split[1]))
+                else:
+                    sentence['tokens'].append(('w', l_gjf_split[0], l_gof_split[0], l_gof_split[1][:-2], l_gof_split[3][:-1]))
+
+                sentence['links'][l_gjf_split[0]] = (l_gjf_split[7], l_gjf_split[0], l_gjf_split[6])
+
+            # if l_gof == '\n':
+            else:
+                if wf:
+                    # print(i)
+                    wf.write(par.to_conll_2009_SRL(sentence))
+                sentence['tokens'] = []
+                sentence['links'] = {}
+            # wf.flush()
+            # if i > num_lines_per_part * (curr_part + 1) and l_gof == '\n':
+                if i > num_lines_per_part * (curr_part + 1):
+                    curr_part += 1
+                    # if wf doesn't exist (first one)
+                    if wf:
+                        wf.close()
+                    wf = open(os.path.join(OUTDIR, 'giga%07d.tsv' % curr_part), 'a')
+        curr_part += 1
+        wf.close()
+
+
 import  time
 def handle_giga_file(ran):
     """
@@ -347,31 +414,9 @@ def handle_giga_file_selected_sentences(error_sentences):
         # curr_part += 1
         wf.close()
 
-file_indices = set(range(0, 100000))
-with open(os.path.join(INTERNAL_DATA, 'diffs_updated_gigafida.pkl'), 'rb') as pkl_file:
-    file_indices = set(pickle.load(pkl_file))
 
-with Pool(CPU_CORES) as p:
-    if analysis == 'kres':
-        p.map(handle_file, infiles)
-    elif analysis == 'gigafida':
-        handle_gigafida_file()
-    elif analysis == 'giga':
-        final_range = [0, 100000]
-        size_per_proc = (final_range[1] - final_range[0]) / CPU_CORES
-        # splits = [int(final_range[0] + size_per_proc) for i in range(CPU_CORES)]
-        ranges = []
-        ps = None
-        for i in range(CPU_CORES):
-            s = int(final_range[0] + size_per_proc * i)
-            ns = int(final_range[0] + size_per_proc * (i + 1))
-            ranges.append([s, ns])
-        # ranges = [[0, 1]]
 
-        # p.map(handle_giga_file, ranges)
-        # p.map(handle_giga_file, ranges)
-        error_sentences = [line.rstrip('\n') for line in open(os.path.join(INTERNAL_DATA, 'sentences_with_less_than_token.txt'))]
-        handle_giga_file_selected_sentences(set(error_sentences))
+handle_ssj500k_file()
 
 
 logging.info("end parsing kres")
diff --git a/tools/parser/parser.py b/tools/parser/parser.py
index 80e353f..f18b9b2 100644
--- a/tools/parser/parser.py
+++ b/tools/parser/parser.py
@@ -1,3 +1,5 @@
+import copy
+
 from lxml import etree
 import re
 from parser.msd.msdmap import Msdmap
@@ -5,6 +7,7 @@ import pickle
 from pathlib import Path
 from fillpred_model.step1 import build_model_row
 import sys
+import xml.etree.ElementTree as ET
 
 class Parser:
     # reads a TEI xml file and returns a dictionary:
@@ -29,17 +32,23 @@ class Parser:
     def parse_tei(self, filepath):
 
         def parse_links(s_el):
-            lgrps = s_el.findall(".//links")
+            sent_id = '#' + s_el.get('id')
+            lgrps = s_el.findall(".//linkGrp")
             if len(lgrps) < 1:
                 raise IOError("Can't find links.")
             res_links = {}
-            for link in lgrps[0]:
-                dep = int(link.get("dep").split(".")[-1])
-                res_links[dep] = (
-                    link.get("afun"),
-                    dep,
-                    int(link.get("from").split(".")[-1]),
-                )
+            for lgrp in lgrps:
+                if lgrp.get("type") == "JOS-SYN":
+                    for link in lgrp:
+                        jos_type = link.get("ana").split(":")[-1]
+                        link_data = link.get("target").split(" ")
+                        link_from = int(link_data[1].split('.')[-1][1:])
+                        link_to = int(link_data[0].split('.')[-1][1:]) if sent_id != link_data[0] else 0
+                        res_links[link_from] = (
+                            jos_type,
+                            link_from,
+                            link_to,
+                        )
             return res_links
 
         guess_corpus = None  # SSJ | KRES
@@ -79,6 +88,11 @@ class Parser:
 
                     # parse sentences
                     for s in p.findall(".//s"):
+                        # test if sentence has jos-syn annotations and doesn't have SRL
+                        sent_annot_type_list = [links.get('type') for links in s.findall(".//linkGrp")]
+                        if 'JOS-SYN' not in sent_annot_type_list or 'UD-SYN' not in sent_annot_type_list or 'SRL' in sent_annot_type_list:
+                            continue
+
                         s_id = s.get("id").split(".")[-1]
                         sentence_text = ""
                         sentence_list = []
@@ -87,21 +101,29 @@ class Parser:
                         # parse tokens
                         for el in s.iter():
                             if el.tag in self.W_TAGS:
-                                if guess_corpus != "GIGA":
-                                    el_id = el.get("id").split(".")[-1]
-                                    if el_id[0] == 't':
-                                        el_id = el_id[1:]  # ssj W_TAG ids start with t
-                                    sentence_text += el.text
-                                    sentence_tokens += [(
-                                        "w",
-                                        int(el_id),
-                                        el.text,
-                                        el.get("lemma"),
-                                        (el.get("msd") if guess_corpus == "KRES" or guess_corpus == "GIGA"
-                                         else el.get("ana").split(":")[-1]),
-                                    )]
-                                else:
-                                    sentence_list.append(el.text)
+                                el_id = el.get("id").split(".")[-1]
+                                if el_id[0] == 't':
+                                    el_id = el_id[1:]  # ssj W_TAG ids start with t
+                                sentence_text += el.text
+                                uPosTag = None
+                                uPosFeats = []
+                                for msd_el in el.get("msd").split('|'):
+                                    key, val = msd_el.split('=')
+                                    if key == 'UPosTag':
+                                        uPosTag = val
+                                    else:
+                                        uPosFeats.append(msd_el)
+                                uPosFeats = '|'.join(uPosFeats)
+                                sentence_tokens += [(
+                                    "w",
+                                    int(el_id),
+                                    el.text,
+                                    el.get("lemma"),
+                                    (el.get("msd") if guess_corpus == "KRES" or guess_corpus == "GIGA"
+                                     else el.get("ana").split(":")[-1]),
+                                    uPosTag,
+                                    uPosFeats
+                                )]
                             elif el.tag in self.C_TAGS:
                                 # only Kres' C_TAGS have ids
                                 if guess_corpus != "GIGA":
@@ -110,33 +132,243 @@ class Parser:
                                     sentence_text += el.text
                                     sentence_tokens += [("c", el_id, el.text,)]
                             elif el.tag in self.S_TAGS:
-                                # Kres'