Project copied

2018-06-19 09:15:37 +02:00
commit a18e52a599
94 changed files with 87092 additions and 0 deletions
@@ -0,0 +1,160 @@
+# Created by .ignore support plugin (hsz.mobi)
+### Maven template
+target/
+pom.xml.tag
+pom.xml.releaseBackup
+pom.xml.versionsBackup
+pom.xml.next
+release.properties
+dependency-reduced-pom.xml
+buildNumber.properties
+.mvn/timing.properties
+
+# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
+!/.mvn/wrapper/maven-wrapper.jar
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/dictionaries
+.idea/
+
+# Sensitive or high-churn files:
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.xml
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+
+# Gradle:
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Mongo Explorer plugin:
+.idea/**/mongoSettings.xml
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+### Java template
+# Compiled class file
+# Log file
+*.log
+
+# BlueJ files
+*.ctxt
+
+# Mobile Tools for Java (J2ME)
+.mtj.tmp/
+
+# Package Files #
+*.war
+*.ear
+*.zip
+*.tar.gz
+*.rar
+
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*
+### Eclipse template
+
+.metadata
+bin/
+tmp/
+*.tmp
+*.bak
+*.swp
+*~.nib
+local.properties
+.settings/
+.loadpath
+.recommenders
+
+# Eclipse Core
+.project
+
+# External tool builders
+.externalToolBuilders/
+
+# Locally stored "Eclipse launch configurations"
+*.launch
+
+# PyDev specific (Python IDE for Eclipse)
+*.pydevproject
+
+# CDT-specific (C/C++ Development Tooling)
+.cproject
+
+# JDT-specific (Eclipse Java Development Tools)
+.classpath
+
+# Java annotation processor (APT)
+.factorypath
+
+# PDT-specific (PHP Development Tools)
+.buildpath
+
+# sbteclipse plugin
+.target
+
+# Tern plugin
+.tern-project
+
+# TeXlipse plugin
+.texlipse
+
+# STS (Spring Tool Suite)
+.springBeans
+
+# Code Recommenders
+.recommenders/
+
+# Scala IDE specific (Scala & Java development for Eclipse)
+.cache-main
+.scala_dependencies
+.worksheet
+
+
+
+
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+ehthumbs.db
+ehthumbs_vista.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
+    <output url="file://$MODULE_DIR$/target/classes" />
+    <output-test url="file://$MODULE_DIR$/target/test-classes" />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
+      <excludeFolder url="file://$MODULE_DIR$/target" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="library" name="Maven: commons-io:commons-io:2.5" level="project" />
+    <orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.6" level="project" />
+    <orderEntry type="library" name="Maven: com.googlecode.json-simple:json-simple:1.1.1" level="project" />
+    <orderEntry type="library" name="Maven: junit:junit:4.10" level="project" />
+    <orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" />
+    <orderEntry type="library" name="Maven: org.apache.commons:commons-csv:1.4" level="project" />
+    <orderEntry type="library" name="Maven: org.controlsfx:controlsfx:8.40.13" level="project" />
+    <orderEntry type="library" name="Maven: org.rocksdb:rocksdbjni:5.7.3" level="project" />
+    <orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.9.0" level="project" />
+    <orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-core:2.9.0" level="project" />
+    <orderEntry type="library" name="Maven: org.kordamp.ikonli:ikonli-fontawesome-pack:1.9.0" level="project" />
+    <orderEntry type="library" name="Maven: org.kordamp.ikonli:ikonli-core:1.9.0" level="project" />
+    <orderEntry type="library" name="Maven: org.kordamp.ikonli:ikonli-javafx:1.9.0" level="project" />
+  </component>
+</module>
@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>thesis</groupId>
+    <artifactId>corpus-analyzer</artifactId>
+    <version>1.2</version>
+
+    <dependencies>
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <version>2.5</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>3.6</version>
+        </dependency>
+        <dependency>
+            <groupId>com.googlecode.json-simple</groupId>
+            <artifactId>json-simple</artifactId>
+            <version>1.1.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-csv</artifactId>
+            <version>1.4</version>
+        </dependency>
+        <dependency>
+            <groupId>org.controlsfx</groupId>
+            <artifactId>controlsfx</artifactId>
+            <version>8.40.13</version>
+        </dependency>
+        <dependency>
+            <groupId>org.rocksdb</groupId>
+            <artifactId>rocksdbjni</artifactId>
+            <version>5.7.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-api</artifactId>
+            <version>2.9.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-core</artifactId>
+            <version>2.9.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.kordamp.ikonli</groupId>
+            <artifactId>ikonli-fontawesome-pack</artifactId>
+            <version>1.9.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.kordamp.ikonli</groupId>
+            <artifactId>ikonli-javafx</artifactId>
+            <version>1.9.0</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <!-- packages dependencies into the jar -->
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                        <configuration>
+                            <archive>
+                                <manifest>
+                                    <mainClass>gui.GUIController</mainClass>
+                                </manifest>
+                            </archive>
+                            <descriptorRefs>
+                                <descriptorRef>jar-with-dependencies</descriptorRef>
+                            </descriptorRefs>
+                            <appendAssemblyId>false</appendAssemblyId>
+                            <outputDirectory>artifact</outputDirectory>
+                            <finalName>Corpus_Analyzer_${version}</finalName>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <!-- JavaFX -->
+                <groupId>com.zenjava</groupId>
+                <artifactId>javafx-maven-plugin</artifactId>
+                <version>8.6.0</version>
+                <configuration>
+                    <mainClass>gui.GUIController</mainClass>
+                    <verbose>true</verbose>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>create-jfxjar</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>build-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: gui.GUIController
+
@@ -0,0 +1,15 @@
+package alg;
+
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+public class Common {
+	public static <K, V> void updateMap(Map<K, AtomicLong> map, K o) {
+		// if not in map
+		AtomicLong r = map.putIfAbsent(o, new AtomicLong(1));
+
+		// else
+		if (r != null)
+			map.get(o).incrementAndGet();
+	}
+}
@@ -0,0 +1,794 @@
+package alg;
+
+import static data.Enums.solar.SolarFilters.*;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.util.*;
+import java.util.concurrent.ForkJoinPool;
+
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.events.*;
+
+import org.apache.logging.log4j.LogManager;
+
+import data.*;
+import gui.ValidationUtil;
+
+public class XML_processing {
+	public final static org.apache.logging.log4j.Logger logger = LogManager.getLogger(XML_processing.class);
+
+	// public static void processCorpus(Statistics stats) {
+	// 	// we can preset the list's size, so there won't be a need to resize it
+	// 	List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
+	//
+	// 	int i = 0;
+	// 	for (File f : Settings.corpus) {
+	// 		i++;
+	// 		readXML(f.toString(), stats);
+	// 	}
+	// }
+
+	// public static void readXML(String path, Statistics stats) {
+	// 	if (stats.getCorpusType() == CorpusType.GIGAFIDA) {
+	// 		readXMLGigafida(path, stats);
+	// 	} else if (stats.getCorpusType() == CorpusType.GOS) {
+	// 		readXMLGos(path, stats);
+	// 	} else if (stats.getCorpusType() == CorpusType.SOLAR) {
+	// 		readXMLSolar(path, stats);
+	// 	}
+	// }
+
+	public static void readXML(String path, StatisticsNew stats) {
+		if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
+				|| stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
+			readXMLGigafida(path, stats);
+		} else if (stats.getCorpus().getCorpusType() == CorpusType.GOS) {
+			readXMLGos(path, stats);
+		} else if (stats.getCorpus().getCorpusType() == CorpusType.SOLAR) {
+			readXMLSolar(path, stats);
+		}
+	}
+
+	/**
+	 * Reads and returns the value of a passed header tag or an empty string.
+	 * E.g. title tag, for discerning the corpus' type.
+	 * Notice: returns only the value of the first occurrence of a given tag name.
+	 */
+	public static String readXMLHeaderTag(String path, String tag) {
+		XMLInputFactory factory = XMLInputFactory.newInstance();
+		XMLEventReader eventReader = null;
+
+		try {
+			eventReader = factory.createXMLEventReader(new FileInputStream(path));
+			while (eventReader.hasNext()) {
+				XMLEvent xmlEvent = eventReader.nextEvent();
+				if (xmlEvent.isStartElement()) {
+					StartElement startElement = xmlEvent.asStartElement();
+					String var = startElement.getName().getLocalPart();
+
+					if (var.equalsIgnoreCase(tag)) {
+						return eventReader.nextEvent().asCharacters().getData();
+					}
+				}
+			}
+		} catch (FileNotFoundException | XMLStreamException e) {
+			e.printStackTrace();
+		} finally {
+			if (eventReader != null) {
+				try {
+					eventReader.close();
+				} catch (XMLStreamException e) {
+					logger.error("closing stream", e);
+				}
+			}
+		}
+		return "";
+	}
+
+	private static void fj(List<Sentence> corpus, StatisticsNew stats) {
+		ForkJoinPool pool = new ForkJoinPool();
+
+		if (stats.getFilter().getAl() == AnalysisLevel.STRING_LEVEL) {
+			alg.ngram.ForkJoin wc = new alg.ngram.ForkJoin(corpus, stats);
+			pool.invoke(wc);
+		} else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
+			alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
+			pool.invoke(wc);
+		} else {
+			// TODO:
+			// alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
+			// pool.invoke(wc);
+		}
+	}
+
+	// public static void readXMLGos(String path, Statistics stats) {
+	// 	boolean in_word = false;
+	// 	String taksonomija = "";
+	// 	String lemma = "";
+	// 	String msd = "";
+	// 	String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm
+	//
+	// 	List<Word> stavek = new ArrayList<>();
+	// 	List<Sentence> corpus = new ArrayList<>();
+	// 	String sentenceDelimiter = "seg";
+	// 	String taxonomyPrefix = "gos.";
+	//
+	// 	try {
+	// 		XMLInputFactory factory = XMLInputFactory.newInstance();
+	// 		XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
+	//
+	// 		while (eventReader.hasNext()) {
+	// 			XMLEvent event = eventReader.nextEvent();
+	//
+	// 			switch (event.getEventType()) {
+	// 				case XMLStreamConstants.START_ELEMENT:
+	//
+	// 					StartElement startElement = event.asStartElement();
+	// 					String qName = startElement.getName().getLocalPart();
+	//
+	// 					// "word" node
+	// 					if (qName.equals("w")) {
+	// 						in_word = true;
+	//
+	// 						if (type.equals("norm")) {
+	// 							// make sure we're looking at <w lemma...> and not <w type...>
+	// 							Iterator var = startElement.getAttributes();
+	// 							ArrayList<Object> attributes = new ArrayList<>();
+	// 							while (var.hasNext()) {
+	// 								attributes.add(var.next());
+	// 							}
+	//
+	// 							if (attributes.contains("msd")) {
+	// 								msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+	// 							} else {
+	// 								msd = null;
+	// 							}
+	//
+	// 							if (attributes.contains("lemma")) {
+	// 								lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+	// 							}
+	// 						}
+	// 					}
+	// 					// taxonomy node
+	// 					else if (qName.equalsIgnoreCase("catRef")) {
+	// 						// there are some term nodes at the beginning that are of no interest to us
+	// 						// they differ by not having the attribute "ref", so test will equal null
+	// 						Attribute test = startElement.getAttributeByName(QName.valueOf("target"));
+	//
+	// 						if (test != null) {
+	// 							// keep only taxonomy properties
+	// 							taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, "");
+	// 						}
+	// 					} else if (qName.equalsIgnoreCase("div")) {
+	// 						type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
+	//
+	// 					}
+	// 					break;
+	//
+	// 				case XMLStreamConstants.CHARACTERS:
+	// 					Characters characters = event.asCharacters();
+	//
+	// 					// "word" node value
+	// 					if (in_word) {
+	// 						if (type.equals("norm") && msd != null) {
+	// 							stavek.add(new Word(characters.getData(), lemma, msd));
+	// 						} else {
+	// 							stavek.add(new Word(characters.getData()));
+	// 						}
+	//
+	// 						in_word = false;
+	// 					}
+	// 					break;
+	//
+	// 				case XMLStreamConstants.END_ELEMENT:
+	// 					EndElement endElement = event.asEndElement();
+	//
+	// 					// parser reached end of the current sentence
+	// 					if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
+	// 						// add sentence to corpus
+	// 						corpus.add(new Sentence(stavek, taksonomija, type));
+	// 						// and start a new one
+	// 						stavek = new ArrayList<>();
+	//
+	// 						/* Invoke Fork-Join when we reach maximum limit of
+	// 						 * sentences (because we can't read everything to
+	// 						 * memory) or we reach the end of the file.
+	// 						 */
+	// 						if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+	// 							fj(corpus, stats);
+	// 							// empty the current corpus, since we don't need
+	// 							// the data anymore
+	// 							corpus.clear();
+	// 						}
+	// 					}
+	//
+	// 					// backup
+	// 					if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+	// 						fj(corpus, stats);
+	// 						corpus.clear();
+	// 					}
+	//
+	// 					break;
+	// 			}
+	// 		}
+	// 	} catch (FileNotFoundException | XMLStreamException e) {
+	// 		e.printStackTrace();
+	// 	}
+	// }
+
+	@SuppressWarnings("unused")
+	public static void readXMLSolar(String path, StatisticsNew stats) {
+		boolean in_word = false;
+		String lemma = "";
+		String msd = "";
+
+		List<Word> stavek = new ArrayList<>();
+		List<Sentence> corpus = new ArrayList<>();
+
+		// used for filter
+		Set<String> headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto"));
+		Map<String, String> headBlock = null;
+		boolean includeThisBlock = false;
+
+		try {
+			XMLInputFactory factory = XMLInputFactory.newInstance();
+			XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
+
+			while (eventReader.hasNext()) {
+				XMLEvent event = eventReader.nextEvent();
+
+				switch (event.getEventType()) {
+					case XMLStreamConstants.START_ELEMENT:
+
+						StartElement startElement = event.asStartElement();
+						// System.out.println(String.format("%s", startElement.toString()));
+						String qName = startElement.getName().getLocalPart();
+
+						// "word" node
+						if (qName.equals("w3")) {
+							in_word = true;
+
+							msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+							lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+						} else if (qName.equals("c3")) {
+							String c3Content = eventReader.nextEvent().asCharacters().getData();
+
+							if (c3Content.equals(".") && includeThisBlock) {
+								// add sentence to corpus
+								corpus.add(new Sentence(stavek));
+								// and start a new one
+								stavek = new ArrayList<>();
+
+							/* Invoke Fork-Join when we reach maximum limit of
+							 * sentences (because we can't read everything to
+							 * memory) or we reach the end of the file.
+							 */
+								if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+									fj(corpus, stats);
+									// empty the current corpus, since we don't need
+									// the data anymore
+									corpus.clear();
+								}
+							}
+						} else if (headTags.contains(qName)) {
+							String tagContent = eventReader.nextEvent().asCharacters().getData();
+							headBlock.put(qName, tagContent);
+						} else if (qName.equals("head")) {
+							headBlock = new HashMap<>();
+						}
+
+						break;
+
+					case XMLStreamConstants.CHARACTERS:
+						Characters characters = event.asCharacters();
+
+						// "word" node value
+						if (in_word) {
+							stavek.add(new Word(characters.getData(), lemma, msd));
+							in_word = false;
+						}
+						break;
+
+					case XMLStreamConstants.END_ELEMENT:
+						EndElement endElement = event.asEndElement();
+						String qNameEnd = endElement.getName().getLocalPart();
+
+						if (qNameEnd.equals("head")) {
+							// validate and set boolean
+							if (validateHeadBlock(headBlock, stats.getFilter().getSolarFilters())) {
+								includeThisBlock = true;
+							}
+						} else if (qNameEnd.equals("body")) {
+							// new block, reset filter status
+							includeThisBlock = false;
+						}
+
+						// backup
+						if (endElement.getName().getLocalPart().equalsIgnoreCase("korpus")) {
+							fj(corpus, stats);
+							corpus.clear();
+						}
+
+						break;
+				}
+			}
+		} catch (FileNotFoundException | XMLStreamException e) {
+			e.printStackTrace();
+		}
+	}
+
+	/**
+	 * @param readHeadBlock block of tags read from the corpus
+	 * @param userSetFilter tags with values set by the user
+	 *
+	 * @return
+	 */
+	private static boolean validateHeadBlock(Map<String, String> readHeadBlock, HashMap<String, HashSet<String>> userSetFilter) {
+		boolean pass = true;
+
+		if (userSetFilter == null) {
+			return true;
+		}
+
+		for (Map.Entry<String, HashSet<String>> filterEntry : userSetFilter.entrySet()) {
+			String key = filterEntry.getKey();
+			HashSet<String> valueObject = filterEntry.getValue();
+
+			// if (valueObject instanceof String) {
+			// 	pass = validateHeadBlockEntry(readHeadBlock, key, (String) valueObject);
+			// } else
+			if (valueObject != null) {
+				//noinspection unchecked
+				for (String value : valueObject) {
+					pass = validateHeadBlockEntry(readHeadBlock, key, value);
+				}
+			}
+
+			if (!pass) {
+				// current head block does not include one of the set filters - not likely, but an edge case anyway
+				return false;
+			}
+		}
+
+		// if it gets to this point, it passed all the filters
+		return true;
+	}
+
+	private static boolean validateHeadBlockEntry(Map<String, String> readHeadBlock, String userSetKey, String userSetValue) {
+		if (!readHeadBlock.keySet().contains(userSetKey)) {
+			// current head block does not include one of the set filters - not likely, but an edge case anyway
+			return false;
+		} else if (!readHeadBlock.get(userSetKey).equals(userSetValue)) {
+			// different values -> doesn't pass the filter
+			return false;
+		}
+
+		return true;
+	}
+
+	/**
+	 * Parses XML headers for information about its taxonomy (if supported) or filters (solar)
+	 *
+	 * @param filepath
+	 * @param corpusIsSplit is corpus split into multiple xml files, or are all entries grouped into one large xml file
+	 * @param corpusType
+	 */
+	public static Object readXmlHeaderTaxonomyAndFilters(String filepath, boolean corpusIsSplit, CorpusType corpusType) {
+		boolean parseTaxonomy = Tax.getCorpusTypesWithTaxonomy().contains(corpusType);
+		// solar
+		Set<String> headTags = null;
+		HashMap<String, HashSet<String>> resultFilters = new HashMap<>();
+		// taxonomy corpora
+		HashSet<String> resultTaxonomy = new HashSet<>();
+
+		String headTagName;
+
+		if (corpusType == CorpusType.SOLAR) {
+			headTagName = "head";
+			// used for filter
+			headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
+
+			// init results now to avoid null pointers
+			headTags.forEach(f -> resultFilters.put(f, new HashSet<>()));
+		} else {
+			headTagName = "teiHeader";
+		}
+
+		XMLInputFactory factory = XMLInputFactory.newInstance();
+		XMLEventReader xmlEventReader = null;
+		try {
+			xmlEventReader = factory.createXMLEventReader(new FileInputStream(filepath));
+			boolean insideHeader = false;
+
+			while (xmlEventReader.hasNext()) {
+				XMLEvent xmlEvent = xmlEventReader.nextEvent();
+
+				if (xmlEvent.isStartElement()) {
+					StartElement startElement = xmlEvent.asStartElement();
+					String elementName = startElement.getName().getLocalPart();
+
+					if (elementName.equalsIgnoreCase(headTagName)) {
+						// if the corpus is split into files, we skip bodies
+						// this toggle is true when we're inside a header (next block of code executes)
+						// and false when we're not (skip reading unnecessary attributes)
+						insideHeader = true;
+					}
+
+					if (insideHeader) {
+						if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
+							HashMap<String, String> atts = extractAttributes(startElement);
+							String debug = "";
+
+							String tax = startElement.getAttributeByName(QName.valueOf("target"))
+									.getValue()
+									.replace("#", "");
+
+							resultTaxonomy.add(tax);
+						} else if (!parseTaxonomy && headTags.contains(elementName)) {
+							String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
+							resultFilters.get(elementName).add(tagContent);
+						}
+					}
+				} else if (xmlEvent.isEndElement() && corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
+					// if the corpus is split into multiple files, each with only one header block per file
+					// that means we should stop after we reach the end of the header
+					return parseTaxonomy ? resultTaxonomy : resultFilters;
+				} else if (xmlEvent.isEndElement() && !corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
+					// whole corpus in one file, so we have to continue reading in order to find all header blocks
+					insideHeader = false;
+				}
+			}
+		} catch (XMLStreamException e) {
+			logger.error("Streaming error", e);
+			return parseTaxonomy ? resultTaxonomy : resultFilters;
+		} catch (FileNotFoundException e) {
+			logger.error("File not found", e);
+			return parseTaxonomy ? resultTaxonomy : resultFilters;
+			// TODO: keep a list of files that threw this error and a dirty boolean marker -> if true, alert user
+		} finally {
+			if (xmlEventReader != null) {
+				try {
+					xmlEventReader.close();
+				} catch (XMLStreamException e) {
+					logger.error("closing stream", e);
+				}
+			}
+		}
+		return parseTaxonomy ? resultTaxonomy : resultFilters;
+	}
+
+	private static boolean isEndElementEndOfHeader(XMLEvent event, String headerTag) {
+		return event.asEndElement()
+				.getName()
+				.getLocalPart()
+				.equalsIgnoreCase(headerTag);
+	}
+
+	@SuppressWarnings("Duplicates")
+	public static boolean readXMLGigafida(String path, StatisticsNew stats) {
+		boolean inWord = false;
+		ArrayList<String> currentFiletaxonomy = new ArrayList<>();
+		String lemma = "";
+		String msd = "";
+
+		List<Word> sentence = new ArrayList<>();
+		List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
+		String sentenceDelimiter = "s";
+
+		XMLEventReader eventReader = null;
+		try {
+			XMLInputFactory factory = XMLInputFactory.newInstance();
+			eventReader = factory.createXMLEventReader(new FileInputStream(path));
+
+			while (eventReader.hasNext()) {
+				XMLEvent event = eventReader.nextEvent();
+
+				switch (event.getEventType()) {
+					case XMLStreamConstants.START_ELEMENT:
+						StartElement startElement = event.asStartElement();
+						String qName = startElement.getName().getLocalPart();
+
+						// "word" node
+						if (qName.equals("w")) {
+							inWord = true;
+
+							msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+							lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+						}
+						// taxonomy node
+						else if (qName.equalsIgnoreCase("catRef")) {
+							// there are some term nodes at the beginning that are of no interest to us
+							// they differ by not having the attribute "ref", so test will equal null
+							Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
+
+							if (tax != null) {
+								// keep only taxonomy properties
+								currentFiletaxonomy.add(String.valueOf(tax.getValue()).replace("#", ""));
+							}
+						}
+						break;
+
+					case XMLStreamConstants.CHARACTERS:
+						Characters characters = event.asCharacters();
+
+						// "word" node value
+						if (inWord) {
+							String word = characters.getData();
+							sentence.add(new Word(word, lemma, msd));
+							inWord = false;
+						}
+						break;
+
+					case XMLStreamConstants.END_ELEMENT:
+						EndElement endElement = event.asEndElement();
+
+						String var = endElement.getName().getLocalPart();
+						String debug = "";
+
+						// parser reached end of the current sentence
+						if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
+							// add sentence to corpus if it passes filters
+							sentence = runFilters(sentence, stats.getFilter());
+
+							if (!ValidationUtil.isEmpty(sentence)) {
+								corpus.add(new Sentence(sentence));
+							}
+
+							// and start a new one
+							sentence = new ArrayList<>();
+
+							/* Invoke Fork-Join when we reach maximum limit of
+							 * sentences (because we can't read everything to
+							 * memory) or we reach the end of the file.
+							 */
+							if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+								fj(corpus, stats);
+								// empty the current corpus, since we don't need the data anymore
+								corpus.clear();
+
+								// TODO: if (stats.isUseDB()) {
+								// 	stats.storeTmpResultsToDB();
+								// }
+							}
+						} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
+							// before proceeding to read this file, make sure that taxonomy filters are a match
+
+							if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
+								currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
+
+								if (currentFiletaxonomy.isEmpty()) {
+									// taxonomies don't match so stop
+									return false;
+								}
+							}
+						}
+
+						// fallback
+						else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+							fj(corpus, stats);
+							corpus.clear();
+
+							// TODO: if (stats.isUseDB()) {
+							// 	stats.storeTmpResultsToDB();
+							// }
+						}
+
+						break;
+				}
+			}
+		} catch (FileNotFoundException | XMLStreamException e) {
+			e.printStackTrace();
+		} finally {
+			if (eventReader != null) {
+				try {
+					eventReader.close();
+				} catch (XMLStreamException e) {
+					logger.error("closing stream", e);
+				}
+			}
+		}
+
+		return true;
+	}
+
+	@SuppressWarnings("Duplicates")
+	public static boolean readXMLGos(String path, StatisticsNew stats) {
+		boolean inWord = false;
+		boolean inOrthDiv = false;
+		boolean computeForOrth = stats.getCorpus().isGosOrthMode();
+		ArrayList<String> currentFiletaxonomy = new ArrayList<>();
+		String lemma = "";
+		String msd = "";
+
+		List<Word> sentence = new ArrayList<>();
+		List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
+		String sentenceDelimiter = "seg";
+
+		String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
+
+		XMLEventReader eventReader = null;
+
+		boolean includeFile = true;
+
+		try {
+			XMLInputFactory factory = XMLInputFactory.newInstance();
+			eventReader = factory.createXMLEventReader(new FileInputStream(path));
+
+			while (eventReader.hasNext()) {
+				XMLEvent event = eventReader.nextEvent();
+				// System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
+
+				switch (event.getEventType()) {
+					case XMLStreamConstants.START_ELEMENT:
+						StartElement startElement = event.asStartElement();
+						String qName = startElement.getName().getLocalPart();
+
+						if (qName.equals("div")) {
+							HashMap<String, String> atts = extractAttributes(startElement);
+
+							if (atts.keySet().contains("type")) {
+								inOrthDiv = atts.get("type").equals("orth");
+							}
+						}
+
+						// "word" node
+						if (qName.equals("w")) {
+							// check that it's not a type
+							HashMap<String, String> atts = extractAttributes(startElement);
+
+							if (!atts.containsKey("type")) {
+								inWord = true;
+
+								if (atts.containsKey("msd")) {
+									msd = atts.get("msd");
+
+								}
+								if (atts.containsKey("lemma")) {
+									lemma = atts.get("lemma");
+								}
+								//
+								// if (!inOrthDiv) {
+								// 	msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
+								// 	lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
+								// }
+							}
+
+							// }
+						}
+						// taxonomy node
+						else if (qName.equalsIgnoreCase("catRef")) {
+							// there are some term nodes at the beginning that are of no interest to us
+							// they differ by not having the attribute "ref", so test will equal null
+							Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
+
+							if (tax != null) {
+								// keep only taxonomy properties
+								currentFiletaxonomy.add(String.valueOf(tax.getValue()));
+							}
+						} else if (qName.equalsIgnoreCase("div")) {
+							gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
+						}
+						break;
+
+					case XMLStreamConstants.CHARACTERS:
+						// "word" node value
+						if (inWord) {
+							Characters characters = event.asCharacters();
+							if (gosType.equals("norm") && msd != null) {
+								sentence.add(new Word(characters.getData(), lemma, msd));
+							} else {
+								sentence.add(new Word(characters.getData()));
+							}
+
+							inWord = false;
+						}
+						break;
+
+					case XMLStreamConstants.END_ELEMENT:
+						EndElement endElement = event.asEndElement();
+
+						// parser reached end of the current sentence
+						if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
+							// add sentence to corpus if it passes filters
+							boolean saveSentence = computeForOrth == inOrthDiv;
+
+							if (includeFile && saveSentence && !ValidationUtil.isEmpty(sentence)) {
+								sentence = runFilters(sentence, stats.getFilter());
+								corpus.add(new Sentence(sentence));
+							}
+
+							// and start a new one
+							sentence = new ArrayList<>();
+
+							/* Invoke Fork-Join when we reach maximum limit of
+							 * sentences (because we can't read everything to
+							 * memory) or we reach the end of the file.
+							 */
+							if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
+								fj(corpus, stats);
+								// empty the current corpus, since we don't need
+								// the data anymore
+								corpus.clear();
+							}
+						} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
+							// before proceeding to read this file, make sure that taxonomy filters are a match
+							if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
+								currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
+
+								// disregard this entry if taxonomies don't match
+								includeFile = !currentFiletaxonomy.isEmpty();
+
+								currentFiletaxonomy = new ArrayList<>();
+							}
+						}
+
+						// backup
+						else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
+							fj(corpus, stats);
+							corpus.clear();
+						}
+
+						break;
+				}
+			}
+		} catch (FileNotFoundException | XMLStreamException e) {
+			e.printStackTrace();
+		} finally {
+			if (eventReader != null) {
+				try {
+					eventReader.close();
+				} catch (XMLStreamException e) {
+					logger.error("closing stream", e);
+				} catch (Exception e) {
+					logger.error("general error", e);
+				}
+			}
+		}
+
+		return true;
+	}
+
+	/**
+	 * Runs the sentence through some filters, so we don't do calculations when unnecessary.
+	 * Filters:
+	 * <ol>
+	 * <li><b>Ngrams:</b> omit sentences that are shorter than the ngram value (e.g. 3 gram of a single word sentence)</li>
+	 * <li><b>Letter ngrams:</b> omit words that are shorter than the specified string length (e.g. combinations of 3 letters when the word consists of only 2 letters)</li>
+	 * </ol>
+	 *
+	 * @return Empty sentence (if fails 1.) or a sentence with some words removed (2.)
+	 */
+	private static List<Word> runFilters(List<Word> sentence, Filter filter) {
+		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
+			// ngram level: if not 0 must be less than or equal to number of words in this sentence.
+			if (filter.getNgramValue() > 0 && filter.getNgramValue() > sentence.size()) {
+				return null;
+			}
+
+			// if we're calculating values for letters, omit words that are shorter than string length
+			if (filter.getNgramValue() == 0) {
+				sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord().length() < filter.getStringLength())
+						|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma().length() < filter.getStringLength()));
+			}
+		}
+
+		return sentence;
+	}
+
+	private static HashMap<String, String> extractAttributes(StartElement se) {
+		Iterator attributesIt = se.getAttributes();
+		HashMap<String, String> atts = new HashMap<>();
+
+		while (attributesIt.hasNext()) {
+			Attribute a = (Attribute) attributesIt.next();
+			atts.put(a.getName().getLocalPart(), a.getValue());
+		}
+
+		return atts;
+	}
+}
@@ -0,0 +1,67 @@
+package alg.inflectedJOS;
+
+import java.util.List;
+import java.util.concurrent.RecursiveAction;
+
+import data.Sentence;
+import data.Statistics;
+
+public class ForkJoin extends RecursiveAction {
+	private static final long serialVersionUID = -1260951004477299634L;
+
+	private static final int ACCEPTABLE_SIZE = 1000;
+	private List<Sentence> corpus;
+	private Statistics stats;
+	private int start;
+	private int end;
+
+
+	/**
+	 * Constructor for subproblems.
+	 */
+	private ForkJoin(List<Sentence> corpus, int start, int end, Statistics stats) {
+		this.corpus = corpus;
+		this.start = start;
+		this.end = end;
+		this.stats = stats;
+	}
+
+	/**
+	 * Default constructor for the initial problem
+	 */
+	public ForkJoin(List<Sentence> corpus, Statistics stats) {
+		this.corpus = corpus;
+		this.start = 0;
+		this.end = corpus.size();
+		this.stats = stats;
+	}
+
+	private void computeDirectly() {
+		List<Sentence> subCorpus = corpus.subList(start, end);
+
+		if (stats.isTaxonomySet()) {
+			InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy());
+		} else {
+			InflectedJOSCount.calculateForAll(subCorpus, stats, null);
+		}
+	}
+
+	@Override
+	protected void compute() {
+		int subCorpusSize = end - start;
+
+		if (subCorpusSize < ACCEPTABLE_SIZE) {
+			computeDirectly();
+		} else {
+			int mid = start + subCorpusSize / 2;
+			ForkJoin left = new ForkJoin(corpus, start, mid, stats);
+			ForkJoin right = new ForkJoin(corpus, mid, end, stats);
+
+			// fork (push to queue)-> compute -> join
+			left.fork();
+			right.fork();
+			left.join();
+			right.join();
+		}
+	}
+}
@@ -0,0 +1,170 @@
+package alg.inflectedJOS;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+
+import alg.Common;
+import data.Sentence;
+import data.Statistics;
+import data.StatisticsNew;
+import data.Word;
+
+public class InflectedJOSCount {
+
+	public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
+
+	// static {
+	// 	// calculate all possible combinations of indices we will substitute with a '-' for substring statistics
+	// 	indices = new HashMap<>();
+	// 	for (int i = 5; i <= 8; i++) {
+	// 		indices.put(i, calculateCombinations(i));
+	// 	}
+	// }
+	//
+	// private static List<Integer> calculateCombinations(int i) {
+	// 	int arr[] = {1, 2, 3, 4, 5};
+	// 	int r = 3;
+	// 	int n = arr.length;
+	// 	ArrayList<ArrayList<Integer>> result = new ArrayList<>();
+	//
+	// 	return printCombination(arr, n, r);
+	// }
+	//
+	// /* arr[]  ---> Input Array
+	// data[] ---> Temporary array to store current combination
+	// start & end ---> Staring and Ending indexes in arr[]
+	// index  ---> Current index in data[]
+	// r ---> Size of a combination to be printed */
+	// static void combinationUtil(int arr[], int data[], int start,
+	// 							int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
+	// 	// Current combination is ready to be printed, print it
+	// 	ArrayList<Integer> tmpResult = new ArrayList<>();
+	//
+	// 	if (index == r) {
+	// 		ArrayList<Integer> tmpResult = new ArrayList<>();
+	// 		for (int j = 0; j < r; j++)
+	// 			System.out.print(data[j] + " ");
+	// 		System.out.println("");
+	// 		return;
+	// 	}
+	//
+	// 	// replace index with all possible elements. The condition
+	// 	// "end-i+1 >= r-index" makes sure that including one element
+	// 	// at index will make a combination with remaining elements
+	// 	// at remaining positions
+	// 	for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
+	// 		data[index] = arr[i];
+	// 		combinationUtil(arr, data, i + 1, end, index + 1, r);
+	// 	}
+	// }
+	//
+	// // The main function that prints all combinations of size r
+	// // in arr[] of size n. This function mainly uses combinationUtil()
+	// static void printCombination(int arr[], int n, int r) {
+	// 	// A temporary array to store all combination one by one
+	// 	int data[] = new int[r];
+	//
+	// 	// Print all combination using temprary array 'data[]'
+	// 	combinationUtil(arr, data, 0, n - 1, 0, r);
+	// }
+
+	// public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
+	// 	for (Sentence s : corpus) {
+	// 		// disregard if wrong taxonomy
+	// 		if (!(s.getTaxonomy().startsWith(taxonomy))) {
+	// 			continue;
+	// 		}
+	//
+	// 		calculateCommon(s, stats.result);
+	//
+	// 		for (Word word : s.getWords()) {
+	// 			// skip if current word is not inflected
+	// 			if (!(word.getMsd().length() > 0)) {
+	// 				continue;
+	// 			}
+	//
+	// 			String msd = word.getMsd();
+	//
+	// 			StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
+	//
+	// 			for (int i = 1; i < msd.length(); i++) {
+	// 				entry.setCharAt(i, msd.charAt(i));
+	// 				Common.updateMap(stats.result, entry.toString());
+	// 				entry.setCharAt(i, '-');
+	// 			}
+	// 		}
+	// 	}
+	// }
+
+	// public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
+	// 	for (Sentence s : corpus) {
+	// 		for (Word word : s.getWords()) {
+	// 			if (!(word.getMsd().length() > 0)) {
+	// 				continue;
+	// 			}
+	//
+	// 			String msd = word.getMsd();
+	//
+	// 			StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
+	//
+	// 			for (int i = 1; i < msd.length(); i++) {
+	// 				entry.setCharAt(i, msd.charAt(i));
+	// 				Common.updateMap(stats.result, entry.toString());
+	// 				entry.setCharAt(i, '-');
+	// 			}
+	// 		}
+	// 	}
+	// }
+
+	static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
+		for (Sentence s : corpus) {
+			// disregard if wrong taxonomy
+			if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
+				continue;
+			}
+
+			for (Word word : s.getWords()) {
+				// skip if current word is not inflected
+				if (!(word.getMsd().length() > 0)) {
+					continue;
+				}
+
+				String msd = word.getMsd();
+
+				StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
+
+				for (int i = 1; i < msd.length(); i++) {
+					entry.setCharAt(i, msd.charAt(i));
+					Common.updateMap(stats.result, entry.toString());
+					entry.setCharAt(i, '-');
+				}
+			}
+		}
+	}
+
+	public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
+		for (Sentence s : corpus) {
+
+			for (Word word : s.getWords()) {
+				// skip if current word is not inflected
+				// // TODO: if has defined msd and is of correct type (create a set)
+				// if (!(word.getMsd().length() > 0)) {
+				// 	continue;
+				// }
+
+				String msd = word.getMsd();
+
+				StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
+
+				for (int i = 1; i < msd.length(); i++) {
+					entry.setCharAt(i, msd.charAt(i));
+					stats.updateResults(entry.toString());
+					entry.setCharAt(i, '-');
+				}
+			}
+		}
+	}
+}
@@ -0,0 +1,131 @@
+package alg.inflectedJOS;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
+
+import data.Enums.InflectedJosTypes;
+import data.StatisticsNew;
+import gui.ValidationUtil;
+import util.Combinations;
+
+// adapted from http://www.geeksforgeeks.org/print-all-possible-combinations-of-r-elements-in-a-given-array-of-size-n/
+public class WordFormation {
+	private static HashMap<String, Long> josTypeResult;
+	private static Object[][] tmpResults;
+
+	private static HashMap<Integer, HashSet<HashSet<Integer>>> indices;
+
+	static {
+		indices = new HashMap<>();
+
+		for (int i = 4; i <= 8; i++) {
+			indices.put(i, Combinations.generateIndices(i));
+		}
+	}
+
+	public static void calculateStatistics(StatisticsNew stat) {
+		Map<String, AtomicLong> result = stat.getResult();
+
+		// 1. filter - keep only inflected types
+		result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.charAt(0)));
+
+		// 2. for each inflected type get all possible subcombinations
+		for (Character josChar : InflectedJosTypes.inflectedJosTypes) {
+			josTypeResult = new HashMap<>();
+
+			// filter out results for a single word type
+			Map<String, AtomicLong> singleTypeResults = result.entrySet().stream()
+					.filter(x -> x.getKey().charAt(0) == josChar)
+					.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+
+			if (ValidationUtil.isEmpty(singleTypeResults)) {
+				continue;
+			}
+
+			// get all possible indices combos for a msd of this length
+			// HashSet<HashSet<Integer>> indicesCombos = indices.get()
+			//Combinations.generateIndices(singleTypeResults.keySet().stream().findFirst().get().length());
+
+			for (Map.Entry<String, AtomicLong> e : singleTypeResults.entrySet()) {
+				int l = e.getKey().length();
+
+				for (HashSet<Integer> indicesCombo : indices.get(e.getKey().length())) {
+					updateResults(mask(e.getKey(), indicesCombo), e.getValue().longValue());
+				}
+			}
+
+			resultsMapToArray(singleTypeResults.values().stream().mapToLong(Number::longValue).sum());
+		}
+
+		stat.setResultCustom(tmpResults);
+	}
+
+	private static String mask(String word, HashSet<Integer> indicesCombo) {
+		StringBuilder sb = new StringBuilder();
+
+		sb.append(word.charAt(0));
+		for (int i = 1; i < word.length(); i++) {
+			sb.append(indicesCombo.contains(i) ? word.charAt(i) : ".");
+		}
+
+		return sb.toString();
+	}
+
+
+	private static void updateResults(String s, Long nOfOccurences) {
+		// if not in map add
+		Long r = josTypeResult.putIfAbsent(s, nOfOccurences);
+
+		// else update
+		if (r != null) {
+			josTypeResult.put(s, josTypeResult.get(s) + nOfOccurences);
+		}
+	}
+
+	private static void resultsMapToArray(Long totalValue) {
+		Double total = totalValue * 1.0;
+		Object[][] josTypeResultArray = new Object[josTypeResult.size()][3];
+
+		int i = 0;
+		for (Map.Entry<String, Long> e : josTypeResult.entrySet()) {
+			josTypeResultArray[i][0] = e.getKey();
+			josTypeResultArray[i][1] = e.getValue();
+			josTypeResultArray[i][2] = e.getValue() / total;
+
+			if (e.getValue() > total) {
+
+				String debug = "";
+
+			}
+
+			i++;
+		}
+
+		if (tmpResults == null) {
+			tmpResults = josTypeResultArray;
+		} else {
+			int firstLength = tmpResults.length;
+			int secondLength = josTypeResultArray.length;
+			Object[][] tmp = new Object[firstLength + secondLength][3];
+
+			System.arraycopy(tmpResults, 0, tmp, 0, firstLength);
+			System.arraycopy(josTypeResultArray, 0, tmp, firstLength, secondLength);
+
+			tmpResults = tmp;
+
+			// tmpResults = ArrayUtils.addAll(tmpResults, josTypeResultArray);
+		}
+	}
+
+	private static void printArray() {
+		for (int i = 0; i < tmpResults.length; i++) {
+			for (int j = 0; j < tmpResults[i].length; j++) {
+				System.out.print(tmpResults[i][j] + "\t");
+			}
+			System.out.println();
+		}
+	}
+}
@@ -0,0 +1,62 @@
+package alg.ngram;
+
+import java.util.List;
+import java.util.concurrent.RecursiveAction;
+
+import data.Sentence;
+import data.StatisticsNew;
+
+public class ForkJoin extends RecursiveAction {
+	private static final long serialVersionUID = 5074814035083362355L;
+
+	private static final int ACCEPTABLE_SIZE = 1000;
+	private List<Sentence> corpus;
+	private StatisticsNew stats;
+	private int start;
+	private int end;
+
+
+	/**
+	 * Constructor for subproblems.
+	 */
+	private ForkJoin(List<Sentence> corpus, int start, int end, StatisticsNew stats) {
+		this.corpus = corpus;
+		this.start = start;
+		this.end = end;
+		this.stats = stats;
+	}
+
+	/**
+	 * Default constructor for the initial problem
+	 */
+	public ForkJoin(List<Sentence> corpus, StatisticsNew stats) {
+		this.corpus = corpus;
+		this.start = 0;
+		this.end = corpus.size();
+		this.stats = stats;
+	}
+
+	private void computeDirectly() {
+		List<Sentence> subCorpus = corpus.subList(start, end);
+		Ngrams.calculateForAll(subCorpus, stats);
+	}
+
+	@Override
+	protected void compute() {
+		int subCorpusSize = end - start;
+
+		if (subCorpusSize < ACCEPTABLE_SIZE) {
+			computeDirectly();
+		} else {
+			int mid = start + subCorpusSize / 2;
+			ForkJoin left = new ForkJoin(corpus, start, mid, stats);
+			ForkJoin right = new ForkJoin(corpus, mid, end, stats);
+
+			// fork (push to queue)-> compute -> join
+			left.fork();
+			right.fork();
+			left.join();
+			right.join();
+		}
+	}
+}
@@ -0,0 +1,204 @@
+package alg.ngram;
+
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import data.CalculateFor;
+import data.Sentence;
+import data.StatisticsNew;
+import data.Word;
+import gui.ValidationUtil;
+
+public class Ngrams {
+	public final static Logger logger = LogManager.getLogger(Ngrams.class);
+
+
+	public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
+		if (stats.getFilter().getNgramValue() == 0) { // letter ngram
+			generateNgramLetterCandidates(corpus, stats);
+		} else if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue()) && stats.getFilter().getSkipValue() > 0) {
+			generateSkipgramCandidates(corpus, stats);
+		} else {
+			generateNgramCandidates(corpus, stats);
+		}
+	}
+
+	public static void generateNgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
+		for (Sentence s : corpus) {
+			// skip sentences shorter than specified ngram length
+			if (s.getWords().size() < stats.getFilter().getNgramValue()) {
+				continue;
+			}
+
+			for (int i = 0; i < s.getWords().size() - stats.getFilter().getNgramValue() + 1; i++) {
+				List<Word> ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue());
+
+				// if msd regex is set and this candidate doesn't pass it, skip this iteration
+				if (stats.getFilter().hasMsd() && !passesRegex(ngramCandidate, stats.getFilter().getMsd())) {
+					continue;
+				}
+
+				stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
+			}
+		}
+	}
+
+	/**
+	 * Checks whether an ngram candidate passes specified regex filter.
+	 */
+	private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex) {
+		if (ngramCandidate.size() != regex.size()) {
+			logger.error("ngramCandidate.size() & msd.size() mismatch"); // should not occur anyway
+			return false;
+		}
+
+		for (int i = 0; i < regex.size(); i++) {
+			if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	private static String wordToString(List<Word> ngramCandidate, CalculateFor calculateFor) {
+		ArrayList<String> candidate = new ArrayList<>(ngramCandidate.size());
+
+		switch (calculateFor) {
+			case LEMMA:
+				candidate.addAll(ngramCandidate
+						.stream()
+						.map(Word::getLemma)
+						.collect(Collectors.toList()));
+				break;
+			case WORD:
+				candidate.addAll(ngramCandidate
+						.stream()
+						.map(Word::getWord)
+						.collect(Collectors.toList()));
+				break;
+			case MORPHOSYNTACTIC_SPECS:
+			case MORPHOSYNTACTIC_PROPERTY:
+				candidate.addAll(ngramCandidate
+						.stream()
+						.map(Word::getMsd)
+						.collect(Collectors.toList()));
+				break;
+			case WORD_TYPE:
+				candidate.addAll(ngramCandidate
+						.stream()
+						.map(w -> Character.toString(w.getMsd().charAt(0)))
+						.collect(Collectors.toList()));
+				break;
+		}
+
+		return StringUtils.join(candidate, " ");
+	}
+
+	/**
+	 * Generates candidates and updates results
+	 *
+	 * @param corpus
+	 * @param stats
+	 */
+	private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
+		for (Sentence s : corpus) {
+			for (Word w : s.getWords()) {
+				String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
+
+				// skip this iteration if:
+				// - word doesn't contain a proper version (missing lemma for example)
+				// - msd regex is given but this word's msd doesn't match it, skip this iteration
+				// - given substring length is larger than the word length
+				if (ValidationUtil.isEmpty(word)
+						|| stats.getFilter().hasMsd() && !w.getMsd().matches(stats.getFilter().getMsd().get(0).pattern())
+						|| word.length() < stats.getFilter().getStringLength()) {
+					continue;
+				}
+
+				for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
+					// TODO: locila?
+					stats.updateResults(word.substring(i, i + stats.getFilter().getStringLength()));
+				}
+			}
+		}
+	}
+
+
+	/**
+	 * Extracts skipgram candidates.
+	 *
+	 * @return List of candidates represented as a list<candidates(String)>
+	 */
+	public static void generateSkipgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
+		ArrayList<Word> currentLoop;
+		int ngram = stats.getFilter().getNgramValue();
+		int skip = stats.getFilter().getSkipValue();
+
+		for (Sentence s : corpus) {
+			List<Word> sentence = s.getWords();
+
+			for (int i = 0; i <= sentence.size() - ngram; i++) { // 1gram
+				for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
+					if (ngram == 2 && j < sentence.size()) {
+						currentLoop = new ArrayList<>();
+						currentLoop.add(sentence.get(i));
+						currentLoop.add(sentence.get(j));
+
+						validateAndCountSkipgramCandidate(currentLoop, stats);
+					} else {
+						for (int k = j + 1; k <= j + 1 + skip; k++) { // 3gram
+							if (ngram == 3 && k < sentence.size()) {
+								currentLoop = new ArrayList<>();
+								currentLoop.add(sentence.get(i));
+								currentLoop.add(sentence.get(j));
+								currentLoop.add(sentence.get(k));
+
+								validateAndCountSkipgramCandidate(currentLoop, stats);
+							} else {
+								for (int l = k + 1; l <= k + 1 + skip; l++) { // 4gram
+									if (ngram == 4 && k < sentence.size()) {
+										currentLoop = new ArrayList<>();
+										currentLoop.add(sentence.get(i));
+										currentLoop.add(sentence.get(j));
+										currentLoop.add(sentence.get(k));
+										currentLoop.add(sentence.get(l));
+
+										validateAndCountSkipgramCandidate(currentLoop, stats);
+									} else {
+										for (int m = k + 1; m <= k + 1 + skip; m++) { // 5gram
+											if (ngram == 5 && k < sentence.size()) {
+												currentLoop = new ArrayList<>();
+												currentLoop.add(sentence.get(i));
+												currentLoop.add(sentence.get(j));
+												currentLoop.add(sentence.get(k));
+												currentLoop.add(sentence.get(l));
+												currentLoop.add(sentence.get(m));
+
+												validateAndCountSkipgramCandidate(currentLoop, stats);
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats) {
+		// count if no regex is set or if it is & candidate passes it
+		if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
+			stats.updateResults(wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()));
+		}
+	}
+}
@@ -0,0 +1,62 @@
+package alg.word;
+
+import java.util.List;
+import java.util.concurrent.RecursiveAction;
+
+import data.Sentence;
+import data.StatisticsNew;
+
+public class ForkJoin extends RecursiveAction {
+	private static final long serialVersionUID = 7711587510996456040L;
+
+	private static final int ACCEPTABLE_SIZE = 1000;
+	private List<Sentence> corpus;
+	private StatisticsNew stats;
+	private int start;
+	private int end;
+
+
+	/**
+	 * Constructor for subproblems.
+	 */
+	private ForkJoin(List<Sentence> corpus, int start, int end, StatisticsNew stats) {
+		this.corpus = corpus;
+		this.start = start;
+		this.end = end;
+		this.stats = stats;
+	}
+
+	/**
+	 * Default constructor for the initial problem
+	 */
+	public ForkJoin(List<Sentence> corpus, StatisticsNew stats) {
+		this.corpus = corpus;
+		this.start = 0;
+		this.end = corpus.size();
+		this.stats = stats;
+	}
+
+	private void computeDirectly() {
+		List<Sentence> subCorpus = corpus.subList(start, end);
+		WordLevel.calculateForAll(subCorpus, stats);
+	}
+
+	@Override
+	protected void compute() {
+		int subCorpusSize = end - start;
+
+		if (subCorpusSize < ACCEPTABLE_SIZE) {
+			computeDirectly();
+		} else {
+			int mid = start + subCorpusSize / 2;
+			ForkJoin left = new ForkJoin(corpus, start, mid, stats);
+			ForkJoin right = new ForkJoin(corpus, mid, end, stats);
+
+			// fork (push to queue)-> compute -> join
+			left.fork();
+			right.fork();
+			left.join();
+			right.join();
+		}
+	}
+}
@@ -0,0 +1,167 @@
+package alg.word;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import alg.Common;
+import data.CalculateFor;
+import data.Sentence;
+import data.Statistics;
+import data.Word;
+
+class WordCount {
+	private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			List<String> sentence = new ArrayList<>(s.getWords().size());
+
+			if (stats.getCf() == CalculateFor.LEMMA) {
+				sentence.addAll(s.getWords()
+						.stream()
+						.map(Word::getLemma)
+						.collect(Collectors.toList()));
+			} else if (stats.getCf() == CalculateFor.WORD) {
+				sentence.addAll(s.getWords()
+						.stream()
+						.map(Word::getWord)
+						.collect(Collectors.toList()));
+			}
+
+			for (String word : sentence) {
+				Common.updateMap(stats.result, word);
+			}
+		}
+	}
+
+	private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			List<String> sentence = new ArrayList<>(s.getWords().size());
+
+			if (stats.getCf() == CalculateFor.LEMMA) {
+				sentence.addAll(s.getWords()
+						.stream()
+						.map(Word::getCVVLemma)
+						.collect(Collectors.toList()));
+			} else if (stats.getCf() == CalculateFor.WORD) {
+				sentence.addAll(s.getWords()
+						.stream()
+						.map(Word::getCVVWord)
+						.collect(Collectors.toList()));
+			}
+
+			for (String word : sentence) {
+				if (word.length() > stats.getSubstringLength()) {
+					for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
+						String substring = word.substring(i, i + stats.getSubstringLength());
+						Common.updateMap(stats.result, substring);
+					}
+				}
+			}
+		}
+	}
+
+	private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			List<String> sentence = new ArrayList<>(s.getWords().size());
+			List<Word> filteredWords = new ArrayList<>();
+
+			for (Word word : s.getWords()) {
+				if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
+					filteredWords.add(word);
+				}
+			}
+
+			if (stats.getCf() == CalculateFor.LEMMA) {
+				sentence.addAll(filteredWords
+						.stream()
+						.map(Word::getLemma)
+						.collect(Collectors.toList()));
+			} else if (stats.getCf() == CalculateFor.WORD) {
+				sentence.addAll(filteredWords
+						.stream()
+						.map(Word::getWord)
+						.collect(Collectors.toList()));
+			}
+
+			for (String word : sentence) {
+				Common.updateMap(stats.result, word);
+			}
+		}
+	}
+
+	private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
+				List<String> sentence = new ArrayList<>(s.getWords().size());
+				List<Word> filteredWords = new ArrayList<>();
+
+				for (Word word : s.getWords()) {
+					if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
+						filteredWords.add(word);
+					}
+				}
+
+				if (stats.getCf() == CalculateFor.LEMMA) {
+					sentence.addAll(filteredWords
+							.stream()
+							.map(Word::getLemma)
+							.collect(Collectors.toList()));
+				} else if (stats.getCf() == CalculateFor.WORD) {
+					sentence.addAll(filteredWords
+							.stream()
+							.map(Word::getWord)
+							.collect(Collectors.toList()));
+				}
+
+				for (String word : sentence) {
+					Common.updateMap(stats.result, word);
+				}
+			}
+		}
+	}
+
+	private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
+		for (Sentence s : corpus) {
+			if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
+				List<String> sentence = new ArrayList<>(s.getWords().size());
+
+				if (stats.getCf() == CalculateFor.LEMMA) {
+					sentence.addAll(s.getWords()
+							.stream()
+							.map(Word::getLemma)
+							.collect(Collectors.toList()));
+				} else if (stats.getCf() == CalculateFor.WORD) {
+					sentence.addAll(s.getWords()
+							.stream()
+							.map(Word::getWord)
+							.collect(Collectors.toList()));
+				}
+
+				for (String word : sentence) {
+					Common.updateMap(stats.result, word);
+				}
+			}
+		}
+	}
+
+	static void calculateForAll(List<Sentence> corpus, Statistics stats) {
+		boolean taxonomyIsSet = stats.isTaxonomySet();
+		boolean JosTypeIsSet = stats.isJOSTypeSet();
+
+		// branching because even though the only difference is an if or two &&
+		// O(if) = 1, the amount of ifs adds up and this saves some time
+		if (taxonomyIsSet && JosTypeIsSet) {
+			calculateForTaxonomyAndJosType(corpus, stats);
+		} else if (taxonomyIsSet && !JosTypeIsSet) {
+			calculateForTaxonomy(corpus, stats);
+		} else if (!taxonomyIsSet && JosTypeIsSet) {
+			calculateForJosType(corpus, stats);
+		} else {
+			if (stats.isVcc()) {
+				calculateVCC(corpus, stats);
+			} else {
+				calculateNoFilter(corpus, stats);
+			}
+		}
+	}
+}
@@ -0,0 +1,112 @@
+package alg.word;
+
+import static data.Enums.WordLevelDefaultValues.*;
+
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+
+import data.Enums.WordLevelDefaultValues;
+import data.Enums.WordLevelType;
+import data.Sentence;
+import data.StatisticsNew;
+import data.Word;
+
+@SuppressWarnings("Duplicates")
+public class WordLevel {
+	private static HashSet<String> suffixes;
+	private static int minSuffixLength;
+	private static int maxSuffixLength;
+
+	private static HashSet<String> prefixes;
+	private static int minPrefixLength;
+	private static int maxPrefixLength;
+
+	static {
+		suffixes = WordLevelDefaultValues.getSuffixes();
+		calculateSuffixesLengths();
+
+		prefixes = WordLevelDefaultValues.getPrefixes();
+		calculatePrefixesLengths();
+	}
+
+	public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
+		for (Sentence s : corpus) {
+			for (Word word : s.getWords()) {
+				calculateForSuffixes(word.getWord(), stats);
+				calculateForPrefixes(word.getWord(), stats);
+			}
+		}
+	}
+
+	private static void calculateForPrefixes(String word, StatisticsNew stats) {
+		for (int tmpPrefixLength = maxPrefixLength; tmpPrefixLength >= minPrefixLength; tmpPrefixLength++) {
+			if (word.length() - tmpPrefixLength < MIN_N_OF_CHARACTERS_LEFT_PREFIX) {
+				return;
+			}
+
+			String extractedPrefix = StringUtils.left(word, tmpPrefixLength);
+
+			if (prefixes.contains(extractedPrefix)) {
+				// save suffix and full word
+				stats.updateResultsNested(WordLevelType.PREFIX, extractedPrefix, word);
+				return;
+			}
+		}
+	}
+
+	public static void calculateForSuffixes(String word, StatisticsNew stats) {
+		for (int tmpSuffixLength = maxSuffixLength; tmpSuffixLength >= minSuffixLength; tmpSuffixLength++) {
+			// preveri, da je beseda - cuttan suffix daljši od prednastavljene vrednosti
+			// ker gremo od najdaljše opcije k najkrajši, se ob dosegu tega pogoja lahko zaključi računanje za trenutno besedo
+			if (word.length() - tmpSuffixLength < MIN_N_OF_CHARACTERS_LEFT_SUFFIX) {
+				return;
+			}
+
+			String extractedSuffix = StringUtils.right(word, tmpSuffixLength);
+
+			if (suffixes.contains(extractedSuffix)) {
+				// save suffix and full word
+				stats.updateResultsNested(WordLevelType.SUFFIX, extractedSuffix, word);
+				return;
+			}
+		}
+	}
+
+	// finds the shortest and longest suffix for quicker calculations
+	public static void calculateSuffixesLengths() {
+		minSuffixLength = -1;
+		maxSuffixLength = -1;
+
+		for (String suffix : suffixes) {
+			if (suffix.length() > maxSuffixLength) {
+				maxSuffixLength = suffix.length();
+
+				if (minSuffixLength < 0) {
+					minSuffixLength = maxSuffixLength;
+				}
+			} else if (suffix.length() < minSuffixLength) {
+				minSuffixLength = suffix.length();
+			}
+		}
+	}
+
+	// finds the shortest and longest suffix for quicker calculations
+	public static void calculatePrefixesLengths() {
+		minPrefixLength = -1;
+		maxPrefixLength = -1;
+
+		for (String prefix : prefixes) {
+			if (prefix.length() > maxPrefixLength) {
+				maxPrefixLength = prefix.length();
+
+				if (minPrefixLength < 0) {
+					minPrefixLength = maxPrefixLength;
+				}
+			} else if (prefix.length() < minPrefixLength) {
+				minPrefixLength = prefix.length();
+			}
+		}
+	}
+}
@@ -0,0 +1,17 @@
+package data;
+
+public enum AnalysisLevel {
+	STRING_LEVEL("Besedni nizi"),
+	WORD_LEVEL("Nivo besed in delov besed"),
+	WORD_FORMATION("Besedotvorni procesi");
+
+	private final String name;
+
+	AnalysisLevel(String name) {
+		this.name = name;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+}
@@ -0,0 +1,43 @@
+package data;
+
+public enum CalculateFor {
+	WORD("različnica"),
+	LEMMA("lema"),
+	MORPHOSYNTACTIC_SPECS("oblikoskladenjska oznaka"),
+	MORPHOSYNTACTIC_PROPERTY("oblikoskladenjska lastnost"),
+	WORD_TYPE("besedna vrsta"),
+	DIST_WORDS("različnica"),
+	DIST_LEMMAS("lema");
+
+
+	private final String name;
+
+	CalculateFor(String name) {
+		this.name = name;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public static CalculateFor factory(String cf) {
+		if (cf != null) {
+			if (WORD.toString().equals(cf)) {
+				return WORD;
+			}
+			if (LEMMA.toString().equals(cf)) {
+				return LEMMA;
+			}
+			if (MORPHOSYNTACTIC_SPECS.toString().equals(cf)) {
+				return MORPHOSYNTACTIC_SPECS;
+			}
+			if (MORPHOSYNTACTIC_PROPERTY.toString().equals(cf)) {
+				return MORPHOSYNTACTIC_PROPERTY;
+			}
+			if (WORD_TYPE.toString().equals(cf)) {
+				return WORD_TYPE;
+			}
+		}
+		return null;
+	}
+}
@@ -0,0 +1,163 @@
+package data;
+
+import static gui.Messages.*;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import data.Enums.solar.SolarFilters;
+import gui.ValidationUtil;
+import javafx.collections.ObservableList;
+
+public class Corpus {
+	public final static Logger logger = LogManager.getLogger(Corpus.class);
+
+	private CorpusType corpusType;
+	private File chosenResultsLocation;
+	private File chosenCorpusLocation;
+	private Collection<File> detectedCorpusFiles;
+	boolean headerRead;
+	private ObservableList<String> taxonomy; // if gigafida or gos
+	private HashMap<String, ObservableList<String>> solarFilters; // if solar
+	private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
+	private boolean gosOrthMode;
+	boolean hasMsdData;
+	private ArrayList<String> validationErrors;
+
+	public Corpus() {
+		validationErrors = new ArrayList<>();
+	}
+
+	public CorpusType getCorpusType() {
+		return corpusType;
+	}
+
+	public void setCorpusType(CorpusType corpusType) {
+		this.corpusType = corpusType;
+		logger.info("Corpus.set: ", corpusType);
+	}
+
+	public File getChosenResultsLocation() {
+		return chosenResultsLocation;
+	}
+
+	public void setChosenResultsLocation(File chosenResultsLocation) {
+		this.chosenResultsLocation = chosenResultsLocation;
+		logger.info("Corpus.set: ", chosenResultsLocation);
+	}
+
+	public File getChosenCorpusLocation() {
+		return chosenCorpusLocation;
+	}
+
+	public void setChosenCorpusLocation(File chosenCorpusLocation) {
+		this.chosenCorpusLocation = chosenCorpusLocation;
+		logger.info("Corpus.set: ", chosenCorpusLocation);
+	}
+
+	public Collection<File> getDetectedCorpusFiles() {
+		return detectedCorpusFiles;
+	}
+
+	public void setDetectedCorpusFiles(Collection<File> detectedCorpusFiles) {
+		this.detectedCorpusFiles = detectedCorpusFiles;
+		logger.info("Corpus.set: ", detectedCorpusFiles);
+	}
+
+	public boolean isHeaderRead() {
+		return headerRead;
+	}
+
+	public void setHeaderRead(boolean headerRead) {
+		this.headerRead = headerRead;
+	}
+
+	public ObservableList<String> getTaxonomy() {
+		return taxonomy;
+	}
+
+	public void setTaxonomy(ObservableList<String> taxonomy) {
+		this.taxonomy = taxonomy;
+		logger.info("Corpus.set: ", taxonomy);
+	}
+
+	public HashMap<String, ObservableList<String>> getSolarFilters() {
+		return solarFilters;
+	}
+
+	public void setSolarFilters(HashMap<String, ObservableList<String>> solarFilters) {
+		this.solarFilters = solarFilters;
+		logger.info("Corpus.set: ", solarFilters);
+	}
+
+	public HashMap<String, HashSet<String>> getSolarFiltersForXML() {
+		return solarFiltersForXML;
+	}
+
+	public void setSolarFiltersForXML(HashMap<String, HashSet<String>> solarFiltersForXML) {
+		this.solarFiltersForXML = solarFiltersForXML;
+		logger.info("Corpus.set: ", solarFiltersForXML);
+	}
+
+	public boolean isGosOrthMode() {
+		return gosOrthMode;
+	}
+
+	public void setGosOrthMode(boolean gosOrthMode) {
+		this.gosOrthMode = gosOrthMode;
+		logger.info("Corpus.set: ", gosOrthMode);
+	}
+
+	public ArrayList<String> getValidationErrors() {
+		return validationErrors;
+	}
+
+	public String getValidationErrorsToString() {
+		return StringUtils.join(validationErrors, "\n - ");
+	}
+
+	public void setValidationErrors(ArrayList<String> validationErrors) {
+		this.validationErrors = validationErrors;
+	}
+
+	public boolean validate() {
+		if (corpusType == null) {
+			validationErrors.add(LABEL_RESULTS_CORPUS_TYPE_NOT_SET);
+		}
+
+		if (chosenCorpusLocation == null) {
+			validationErrors.add(LABEL_CORPUS_LOCATION_NOT_SET);
+		}
+
+		if (chosenResultsLocation == null) {
+			validationErrors.add(LABEL_RESULTS_LOCATION_NOT_SET);
+		}
+
+		if (!headerRead && corpusType != null) {
+			// if user didn't opt into reading the headers, set default taxonomy or solar filters
+			if (Tax.getCorpusTypesWithTaxonomy().contains(corpusType)) {
+				taxonomy = Tax.getTaxonomyForComboBox(corpusType);
+			} else if (corpusType == CorpusType.SOLAR && solarFilters == null) {
+				setSolarFilters(SolarFilters.getFiltersForComboBoxes());
+			}
+		}
+
+		if (headerRead && ValidationUtil.isEmpty(taxonomy)) {
+			// mustn't happen, intercept at gui level
+		}
+
+		if (!ValidationUtil.isEmpty(validationErrors)) {
+			logger.error("Corpus validation error: ", StringUtils.join(validationErrors, "\n - "));
+			return false;
+		} else {
+			return true;
+		}
+	}
+}
@@ -0,0 +1,25 @@
+package data;
+
+public enum CorpusType {
+	GIGAFIDA("Gigafida", "gigafida"),
+	CCKRES("ccKres ", "cckres"),
+	SOLAR("Šolar", "šolar"),
+	GOS("GOS", "gos");
+
+
+	private final String name;
+	private final String nameLowerCase;
+
+	CorpusType(String name, String nameLowerCase) {
+		this.name = name;
+		this.nameLowerCase = nameLowerCase;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public String getNameLowerCase() {
+		return nameLowerCase;
+	}
+}
@@ -0,0 +1,12 @@
+package data.Enums;
+
+import java.util.Arrays;
+import java.util.HashSet;
+
+public class InflectedJosTypes {
+	public static final HashSet<Character> inflectedJosTypes = new HashSet<>();
+
+	static {
+		inflectedJosTypes.addAll(Arrays.asList('S', 'G', 'P'));
+	}
+}
@@ -0,0 +1,68 @@
+package data.Enums;
+
+import java.util.HashMap;
+
+public enum Msd {
+	NOUN("samostalnik", 'S', "Noun", 'N', 5),
+	VERB("glagol", 'G', "Verb", 'V', 7),
+	ADJECTIVE("pridevnik", 'P', "Adjective", 'A', 6),
+	ADVERB("prislov", 'R', "Adverb", 'R', 2),
+	PRONOUN("zaimek", 'Z', "Pronoun", 'P', 8),
+	NUMERAL("števnik", 'K', "Numeral", 'M', 6),
+	PREPOSITION("predlog", 'D', "Preposition", 'S', 1),
+	CONJUNCTION("veznik", 'V', "Conjunction", 'C', 1),
+	PARTICLE("členek", 'L', "Particle", 'Q', 0),
+	INTERJECTION("medmet", 'M', "Interjection", 'I', 0),
+	ABBREVIATION("okrajšava", 'O', "Abbreviation", 'Y', 0),
+	RESIDUAL("neuvrščeno", 'N', "Residual", 'X', 1);
+
+	private final String siName;
+	private final Character siCode;
+	private final String enName;
+	private final Character enCode;
+	private final Integer nOfAttributes;
+
+	private static HashMap<Character, Integer> siCodeNOfAttributes;
+
+	static {
+		siCodeNOfAttributes = new HashMap<>();
+		for (Msd msd : Msd.values()) {
+			siCodeNOfAttributes.put(msd.getSiCode(), msd.nOfAttributes);
+		}
+	}
+
+	Msd(String siName, Character siCode, String enName, Character enCode, int nOfAttributes) {
+		this.siName = siName;
+		this.siCode = siCode;
+		this.enName = enName;
+		this.enCode = enCode;
+		this.nOfAttributes = nOfAttributes;
+	}
+
+	public String getSiName() {
+		return siName;
+	}
+
+	public Character getSiCode() {
+		return siCode;
+	}
+
+	public String getEnName() {
+		return enName;
+	}
+
+	public Character getEnCode() {
+		return enCode;
+	}
+
+	/**
+	 * Returns the number of attributes for the given type.
+	 *
+	 * @param msd
+	 *
+	 * @return
+	 */
+	public static int getMsdLengthForType(String msd) {
+		return siCodeNOfAttributes.get(msd.charAt(0)) + 1;
+	}
+}
@@ -0,0 +1,55 @@
+package data.Enums;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class WordLevelDefaultValues {
+	public final static Logger logger = LogManager.getLogger(WordLevelDefaultValues.class);
+
+	private static HashSet<String> suffixes;
+	private static final String SUFFIXES_FILE = "/Lists/suffixes.txt";
+	public static final int MIN_N_OF_CHARACTERS_LEFT_SUFFIX = 2;
+
+	private static HashSet<String> prefixes;
+	private static final String PREFIXES_FILE = "/Lists/prefixes.txt";
+	public static final int MIN_N_OF_CHARACTERS_LEFT_PREFIX = 2;
+
+	static {
+		suffixes = new HashSet<>();
+		suffixes = readFromFile(SUFFIXES_FILE);
+		prefixes = new HashSet<>();
+		prefixes = readFromFile(PREFIXES_FILE);
+	}
+
+	private static HashSet<String> readFromFile(String fileName) {
+		Set<String> dictionary = new HashSet<>();
+
+		try (InputStream is = WordLevelDefaultValues.class.getClass().getResourceAsStream(fileName)) {
+			if (is != null) {
+				// TODO: warn if !exists
+				BufferedReader reader = new BufferedReader(new InputStreamReader(is));
+				dictionary = reader.lines().collect(Collectors.toSet());
+			}
+		} catch (IOException e) {
+			logger.error("Problem reading init dictionary", e);
+		}
+
+		return (HashSet<String>) dictionary;
+	}
+
+	public static HashSet<String> getSuffixes() {
+		return suffixes;
+	}
+
+	public static HashSet<String> getPrefixes() {
+		return prefixes;
+	}
+}
@@ -0,0 +1,16 @@
+package data.Enums;
+
+public enum WordLevelType {
+	SUFFIX("pripona"),
+	PREFIX("predpona");
+
+	private final String name;
+
+	WordLevelType(String name) {
+		this.name = name;
+	}
+
+	public String getName() {
+		return name;
+	}
+}
@@ -0,0 +1,57 @@
+package data.Enums.solar;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public class SolarFilters {
+	private static HashMap<String, ObservableList<String>> SOLAR_FILTERS;
+	public static final String SOLA = "sola";
+	public static final String PREDMET = "predmet";
+	public static final String RAZRED = "razred";
+	public static final String REGIJA = "regija";
+	public static final String TIP = "tip";
+	public static final String LETO = "leto";
+
+	static {
+		SOLAR_FILTERS = new HashMap<>();
+
+		SOLAR_FILTERS.put(REGIJA, FXCollections.observableArrayList("Celje", "Gorica", "Koper", "Kranj", "Krško", "Ljubljana", "Maribor", "Murska Sobota", "Novo mesto", "Postojna", "Slovenj Gradec"));
+		SOLAR_FILTERS.put(PREDMET, FXCollections.observableArrayList("državljanska vzgoja in etika", "ekonomija", "filozofija", "geografija", "kemija", "podjetništvo", "psihologija", "slovenščina", "sociologija", "umetnostna vzgoja", "zgodovina"));
+		SOLAR_FILTERS.put(RAZRED, FXCollections.observableArrayList("6. razred", "7. razred", "8. razred", "9. razred", "1. letnik", "2. letnik", "3. letnik", "4. letnik", "5. letnik", "maturitetni tečaj"));
+		SOLAR_FILTERS.put(LETO, FXCollections.observableArrayList("2007", "2008", "2009", "2009/2010", "2010"));
+		SOLAR_FILTERS.put(SOLA, FXCollections.observableArrayList("gimnazija", "osnovna šola", "poklicna šola", "strokovna šola"));
+		SOLAR_FILTERS.put(TIP, FXCollections.observableArrayList("esej/spis", "pisni izdelek (učna ura)", "test (daljše besedilo)", "test (odgovori na vprašanja)"));
+	}
+
+	public static final ObservableList<String> N_GRAM_COMPUTE_FOR_FULL = FXCollections.observableArrayList("različnica", "lema", "oblikoskladenjska oznaka", "oblikoskladenjska lastnost", "besedna vrsta");
+	public static final ObservableList<String> N_GRAM_COMPUTE_FOR_LIMITED = FXCollections.observableArrayList("različnica", "lema");
+
+	/**
+	 * Returns filters with all possible values
+	 */
+	public static HashMap<String, ObservableList<String>> getFiltersForComboBoxes() {
+		return SOLAR_FILTERS;
+	}
+
+	/**
+	 * Returns filters with all possible values
+	 */
+	public static HashMap<String, ObservableList<String>> getFiltersForComboBoxes(HashMap<String, HashSet<String>> foundFilters) {
+		HashMap<String, ObservableList<String>> filtersForComboBoxes = new HashMap<>();
+
+		for (Map.Entry<String, ObservableList<String>> e : SOLAR_FILTERS.entrySet()) {
+			if (!foundFilters.containsKey(e.getKey())) {
+				// if, by some reason a specific filter wasn't in the corpus, return a blank list for that filter
+				filtersForComboBoxes.put(e.getKey(), FXCollections.observableArrayList());
+			} else {
+				filtersForComboBoxes.put(e.getKey(), FXCollections.observableArrayList(foundFilters.get(e.getKey())).sorted());
+			}
+		}
+
+		return filtersForComboBoxes;
+	}
+}
@@ -0,0 +1,144 @@
+package data;
+
+import static data.Filter.filterName.*;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import gui.ValidationUtil;
+
+@SuppressWarnings("unchecked")
+public class Filter {
+	private HashMap<filterName, Object> filter;
+
+	public enum filterName {
+		ANALYSIS_LEVEL,
+		CALCULATE_FOR,
+		NGRAM_VALUE,
+		SKIP_VALUE,
+		IS_CVV,
+		STRING_LENGTH,
+		TAXONOMY,
+		MSD,
+		HAS_MSD,
+		SOLAR_FILTERS
+	}
+
+	public Filter() {
+		filter = new HashMap<>();
+	}
+
+	public Filter(AnalysisLevel al, CalculateFor cf) {
+		filter = new HashMap<>();
+
+		filter.put(ANALYSIS_LEVEL, al);
+		filter.put(CALCULATE_FOR, cf);
+	}
+
+	public void setAl(AnalysisLevel al) {
+		filter.put(ANALYSIS_LEVEL, al);
+	}
+
+	public AnalysisLevel getAl() {
+		return (AnalysisLevel) filter.get(ANALYSIS_LEVEL);
+	}
+
+	public void setCalculateFor(CalculateFor cf) {
+		filter.put(CALCULATE_FOR, cf);
+	}
+
+	public CalculateFor getCalculateFor() {
+		return (CalculateFor) filter.get(CALCULATE_FOR);
+	}
+
+	public void setNgramValue(Integer ngramValue) {
+		filter.put(NGRAM_VALUE, ngramValue);
+	}
+
+	public Integer getNgramValue() {
+		return (Integer) filter.get(NGRAM_VALUE);
+	}
+
+	public void setSkipValue(Integer skipValue) {
+		filter.put(SKIP_VALUE, skipValue);
+	}
+
+	public Integer getSkipValue() {
+		return (Integer) filter.get(SKIP_VALUE);
+	}
+
+	public void setIsCvv(boolean isCvv) {
+		filter.put(IS_CVV, isCvv);
+	}
+
+	public boolean isCvv() {
+		return filter.containsKey(IS_CVV) && (boolean) filter.get(IS_CVV);
+	}
+
+	public void setStringLength(int stringLength) {
+		filter.put(STRING_LENGTH, stringLength);
+	}
+
+	public Integer getStringLength() {
+		return (Integer) filter.get(STRING_LENGTH);
+	}
+
+	public void setTaxonomy(ArrayList<String> taxonomy) {
+		filter.put(TAXONOMY, taxonomy);
+	}
+
+	public ArrayList<String> getTaxonomy() {
+		if (filter.containsKey(TAXONOMY) && filter.get(TAXONOMY) != null) {
+			return (ArrayList<String>) filter.get(TAXONOMY);
+		} else {
+			return new ArrayList<>();
+		}
+	}
+
+	public void setMsd(ArrayList<Pattern> msd) {
+		filter.put(MSD, msd);
+		if (!ValidationUtil.isEmpty(msd)) {
+			setHasMsd(true);
+		} else {
+			setHasMsd(false);
+		}
+	}
+
+	public ArrayList<Pattern> getMsd() {
+		return (ArrayList<Pattern>) filter.get(MSD);
+	}
+
+	public void setHasMsd(boolean hasMsd) {
+		filter.put(HAS_MSD, hasMsd);
+	}
+
+	public boolean hasMsd() {
+		return filter.containsKey(HAS_MSD) && (boolean) filter.get(HAS_MSD);
+	}
+
+	public String toString() {
+		String newLine = "\n\t- ";
+		StringBuilder sb = new StringBuilder();
+
+		sb.append(newLine).append("Filter:");
+		for (Map.Entry<filterName, Object> entry : filter.entrySet()) {
+			sb.append(newLine)
+					.append(entry.getKey().toString())
+					.append(": ")
+					.append(entry.getValue() != null ? entry.getValue().toString() : "null");
+		}
+
+		return sb.toString();
+	}
+
+	public void setSolarFilters(HashMap<String, HashSet<String>> filters) {
+		filter.put(SOLAR_FILTERS, filters);
+	}
+
+	public HashMap<String, HashSet<String>> getSolarFilters() {
+		return (HashMap<String, HashSet<String>>) filter.get(SOLAR_FILTERS);
+	}
+}
@@ -0,0 +1,71 @@
+package data;
+
+public enum GigafidaJosWordType {
+	SAMOSTALNIK("samostalnik", 'S'),
+	GLAGOL("glagol", 'G'),
+	PRIDEVNIK("pridevnik", 'P'),
+	PRISLOV("prislov", 'R'),
+	ZAIMEK("zaimek", 'Z'),
+	STEVNIK("stevnik", 'K'),
+	PREDLOG("predlog", 'D'),
+	VEZNIK("veznik", 'V'),
+	CLENEK("clenek", 'L'),
+	MEDMET("medmet", 'M'),
+	OKRAJSAVA("okrajsava", 'O');
+
+
+	private final String name;
+	private final char wordType;
+
+	GigafidaJosWordType(String name, char wordType) {
+		this.name = name;
+		this.wordType = wordType;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public char getWordType() {
+		return wordType;
+	}
+
+	public static GigafidaJosWordType factory(String wType) {
+		if (wType != null) {
+			if (SAMOSTALNIK.toString().equals(wType)) {
+				return SAMOSTALNIK;
+			}
+			if (GLAGOL.toString().equals(wType)) {
+				return GLAGOL;
+			}
+			if (PRIDEVNIK.toString().equals(wType)) {
+				return PRIDEVNIK;
+			}
+			if (PRISLOV.toString().equals(wType)) {
+				return PRISLOV;
+			}
+			if (ZAIMEK.toString().equals(wType)) {
+				return ZAIMEK;
+			}
+			if (STEVNIK.toString().equals(wType)) {
+				return STEVNIK;
+			}
+			if (PREDLOG.toString().equals(wType)) {
+				return PREDLOG;
+			}
+			if (VEZNIK.toString().equals(wType)) {
+				return VEZNIK;
+			}
+			if (CLENEK.toString().equals(wType)) {
+				return CLENEK;
+			}
+			if (MEDMET.toString().equals(wType)) {
+				return MEDMET;
+			}
+			if (OKRAJSAVA.toString().equals(wType)) {
+				return OKRAJSAVA;
+			}
+		}
+		return null;
+	}
+}
@@ -0,0 +1,76 @@
+package data;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public enum GigafidaTaxonomy {
+	TISK("tisk", "T"),
+	KNJIZNO("knjižno", "T.K"),
+	LEPOSLOVNO("leposlovno", "T.K.L"),
+	STROKOVNO("strokovno", "T.K.S"),
+	PERIODICNO("periodično", "T.P"),
+	CASOPIS("časopis", "T.P.C"),
+	REVIJA("revija", "T.P.R"),
+	INTERNET("internet", "I");
+
+	private final String name;
+	private final String taxonomy;
+
+	private static final ObservableList<String> FOR_COMBO_BOX;
+
+	static {
+		ArrayList<String> values = Arrays.stream(GigafidaTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
+		FOR_COMBO_BOX = FXCollections.observableArrayList(values);
+	}
+
+	GigafidaTaxonomy(String name, String taxonomy) {
+		this.name = name;
+		this.taxonomy = taxonomy;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public String getTaxonomnyString() {
+		return this.taxonomy;
+	}
+
+	public static GigafidaTaxonomy factory(String tax) {
+		if (tax != null) {
+			if (TISK.toString().equals(tax)) {
+				return TISK;
+			}
+			if (KNJIZNO.toString().equals(tax)) {
+				return KNJIZNO;
+			}
+			if (LEPOSLOVNO.toString().equals(tax)) {
+				return LEPOSLOVNO;
+			}
+			if (STROKOVNO.toString().equals(tax)) {
+				return STROKOVNO;
+			}
+			if (PERIODICNO.toString().equals(tax)) {
+				return PERIODICNO;
+			}
+			if (CASOPIS.toString().equals(tax)) {
+				return CASOPIS;
+			}
+			if (REVIJA.toString().equals(tax)) {
+				return REVIJA;
+			}
+			if (INTERNET.toString().equals(tax)) {
+				return INTERNET;
+			}
+		}
+		return null;
+	}
+
+	public static ObservableList<String> getForComboBox() {
+		return FOR_COMBO_BOX;
+	}
+}
@@ -0,0 +1,85 @@
+package data;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public enum GosTaxonomy {
+	JAVNI("javni", "gos.T.J"),
+	INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "gos.T.J.I"),
+	RAZVEDRILNI("razvedrilni", "gos.T.J.R"),
+	NEJAVNI("nejavni", "gos.T.N"),
+	NEZASEBNI("nezasebni", "gos.T.N.N"),
+	ZASEBNI("zasebni", "gos.T.N.Z"),
+	OSEBNI_STIK("osebni stik", "gos.K.O"),
+	TELEFON("telefon", "gos.K.P"),
+	RADIO("radio", "gos.K.R"),
+	TELEVIZIJA("televizija", "gos.K.T");
+
+
+	private final String name;
+	private final String taxonomy;
+
+	private static final ObservableList<String> FOR_COMBO_BOX;
+
+	static {
+		ArrayList<String> values = Arrays.stream(GosTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
+		FOR_COMBO_BOX = FXCollections.observableArrayList(values);
+	}
+
+	GosTaxonomy(String name, String taxonomy) {
+		this.name = name;
+		this.taxonomy = taxonomy;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public String getTaxonomnyString() {
+		return this.taxonomy;
+	}
+
+	public static GosTaxonomy factory(String tax) {
+		if (tax != null) {
+			if (JAVNI.toString().equals(tax)) {
+				return JAVNI;
+			}
+			if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
+				return INFORMATIVNO_IZOBRAZEVALNI;
+			}
+			if (RAZVEDRILNI.toString().equals(tax)) {
+				return RAZVEDRILNI;
+			}
+			if (NEJAVNI.toString().equals(tax)) {
+				return NEJAVNI;
+			}
+			if (NEZASEBNI.toString().equals(tax)) {
+				return NEZASEBNI;
+			}
+			if (ZASEBNI.toString().equals(tax)) {
+				return ZASEBNI;
+			}
+			if (OSEBNI_STIK.toString().equals(tax)) {
+				return OSEBNI_STIK;
+			}
+			if (TELEFON.toString().equals(tax)) {
+				return TELEFON;
+			}
+			if (RADIO.toString().equals(tax)) {
+				return RADIO;
+			}
+			if (TELEVIZIJA.toString().equals(tax)) {
+				return TELEVIZIJA;
+			}
+		}
+		return null;
+	}
+
+	public static ObservableList<String> getForComboBox() {
+		return FOR_COMBO_BOX;
+	}
+}
@@ -0,0 +1,56 @@
+package data;
+
+import java.util.List;
+import java.util.Map;
+
+public class Sentence {
+
+
+	private List<Word> words;
+	private String taksonomija;
+
+	// GOS
+	private String type;
+	private Map<String, String> properties;
+
+	public Sentence(List<Word> words, String taksonomija) {
+		this.words = words;
+		this.taksonomija = taksonomija;
+	}
+
+	public Sentence(List<Word> words) {
+		this.words = words;
+	}
+
+	public Sentence(List<Word> words, String taksonomija, Map<String, String> properties) {
+		this.words = words;
+		this.taksonomija = taksonomija;
+		this.properties = properties;
+	}
+
+	public Sentence(List<Word> words, String taksonomija, String type) {
+		this.words = words;
+		this.taksonomija = taksonomija;
+		this.type = type;
+	}
+
+	public List<Word> getWords() {
+		return words;
+	}
+
+	public String getTaxonomy() {
+		return taksonomija;
+	}
+
+	public List<Word> getSublist(int indexFrom, int indexTo) {
+		return this.words.subList(indexFrom, indexTo);
+	}
+
+	public String getType() {
+		return type;
+	}
+
+	public void setType(String type) {
+		this.type = type;
+	}
+}
@@ -0,0 +1,16 @@
+package data;
+
+
+import java.io.File;
+import java.util.Collection;
+
+public class Settings {
+	public static final int CORPUS_SENTENCE_LIMIT = 50000;
+	public static final boolean PRINT_LOG = false;
+
+	public static final String FX_ACCENT_OK = "-fx-accent: forestgreen;";
+	public static final String FX_ACCENT_NOK = "-fx-accent: red;";
+
+	public static Collection<File> corpus;
+	public static File resultsFilePath;
+}
@@ -0,0 +1,299 @@
+package data;
+
+import java.io.UnsupportedEncodingException;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
+
+import util.Util;
+import util.db.RDB;
+
+public class Statistics {
+	private CorpusType corpusType;
+	private AnalysisLevel analysisLevel;
+	private boolean useDB;
+	private RDB db;
+
+	private boolean analysisProducedResults;
+
+	private String taxonomy;
+	private boolean taxonomyIsSet;
+
+	private char JOSType;
+	private boolean JOSTypeIsSet;
+
+	private String resultTitle;
+	public Map<String, AtomicLong> result = new ConcurrentHashMap<>();
+
+	// nGrams
+	private int nGramLevel;
+	private Integer skip;
+	private CalculateFor cf;
+	private List<Pattern> morphosyntacticFilter;
+
+	// distributions
+	private String distributionTaxonomy;
+	private char distributionJosWordType;
+	private boolean vcc;
+	private Integer substringLength;
+
+	// inflected JOS
+	private String inflectedJosTaxonomy;
+
+	// GOS
+	boolean gosOrthMode;
+
+	// šolar
+	Map<String, Object> solarHeadBlockFilter;
+
+
+	// for ngrams
+	public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) {
+		String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
+		this.cf = cf;
+		this.analysisLevel = al;
+		this.nGramLevel = nGramLevel;
+		this.skip = skip == null || skip == 0 ? null : skip;
+
+		this.resultTitle = String.format("%s%d-gram_%s_%s",
+				this.skip != null ? String.format("%d-%s-", skip, "skip") : "",
+				nGramLevel,
+				cf.toString(),
+				dateTime);
+	}
+
+	// for words distributions
+	public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
+		String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
+
+		this.resultTitle = String.format("%s_%s_%s",
+				distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
+				distributionJosWordType != null ? distributionJosWordType.toString() : "",
+				dateTime);
+
+		this.analysisLevel = al;
+		this.cf = cf;
+		this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
+		this.taxonomyIsSet = distributionTaxonomy != null;
+
+		this.JOSTypeIsSet = distributionJosWordType != null;
+		this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
+	}
+
+	public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
+		String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
+
+		this.resultTitle = String.format("%s_%d_%s",
+				"Distribucija zaporedij samoglasnikov in soglasnikov",
+				substringLength,
+				dateTime);
+
+		this.analysisLevel = al;
+		this.cf = cf;
+		this.substringLength = substringLength;
+		this.vcc = true;
+	}
+
+	public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
+		String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
+
+		this.resultTitle = String.format("InflectedJOS_%s_%s",
+				distributionTaxonomy != null ? distributionTaxonomy : "",
+				dateTime);
+
+		this.analysisLevel = al;
+		this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
+		this.taxonomyIsSet = inflectedJosTaxonomy != null;
+	}
+
+	public Integer getSkip() {
+		return skip;
+	}
+
+	public Integer getSubstringLength() {
+		return substringLength;
+	}
+
+	public String getInflectedJosTaxonomy() {
+		return inflectedJosTaxonomy;
+	}
+
+	public void setSubstringLength(Integer substringLength) {
+		this.substringLength = substringLength;
+	}
+
+	public boolean isVcc() {
+		return vcc;
+	}
+
+	public void setVcc(boolean vcc) {
+		this.vcc = vcc;
+	}
+
+	public String getDistributionTaxonomy() {
+		return distributionTaxonomy;
+	}
+
+	public void setDistributionTaxonomy(String distributionTaxonomy) {
+		this.distributionTaxonomy = distributionTaxonomy;
+	}
+
+	public char getDistributionJosWordType() {
+		return distributionJosWordType;
+	}
+
+	public void setDistributionJosWordType(char distributionJosWordType) {
+		this.distributionJosWordType = distributionJosWordType;
+	}
+
+	public void setMorphosyntacticFilter(List<String> morphosyntacticFilter) {
+		// change filter strings to regex patterns
+		this.morphosyntacticFilter = new ArrayList<>();
+		for (String s : morphosyntacticFilter) {
+			this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", ".")));
+		}
+	}
+
+	public List<Pattern> getMsd() {
+		return morphosyntacticFilter;
+	}
+
+	public Map<String, AtomicLong> getResult() {
+		return result;
+	}
+
+	public void setTaxonomy(String taxonomy) {
+		this.taxonomy = taxonomy;
+	}
+
+	public void setTaxonomyIsSet(boolean taxonomyIsSet) {
+		this.taxonomyIsSet = taxonomyIsSet;
+	}
+
+	public char getJOSType() {
+		return JOSType;
+	}
+
+	public void setJOSType(char JOSType) {
+		this.JOSType = JOSType;
+	}
+
+	public boolean isJOSTypeSet() {
+		return JOSTypeIsSet;
+	}
+
+	public void setJOSType(boolean JOSTypeIsSet) {
+		this.JOSTypeIsSet = JOSTypeIsSet;
+	}
+
+	public void saveResultToDisk(int... limit) throws UnsupportedEncodingException {
+		// Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
+		//
+		// if (useDB) {
+		// 	result = db.getDump();
+		// 	db.delete();
+		// }
+		//
+		// // if no results and nothing to save, return false
+		// if (!(result.size() > 0)) {
+		// 	analysisProducedResults = false;
+		// 	return;
+		// } else {
+		// 	analysisProducedResults = true;
+		// }
+		//
+		// stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
+		// Export.SetToCSV(stats);
+	}
+
+	// private Map<String, Integer> getSortedResultInflected(Map map) {
+	// 	// first convert to <String, Integer>
+	// 	Map<String, Integer> m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0);
+	//
+	// 	Map<String, Integer> sortedM = new TreeMap<>();
+	//
+	// 	sortedM.putAll(m);
+	//
+	// 	return sortedM;
+	// }
+
+	private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
+		return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
+	}
+
+	public String getTaxonomy() {
+		return taxonomy;
+	}
+
+	public boolean isTaxonomySet() {
+		return taxonomyIsSet;
+	}
+
+	public int getnGramLevel() {
+		return nGramLevel;
+	}
+
+	public CalculateFor getCf() {
+		return cf;
+	}
+
+	public AnalysisLevel getAnalysisLevel() {
+		return analysisLevel;
+	}
+
+	public CorpusType getCorpusType() {
+		return corpusType;
+	}
+
+	public void setCorpusType(CorpusType corpusType) {
+		this.corpusType = corpusType;
+	}
+
+	public boolean isGosOrthMode() {
+		return gosOrthMode;
+	}
+
+	public void setGosOrthMode(boolean gosOrthMode) {
+		this.gosOrthMode = gosOrthMode;
+	}
+
+	public Map<String, Object> getSolarHeadBlockFilter() {
+		return solarHeadBlockFilter;
+	}
+
+	public void setSolarHeadBlockFilter(Map<String, Object> solarHeadBlockFilter) {
+		this.solarHeadBlockFilter = solarHeadBlockFilter;
+	}
+
+	public boolean isUseDB() {
+		return useDB;
+	}
+
+	public void setUseDB(boolean useDB) {
+		if (useDB && db == null) {
+			db = new RDB();
+		}
+		this.useDB = useDB;
+	}
+
+	/**
+	 * Stores results from this batch to a database and clears results map
+	 */
+	public void storeTmpResultsToDB() {
+		try {
+			db.writeBatch(result);
+			result = new ConcurrentHashMap<>();
+		} catch (UnsupportedEncodingException e) {
+			e.printStackTrace();
+		}
+	}
+
+	public boolean isAnalysisProducedResults() {
+		return analysisProducedResults;
+	}
+}
@@ -0,0 +1,409 @@
+package data;
+
+import static gui.ValidationUtil.*;
+
+import java.io.UnsupportedEncodingException;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import alg.inflectedJOS.WordFormation;
+import data.Enums.WordLevelType;
+import javafx.collections.ObservableList;
+import util.Export;
+import util.Util;
+import util.db.RDB;
+
+@SuppressWarnings("Duplicates")
+public class StatisticsNew {
+	public final static Logger logger = LogManager.getLogger(StatisticsNew.class);
+
+	private Corpus corpus;
+	private Filter filter;
+
+	private String resultTitle;
+	private Map<String, AtomicLong> result;
+	private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
+	private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix;
+	private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix;
+	private boolean useDB;
+	private RDB db;
+	private boolean analysisProducedResults;
+	private LocalDateTime time;
+
+	public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
+		this.corpus = corpus;
+		this.filter = filter;
+
+		if (useDB) {
+			this.useDB = true;
+			db = new RDB();
+		}
+
+		if (filter.getAl() == AnalysisLevel.WORD_LEVEL) {
+			resultNestedSuffix = new ConcurrentHashMap<>();
+			resultNestedPrefix = new ConcurrentHashMap<>();
+		} else {
+			result = new ConcurrentHashMap<>();
+		}
+
+		resultTitle = generateResultTitle();
+
+		logger.debug(toString());
+	}
+
+	/**
+	 * Result's title consists of:
+	 * <ul>
+	 * <li>Corpus type</li>
+	 * <li>Analysis level</li>
+	 * <li>Calculate for</li>
+	 * <li></li>
+	 * <li></li>
+	 * <li></li>
+	 * <li></li>
+	 * </ul>
+	 *
+	 * @return
+	 */
+	private String generateResultTitle() {
+		String separator = "_";
+		StringBuilder sb = new StringBuilder();
+
+		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
+			Integer ngramLevel = filter.getNgramValue();
+			if(ngramLevel == 0) {
+				sb.append("Crke").
+						append(separator)
+						.append(corpus.getCorpusType().toString())
+						.append(separator);
+			} else if(ngramLevel == 1) {
+				sb.append("Besede").append(separator)
+						.append(corpus.getCorpusType().toString())
+						.append(separator);
+			}
+			else {
+				sb.append(filter.getAl().toString())
+						.append(separator)
+						.append(corpus.getCorpusType().toString())
+						.append(separator);
+				sb.append(filter.getCalculateFor().toString())
+						.append(separator);
+				// ngram value
+				sb.append(filter.getNgramValue()).append("-gram")
+						.append(separator);
+				sb.append(filter.getSkipValue()).append("-preskok")
+						.append(separator);
+			}
+			// TODO: assure skip is not null but zero
+
+		} else {
+			sb.append(filter.getAl().toString()) // analysis level
+					.append(separator)
+					.append(corpus.getCorpusType().toString())
+					.append(separator);
+		}
+		// skip value
+		// msd ?
+		// if taxonomy -> taxonomy
+		// if cvv -> cvv + dolžina
+
+		this.time = this.time != null ? this.time : LocalDateTime.now();
+
+		sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
+		return sb.toString();
+
+	}
+
+	public boolean isAnalysisProducedResults() {
+		return analysisProducedResults;
+	}
+
+	public void setAnalysisProducedResults(boolean analysisProducedResults) {
+		this.analysisProducedResults = analysisProducedResults;
+	}
+
+	public String toString() {
+		String newLine = "\n\t- ";
+		StringBuilder sb = new StringBuilder();
+		sb.append(newLine).append("Statistic properties:");
+		sb.append(newLine).append(corpus.getCorpusType().toString()).append(String.format(" (%d files)", corpus.getDetectedCorpusFiles().size()));
+		sb.append(newLine).append(useDB ? "use DB" : "run in memory");
+		sb.append(newLine).append(filter.toString());
+
+		return sb.toString();
+	}
+
+	public String getResultTitle() {
+		return resultTitle;
+	}
+
+	// ****************************************
+	// ***************** util *****************
+	// ****************************************
+
+	/**
+	 * Stores results from this batch to a database and clears results map
+	 */
+	public void storeTmpResultsToDB() {
+		try {
+			db.writeBatch(result);
+			result = new ConcurrentHashMap<>();
+		} catch (UnsupportedEncodingException e) {
+			logger.error("Store tmp results to DB", e);
+			// e.printStackTrace();
+		}
+	}
+
+	public Filter getFilter() {
+		return filter;
+	}
+
+	public Corpus getCorpus() {
+		return corpus;
+	}
+
+	public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
+		Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
+
+		if (useDB) {
+			result = db.getDump();
+			db.delete();
+		}
+
+		// if no results and nothing to save, return false
+		if (!(result.size() > 0)) {
+			analysisProducedResults = false;
+			return false;
+		} else {
+			analysisProducedResults = true;
+		}
+
+		stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
+		Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock());
+		return true;
+	}
+
+	public boolean saveResultNestedToDisk(int... limit) throws UnsupportedEncodingException {
+		resultTitle = generateResultTitle();
+
+		if (useDB) {
+			result = db.getDump();
+			db.delete();
+		}
+		Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();
+
+		if (!isEmpty(resultNestedSuffix)) {
+			results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
+		}
+
+		if (!isEmpty(resultNestedPrefix)) {
+			results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
+		}
+
+		// if no results and nothing to save, return false
+		if (!(results.size() > 0)) {
+			analysisProducedResults = false;
+			return false;
+		} else {
+			analysisProducedResults = true;
+		}
+
+		Export.nestedMapToCSV(resultTitle, results, corpus.getChosenResultsLocation(), headerInfoBlock());
+		return true;
+	}
+
+	public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
+		filter.setAl(AnalysisLevel.WORD_FORMATION);
+		resultTitle = generateResultTitle();
+
+		if (useDB) {
+			result = db.getDump();
+			db.delete();
+		}
+
+		// if no results and nothing to save, return false
+		if (!(result.size() > 0)) {
+			analysisProducedResults = false;
+			return false;
+		} else {
+			analysisProducedResults = true;
+		}
+
+		WordFormation.calculateStatistics(this);
+
+		Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
+		return true;
+	}
+
+	private Map<String, Map<String, Long>> sortNestedMap(Map<String, ConcurrentHashMap<String, AtomicLong>> nestedMap, int limit) {
+		Map<String, Map<String, Long>> sorted = new HashMap<>();
+
+		for (String s : nestedMap.keySet()) {
+			sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
+		}
+
+		return sorted;
+	}
+
+
+	private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
+		return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
+	}
+
+	public void updateResults(String o) {
+		// if not in map
+		AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
+
+		// else
+		if (r != null)
+			result.get(o).incrementAndGet();
+	}
+
+	public Map<String, AtomicLong> getResult() {
+		return result;
+	}
+
+	public Object[][] getResultCustom() {
+		return resultCustom;
+	}
+
+	public void setResultCustom(Object[][] resultCustom) {
+		this.resultCustom = resultCustom;
+	}
+
+	public void updateResultsNested(WordLevelType type, String key, String stringValue) {
+		ConcurrentHashMap<String, ConcurrentHashMap<String, AtomicLong>> resultsMap;
+
+		if (type == WordLevelType.SUFFIX) {
+			updateResultsNestedSuffix(key, stringValue);
+		} else if (type == WordLevelType.PREFIX) {
+			updateResultsNestedPrefix(key, stringValue);
+		}
+	}
+
+	public void updateResultsNestedSuffix(String key, String stringValue) {
+		if (resultNestedSuffix.containsKey(key)) {
+			// if not in map
+			AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
+
+			// else
+			if (r != null) {
+				resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
+			}
+		} else {
+			resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
+			AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
+
+			if (r != null) {
+				resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
+			}
+		}
+	}
+
+	public void updateResultsNestedPrefix(String key, String stringValue) {
+		if (resultNestedPrefix.containsKey(key)) {
+			// if not in map
+			AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
+
+			// else
+			if (r != null) {
+				resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
+			}
+		} else {
+			resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
+			AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
+
+			if (r != null) {
+				resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
+			}
+		}
+	}
+
+	private LinkedHashMap<String, String> headerInfoBlock() {
+		LinkedHashMap<String, String> info = new LinkedHashMap<>();
+
+		info.put("Korpus:", corpus.getCorpusType().toString());
+		info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
+		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
+			Integer ngramLevel = filter.getNgramValue();
+			if (ngramLevel == 0)
+				info.put("Analiza:", "Črke");
+			else if (ngramLevel == 1)
+				info.put("Analiza", "Besede");
+			else
+				info.put("Analiza:", filter.getAl().toString());
+		} else {
+			info.put("Analiza:", filter.getAl().toString());
+		}
+
+		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
+			Integer ngramLevel = filter.getNgramValue();
+
+			// n.gram nivo
+			if (ngramLevel > 1) {
+				info.put("n-gram nivo:", String.valueOf(ngramLevel));
+			} else if (ngramLevel == 1){
+				info.put("n-gram nivo:", "nivo besed");
+			} else {
+				info.put("n-gram nivo:", "nivo črk");
+			}
+			// skip
+			if (ngramLevel > 1)
+				info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
+
+			// izračunaj za
+			info.put("Izračunaj za:", filter.getCalculateFor().toString());
+
+			// msd
+			if (!isEmpty(filter.getMsd())) {
+				StringBuilder msdPattern = new StringBuilder();
+				for (Pattern pattern : filter.getMsd()) {
+					msdPattern.append(pattern.toString()).append(" ");
+				}
+
+				info.put("MSD:", msdPattern.toString());
+			}
+
+			// taksonomija
+			if (!isEmpty(filter.getTaxonomy())) {
+				info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
+			}
+
+
+		}
+
+		if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+			ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
+
+			info.put("Taksonomija: ", "");
+			String sep = "";
+			for (String s : tax) {
+				info.put(sep = sep + " ", s);
+			}
+		}
+
+		if (corpus.getCorpusType() == CorpusType.SOLAR) {
+			HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
+
+			if (!isEmpty(filters)) {
+				info.put("Dodatni filtri: ", "");
+
+				for (Map.Entry<String, ObservableList<String>> f : filters.entrySet()) {
+					info.put(f.getKey(), StringUtils.join(f.getValue(), ", "));
+				}
+			}
+		}
+
+		return info;
+	}
+}
@@ -0,0 +1,175 @@
+package data;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+import gui.ValidationUtil;
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public class Tax {
+	private static LinkedHashMap<String, String> GIGAFIDA_TAXONOMY;
+	private static LinkedHashMap<String, String> GOS_TAXONOMY;
+	private static final HashSet<CorpusType> corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES));
+
+	static {
+		// GIGAFIDA ----------------------------
+		GIGAFIDA_TAXONOMY = new LinkedHashMap<>();
+
+		GIGAFIDA_TAXONOMY.put("SSJ.T", "tisk");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.K", "tisk-knjižno");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", "tisk-knjižno-leposlovno");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", "tisk-knjižno-strokovno");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.P", "tisk-periodično");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", "tisk-periodično-časopis");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", "tisk-periodično-revija");
+		GIGAFIDA_TAXONOMY.put("SSJ.T.D", "tisk-drugo");
+		GIGAFIDA_TAXONOMY.put("SSJ.I", "internet");
+
+		GIGAFIDA_TAXONOMY.put("Ft.P", "prenosnik");
+		GIGAFIDA_TAXONOMY.put("Ft.P.G", "prenosnik-govorni");
+		GIGAFIDA_TAXONOMY.put("Ft.P.E", "prenosnik-elektronski");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P", "prenosnik-pisni");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O", "prenosnik-pisni-objavljeno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", "prenosnik-pisni-objavljeno-knjižno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", "prenosnik-pisni-objavljeno-periodično");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", "prenosnik-pisni-objavljeno-periodično-časopisno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", "prenosnik-pisni-objavljeno-periodično-časopisno-dnevno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", "prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", "prenosnik-pisni-objavljeno-periodično-časopisno-tedensko");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", "prenosnik-pisni-objavljeno-periodično-revialno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", "prenosnik-pisni-objavljeno-periodično-revialno-tedensko");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", "prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", "prenosnik-pisni-objavljeno-periodično-revialno-mesečno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", "prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", "prenosnik-pisni-objavljeno-periodično-revialno-občasno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.N", "prenosnik-pisni-neobjavljeno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", "prenosnik-pisni-neobjavljeno-javno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", "prenosnik-pisni-neobjavljeno-interno");
+		GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", "prenosnik-pisni-neobjavljeno-zasebno");
+
+		GIGAFIDA_TAXONOMY.put("Ft.Z", "zvrst");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.U", "zvrst-umetnostna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", "zvrst-umetnostna-pesniška");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", "zvrst-umetnostna-prozna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", "zvrst-umetnostna-dramska");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N", "zvrst-neumetnostna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", "zvrst-neumetnostna-strokovna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", "zvrst-neumetnostna-strokovna-humanistična in družboslovna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", "zvrst-neumetnostna-strokovna-naravoslovna in tehnična");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", "zvrst-neumetnostna-nestrokovna");
+		GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", "zvrst-neumetnostna-pravna");
+		GIGAFIDA_TAXONOMY.put("Ft.L", "zvrst-lektorirano");
+		GIGAFIDA_TAXONOMY.put("Ft.L.D", "zvrst-lektorirano-da");
+		GIGAFIDA_TAXONOMY.put("Ft.L.N", "zvrst-lektorirano-ne");
+
+		// GOS ----------------------------------
+		GOS_TAXONOMY = new LinkedHashMap<>();
+
+		GOS_TAXONOMY.put("gos.T", "diskurz");
+		GOS_TAXONOMY.put("gos.T.J", "diskurz-javni");
+		GOS_TAXONOMY.put("gos.T.J.I", "diskurz-javni-informativno-izobraževalni");
+		GOS_TAXONOMY.put("gos.T.J.R", "diskurz-javni-razvedrilni");
+		GOS_TAXONOMY.put("gos.T.N", "diskurz-nejavni");
+		GOS_TAXONOMY.put("gos.T.N.N", "diskurz-nejavni-nezasebni");
+		GOS_TAXONOMY.put("gos.T.N.Z", "diskurz-nejavni-zasebni");
+
+		GOS_TAXONOMY.put("gos.S", "situacija");
+		GOS_TAXONOMY.put("gos.S.R", "situacija-radio");
+		GOS_TAXONOMY.put("gos.S.T", "situacija-televizija");
+	}
+
+	/**
+	 * Returns the whole default taxonomy for the specified corpus type
+	 */
+	public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType) {
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
+			return FXCollections.observableArrayList(GIGAFIDA_TAXONOMY.values());
+		} else if (corpusType == CorpusType.GOS) {
+			return FXCollections.observableArrayList(GOS_TAXONOMY.values());
+		}
+
+		return FXCollections.observableArrayList(new ArrayList<>());
+	}
+
+	/**
+	 * Returns taxonomy names only for items found in headers
+	 */
+	public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType, HashSet<String> foundTax) {
+		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
+
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
+			tax = GIGAFIDA_TAXONOMY;
+		} else if (corpusType == CorpusType.GOS) {
+			tax = GOS_TAXONOMY;
+		}
+
+		ArrayList<String> taxForCombo = new ArrayList<>();
+
+		// assures same relative order
+		for (String t : tax.keySet()) {
+			if (foundTax.contains(t)) {
+				taxForCombo.add(tax.get(t));
+			}
+		}
+
+		return FXCollections.observableArrayList(taxForCombo);
+	}
+
+	public static HashSet<CorpusType> getCorpusTypesWithTaxonomy() {
+		return corpusTypesWithTaxonomy;
+	}
+
+	public static ArrayList<String> getTaxonomyCodes(ArrayList<String> taxonomyNames, CorpusType corpusType) {
+		ArrayList<String> result = new ArrayList<>();
+
+		if (ValidationUtil.isEmpty(taxonomyNames)) {
+			return result;
+		}
+
+		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
+
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
+			tax = GIGAFIDA_TAXONOMY;
+		} else if (corpusType == CorpusType.GOS) {
+			tax = GOS_TAXONOMY;
+		}
+
+		// for easier lookup
+		Map<String, String> taxInversed = tax.entrySet()
+				.stream()
+				.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
+
+		for (String taxonomyName : taxonomyNames) {
+			result.add(taxInversed.get(taxonomyName));
+		}
+
+		return result;
+	}
+
+	/**
+	 * Returns a list of proper names for codes
+	 *
+	 * @param corpusType
+	 * @param taxonomy
+	 *
+	 * @return
+	 */
+	public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<String> taxonomy) {
+		LinkedHashMap<String, String> tax = new LinkedHashMap<>();
+
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
+			tax = GIGAFIDA_TAXONOMY;
+		} else if (corpusType == CorpusType.GOS) {
+			tax = GOS_TAXONOMY;
+		}
+
+		ArrayList<String> result = new ArrayList<>();
+
+		for (String t : taxonomy) {
+			result.add(tax.get(t));
+		}
+
+		return result;
+	}
+}
@@ -0,0 +1,171 @@
+package data;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import javafx.collections.FXCollections;
+import javafx.collections.ObservableList;
+
+public enum Taxonomy {
+	// GOS
+	JAVNI("javni", "T.J", "gos"),
+	INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "T.J.I", "gos"),
+	RAZVEDRILNI("razvedrilni", "T.J.R", "gos"),
+	NEJAVNI("nejavni", "T.N", "gos"),
+	NEZASEBNI("nezasebni", "T.N.N", "gos"),
+	ZASEBNI("zasebni", "T.N.Z", "gos"),
+	OSEBNI_STIK("osebni stik", "K.O", "gos"),
+	TELEFON("telefon", "K.P", "gos"),
+	RADIO("radio", "K.R", "gos"),
+	TELEVIZIJA("televizija", "K.T", "gos"),
+	// Gigafida
+	KNJIZNO("knjižno", "T.K", "gigafida"),
+	LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
+	STROKOVNO("strokovno", "T.K.S", "gigafida"),
+	PERIODICNO("periodično", "T.P", "gigafida"),
+	CASOPIS("časopis", "T.P.C", "gigafida"),
+	REVIJA("revija", "T.P.R", "gigafida"),
+	INTERNET("internet", "I", "gigafida"),
+
+	SSJ_TISK("tisk", "SSJ.T", "gigafida"),
+	SSJ_KNJIZNO("opis", "identifikator", "gigafida"),
+	SSJ_LEPOSLOVNO("opis", "identifikator", "gigafida"),
+	SSJ_STROKOVNO("opis", "identifikator", "gigafida"),
+	SSJ_PERIODICNO("opis", "identifikator", "gigafida"),
+	SSJ_CASOPIS("opis", "identifikator", "gigafida"),
+	SSJ_REVIJA("opis", "identifikator", "gigafida"),
+	SSJ_DRUGO("opis", "identifikator", "gigafida"),
+	SSJ_INTERNET("opis", "identifikator", "gigafida"),
+	FT_P_PRENOSNIK("opis", "identifikator", "gigafida"),
+	FT_P_GOVORNI("opis", "identifikator", "gigafida"),
+	FT_P_ELEKTRONSKI("opis", "identifikator", "gigafida"),
+	FT_P_PISNI("opis", "identifikator", "gigafida"),
+	FT_P_OBJAVLJENO("opis", "identifikator", "gigafida"),
+	FT_P_KNJIZNO("opis", "identifikator", "gigafida"),
+	FT_P_PERIODICNO("opis", "identifikator", "gigafida"),
+	FT_P_CASOPISNO("opis", "identifikator", "gigafida"),
+	FT_P_DNEVNO("opis", "identifikator", "gigafida"),
+	FT_P_VECKRAT_TEDENSKO("opis", "identifikator", "gigafida"),
+	// FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
+	FT_P_REVIALNO("opis", "identifikator", "gigafida"),
+	FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
+	FT_P_STIRINAJSTDNEVNO("opis", "identifikator", "gigafida"),
+	FT_P_MESECNO("opis", "identifikator", "gigafida"),
+	FT_P_REDKEJE_KOT_MESECNO("opis", "identifikator", "gigafida"),
+	FT_P_OBCASNO("opis", "identifikator", "gigafida"),
+	FT_P_NEOBJAVLJENO("opis", "identifikator", "gigafida"),
+	FT_P_JAVNO("opis", "identifikator", "gigafida"),
+	FT_P_INTERNO("opis", "identifikator", "gigafida"),
+	FT_P_ZASEBNO("opis", "identifikator", "gigafida"),
+	FT_ZVRST("opis", "identifikator", "gigafida"),
+	FT_UMETNOSTNA("opis", "identifikator", "gigafida"),
+	FT_PESNISKA("opis", "identifikator", "gigafida"),
+	FT_PROZNA("opis", "identifikator", "gigafida"),
+	FT_DRAMSKA("opis", "identifikator", "gigafida"),
+	FT_NEUMETNOSTNA("opis", "identifikator", "gigafida"),
+	FT_STROKOVNA("opis", "identifikator", "gigafida"),
+	FT_HID("opis", "identifikator", "gigafida"),
+	FT_NIT("opis", "identifikator", "gigafida"),
+	FT_NESTROKOVNA("opis", "identifikator", "gigafida"),
+	FT_PRAVNA("opis", "identifikator", "gigafida"),
+	FT_LEKTORIRANO("opis", "identifikator", "gigafida"),
+	FT_DA("opis", "identifikator", "gigafida"),
+	FT_NE("opis", "identifikator", "gigafida");
+
+
+
+	private final String name;
+	private final String taxonomy;
+	private final String corpus;
+
+	Taxonomy(String name, String taxonomy, String corpusType) {
+		this.name = name;
+		this.taxonomy = taxonomy;
+		this.corpus = corpusType;
+	}
+
+	public String toString() {
+		return this.name;
+	}
+
+	public String getTaxonomnyString() {
+		return this.taxonomy;
+	}
+
+	public static Taxonomy factory(String tax) {
+		if (tax != null) {
+			// GOS
+			if (JAVNI.toString().equals(tax)) {
+				return JAVNI;
+			}
+			if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
+				return INFORMATIVNO_IZOBRAZEVALNI;
+			}
+			if (RAZVEDRILNI.toString().equals(tax)) {
+				return RAZVEDRILNI;
+			}
+			if (NEJAVNI.toString().equals(tax)) {
+				return NEJAVNI;
+			}
+			if (NEZASEBNI.toString().equals(tax)) {
+				return NEZASEBNI;
+			}
+			if (ZASEBNI.toString().equals(tax)) {
+				return ZASEBNI;
+			}
+			if (OSEBNI_STIK.toString().equals(tax)) {
+				return OSEBNI_STIK;
+			}
+			if (TELEFON.toString().equals(tax)) {
+				return TELEFON;
+			}
+			if (RADIO.toString().equals(tax)) {
+				return RADIO;
+			}
+			if (TELEVIZIJA.toString().equals(tax)) {
+				return TELEVIZIJA;
+			}
+
+			// Gigafida
+			// if (TISK.toString().equals(tax)) {
+			// 	return TISK;
+			// }
+			if (KNJIZNO.toString().equals(tax)) {
+				return KNJIZNO;
+			}
+			if (LEPOSLOVNO.toString().equals(tax)) {
+				return LEPOSLOVNO;
+			}
+			if (STROKOVNO.toString().equals(tax)) {
+				return STROKOVNO;
+			}
+			if (PERIODICNO.toString().equals(tax)) {
+				return PERIODICNO;
+			}
+			if (CASOPIS.toString().equals(tax)) {
+				return CASOPIS;
+			}
+			if (REVIJA.toString().equals(tax)) {
+				return REVIJA;
+			}
+			if (INTERNET.toString().equals(tax)) {
+				return INTERNET;
+			}
+		}
+		return null;
+	}
+
+	public static ObservableList<String> getDefaultForComboBox(String corpusType) {
+		ArrayList<String> values = Arrays.stream(Taxonomy.values())
+				.filter(x -> x.corpus.equals(corpusType))
+				.map(x -> x.name)
+				.collect(Collectors.toCollection(ArrayList::new));
+
+		return FXCollections.observableArrayList(values);
+	}
+
+	public static ObservableList<String> getDefaultForComboBox(CorpusType corpusType) {
+		return getDefaultForComboBox(corpusType.toString());
+	}
+}
@@ -0,0 +1,53 @@
+package data;
+
+import static gui.ValidationUtil.*;
+
+import java.util.ArrayList;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+
+import gui.Messages;
+import gui.ValidationUtil;
+
+public class Validation {
+
+	public static String validateForStringLevel(Filter filter) {
+		ArrayList<String> errors = new ArrayList<>();
+
+		// should not be null, error if null, because init failed
+		if (filter.getNgramValue() == null) {
+			errors.add(Messages.MISSING_NGRAM_LEVEL);
+		}
+
+		// should not be null, error if null, because init failed
+		if (filter.getCalculateFor() == null) {
+			errors.add(Messages.MISSING_CALCULATE_FOR);
+		}
+
+		if (filter.getSkipValue() == null) {
+			filter.setSkipValue(0);
+		}
+
+		if (filter.getNgramValue() != null && ValidationUtil.isEmpty(filter.getMsd()) &&
+				(filter.getMsd().size() != filter.getNgramValue())) {
+			if (!(filter.getMsd().size() == 1 && filter.getNgramValue() == 0) && !ValidationUtil.isEmpty(filter.getMsd()))
+				errors.add(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES);
+		}
+
+		Integer ngramValue = filter.getNgramValue();
+		ArrayList<Pattern> msd = filter.getMsd();
+
+		if (ngramValue > 0 && !ValidationUtil.isEmpty(msd) && ngramValue != msd.size()) {
+			errors.add(String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, ngramValue, msd.size()));
+		}
+
+		if (filter.getNgramValue() != null && filter.getNgramValue() == 0 && isEmpty(filter.getStringLength())) {
+			// if count letters, make sure that the length is given
+			// TODO: check that words we're adding in xml reader are longer than this value
+			errors.add(Messages.MISSING_STRING_LENGTH);
+		}
+
+		return isEmpty(errors) ? null : StringUtils.join(errors, ", \n");
+	}
+}
@@ -0,0 +1,141 @@
+package data;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.HashSet;
+
+import org.apache.commons.lang3.StringUtils;
+
+import data.Enums.Msd;
+import gui.ValidationUtil;
+
+public class Word implements Serializable {
+	public static final char PAD_CHARACTER = '-';
+
+	private String word;
+	private String lemma;
+	private String msd;
+	private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
+
+	/**
+	 * Possible values:
+	 * <p>
+	 * <ul>
+	 * <li>S = samostalnik</li>
+	 * <li>G = glagol</li>
+	 * <li>P = pridevnik</li>
+	 * <li>R = prislov</li>
+	 * <li>Z = zaimek</li>
+	 * <li>K = števnik</li>
+	 * <li>D = predlog</li>
+	 * <li>V = veznik</li>
+	 * <li>L = členek</li>
+	 * <li>M = medmet</li>
+	 * <li>O = okrajšava</li>
+	 * <li>N = neuvrščeno</li>
+	 * </ul>
+	 */
+	//private char besedna_vrsta;
+	public Word(String word, String lemma, String msd) {
+		this.lemma = lemma;
+		this.msd = normalizeMsd(msd);
+
+		// veliko zacetnico ohranimo samo za lastna imena
+		if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
+				&& this.msd.length() >= 2
+				&& this.msd.charAt(1) == 'l')) {
+			this.word = word.toLowerCase();
+		} else {
+			this.word = word;
+		}
+	}
+
+	public Word() {
+	}
+
+	/**
+	 * Appends a number of '-' to msds which are not properly sized.
+	 * E.g. nouns should have 5 attributes, yet the last one isn't always defined (Somei vs. Sometd)
+	 *
+	 * @param msdInput
+	 *
+	 * @return
+	 */
+	private String normalizeMsd(String msdInput) {
+		if (ValidationUtil.isEmpty(msdInput)) {
+			return "";
+		} else {
+			return StringUtils.rightPad(msdInput, Msd.getMsdLengthForType(msdInput), PAD_CHARACTER);
+		}
+	}
+
+	public Word(String word) {
+		this.word = word;
+	}
+
+	public String getWord() {
+		return word;
+	}
+
+	public String getCVVWord() {
+		return covertToCvv(word);
+	}
+
+	public String getCVVLemma() {
+		return covertToCvv(lemma);
+	}
+
+	private String covertToCvv(String s) {
+		char[] StringCA = s.toCharArray();
+
+		for (int i = 0; i < StringCA.length; i++) {
+			StringCA[i] = VOWELS.contains(StringCA[i]) ? 'V' : 'C';
+		}
+
+		return new String(StringCA);
+	}
+
+	public void setWord(String word) {
+		this.word = word;
+	}
+
+	public String getLemma() {
+		return lemma;
+	}
+
+	public void setLemma(String lemma) {
+		this.lemma = lemma;
+	}
+
+	public String getMsd() {
+		return msd;
+	}
+
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+
+		sb.append("beseda:\t")
+				.append(getWord())
+				.append("\n")
+				.append("lema:\t")
+				.append(getLemma())
+				.append("\n")
+				.append("msd:\t")
+				.append(getMsd())
+				.append("\n");
+
+		return sb.toString();
+	}
+
+	public String getForCf(CalculateFor calculateFor, boolean cvv) {
+		String returnValue = "";
+
+		if (cvv) {
+			returnValue = calculateFor == CalculateFor.WORD ? getCVVWord() : getCVVLemma();
+		} else {
+			returnValue = calculateFor == CalculateFor.WORD ? getWord() : getLemma();
+		}
+
+		return returnValue;
+	}
+}
@@ -0,0 +1,454 @@
+package gui;
+
+import data.*;
+import javafx.application.HostServices;
+import javafx.beans.value.ChangeListener;
+import javafx.beans.value.ObservableValue;
+import javafx.collections.FXCollections;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.control.*;
+import javafx.scene.layout.Pane;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.*;
+import java.util.regex.Pattern;
+
+import static alg.XML_processing.readXML;
+import static gui.GUIController.showAlert;
+import static gui.Messages.*;
+
+@SuppressWarnings("Duplicates")
+public class CharacterAnalysisTab {
+	public final static Logger logger = LogManager.getLogger(CharacterAnalysisTab.class);
+
+	@FXML
+	public Label selectedFiltersLabel;
+	@FXML
+	public Label solarFilters;
+
+	@FXML
+	private TextField msdTF;
+	private ArrayList<Pattern> msd;
+	private ArrayList<String> msdStrings;
+
+	@FXML
+	private CheckComboBox<String> taxonomyCCB;
+	private ArrayList<String> taxonomy;
+
+	@FXML
+	private CheckBox calculatecvvCB;
+	private boolean calculateCvv;
+
+	@FXML
+	private TextField stringLengthTF;
+	private Integer stringLength;
+
+	@FXML
+	private ToggleGroup calculateForRB;
+	private  CalculateFor calculateFor;
+
+	@FXML
+	private RadioButton lemmaRB;
+
+	@FXML
+	private RadioButton varietyRB;
+
+	@FXML
+	private Pane paneLetters;
+
+	@FXML
+	private Button computeNgramsB;
+
+	@FXML
+	public ProgressBar ngramProgressBar;
+	@FXML
+	public Label progressLabel;
+
+	@FXML
+	private Hyperlink helpH;
+
+	private enum MODE {
+		LETTER
+	}
+
+	private MODE currentMode;
+
+	private Corpus corpus;
+	private HashMap<String, HashSet<String>> solarFiltersMap;
+	private Filter filter;
+	private boolean useDb;
+	private HostServices hostService;
+
+	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("različnica", "lema");
+	private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
+
+
+	// TODO: pass observables for taxonomy based on header scan
+	// after header scan
+	private ObservableList<String> taxonomyCCBValues;
+	private CorpusType currentCorpusType;
+
+	public void init() {
+		currentMode = MODE.LETTER;
+		toggleMode(currentMode);
+
+		calculateForRB.selectedToggleProperty().addListener(new ChangeListener<Toggle>() {
+			@Override
+			public void changed(ObservableValue<? extends Toggle> observable, Toggle oldValue, Toggle newValue) {
+				//logger.info("calculateForRB:", newValue.toString());
+				RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button
+				calculateFor = CalculateFor.factory(chk.getText());
+				logger.info("calculateForRB:", chk.getText());
+				//System.out.println("Selected Radio Button - "+chk.getText());
+			}
+		});
+
+		// msd
+		msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+			if (!newValue) {
+				// focus lost
+				String value = msdTF.getText();
+				logger.info("msdTf: ", value);
+
+				if (!ValidationUtil.isEmpty(value)) {
+					ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
+
+					int nOfRequiredMsdTokens = 1;
+					if (msdTmp.size() != nOfRequiredMsdTokens) {
+						String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
+						logAlert(msg);
+						showAlert(Alert.AlertType.ERROR, msg);
+					}
+					msd = new ArrayList<>();
+					msdStrings = new ArrayList<>();
+					for (String msdToken : msdTmp) {
+						msd.add(Pattern.compile(msdToken));
+						msdStrings.add(msdToken);
+					}
+					logger.info(String.format("msd accepted (%d)", msd.size()));
+
+				} else if (!ValidationUtil.isEmpty(newValue)) {
+					msd = new ArrayList<>();
+					msdStrings = new ArrayList<>();
+				}
+			}
+		});
+
+		msdTF.setText("");
+		msd = new ArrayList<>();
+
+		// taxonomy
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+			taxonomyCCB.getItems().removeAll();
+			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+			taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+				taxonomy = new ArrayList<>();
+				ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+				taxonomy.addAll(checkedItems);
+				logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+			});
+			taxonomyCCB.getCheckModel().clearChecks();
+		} else {
+			taxonomyCCB.setDisable(true);
+		}
+
+		// cvv
+		calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+			calculateCvv = newValue;
+			logger.info("calculate cvv: " + calculateCvv);
+		});
+
+
+		// string length
+		stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+			if (!newValue) {
+				// focus lost
+				String value = stringLengthTF.getText();
+				if (!ValidationUtil.isEmpty(value)) {
+					if (!ValidationUtil.isNumber(value)) {
+						logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
+						GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
+					}
+					stringLength = Integer.parseInt(value);
+				} else {
+					GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
+					stringLengthTF.setText("1");
+					logAlert(WARNING_MISSING_STRING_LENGTH);
+				}
+			}
+		});
+
+		computeNgramsB.setOnAction(e -> {
+			compute();
+			logger.info("compute button");
+		});
+
+        helpH.setOnAction(e -> openHelpWebsite());
+	}
+
+	/**
+	 * case a: values for combo boxes can change after a corpus change
+	 * <ul>
+	 * <li>different corpus type - reset all fields so no old values remain</li>
+	 * <li>same corpus type, different subset - keep</li>
+	 * </ul>
+	 * <p>
+	 * case b: values for combo boxes can change after a header scan
+	 * <ul>
+	 * <li>at first, fields are populated by corpus type defaults</li>
+	 * <li>after, with gathered data</li>
+	 * </ul>
+	 * <p></p>
+	 * ngrams: 1
+	 * calculateFor: word
+	 * msd:
+	 * taxonomy:
+	 * skip: 0
+	 * iscvv: false
+	 * string length: 1
+	 */
+	public void populateFields() {
+		// corpus changed if: current one is null (this is first run of the app)
+		// or if currentCorpus != gui's corpus
+		boolean corpusChanged = currentCorpusType == null
+				|| currentCorpusType != corpus.getCorpusType();
+
+		// TODO: check for GOS, GIGAFIDA, SOLAR...
+		// refresh and:
+		// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
+		if (calculateFor == null) {
+			calculateForRB.selectToggle(lemmaRB);
+			calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
+		}
+
+		if (!filter.hasMsd()) {
+			// if current corpus doesn't have msd data, disable this field
+			msd = new ArrayList<>();
+			msdTF.setText("");
+			msdTF.setDisable(true);
+			logger.info("no msd data");
+		} else {
+			if (ValidationUtil.isEmpty(msd)
+					|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
+				// msd has not been set previously
+				// or msd has been set but the corpus changed -> reset
+				msd = new ArrayList<>();
+				msdTF.setText("");
+				msdTF.setDisable(false);
+				logger.info("msd reset");
+			} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
+				// if msd has been set, but corpus type remained the same, we can keep any set msd value
+				msdTF.setText(StringUtils.join(msdStrings, " "));
+				msdTF.setDisable(false);
+				logger.info("msd kept");
+			}
+		}
+
+		// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
+
+		// keep calculateCvv
+		calculatecvvCB.setSelected(calculateCvv);
+
+		// keep string length if set
+		if (stringLength != null) {
+			stringLengthTF.setText(String.valueOf(stringLength));
+		} else {
+			stringLengthTF.setText("1");
+			stringLength = 1;
+		}
+
+		// TODO: trigger on rescan
+		if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
+			// user changed corpus (by type) or by selection & triggered a rescan of headers
+			// see if we read taxonomy from headers, otherwise use default values for given corpus
+			ObservableList<String> tax = corpus.getTaxonomy();
+			taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+
+			currentCorpusType = corpus.getCorpusType();
+			// setTaxonomyIsDirty(false);
+		} else {
+
+		}
+
+		// see if we read taxonomy from headers, otherwise use default values for given corpus
+		ObservableList<String> tax = corpus.getTaxonomy();
+		taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+		taxonomyCCB.getItems().addAll(taxonomyCCBValues);
+
+	}
+
+	/**
+	 * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
+	 * sets combobox values to what is applicable ...
+	 *
+	 * @param mode
+	 */
+	public void toggleMode(MODE mode) {
+		if (mode == null) {
+			mode = currentMode;
+		}
+
+		logger.info("mode: ", mode.toString());
+
+		if (mode == MODE.LETTER) {
+			paneLetters.setVisible(true);
+
+			// populate with default cvv length value
+			if (stringLength == null) {
+				stringLengthTF.setText("1");
+				stringLength = 1;
+			} else {
+				stringLengthTF.setText(String.valueOf(stringLength));
+			}
+
+			// if calculateFor was selected for something other than a word or a lemma -> reset
+			if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
+				// if the user selected something else before selecting ngram for letters, reset that choice
+				calculateFor = CalculateFor.LEMMA;
+				calculateForRB.selectToggle(lemmaRB);
+			}
+		}
+
+		// override if orth mode, allow only word
+		if (corpus.isGosOrthMode()) {
+			// TODO change to
+			varietyRB.setDisable(true);
+			msdTF.setDisable(true);
+		} else {
+			msdTF.setDisable(false);
+			varietyRB.setDisable(false);
+		}
+	}
+
+	private void compute() {
+		Filter filter = new Filter();
+		filter.setNgramValue(0);
+		filter.setCalculateFor(calculateFor);
+		filter.setMsd(msd);
+		filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+		filter.setAl(AnalysisLevel.STRING_LEVEL);
+		filter.setSkipValue(0);
+		filter.setIsCvv(calculateCvv);
+		filter.setSolarFilters(solarFiltersMap);
+		filter.setStringLength(stringLength);
+
+		String message = Validation.validateForStringLevel(filter);
+		if (message == null) {
+			// no errors
+			logger.info("Executing: ", filter.toString());
+			StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+			execute(statistic);
+		} else {
+			logAlert(message);
+			showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+		}
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+
+	private void logAlert(String alert) {
+		logger.info("alert: " + alert);
+	}
+
+	public Corpus getCorpus() {
+		return corpus;
+	}
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+
+		if (corpus.getCorpusType() != CorpusType.SOLAR) {
+			setSelectedFiltersLabel(null);
+		} else {
+			setSelectedFiltersLabel("/");
+		}
+	}
+
+	public void setSelectedFiltersLabel(String content) {
+		if (content != null) {
+			solarFilters.setVisible(true);
+			selectedFiltersLabel.setVisible(true);
+			selectedFiltersLabel.setText(content);
+		} else {
+			solarFilters.setVisible(false);
+			selectedFiltersLabel.setVisible(false);
+		}
+	}
+
+	private void execute(StatisticsNew statistic) {
+		logger.info("Started execution: ", statistic.getFilter());
+
+		Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+		boolean corpusIsSplit = corpusFiles.size() > 1;
+
+		final Task<Void> task = new Task<Void>() {
+			@SuppressWarnings("Duplicates")
+			@Override
+			protected Void call() throws Exception {
+				long i = 0;
+				for (File f : corpusFiles) {
+					readXML(f.toString(), statistic);
+					i++;
+					this.updateProgress(i, corpusFiles.size());
+					this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+				}
+
+				return null;
+			}
+		};
+
+		ngramProgressBar.progressProperty().bind(task.progressProperty());
+		progressLabel.textProperty().bind(task.messageProperty());
+
+		task.setOnSucceeded(e -> {
+			try {
+				boolean successullySaved = statistic.saveResultToDisk();
+				if (successullySaved) {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+				} else {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+				}
+			} catch (UnsupportedEncodingException e1) {
+				showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+				logger.error("Error while saving", e1);
+			}
+
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		task.setOnFailed(e -> {
+			showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+			logger.error("Error while executing", e);
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setProgress(0.0);
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		final Thread thread = new Thread(task, "task");
+		thread.setDaemon(true);
+		thread.start();
+	}
+
+	public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+		this.solarFiltersMap = solarFiltersMap;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
@@ -0,0 +1,517 @@
+package gui;
+
+import static data.CorpusType.*;
+import static gui.GUIController.*;
+import static gui.Messages.*;
+import static util.Util.*;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOCase;
+import org.apache.commons.io.filefilter.FileFilterUtils;
+import org.apache.commons.io.filefilter.TrueFileFilter;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import alg.XML_processing;
+import data.Corpus;
+import data.CorpusType;
+import data.Enums.solar.SolarFilters;
+import data.Tax;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.control.*;
+import javafx.scene.layout.Pane;
+import javafx.stage.DirectoryChooser;
+import javafx.stage.Stage;
+import javafx.application.HostServices;
+
+public class CorpusTab {
+	public final static Logger logger = LogManager.getLogger(CorpusTab.class);
+	public Pane setCorpusWrapperP;
+
+	private Stage stage;
+
+	@FXML
+	private Button chooseCorpusLocationB;
+	private File chosenCorpusLocation;
+
+	@FXML
+	private CheckBox readHeaderInfoChB;
+	private boolean readHeaderInfo;
+
+	@FXML
+	private CheckBox gosUseOrthChB;
+	private boolean gosUseOrth;
+
+	@FXML
+	private Button chooseResultsLocationB;
+
+	@FXML
+	private Label chooseCorpusL;
+	private String chooseCorpusLabelContent;
+
+	@FXML
+	private Label chooseResultsL;
+	private String chooseResultsLabelContent;
+
+	@FXML
+	private ProgressIndicator locationScanPI;
+
+	@FXML
+	private Hyperlink helpH;
+
+	// *** shared ***
+	private Corpus corpus;
+	private CorpusType corpusType;
+
+	// tabs - used to enable/disable
+	private Tab stringLevelTabNew2;
+	private Tab oneWordAnalysisTab;
+	private Tab characterLevelTab;
+	private Tab wordFormationTab;
+	private Tab wordLevelTab;
+	private Tab filterTab;
+	private TabPane tabPane;
+	private StringAnalysisTabNew2 satNew2Controller;
+	private OneWordAnalysisTab oneWordTabController;
+	private CharacterAnalysisTab catController;
+	private FiltersForSolar ffsController;
+	//private WordFormationTab wfController;
+	private WordLevelTab wlController;
+	private HostServices hostService;
+
+
+	public void initialize() {
+		stage = new Stage();
+
+		// add listeners
+		chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
+		chooseCorpusLocationB.setTooltip(new Tooltip(TOOLTIP_chooseCorpusLocationB));
+		helpH.setOnAction(e -> openHelpWebsite());
+
+		readHeaderInfoChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+			readHeaderInfo = newValue;
+			logger.info("read headers: ", readHeaderInfo);
+		});
+		readHeaderInfoChB.setTooltip(new Tooltip(TOOLTIP_readHeaderInfoChB));
+
+		gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+			gosUseOrth = newValue;
+			corpus.setGosOrthMode(gosUseOrth);
+			wordFormationTab.setDisable(gosUseOrth);
+			satNew2Controller.toggleMode(null);
+			oneWordTabController.toggleMode(null);
+			catController.toggleMode(null);
+
+			logger.info("gosUseOrth: ", gosUseOrth);
+		});
+
+		chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));
+
+		// set labels and toggle visibility
+		toggleGosChBVisibility();
+
+		chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET;
+		chooseCorpusL.setText(chooseCorpusLabelContent);
+
+		chooseResultsLabelContent = Messages.LABEL_RESULTS_LOCATION_NOT_SET;
+		chooseResultsL.setText(chooseResultsLabelContent);
+
+		togglePiAndSetCorpusWrapper(false);
+	}
+
+	private void togglePiAndSetCorpusWrapper(boolean piIsActive) {
+		locationScanPI.setVisible(piIsActive);
+		setCorpusWrapperP.setLayoutX(piIsActive ? 100.0 : 10.0);
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+
+	/**
+	 * In order for a directory to pass as a valid corpus location, following criteria has to be met:
+	 * <ul>
+	 * <li>it can't be null</li>
+	 * <li>it has to be readable</li>
+	 * <li>it has to contain xml files</li>
+	 * <li>xml files have to contain valid headers from which we can infer the corpus type</li>
+	 * <li>corpus type must be one of the expected corpus types - as noted in the @see data.CorpusType.class	</li>
+	 * </ul>
+	 * <p>
+	 * Additionally, if the user checks to read taxonomy/filters from the corpus files, that read
+	 * has to produce a non-empty list results list
+	 */
+	private void chooseCorpusLocation() {
+		File selectedDirectory = directoryChooser();
+
+		if (selectedDirectory != null && ValidationUtil.isReadableDirectory(selectedDirectory)) {
+			logger.info("selected corpus dir: ", selectedDirectory.getAbsolutePath());
+
+			// scan for xml files
+			Collection<File> corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("xml", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
+
+			// make sure there are corpus files in selected directory or notify the user about it
+			if (corpusFiles.size() == 0) {
+				logger.info("alert: ", WARNING_CORPUS_NOT_FOUND);
+				showAlert(Alert.AlertType.ERROR, WARNING_CORPUS_NOT_FOUND, null);
+			} else {
+				String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles, selectedDirectory.getAbsolutePath());
+
+				if (chooseCorpusLabelContentTmp == null) {
+					logger.info("alert: ", WARNING_CORPUS_NOT_FOUND);
+					showAlert(Alert.AlertType.ERROR, WARNING_CORPUS_NOT_FOUND, null);
+				} else {
+					initNewCorpus(selectedDirectory, corpusFiles);
+
+					corpus.setChosenCorpusLocation(selectedDirectory);
+					corpus.setDetectedCorpusFiles(corpusFiles);
+					chooseCorpusLabelContent = chooseCorpusLabelContentTmp;
+					logger.info("corpus dir: ", corpus.getChosenCorpusLocation().getAbsolutePath());
+
+					if (readHeaderInfo) {
+						logger.info("reading header info...");
+						readHeaderInfo();
+					} else {
+						setResults();
+
+						setCorpusForAnalysis();
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * If a user selects a valid corpus location, we define a new corpus (so none of the old data gets carried over)
+	 *
+	 * @param selectedDirectory
+	 * @param corpusFiles
+	 */
+	private void initNewCorpus(File selectedDirectory, Collection<File> corpusFiles) {
+		corpus = new Corpus();
+		corpus.setCorpusType(corpusType);
+		corpus.setDetectedCorpusFiles(corpusFiles);
+		corpus.setChosenCorpusLocation(selectedDirectory);
+		chooseResultsLocation(selectedDirectory);
+	}
+
+	private void chooseResultsLocation(File dir) {
+		// results location can be set either to default value (after selecting valid corpus location) - dir attribute
+		// or to a dir picked via directoryChooser (when dir == null
+		File selectedDirectory = dir == null ? directoryChooser() : dir;
+
+		if (selectedDirectory != null) {
+			String resultsLocationPath = selectedDirectory.getAbsolutePath().concat(File.separator);
+			File chosenResultsLocationTmp = new File(resultsLocationPath);
+
+			if (!ValidationUtil.isValidDirectory(chosenResultsLocationTmp)) {
+				showAlert(Alert.AlertType.ERROR, WARNING_RESULTS_DIR_NOT_VALID);
+				logger.info("alert: ", WARNING_RESULTS_DIR_NOT_VALID);
+			} else {
+				corpus.setChosenResultsLocation(chosenResultsLocationTmp);
+				chooseResultsLabelContent = corpus.getChosenResultsLocation().getAbsolutePath();
+				chooseResultsL.setText(chooseResultsLabelContent);
+				logger.info("results dir: " + chooseResultsLabelContent);
+			}
+		}
+	}
+
+	private void setResults() {
+		// if everything is ok
+		// check and enable checkbox if GOS
+		toggleGosChBVisibility();
+
+		// set default results location
+		String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
+		logger.info("setting default results location to: ", defaultResultsLocationPath);
+
+		chooseCorpusL.setText(chooseCorpusLabelContent);
+	}
+
+	private void readHeaderInfo() {
+		CorpusType corpusType = corpus.getCorpusType();
+		Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
+		togglePiAndSetCorpusWrapper(true);
+		chooseCorpusL.setText(LABEL_SCANNING_CORPUS);
+
+		logger.info("reading header data for ", corpusType.toString());
+
+		if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES) {
+			boolean corpusIsSplit = corpusFiles.size() > 1;
+
+			final Task<HashSet<String>> task = new Task<HashSet<String>>() {
+				@Override
+				protected HashSet<String> call() throws Exception {
+					HashSet<String> values = new HashSet<>();
+					long i = 0;
+
+					if (!corpusIsSplit) {
+						updateProgress(-1.0f, -1.0f);
+					}
+
+					for (File file : corpusFiles) {
+						values.addAll((Collection<? extends String>) XML_processing.readXmlHeaderTaxonomyAndFilters(file.getAbsolutePath(), corpusIsSplit, corpusType));
+						i++;
+
+						if (corpusIsSplit) {
+							updateProgress(i, corpusFiles.size());
+						}
+					}
+
+					updateProgress(1.0f, 1.0f);
+					return values;
+				}
+			};
+
+			locationScanPI.progressProperty().bind(task.progressProperty());
+
+			task.setOnSucceeded(e -> {
+				ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
+
+				if (ValidationUtil.isEmpty(readTaxonomy)) {
+					// if no taxonomy found alert the user and keep other tabs disabled
+					logger.info("No taxonomy found in headers.");
+					GUIController.showAlert(Alert.AlertType.ERROR, WARNING_NO_TAXONOMY_FOUND);
+				} else {
+					// set taxonomy, update label
+					corpus.setTaxonomy(readTaxonomy);
+					corpus.setHeaderRead(true);
+					chooseCorpusL.setText(chooseCorpusLabelContent);
+					setResults();
+					setCorpusForAnalysis();
+				}
+
+				togglePiAndSetCorpusWrapper(false);
+
+			});
+
+			task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
+			task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
+
+			final Thread thread = new Thread(task, "task");
+			thread.setDaemon(true);
+			thread.start();
+		} else if (corpusType == CorpusType.SOLAR) {
+			// many many fields
+			boolean corpusIsSplit = corpusFiles.size() > 1;
+
+			final Task<HashMap<String, HashSet<String>>> task = new Task<HashMap<String, HashSet<String>>>() {
+				@Override
+				protected HashMap<String, HashSet<String>> call() throws Exception {
+					HashMap<String, HashSet<String>> values = new HashMap<>();
+					long i = 0;
+
+					if (!corpusIsSplit) {
+						updateProgress(-1.0f, -1.0f);
+					}
+
+					for (File file : corpusFiles) {
+						HashMap<String, HashSet<String>> tmpvalues = (HashMap<String, HashSet<String>>) XML_processing.readXmlHeaderTaxonomyAndFilters(file.getAbsolutePath(), corpusIsSplit, corpusType);
+
+						// update final results
+						for (Map.Entry<String, HashSet<String>> entry : tmpvalues.entrySet()) {
+							if (values.containsKey(entry.getKey())) {
+								values.get(entry.getKey()).addAll(entry.getValue());
+							} else {
+								values.put(entry.getKey(), entry.getValue());
+							}
+						}
+
+						i++;
+
+						if (corpusIsSplit) {
+							updateProgress(i, corpusFiles.size());
+						}
+					}
+
+					updateProgress(1.0f, 1.0f);
+					return values;
+				}
+			};
+
+			locationScanPI.progressProperty().bind(task.progressProperty());
+
+			task.setOnSucceeded(e -> {
+				HashMap<String, HashSet<String>> values = task.getValue();
+
+				if (ValidationUtil.isEmpty(values)) {
+					// if no taxonomy found alert the user and keep other tabs disabled
+					logger.info("No solar filters found in headers.");
+					GUIController.showAlert(Alert.AlertType.ERROR, WARNING_NO_SOLAR_FILTERS_FOUND);
+				} else {
+					HashMap<String, ObservableList<String>> filtersForComboBoxes = SolarFilters.getFiltersForComboBoxes(values);
+					// set taxonomy, update label
+					corpus.setSolarFiltersForXML(values);
+					corpus.setSolarFilters(filtersForComboBoxes);
+					corpus.setHeaderRead(true);
+					chooseCorpusL.setText(chooseCorpusLabelContent);
+					setResults();
+					setCorpusForAnalysis();
+				}
+
+				togglePiAndSetCorpusWrapper(false);
+
+			});
+
+			task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
+			task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
+
+			final Thread thread = new Thread(task, "task");
+			thread.setDaemon(true);
+			thread.start();
+		}
+
+	}
+
+	private void setCorpusForAnalysis() {
+		if (corpus.validate()) {
+			// new statistic, enable tabs...
+			stringLevelTabNew2.setDisable(false);
+			satNew2Controller.setCorpus(corpus);
+			satNew2Controller.init();
+			oneWordAnalysisTab.setDisable(false);
+			oneWordTabController.setCorpus(corpus);
+			oneWordTabController.init();
+			characterLevelTab.setDisable(false);
+			catController.setCorpus(corpus);
+			catController.init();
+			wordFormationTab.setDisable(false);
+			wordLevelTab.setDisable(false);
+			//wfController.setCorpus(corpus);
+			//wfController.init();
+			wlController.setCorpus(corpus);
+			wlController.init();
+
+			if (corpus.getCorpusType() == CorpusType.SOLAR) {
+				filterTab.setDisable(false);
+				tabPane.getTabs().add(1, filterTab);
+				ffsController.setCorpus(corpus);
+				ffsController.initFilters();
+			} else {
+				filterTab.setDisable(true);
+				tabPane.getTabs().removeAll(filterTab);
+			}
+		} else {
+			GUIController.showAlert(Alert.AlertType.ERROR, corpus.getValidationErrorsToString());
+		}
+	}
+
+	private File directoryChooser() {
+		DirectoryChooser directoryChooser = new DirectoryChooser();
+
+		// open in the folder where the jar is located if possible
+		File workingDir = getWorkingDirectory();
+
+		if (workingDir != null) {
+			directoryChooser.setInitialDirectory(workingDir);
+		}
+
+		return directoryChooser.showDialog(stage);
+	}
+
+	/**
+	 * Hides GOS related checkbox until needed.
+	 */
+	private void toggleGosChBVisibility() {
+		gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
+	}
+
+	private String detectCorpusType(Collection<File> corpusFiles, String corpusLocation) {
+		// check that we recognize this corpus
+		// read first file only, maybe later do all, if toll on resources is acceptable
+		File f = corpusFiles.iterator().next();
+		String title = XML_processing.readXMLHeaderTag(f.getAbsolutePath(), "title").toLowerCase();
+		String test = CCKRES.getNameLowerCase();
+		String debug = "";
+
+		// check if XML file's title contains any of recognized corpus titles
+		corpusType = null;
+		if (title.contains(SOLAR.getNameLowerCase())) {
+			corpusType = SOLAR;
+		} else if (title.contains(GIGAFIDA.getNameLowerCase())) {
+			corpusType = GIGAFIDA;
+		} else if (title.contains(CCKRES.getNameLowerCase())) {
+			corpusType = CCKRES;
+		} else if (title.contains(GOS.getNameLowerCase())) {
+			corpusType = GOS;
+		}
+
+		if (corpusType == null) {
+			return null;
+		} else {
+			corpus.setCorpusType(corpusType);
+
+			StringBuilder sb = new StringBuilder();
+			sb.append(corpusLocation)
+					.append("\n")
+					.append(String.format(NOTIFICATION_FOUND_X_FILES, corpusFiles.size()))
+					.append("\n")
+					.append(String.format("Korpus: %s", corpusType.toString()));
+
+			String result = sb.toString();
+
+			logger.debug(result);
+			return result;
+		}
+	}
+
+	public Corpus getCorpus() {
+		return corpus;
+	}
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+	}
+
+	public void setStringLevelTabNew2(Tab stringLevelTabNew2) { this.stringLevelTabNew2 = stringLevelTabNew2; }
+
+	public void setOneWordAnalysisTab(Tab oneWordAnalysisTab) { this.oneWordAnalysisTab = oneWordAnalysisTab; }
+
+	public void setCharacterLevelTab(Tab characterLevelTab) { this.characterLevelTab = characterLevelTab; }
+
+	public void setWordLevelTab(Tab wordLevelTab) {
+		this.wordLevelTab = wordLevelTab;
+	}
+
+	public void setFilterTab(Tab filterTab) {
+		this.filterTab = filterTab;
+	}
+
+	public void setFfsController(FiltersForSolar ffsController) {
+		this.ffsController = ffsController;
+	}
+
+	public void setTabPane(TabPane tabPane) {
+		this.tabPane = tabPane;
+	}
+
+	public void setSatNew2Controller(StringAnalysisTabNew2 satNew2Controller) { this.satNew2Controller = satNew2Controller; }
+
+	public void setOneWordTabController(OneWordAnalysisTab oneWordTabController) { this.oneWordTabController = oneWordTabController; }
+
+	public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
+
+	/*public void setWfController(WordFormationTab wfController) {
+		this.wfController = wfController;
+	}*/
+
+	public void setWlController(WordLevelTab wlController) {
+		this.wlController = wlController;
+	}
+
+	public void setWordFormationTab(Tab wordFormationTab) {
+		this.wordFormationTab = wordFormationTab;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
@@ -0,0 +1,187 @@
+package gui;
+
+import static data.Enums.solar.SolarFilters.*;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import javafx.application.HostServices;
+import javafx.scene.control.Hyperlink;
+import org.controlsfx.control.CheckComboBox;
+
+import data.Corpus;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.fxml.FXML;
+import javafx.scene.control.Label;
+import javafx.scene.layout.AnchorPane;
+import util.Util;
+
+public class FiltersForSolar {
+
+	@FXML
+	public AnchorPane solarFiltersTabPane;
+	@FXML
+	public CheckComboBox<String> solarRegijaCCB;
+	@FXML
+	public CheckComboBox<String> solarPredmetCCB;
+	@FXML
+	public CheckComboBox<String> solarRazredCCB;
+	@FXML
+	public CheckComboBox<String> solarLetoCCB;
+	@FXML
+	public CheckComboBox<String> solarSolaCCB;
+	@FXML
+	public CheckComboBox<String> solarVrstaBesedilaCCB;
+	@FXML
+	public Label selectedFiltersLabel;
+	@FXML
+	private Hyperlink helpH;
+
+	private HashMap<String, ObservableList<String>> selectedFilters;
+	private Corpus corpus;
+
+	private StringAnalysisTabNew2 satNew2Controller;
+	private OneWordAnalysisTab oneWordTabController;
+	private CharacterAnalysisTab catController;
+	//private WordFormationTab wfController;
+	private WordLevelTab wlController;
+	private HostServices hostService;
+
+	@SuppressWarnings("unchecked")
+	public void initialize() {
+		selectedFilters = new HashMap<>();
+
+		solarRegijaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(REGIJA, solarRegijaCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarPredmetCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(PREDMET, solarPredmetCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarRazredCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(RAZRED, solarRazredCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarLetoCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(LETO, solarLetoCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarSolaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(SOLA, solarSolaCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+		solarVrstaBesedilaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
+			selectedFilters.put(TIP, solarVrstaBesedilaCCB.getCheckModel().getCheckedItems());
+			updateSolarFilterLabel();
+		});
+
+        helpH.setOnAction(e -> openHelpWebsite());
+	}
+
+	public void initFilters() {
+		solarRegijaCCB.getItems().removeAll();
+		solarRegijaCCB.getItems().setAll(corpus.getSolarFilters().get(REGIJA));
+		solarRegijaCCB.getItems().sorted();
+		solarPredmetCCB.getItems().removeAll();
+		solarPredmetCCB.getItems().setAll(corpus.getSolarFilters().get(PREDMET));
+		solarPredmetCCB.getItems().sorted();
+		solarRazredCCB.getItems().removeAll();
+		solarRazredCCB.getItems().setAll(corpus.getSolarFilters().get(RAZRED));
+		solarRazredCCB.getItems().sorted();
+		solarLetoCCB.getItems().removeAll();
+		solarLetoCCB.getItems().setAll(corpus.getSolarFilters().get(LETO));
+		solarLetoCCB.getItems().sorted();
+		solarSolaCCB.getItems().removeAll();
+		solarSolaCCB.getItems().setAll(corpus.getSolarFilters().get(SOLA));
+		solarSolaCCB.getItems().sorted();
+		solarVrstaBesedilaCCB.getItems().removeAll();
+		solarVrstaBesedilaCCB.getItems().setAll(corpus.getSolarFilters().get(TIP));
+		solarVrstaBesedilaCCB.getItems().sorted();
+	}
+
+	private void updateSolarFilterLabel() {
+		if (Util.isMapEmpty(selectedFilters)) {
+			setSOlarFIlterLabelText("/");
+		} else {
+			StringBuilder allFilters = new StringBuilder();
+			for (Map.Entry<String, ObservableList<String>> entry : selectedFilters.entrySet()) {
+				ArrayList<String> values = new ArrayList<>(entry.getValue());
+
+				if (!values.isEmpty()) {
+					allFilters.append(entry.getKey())
+							.append(": ");
+
+					for (int i = 0; i < values.size(); i++) {
+						allFilters.append(values.get(i));
+
+						if (i < values.size() - 1) {
+							// so we won't append a comma after the last element
+							allFilters.append(", ");
+						}
+					}
+					allFilters.append("\n\n");
+				}
+			}
+
+			setSOlarFIlterLabelText(allFilters.toString());
+		}
+
+		HashMap<String, HashSet<String>> solarFiltersMap = new HashMap<>();
+		for (Map.Entry<String, ObservableList<String>> e : selectedFilters.entrySet()) {
+			HashSet<String> values = new HashSet<>();
+			values.addAll(e.getValue());
+
+			solarFiltersMap.put(e.getKey(), values);
+		}
+
+		satNew2Controller.setSolarFiltersMap(solarFiltersMap);
+		oneWordTabController.setSolarFiltersMap(solarFiltersMap);
+		catController.setSolarFiltersMap(solarFiltersMap);
+		//wfController.setSolarFiltersMap(solarFiltersMap);
+		wlController.setSolarFiltersMap(solarFiltersMap);
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+
+	private void setSOlarFIlterLabelText(String content) {
+		selectedFiltersLabel.setText(content);
+		satNew2Controller.setSelectedFiltersLabel(content);
+		oneWordTabController.setSelectedFiltersLabel(content);
+		catController.setSelectedFiltersLabel(content);
+		//wfController.setSelectedFiltersLabel(content);
+		wlController.setSelectedFiltersLabel(content);
+	}
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+	}
+
+	public void setSatNew2Controller(StringAnalysisTabNew2 satNew2Controller) { this.satNew2Controller = satNew2Controller; }
+
+	public void setOneWordTabController(OneWordAnalysisTab oneWordTabController) { this.oneWordTabController = oneWordTabController; }
+
+	public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
+
+	/*public void setWfController(WordFormationTab wfController) {
+		this.wfController = wfController;
+	}*/
+
+	public void setWlController(WordLevelTab wlController) {
+		this.wlController = wlController;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
@@ -0,0 +1,150 @@
+package gui;
+
+import java.io.IOException;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.kordamp.ikonli.fontawesome.FontAwesome;
+import org.kordamp.ikonli.javafx.FontIcon;
+
+import data.Corpus;
+import javafx.application.Application;
+import javafx.fxml.FXML;
+import javafx.fxml.FXMLLoader;
+import javafx.scene.Parent;
+import javafx.scene.Scene;
+import javafx.scene.control.Alert;
+import javafx.scene.control.Tab;
+import javafx.scene.control.TabPane;
+import javafx.stage.Stage;
+
+public class GUIController extends Application {
+	public final static Logger logger = LogManager.getLogger(GUIController.class);
+
+	@FXML
+	public Tab StringLevelTabNew2;
+	@FXML
+	public Tab OneWordAnalysisTab;
+	@FXML
+	public Tab CharacterLevelTabNew;
+	@FXML
+	public Tab corpusTab;
+	public TabPane tabPane;
+	@FXML
+	private CharacterAnalysisTab catController;
+	@FXML
+	private static Parent sat;
+	@FXML
+	private StringAnalysisTabNew2 satNew2Controller;
+	@FXML
+	private static Parent satNew2;
+	@FXML
+	private OneWordAnalysisTab oneWordTabController;
+	@FXML
+	private static Parent oneWordTab;
+	@FXML
+	private CorpusTab ctController;
+	@FXML
+	private Parent ct;
+	//@FXML
+	//private WordFormationTab wfController;
+	@FXML
+	private Parent wf;
+	@FXML
+	private WordLevelTab wlController;
+	@FXML
+	private Parent wl;
+	@FXML
+	private FiltersForSolar ffsController;
+	@FXML
+	private Parent ffs;
+	@FXML
+	private SelectedFiltersPane sfpController;
+	@FXML
+	private Parent sfp;
+	@FXML
+	public Tab stringLevelTab;
+	@FXML
+	public Tab wordLevelTab;
+	/*@FXML
+	public Tab wordFormationTab;*/
+
+
+	@FXML
+	public Tab filterTab;
+	public Stage stage;
+
+	private Corpus corpus;
+
+
+	@Override
+	public void start(Stage primaryStage) throws IOException {
+		Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml"));
+		primaryStage.setTitle("GUI");
+		Scene scene = new Scene(root, 800, 600);
+		// https://github.com/dicolar/jbootx
+		// scene.getStylesheets().add(GUIController.class.getResource("bootstrap3.css").toExternalForm())
+		primaryStage.setScene(scene);
+		stage = primaryStage;
+		primaryStage.show();
+	}
+
+	public static void main(String[] args) {
+		launch(args);
+	}
+
+	public void initialize() {
+		corpus = new Corpus();
+		ctController.setCorpus(corpus);
+		ctController.setFilterTab(filterTab);
+		ctController.setStringLevelTabNew2(StringLevelTabNew2);
+		ctController.setOneWordAnalysisTab(OneWordAnalysisTab);
+		ctController.setCharacterLevelTab(CharacterLevelTabNew);
+		ctController.setSatNew2Controller(satNew2Controller);
+		ctController.setOneWordTabController(oneWordTabController);
+		ctController.setCatController(catController);
+		//ctController.setWfController(wfController);
+		ctController.setWlController(wlController);
+		ctController.setTabPane(tabPane);
+		ctController.setFfsController(ffsController);
+		//ctController.setWordFormationTab(wordFormationTab);
+		ctController.setWordLevelTab(wordLevelTab);
+		ctController.setHostServices(getHostServices());
+
+		satNew2Controller.setCorpus(corpus);
+		satNew2Controller.setHostServices(getHostServices());
+		oneWordTabController.setCorpus(corpus);
+		oneWordTabController.setHostServices(getHostServices());
+		catController.setCorpus(corpus);
+		catController.setHostServices(getHostServices());
+		//wfController.setCorpus(corpus);
+		//wfController.setHostServices(getHostServices());
+		wlController.setCorpus(corpus);
+		wlController.setHostServices(getHostServices());
+		ffsController.setSatNew2Controller(satNew2Controller);
+		ffsController.setOneWordTabController(oneWordTabController);
+		ffsController.setCatController(catController);
+		//ffsController.setWfController(wfController);
+		ffsController.setWlController(wlController);
+		ffsController.setHostServices(getHostServices());
+
+		// set tab icons
+		corpusTab.setGraphic(new FontIcon(FontAwesome.COG));
+		filterTab.setGraphic(new FontIcon(FontAwesome.FILTER));
+
+		// hide filter tab
+		tabPane.getTabs().removeAll(filterTab);
+	}
+
+	static void showAlert(Alert.AlertType alertType, String headerText, String contentText) {
+		Alert alert = new Alert(alertType);
+		alert.setTitle(Messages.windowTitles.get(alertType));
+		alert.setHeaderText(headerText != null ? headerText : "");
+		alert.setContentText(contentText != null ? contentText : "");
+		alert.showAndWait();
+	}
+
+	static void showAlert(Alert.AlertType alertType, String headerText) {
+		showAlert(alertType, headerText, null);
+	}
+}
@@ -0,0 +1,74 @@
+package gui;
+
+import static javafx.scene.control.Alert.AlertType.*;
+
+import java.util.HashMap;
+
+import javafx.scene.control.Alert;
+
+public class Messages {
+
+	// warnings & errors
+	public static final String WARNING_CORPUS_NOT_FOUND = "V izbranem direktoriju ni ustreznih korpusnih datotek.";
+	public static final String WARNING_RESULTS_DIR_NOT_VALID = "Za dostop do izbranega direktorija nimate potrebnih pravic.";
+	public static final String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS = "Izbran nivo ngramov in vpisano št. besed v filtru se ne ujemata.";
+	public static final String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO = "Izberite drugo število ali popravite filter.";
+	public static final String WARNING_WORD_OR_LEMMA = "Izberite, če želite statistiko izračunati za besede ali leme.";
+	public static final String WARNING_ONLY_NUMBERS_ALLOWED = "Prosim vnesite veljavno število.";
+	public static final String WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES = "Število za ngram (%d) in število msd oznak (%d) se morata ujemati.";
+	public static final String WARNING_MISSING_STRING_LENGTH = "Dolžina niza mora biti večja od 0. Vstavljena je privzeta vrednost (1).";
+	public static final String WARNING_NO_TAXONOMY_FOUND = "Iz korpusnih datotek ni bilo moč razbrati taksonomije. Prosim izberite drugo lokacijo ali korpus.";
+	public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus.";
+	public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem.";
+	public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov.";
+
+	// missing
+	public static final String MISSING_NGRAM_LEVEL = "N-gram nivo";
+	public static final String MISSING_CALCULATE_FOR = "Izračunaj za";
+	public static final String MISSING_SKIP = "";
+	public static final String MISSING_STRING_LENGTH = "Dolžina niza";
+	public static final String MISMATCHED_STRING_LENGTH_AND_MSD_REGEX = "Neujemajoča dolžina niza in regex filter";
+
+
+	// general notifications - static content/set only once
+	public static final String NOTIFICATION_FOUND_X_FILES = "Št. najdenih datotek: %d";
+	public static final String NOTIFICATION_ANALYSIS_COMPLETED = "Analiza je zaključena, rezultati so shranjeni.";
+	public static final String NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS = "Analiza je zaključena, vendar ni bilo moč izračunati statistike, ki bi ustrezala vsem navedenim pogojem.";
+	public static final String RESULTS_PATH_SET_TO_DEFAULT = "Lokacija za shranjevanje rezultatov je nastavljena na lokacijo korpusa.";
+
+	// ongoing notifications - displayed while processing, dynamically changing
+	public static final String ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y = "Analiziram datoteko %d od %d (%s)";
+
+	// Labels
+	public static final String LABEL_CORPUS_LOCATION_NOT_SET = "Lokacija korpusa ni nastavljena";
+	public static final String LABEL_RESULTS_LOCATION_NOT_SET = "Lokacija za shranjevanje rezultatov ni nastavljena";
+	public static final String LABEL_RESULTS_CORPUS_TYPE_NOT_SET = "Vrsta korpusa ni nastavljena";
+
+	public static final String LABEL_SCANNING_CORPUS = "Iskanje in analiza korpusnih datotek...";
+	public static final String LABEL_SCANNING_SINGLE_FILE_CORPUS = "Analiza vnosa ";
+	public static final String COMPLETED = "končano";
+
+	public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
+	public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
+
+
+
+	// Not properly to be here. TODO move somewhere else in future
+	public static final String HELP_URL = "http://slovnica.ijs.si/";
+
+	// helper maps
+	/**
+	 * Typical window titles
+	 * ERROR = "Napaka"
+	 * WARNING = "Opozorilo"
+	 * CONFIRMATION = "Potrdilo"
+	 */
+	static HashMap<Alert.AlertType, String> windowTitles = new HashMap<>();
+
+	static {
+		// automatically set window's title
+		windowTitles.put(ERROR, "Napaka");
+		windowTitles.put(WARNING, "Opozorilo");
+		windowTitles.put(CONFIRMATION, "Potrdilo");
+	}
+}
@@ -0,0 +1,389 @@
+package gui;
+
+import data.*;
+import javafx.application.HostServices;
+import javafx.collections.FXCollections;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.control.*;
+import javafx.scene.layout.Pane;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.*;
+import java.util.regex.Pattern;
+
+import static alg.XML_processing.readXML;
+import static gui.GUIController.showAlert;
+import static gui.Messages.*;
+
+@SuppressWarnings("Duplicates")
+public class OneWordAnalysisTab {
+    public final static Logger logger = LogManager.getLogger(OneWordAnalysisTab.class);
+
+    @FXML
+    public Label selectedFiltersLabel;
+    @FXML
+    public Label solarFilters;
+
+    @FXML
+    private TextField msdTF;
+    private ArrayList<Pattern> msd;
+    private ArrayList<String> msdStrings;
+
+    @FXML
+    private CheckComboBox<String> taxonomyCCB;
+    private ArrayList<String> taxonomy;
+
+    @FXML
+    private ComboBox<String> calculateForCB;
+    private CalculateFor calculateFor;
+
+
+    @FXML
+    private Button computeNgramsB;
+
+    @FXML
+    public ProgressBar ngramProgressBar;
+    @FXML
+    public Label progressLabel;
+
+    @FXML
+    private Hyperlink helpH;
+
+    private enum MODE {
+        LETTER,
+        WORD
+    }
+
+    private MODE currentMode;
+
+    private Corpus corpus;
+    private HashMap<String, HashSet<String>> solarFiltersMap;
+    private Filter filter;
+    private boolean useDb;
+    private HostServices hostService;
+
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
+
+
+    // TODO: pass observables for taxonomy based on header scan
+    // after header scan
+    private ObservableList<String> taxonomyCCBValues;
+    private CorpusType currentCorpusType;
+
+    public void init() {
+        currentMode = MODE.WORD;
+        toggleMode(currentMode);
+
+        // calculateForCB
+        calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
+            calculateFor = CalculateFor.factory(newValue);
+            logger.info("calculateForCB:", calculateFor.toString());
+        });
+
+        calculateForCB.getSelectionModel().select(0);
+
+        // msd
+        msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+            if (!newValue) {
+                // focus lost
+                String value = msdTF.getText();
+                logger.info("msdTf: ", value);
+
+                if (!ValidationUtil.isEmpty(value)) {
+                    ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
+
+                    int nOfRequiredMsdTokens = 1;
+                    if (msdTmp.size() != nOfRequiredMsdTokens) {
+                        String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
+                        logAlert(msg);
+                        showAlert(Alert.AlertType.ERROR, msg);
+                    }
+                    msd = new ArrayList<>();
+                    msdStrings = new ArrayList<>();
+                    for (String msdToken : msdTmp) {
+                        msd.add(Pattern.compile(msdToken));
+                        msdStrings.add(msdToken);
+                    }
+                    logger.info(String.format("msd accepted (%d)", msd.size()));
+
+                } else if (!ValidationUtil.isEmpty(newValue)) {
+                    msd = new ArrayList<>();
+                    msdStrings = new ArrayList<>();
+                }
+            }
+        });
+
+        msdTF.setText("");
+        msd = new ArrayList<>();
+
+        // taxonomy
+        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+            taxonomyCCB.getItems().removeAll();
+            taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+            taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+                taxonomy = new ArrayList<>();
+                ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+                taxonomy.addAll(checkedItems);
+                logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+            });
+            taxonomyCCB.getCheckModel().clearChecks();
+        } else {
+            taxonomyCCB.setDisable(true);
+        }
+
+        computeNgramsB.setOnAction(e -> {
+            compute();
+            logger.info("compute button");
+        });
+        helpH.setOnAction(e -> openHelpWebsite());
+    }
+
+    /**
+     * case a: values for combo boxes can change after a corpus change
+     * <ul>
+     * <li>different corpus type - reset all fields so no old values remain</li>
+     * <li>same corpus type, different subset - keep</li>
+     * </ul>
+     * <p>
+     * case b: values for combo boxes can change after a header scan
+     * <ul>
+     * <li>at first, fields are populated by corpus type defaults</li>
+     * <li>after, with gathered data</li>
+     * </ul>
+     * <p></p>
+     * ngrams: 1
+     * calculateFor: word
+     * msd:
+     * taxonomy:
+     * skip: 0
+     * iscvv: false
+     * string length: 1
+     */
+    public void populateFields() {
+        // corpus changed if: current one is null (this is first run of the app)
+        // or if currentCorpus != gui's corpus
+        boolean corpusChanged = currentCorpusType == null
+                || currentCorpusType != corpus.getCorpusType();
+
+
+        // TODO: check for GOS, GIGAFIDA, SOLAR...
+        // refresh and:
+        // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
+        if (calculateFor == null) {
+            calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
+            calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
+        }
+
+        if (!filter.hasMsd()) {
+            // if current corpus doesn't have msd data, disable this field
+            msd = new ArrayList<>();
+            msdTF.setText("");
+            msdTF.setDisable(true);
+            logger.info("no msd data");
+        } else {
+            if (ValidationUtil.isEmpty(msd)
+                    || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
+                // msd has not been set previously
+                // or msd has been set but the corpus changed -> reset
+                msd = new ArrayList<>();
+                msdTF.setText("");
+                msdTF.setDisable(false);
+                logger.info("msd reset");
+            } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
+                // if msd has been set, but corpus type remained the same, we can keep any set msd value
+                msdTF.setText(StringUtils.join(msdStrings, " "));
+                msdTF.setDisable(false);
+                logger.info("msd kept");
+            }
+        }
+
+        // TODO: trigger on rescan
+        if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
+            // user changed corpus (by type) or by selection & triggered a rescan of headers
+            // see if we read taxonomy from headers, otherwise use default values for given corpus
+            ObservableList<String> tax = corpus.getTaxonomy();
+            taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+
+            currentCorpusType = corpus.getCorpusType();
+            // setTaxonomyIsDirty(false);
+        } else {
+
+        }
+
+        // see if we read taxonomy from headers, otherwise use default values for given corpus
+        ObservableList<String> tax = corpus.getTaxonomy();
+        taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+        taxonomyCCB.getItems().addAll(taxonomyCCBValues);
+
+    }
+
+    /**
+     * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
+     * sets combobox values to what is applicable ...
+     *
+     * @param mode
+     */
+    public void toggleMode(MODE mode) {
+        if (mode == null) {
+            mode = currentMode;
+        }
+
+        logger.info("mode: ", mode.toString());
+
+        if (mode == MODE.WORD) {
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
+        } else if (mode == MODE.LETTER) {
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
+
+
+            // if calculateFor was selected for something other than a word or a lemma -> reset
+            if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
+                // if the user selected something else before selecting ngram for letters, reset that choice
+                calculateFor = CalculateFor.WORD;
+                calculateForCB.getSelectionModel().select("različnica");
+            }
+        }
+
+        // override if orth mode, allow only word
+        if (corpus.isGosOrthMode()) {
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
+            msdTF.setDisable(true);
+        } else {
+            msdTF.setDisable(false);
+        }
+    }
+
+    private void compute() {
+        Filter filter = new Filter();
+        filter.setNgramValue(1);
+        filter.setCalculateFor(calculateFor);
+        filter.setMsd(msd);
+        filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+        filter.setAl(AnalysisLevel.STRING_LEVEL);
+        filter.setSkipValue(0);
+        filter.setIsCvv(false);
+        filter.setSolarFilters(solarFiltersMap);
+        filter.setStringLength(1);
+
+        String message = Validation.validateForStringLevel(filter);
+        if (message == null) {
+            // no errors
+            logger.info("Executing: ", filter.toString());
+            StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+            execute(statistic);
+        } else {
+            logAlert(message);
+            showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+        }
+    }
+
+    private void logAlert(String alert) {
+        logger.info("alert: " + alert);
+    }
+
+    private void openHelpWebsite(){
+        hostService.showDocument(Messages.HELP_URL);
+    }
+
+    public Corpus getCorpus() {
+        return corpus;
+    }
+
+    public void setCorpus(Corpus corpus) {
+        this.corpus = corpus;
+
+        if (corpus.getCorpusType() != CorpusType.SOLAR) {
+            setSelectedFiltersLabel(null);
+        } else {
+            setSelectedFiltersLabel("/");
+        }
+    }
+
+    public void setSelectedFiltersLabel(String content) {
+        if (content != null) {
+            solarFilters.setVisible(true);
+            selectedFiltersLabel.setVisible(true);
+            selectedFiltersLabel.setText(content);
+        } else {
+            solarFilters.setVisible(false);
+            selectedFiltersLabel.setVisible(false);
+        }
+    }
+
+    private void execute(StatisticsNew statistic) {
+        logger.info("Started execution: ", statistic.getFilter());
+
+        Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+        boolean corpusIsSplit = corpusFiles.size() > 1;
+
+        final Task<Void> task = new Task<Void>() {
+            @SuppressWarnings("Duplicates")
+            @Override
+            protected Void call() throws Exception {
+                long i = 0;
+                for (File f : corpusFiles) {
+                    readXML(f.toString(), statistic);
+                    i++;
+                    this.updateProgress(i, corpusFiles.size());
+                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+                }
+
+                return null;
+            }
+        };
+
+        ngramProgressBar.progressProperty().bind(task.progressProperty());
+        progressLabel.textProperty().bind(task.messageProperty());
+
+        task.setOnSucceeded(e -> {
+            try {
+                boolean successullySaved = statistic.saveResultToDisk();
+                if (successullySaved) {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+                } else {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+                }
+            } catch (UnsupportedEncodingException e1) {
+                showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+                logger.error("Error while saving", e1);
+            }
+
+            ngramProgressBar.progressProperty().unbind();
+            ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+            progressLabel.textProperty().unbind();
+            progressLabel.setText("");
+        });
+
+        task.setOnFailed(e -> {
+            showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+            logger.error("Error while executing", e);
+            ngramProgressBar.progressProperty().unbind();
+            ngramProgressBar.setProgress(0.0);
+            ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+            progressLabel.textProperty().unbind();
+            progressLabel.setText("");
+        });
+
+        final Thread thread = new Thread(task, "task");
+        thread.setDaemon(true);
+        thread.start();
+    }
+
+    public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+        this.solarFiltersMap = solarFiltersMap;
+    }
+    public void setHostServices(HostServices hostServices){
+        this.hostService = hostServices;
+    }
+
+}
@@ -0,0 +1,18 @@
+package gui;
+
+import javafx.scene.control.Label;
+
+public class SelectedFiltersPane {
+
+
+	public Label selectedFiltersLabel;
+
+	public Label getSelectedFiltersLabel() {
+		return selectedFiltersLabel;
+	}
+
+	public void setSelectedFiltersLabel(String filters) {
+		this.selectedFiltersLabel = new Label(filters);
+		this.selectedFiltersLabel.setText("test?");
+	}
+}
@@ -0,0 +1,511 @@
+package gui;
+
+import static alg.XML_processing.*;
+import static gui.GUIController.*;
+import static gui.Messages.*;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.*;
+import java.util.regex.Pattern;
+
+import javafx.application.HostServices;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import data.*;
+import javafx.collections.FXCollections;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.control.*;
+import javafx.scene.layout.Pane;
+
+@SuppressWarnings("Duplicates")
+public class StringAnalysisTabNew2 {
+    public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class);
+
+    @FXML
+    public Label selectedFiltersLabel;
+    @FXML
+    public Label solarFilters;
+
+    @FXML
+    private TextField msdTF;
+    private ArrayList<Pattern> msd;
+    private ArrayList<String> msdStrings;
+
+    @FXML
+    private CheckComboBox<String> taxonomyCCB;
+    private ArrayList<String> taxonomy;
+
+    @FXML
+    private CheckBox calculatecvvCB;
+    private boolean calculateCvv;
+
+    @FXML
+    private TextField stringLengthTF;
+    private Integer stringLength;
+
+    @FXML
+    private ComboBox<String> calculateForCB;
+    private CalculateFor calculateFor;
+
+    @FXML
+    private ComboBox<String> ngramValueCB;
+    private Integer ngramValue;
+
+    @FXML
+    private ComboBox<String> skipValueCB;
+    private Integer skipValue;
+
+    @FXML
+    private Pane paneWords;
+
+    @FXML
+    private Pane paneLetters;
+
+    @FXML
+    private Button computeNgramsB;
+
+    @FXML
+    public ProgressBar ngramProgressBar;
+    @FXML
+    public Label progressLabel;
+
+    @FXML
+    private Hyperlink helpH;
+
+    private enum MODE {
+        LETTER,
+        WORD
+    }
+
+    private MODE currentMode;
+
+    private Corpus corpus;
+    private HashMap<String, HashSet<String>> solarFiltersMap;
+    private Filter filter;
+    private boolean useDb;
+    private HostServices hostService;
+
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
+    private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
+
+
+    // TODO: pass observables for taxonomy based on header scan
+    // after header scan
+    private ObservableList<String> taxonomyCCBValues;
+    private CorpusType currentCorpusType;
+
+    public void init() {
+        currentMode = MODE.WORD;
+        toggleMode(currentMode);
+
+        // ngram value CB
+        ngramValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
+            if (newValue.equals("nivo črk")) {
+                ngramValue = 0;
+                toggleMode(MODE.LETTER);
+            } else {
+                ngramValue = Integer.valueOf(newValue);
+                toggleMode(MODE.WORD);
+            }
+
+            // skip only on ngrams of more than one word
+            if (ngramValue > 1) {
+                skipValueCB.setDisable(false);
+            } else {
+                skipValueCB.getSelectionModel().select(0);
+                skipValue = 0;
+                skipValueCB.setDisable(true);
+            }
+
+            logger.info("ngramValueCB:", ngramValue);
+        });
+
+        // set first n-gram value to 2 at index 0
+        ngramValueCB.getSelectionModel().select(0); // selected index
+        ngramValue = 2; // actual value at that index
+
+        // calculateForCB
+        calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
+            calculateFor = CalculateFor.factory(newValue);
+            logger.info("calculateForCB:", calculateFor.toString());
+        });
+
+        calculateForCB.getSelectionModel().select(0);
+
+        // msd
+        msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+            if (!newValue) {
+                // focus lost
+                String value = msdTF.getText();
+                logger.info("msdTf: ", value);
+
+                if (!ValidationUtil.isEmpty(value)) {
+                    ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
+
+                    int nOfRequiredMsdTokens = ngramValue == 0 ? 1 : ngramValue;
+                    if (msdTmp.size() != nOfRequiredMsdTokens) {
+                        String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
+                        logAlert(msg);
+                        showAlert(Alert.AlertType.ERROR, msg);
+                    }
+                    msd = new ArrayList<>();
+                    msdStrings = new ArrayList<>();
+                    for (String msdToken : msdTmp) {
+                        msd.add(Pattern.compile(msdToken));
+                        msdStrings.add(msdToken);
+                    }
+                    logger.info(String.format("msd accepted (%d)", msd.size()));
+
+                } else if (!ValidationUtil.isEmpty(newValue)) {
+                    msd = new ArrayList<>();
+                    msdStrings = new ArrayList<>();
+                }
+            }
+        });
+
+        msdTF.setText("");
+        msd = new ArrayList<>();
+
+        // taxonomy
+        if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+            taxonomyCCB.getItems().removeAll();
+            taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+            taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+                taxonomy = new ArrayList<>();
+                ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+                taxonomy.addAll(checkedItems);
+                logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+            });
+            taxonomyCCB.getCheckModel().clearChecks();
+        } else {
+            taxonomyCCB.setDisable(true);
+        }
+
+        // skip
+        skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
+            skipValue = Integer.valueOf(newValue);
+            logger.info("Skip " + skipValue);
+        });
+
+        skipValueCB.getSelectionModel().select(0);
+        skipValue = 0;
+
+        // cvv
+        calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
+            calculateCvv = newValue;
+            logger.info("calculate cvv: " + calculateCvv);
+        });
+
+        calculatecvvCB.setSelected(false);
+
+        // string length
+        stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
+            if (!newValue) {
+                // focus lost
+                String value = stringLengthTF.getText();
+                if (!ValidationUtil.isEmpty(value)) {
+                    if (!ValidationUtil.isNumber(value)) {
+                        logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
+                        GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
+                    }
+                    stringLength = Integer.parseInt(value);
+                } else {
+                    GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
+                    stringLengthTF.setText("1");
+                    logAlert(WARNING_MISSING_STRING_LENGTH);
+                }
+            }
+        });
+
+        computeNgramsB.setOnAction(e -> {
+            compute();
+            logger.info("compute button");
+        });
+
+        helpH.setOnAction(e -> openHelpWebsite());
+    }
+
+    /**
+     * case a: values for combo boxes can change after a corpus change
+     * <ul>
+     * <li>different corpus type - reset all fields so no old values remain</li>
+     * <li>same corpus type, different subset - keep</li>
+     * </ul>
+     * <p>
+     * case b: values for combo boxes can change after a header scan
+     * <ul>
+     * <li>at first, fields are populated by corpus type defaults</li>
+     * <li>after, with gathered data</li>
+     * </ul>
+     * <p></p>
+     * ngrams: 1
+     * calculateFor: word
+     * msd:
+     * taxonomy:
+     * skip: 0
+     * iscvv: false
+     * string length: 1
+     */
+    public void populateFields() {
+        // corpus changed if: current one is null (this is first run of the app)
+        // or if currentCorpus != gui's corpus
+        boolean corpusChanged = currentCorpusType == null
+                || currentCorpusType != corpus.getCorpusType();
+
+        // keep ngram value if set
+        if (ngramValue == null) {
+            ngramValueCB.getSelectionModel().select("1");
+            ngramValue = 1;
+        }
+
+        // TODO: check for GOS, GIGAFIDA, SOLAR...
+        // refresh and:
+        // TODO if current value != null && is in new calculateFor ? keep : otherwise reset
+        if (calculateFor == null) {
+            calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
+            calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
+        }
+
+        if (!filter.hasMsd()) {
+            // if current corpus doesn't have msd data, disable this field
+            msd = new ArrayList<>();
+            msdTF.setText("");
+            msdTF.setDisable(true);
+            logger.info("no msd data");
+        } else {
+            if (ValidationUtil.isEmpty(msd)
+                    || (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
+                // msd has not been set previously
+                // or msd has been set but the corpus changed -> reset
+                msd = new ArrayList<>();
+                msdTF.setText("");
+                msdTF.setDisable(false);
+                logger.info("msd reset");
+            } else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
+                // if msd has been set, but corpus type remained the same, we can keep any set msd value
+                msdTF.setText(StringUtils.join(msdStrings, " "));
+                msdTF.setDisable(false);
+                logger.info("msd kept");
+            }
+        }
+
+        // TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
+
+        // keep skip value
+        if (skipValue == null) {
+            skipValueCB.getSelectionModel().select("0");
+            skipValue = 0;
+        }
+
+        // keep calculateCvv
+        calculatecvvCB.setSelected(calculateCvv);
+
+        // keep string length if set
+        if (stringLength != null) {
+            stringLengthTF.setText(String.valueOf(stringLength));
+        } else {
+            stringLengthTF.setText("1");
+            stringLength = 1;
+        }
+
+        // TODO: trigger on rescan
+        if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
+            // user changed corpus (by type) or by selection & triggered a rescan of headers
+            // see if we read taxonomy from headers, otherwise use default values for given corpus
+            ObservableList<String> tax = corpus.getTaxonomy();
+            taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+
+            currentCorpusType = corpus.getCorpusType();
+            // setTaxonomyIsDirty(false);
+        } else {
+
+        }
+
+        // see if we read taxonomy from headers, otherwise use default values for given corpus
+        ObservableList<String> tax = corpus.getTaxonomy();
+        taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
+        taxonomyCCB.getItems().addAll(taxonomyCCBValues);
+
+    }
+
+    /**
+     * Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
+     * sets combobox values to what is applicable ...
+     *
+     * @param mode
+     */
+    public void toggleMode(MODE mode) {
+        if (mode == null) {
+            mode = currentMode;
+        }
+
+        logger.info("mode: ", mode.toString());
+
+        if (mode == MODE.WORD) {
+            paneWords.setVisible(true);
+            paneLetters.setVisible(false);
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
+        } else if (mode == MODE.LETTER) {
+            paneWords.setVisible(false);
+            paneLetters.setVisible(true);
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
+
+            // populate with default cvv length value
+            if (stringLength == null) {
+                stringLengthTF.setText("1");
+                stringLength = 1;
+            } else {
+                stringLengthTF.setText(String.valueOf(stringLength));
+            }
+
+            // if calculateFor was selected for something other than a word or a lemma -> reset
+            if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
+                // if the user selected something else before selecting ngram for letters, reset that choice
+                calculateFor = CalculateFor.WORD;
+                calculateForCB.getSelectionModel().select("različnica");
+            }
+        }
+
+        // override if orth mode, allow only word
+        if (corpus.isGosOrthMode()) {
+            calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
+            msdTF.setDisable(true);
+        } else {
+            msdTF.setDisable(false);
+        }
+    }
+
+    private void compute() {
+        Filter filter = new Filter();
+        filter.setNgramValue(ngramValue);
+        filter.setCalculateFor(calculateFor);
+        filter.setMsd(msd);
+        filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+        filter.setAl(AnalysisLevel.STRING_LEVEL);
+        filter.setSkipValue(skipValue);
+        filter.setIsCvv(calculateCvv);
+        filter.setSolarFilters(solarFiltersMap);
+
+        if (ngramValue != null && ngramValue == 0) {
+            filter.setStringLength(stringLength);
+        }
+
+        String message = Validation.validateForStringLevel(filter);
+        if (message == null) {
+            // no errors
+            logger.info("Executing: ", filter.toString());
+            StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+            execute(statistic);
+        } else {
+            logAlert(message);
+            showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+        }
+    }
+
+    private void logAlert(String alert) {
+        logger.info("alert: " + alert);
+    }
+
+    private void openHelpWebsite(){
+        hostService.showDocument(Messages.HELP_URL);
+    }
+
+    public Corpus getCorpus() {
+        return corpus;
+    }
+
+    public void setCorpus(Corpus corpus) {
+        this.corpus = corpus;
+
+        if (corpus.getCorpusType() != CorpusType.SOLAR) {
+            setSelectedFiltersLabel(null);
+        } else {
+            setSelectedFiltersLabel("/");
+        }
+    }
+
+    public void setSelectedFiltersLabel(String content) {
+        if (content != null) {
+            solarFilters.setVisible(true);
+            selectedFiltersLabel.setVisible(true);
+            selectedFiltersLabel.setText(content);
+        } else {
+            solarFilters.setVisible(false);
+            selectedFiltersLabel.setVisible(false);
+        }
+    }
+
+    private void execute(StatisticsNew statistic) {
+        logger.info("Started execution: ", statistic.getFilter());
+
+        Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+        boolean corpusIsSplit = corpusFiles.size() > 1;
+
+        final Task<Void> task = new Task<Void>() {
+            @SuppressWarnings("Duplicates")
+            @Override
+            protected Void call() throws Exception {
+                long i = 0;
+                for (File f : corpusFiles) {
+                    readXML(f.toString(), statistic);
+                    i++;
+                    this.updateProgress(i, corpusFiles.size());
+                    this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+                }
+
+                return null;
+            }
+        };
+
+        ngramProgressBar.progressProperty().bind(task.progressProperty());
+        progressLabel.textProperty().bind(task.messageProperty());
+
+        task.setOnSucceeded(e -> {
+            try {
+                boolean successullySaved = statistic.saveResultToDisk();
+                if (successullySaved) {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+                } else {
+                    showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+                }
+            } catch (UnsupportedEncodingException e1) {
+                showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+                logger.error("Error while saving", e1);
+            }
+
+            ngramProgressBar.progressProperty().unbind();
+            ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+            progressLabel.textProperty().unbind();
+            progressLabel.setText("");
+        });
+
+        task.setOnFailed(e -> {
+            showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+            logger.error("Error while executing", e);
+            ngramProgressBar.progressProperty().unbind();
+            ngramProgressBar.setProgress(0.0);
+            ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+            progressLabel.textProperty().unbind();
+            progressLabel.setText("");
+        });
+
+        final Thread thread = new Thread(task, "task");
+        thread.setDaemon(true);
+        thread.start();
+    }
+
+    public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+        this.solarFiltersMap = solarFiltersMap;
+    }
+    public void setHostServices(HostServices hostServices){
+        this.hostService = hostServices;
+    }
+}
@@ -0,0 +1,77 @@
+package gui;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang3.math.NumberUtils;
+
+public class ValidationUtil {
+
+	public static boolean isNumber(String value) {
+		return NumberUtils.isCreatable(value);
+	}
+
+	/**
+	 * Checks if an object is empty or null. Null part is especially important,
+	 * since Java's built-in isEmpty() methods don't check for this condition
+	 * and throw a nullPointerException as a result.
+	 * <p>
+	 * Supported structures:
+	 * <ul>
+	 * <li>String: empty if null or length is zero</li>
+	 * <li>List: empty if null or size() == 0</li>
+	 * <li>Map: empty if null or if it contains no keys, or if all keys map to an empty value </li>
+	 * </ul>
+	 */
+	public static boolean isEmpty(Object o) {
+		if (o == null) {
+			return true;
+		}
+
+		if (o instanceof String) {
+			if (((String) o).length() == 0) {
+				return true;
+			}
+		}
+
+		if (o instanceof List) {
+			if (((List) o).isEmpty()) {
+				return true;
+			}
+		}
+
+		if (o instanceof Map) {
+			if (((Map) o).keySet().isEmpty()) {
+				return true;
+			} else {
+				for (Object val : ((Map) o).values()) {
+					if (!isEmpty(val)) {
+						// if map contains any value that isn't empty, the map isn't considered empty
+						return false;
+					}
+				}
+			}
+		}
+
+		return false;
+	}
+
+	public static boolean isNotEmpty(Object o) {
+		return !isEmpty(o);
+	}
+
+	/**
+	 * Checks whether a given File is a folder for which we have appropriate permission
+	 */
+	public static boolean isValidDirectory(File f) {
+		return f.isDirectory() && f.canRead() && f.canWrite();
+	}
+
+	/**
+	 * Checks whether a given File is a folder for which we have appropriate permission
+	 */
+	public static boolean isReadableDirectory(File f) {
+		return f.isDirectory() && f.canRead();
+	}
+}
@@ -0,0 +1,208 @@
+package gui;
+
+import static alg.XML_processing.*;
+import static gui.GUIController.*;
+import static gui.Messages.*;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import javafx.application.HostServices;
+import javafx.scene.control.*;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import data.*;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.layout.AnchorPane;
+
+@SuppressWarnings("Duplicates")
+public class WordFormationTab {
+	public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
+
+	public AnchorPane wordAnalysisTabPane;
+
+	@FXML
+	public Label selectedFiltersLabel;
+	@FXML
+	public Label solarFilters;
+
+	@FXML
+	private CheckComboBox<String> taxonomyCCB;
+	private ArrayList<String> taxonomy;
+
+	@FXML
+	private Button computeB;
+
+	@FXML
+	public ProgressBar ngramProgressBar;
+	@FXML
+	public Label progressLabel;
+
+	@FXML
+	private Hyperlink helpH;
+
+	private Corpus corpus;
+	private HashMap<String, HashSet<String>> solarFiltersMap;
+	private HostServices hostService;
+
+	// after header scan
+	private ObservableList<String> taxonomyCCBValues;
+	private CorpusType currentCorpusType;
+	private boolean useDb;
+
+
+	public void init() {
+		// taxonomy
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+			taxonomyCCB.getItems().removeAll();
+			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+			taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+				taxonomy = new ArrayList<>();
+				ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+				taxonomy.addAll(checkedItems);
+				logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+			});
+			taxonomyCCB.getCheckModel().clearChecks();
+		} else {
+			taxonomyCCB.setDisable(true);
+		}
+
+		computeB.setOnAction(e -> {
+			compute();
+			logger.info("compute button");
+		});
+
+        helpH.setOnAction(e -> openHelpWebsite());
+	}
+
+	private void compute() {
+		Filter filter = new Filter();
+		filter.setNgramValue(1);
+		filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+		filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+		filter.setAl(AnalysisLevel.STRING_LEVEL);
+		filter.setSkipValue(0);
+		filter.setMsd(new ArrayList<>());
+		filter.setIsCvv(false);
+		filter.setSolarFilters(solarFiltersMap);
+
+		String message = Validation.validateForStringLevel(filter);
+		if (message == null) {
+			// no errors
+			logger.info("Executing: ", filter.toString());
+			StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+			execute(statistic);
+		} else {
+			logAlert(message);
+			showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+		}
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+
+	private void execute(StatisticsNew statistic) {
+		logger.info("Started execution: ", statistic.getFilter());
+
+		Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+
+		final Task<Void> task = new Task<Void>() {
+			@SuppressWarnings("Duplicates")
+			@Override
+			protected Void call() throws Exception {
+				long i = 0;
+				for (File f : corpusFiles) {
+					readXML(f.toString(), statistic);
+					i++;
+					this.updateProgress(i, corpusFiles.size());
+					this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+				}
+
+				return null;
+			}
+		};
+
+		ngramProgressBar.progressProperty().bind(task.progressProperty());
+		progressLabel.textProperty().bind(task.messageProperty());
+
+		task.setOnSucceeded(e -> {
+			try {
+				// first, we have to recalculate all occurrences to detailed statistics
+				boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
+
+				if (successullySaved) {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+				} else {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+				}
+			} catch (UnsupportedEncodingException e1) {
+				showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+				logger.error("Error while saving", e1);
+			}
+
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		task.setOnFailed(e -> {
+			showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+			logger.error("Error while executing", e);
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setProgress(0.0);
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		final Thread thread = new Thread(task, "task");
+		thread.setDaemon(true);
+		thread.start();
+	}
+
+	private void logAlert(String alert) {
+		logger.info("alert: " + alert);
+	}
+
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+
+		if (corpus.getCorpusType() != CorpusType.SOLAR) {
+			setSelectedFiltersLabel(null);
+		} else {
+			setSelectedFiltersLabel("/");
+		}
+	}
+
+	public void setSelectedFiltersLabel(String content) {
+		if (content != null) {
+			solarFilters.setVisible(true);
+			selectedFiltersLabel.setVisible(true);
+			selectedFiltersLabel.setText(content);
+		} else {
+			solarFilters.setVisible(false);
+			selectedFiltersLabel.setVisible(false);
+		}
+	}
+
+	public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+		this.solarFiltersMap = solarFiltersMap;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
@@ -0,0 +1,207 @@
+package gui;
+
+import static alg.XML_processing.*;
+import static gui.GUIController.*;
+import static gui.Messages.*;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import javafx.application.HostServices;
+import javafx.scene.control.*;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.controlsfx.control.CheckComboBox;
+
+import data.*;
+import javafx.collections.ListChangeListener;
+import javafx.collections.ObservableList;
+import javafx.concurrent.Task;
+import javafx.fxml.FXML;
+import javafx.scene.layout.AnchorPane;
+
+@SuppressWarnings("Duplicates")
+public class WordLevelTab {
+	public final static Logger logger = LogManager.getLogger(WordLevelTab.class);
+
+	public AnchorPane wordLevelAnalysisTabPane;
+
+	@FXML
+	public Label selectedFiltersLabel;
+	@FXML
+	public Label solarFilters;
+
+	@FXML
+	private CheckComboBox<String> taxonomyCCB;
+	private ArrayList<String> taxonomy;
+
+	@FXML
+	private Button computeB;
+
+	@FXML
+	public ProgressBar ngramProgressBar;
+	@FXML
+	public Label progressLabel;
+
+	@FXML
+	private Hyperlink helpH;
+
+	private Corpus corpus;
+	private HashMap<String, HashSet<String>> solarFiltersMap;
+	private HostServices hostService;
+
+	// after header scan
+	private ObservableList<String> taxonomyCCBValues;
+	private CorpusType currentCorpusType;
+	private boolean useDb;
+
+
+	public void init() {
+		// taxonomy
+		if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
+			taxonomyCCB.getItems().removeAll();
+			taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
+			taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
+				taxonomy = new ArrayList<>();
+				ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
+				taxonomy.addAll(checkedItems);
+				logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
+			});
+			taxonomyCCB.getCheckModel().clearChecks();
+		} else {
+			taxonomyCCB.setDisable(true);
+		}
+
+		computeB.setOnAction(e -> {
+			compute();
+			logger.info("compute button");
+		});
+
+		helpH.setOnAction(e -> openHelpWebsite());
+	}
+
+	private void openHelpWebsite(){
+		hostService.showDocument(Messages.HELP_URL);
+	}
+	private void compute() {
+		Filter filter = new Filter();
+		filter.setNgramValue(1);
+		filter.setCalculateFor(CalculateFor.WORD);
+		filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
+		filter.setAl(AnalysisLevel.WORD_LEVEL);
+		filter.setSkipValue(0);
+		filter.setMsd(new ArrayList<>());
+		filter.setIsCvv(false);
+		filter.setSolarFilters(solarFiltersMap);
+
+		String message = Validation.validateForStringLevel(filter);
+		if (message == null) {
+			// no errors
+			logger.info("Executing: ", filter.toString());
+			StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
+			execute(statistic);
+		} else {
+			logAlert(message);
+			showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
+		}
+	}
+
+	private void execute(StatisticsNew statistic) {
+		logger.info("Started execution: ", statistic.getFilter());
+
+		Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
+
+		final Task<Void> task = new Task<Void>() {
+			@SuppressWarnings("Duplicates")
+			@Override
+			protected Void call() throws Exception {
+				long i = 0;
+				for (File f : corpusFiles) {
+					readXML(f.toString(), statistic);
+					i++;
+					this.updateProgress(i, corpusFiles.size());
+					this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
+				}
+
+				return null;
+			}
+		};
+
+		ngramProgressBar.progressProperty().bind(task.progressProperty());
+		progressLabel.textProperty().bind(task.messageProperty());
+
+		task.setOnSucceeded(e -> {
+			try {
+				// first, we have to recalculate all occurrences to detailed statistics
+				boolean successullySaved = statistic.saveResultNestedToDisk();
+
+				if (successullySaved) {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
+				} else {
+					showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
+				}
+			} catch (UnsupportedEncodingException e1) {
+				showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
+				logger.error("Error while saving", e1);
+			}
+
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		task.setOnFailed(e -> {
+			showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
+			logger.error("Error while executing", e);
+			ngramProgressBar.progressProperty().unbind();
+			ngramProgressBar.setProgress(0.0);
+			ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
+			progressLabel.textProperty().unbind();
+			progressLabel.setText("");
+		});
+
+		final Thread thread = new Thread(task, "task");
+		thread.setDaemon(true);
+		thread.start();
+	}
+
+	private void logAlert(String alert) {
+		logger.info("alert: " + alert);
+	}
+
+
+	public void setCorpus(Corpus corpus) {
+		this.corpus = corpus;
+
+		if (corpus.getCorpusType() != CorpusType.SOLAR) {
+			setSelectedFiltersLabel(null);
+		} else {
+			setSelectedFiltersLabel("/");
+		}
+	}
+
+	public void setSelectedFiltersLabel(String content) {
+		if (content != null) {
+			solarFilters.setVisible(true);
+			selectedFiltersLabel.setVisible(true);
+			selectedFiltersLabel.setText(content);
+		} else {
+			solarFilters.setVisible(false);
+			selectedFiltersLabel.setVisible(false);
+		}
+	}
+
+	public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
+		this.solarFiltersMap = solarFiltersMap;
+	}
+
+	public void setHostServices(HostServices hostServices){
+		this.hostService = hostServices;
+	}
+}
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: gui.GUIController
+
@@ -0,0 +1,25 @@
+package util;
+
+import java.nio.ByteBuffer;
+
+public class ByteUtils {
+
+	/*
+	 * Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
+	 */
+	public static byte[] longToBytes(long x) {
+		ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
+		buffer.putLong(x);
+		return buffer.array();
+	}
+
+	/*
+	 * Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
+	 */
+	public static long bytesToLong(byte[] bytes) {
+		ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
+		buffer.put(bytes);
+		buffer.flip();//need flip
+		return buffer.getLong();
+	}
+}
@@ -0,0 +1,46 @@
+package util;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.stream.IntStream;
+
+public class Combinations {
+	private static HashSet<HashSet<Integer>> result = new HashSet<>();
+
+
+	/* arr[]  ---> Input Array
+		data[] ---> Temporary array to store current combination
+		start & end ---> Staring and Ending indexes in arr[]
+		index  ---> Current index in data[]
+		r ---> Size of a combination to be printed */
+	static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) {
+		// Current combination is ready to be printed, print it
+		if (index == combinationLength) {
+			result.add(new HashSet<>(Arrays.asList(data)));
+			return;
+		}
+
+		// replace index with all possible elements. The condition
+		// "end-i+1 >= r-index" makes sure that including one element
+		// at index will make a combination with remaining elements
+		// at remaining positions
+		for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) {
+			data[index] = arr[i];
+			combinationUtil(arr, data, i + 1, end, index + 1, combinationLength);
+		}
+	}
+
+	public static HashSet<HashSet<Integer>> generateIndices(int maxNOfIndices) {
+		result = new HashSet<>();
+		int[] arr = IntStream.range(1, maxNOfIndices).toArray();
+		for (int i = 1; i < maxNOfIndices - 1; i++) {
+			// A temporary array to store all combination one by one
+			combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i);
+		}
+
+		// also add an empty one for X.... (all of this type)
+		result.add(new HashSet<>());
+
+		return result;
+	}
+}
@@ -0,0 +1,267 @@
+package util;
+
+import static util.Util.*;
+
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+import org.apache.commons.lang3.tuple.Pair;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+import data.Enums.WordLevelType;
+
+@SuppressWarnings("unchecked")
+public class Export {
+	public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
+		JSONArray wrapper = new JSONArray();
+
+		for (Pair<String, Map<String, Long>> p : set) {
+			JSONArray data_wrapper = new JSONArray();
+			JSONObject metric = new JSONObject();
+
+			String title = p.getLeft();
+			Map<String, Long> map = p.getRight();
+
+			if (map.isEmpty())
+				continue;
+
+			long total = Util.mapSumFrequencies(map);
+
+			for (Map.Entry<String, Long> e : map.entrySet()) {
+				JSONObject data_entry = new JSONObject();
+				data_entry.put("word", e.getKey());
+				data_entry.put("frequency", e.getValue());
+				data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
+
+				data_wrapper.add(data_entry);
+			}
+
+			metric.put("Title", title);
+			metric.put("data", data_wrapper);
+			wrapper.add(metric);
+		}
+
+		try (FileWriter file = new FileWriter("statistics.json")) {
+			file.write(wrapper.toJSONString());
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+	public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+		//Delimiter used in CSV file
+		String NEW_LINE_SEPARATOR = "\n";
+
+		//CSV file header
+		Object[] FILE_HEADER = {"word", "frequency", "percent"};
+
+		String fileName = "";
+
+		for (Pair<String, Map<String, Long>> p : set) {
+			String title = p.getLeft();
+			fileName = title.replace(": ", "-");
+			fileName = fileName.replace(" ", "_").concat(".csv");
+
+			fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+
+			Map<String, Long> map = p.getRight();
+
+			if (map.isEmpty())
+				continue;
+
+			long total = Util.mapSumFrequencies(map);
+
+			OutputStreamWriter fileWriter = null;
+			CSVPrinter csvFilePrinter = null;
+
+			//Create the CSVFormat object with "\n" as a record delimiter
+			CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+
+			try {
+				//initialize FileWriter object
+				fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+
+				//initialize CSVPrinter object
+				csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+
+				// write info block
+				printHeaderInfo(csvFilePrinter, headerInfoBlock);
+
+				//Create CSV file header
+				csvFilePrinter.printRecord(FILE_HEADER);
+
+				for (Map.Entry<String, Long> e : map.entrySet()) {
+					List dataEntry = new ArrayList<>();
+					dataEntry.add(e.getKey());
+					dataEntry.add(e.getValue().toString());
+					dataEntry.add(formatNumberAsPercent((double) e.getValue() / total));
+					csvFilePrinter.printRecord(dataEntry);
+				}
+			} catch (Exception e) {
+				System.out.println("Error in CsvFileWriter!");
+				e.printStackTrace();
+			} finally {
+				try {
+					if (fileWriter != null) {
+						fileWriter.flush();
+						fileWriter.close();
+					}
+					if (csvFilePrinter != null) {
+						csvFilePrinter.close();
+					}
+				} catch (IOException e) {
+					System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+					e.printStackTrace();
+				}
+			}
+		}
+
+		return fileName;
+	}
+
+	public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+		//Delimiter used in CSV file
+		String NEW_LINE_SEPARATOR = "\n";
+
+		//CSV file header
+		Object[] FILE_HEADER = {"word", "frequency", "percent"};
+
+		String fileName = "";
+
+		fileName = title.replace(": ", "-");
+		fileName = fileName.replace(" ", "_").concat(".csv");
+
+		fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+
+		OutputStreamWriter fileWriter = null;
+		CSVPrinter csvFilePrinter = null;
+
+		//Create the CSVFormat object with "\n" as a record delimiter
+		CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+
+		try {
+			//initialize FileWriter object
+			fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+
+			//initialize CSVPrinter object
+			csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+
+			// write info block
+			printHeaderInfo(csvFilePrinter, headerInfoBlock);
+
+			//Create CSV file header
+			csvFilePrinter.printRecord(FILE_HEADER);
+
+			for (Object[] resultEntry : result) {
+				List dataEntry = new ArrayList<>();
+				dataEntry.add(resultEntry[0]);
+				dataEntry.add(resultEntry[1]);
+				dataEntry.add(formatNumberAsPercent(resultEntry[2]));
+				csvFilePrinter.printRecord(dataEntry);
+			}
+		} catch (Exception e) {
+			System.out.println("Error in CsvFileWriter!");
+			e.printStackTrace();
+		} finally {
+			try {
+				if (fileWriter != null) {
+					fileWriter.flush();
+					fileWriter.close();
+				}
+				if (csvFilePrinter != null) {
+					csvFilePrinter.close();
+				}
+			} catch (IOException e) {
+				System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+				e.printStackTrace();
+			}
+		}
+
+		return fileName;
+	}
+
+	public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+		//Delimiter used in CSV file
+		String NEW_LINE_SEPARATOR = "\n";
+
+		//CSV file header
+		Object[] FILE_HEADER = {"type", "key", "word", "frequency"};
+
+		String fileName = "";
+
+		fileName = title.replace(": ", "-");
+		fileName = fileName.replace(" ", "_").concat(".csv");
+
+		fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+
+		OutputStreamWriter fileWriter = null;
+		CSVPrinter csvFilePrinter = null;
+
+		//Create the CSVFormat object with "\n" as a record delimiter
+		CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+
+		try {
+			//initialize FileWriter object
+			fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+
+			//initialize CSVPrinter object
+			csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+
+			// write info block
+			printHeaderInfo(csvFilePrinter, headerInfoBlock);
+
+			//Create CSV file header
+			csvFilePrinter.printRecord(FILE_HEADER);
+
+			for (Map.Entry<WordLevelType, Map<String, Map<String, Long>>> typeEntry : result.entrySet()) {
+				for (Map.Entry<String, Map<String, Long>> keyWordEntry : typeEntry.getValue().entrySet()) {
+					for (Map.Entry<String, Long> calculationResults : keyWordEntry.getValue().entrySet()) {
+						List values = new ArrayList();
+						values.add(typeEntry.getKey().getName());
+						values.add(keyWordEntry.getKey());
+						values.add(calculationResults.getKey());
+						values.add(calculationResults.getValue());
+						csvFilePrinter.printRecord(values);
+					}
+				}
+			}
+		} catch (Exception e) {
+			System.out.println("Error in CsvFileWriter!");
+			e.printStackTrace();
+		} finally {
+			try {
+				if (fileWriter != null) {
+					fileWriter.flush();
+					fileWriter.close();
+				}
+				if (csvFilePrinter != null) {
+					csvFilePrinter.close();
+				}
+			} catch (IOException e) {
+				System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+				e.printStackTrace();
+			}
+		}
+
+		return fileName;
+	}
+
+	private static void printHeaderInfo(CSVPrinter csvFilePrinter, LinkedHashMap<String, String> headerInfoBlock) throws IOException {
+		for (Map.Entry<String, String> entry : headerInfoBlock.entrySet()) {
+			List values = new ArrayList();
+			values.add(entry.getKey());
+			values.add(entry.getValue());
+			csvFilePrinter.printRecord(values);
+		}
+
+		// 2 empty lines
+		List values = new ArrayList();
+		csvFilePrinter.printRecord(values);
+		csvFilePrinter.printRecord(values);
+
+	}
+}
@@ -0,0 +1,31 @@
+package util;
+
+public class Key /*implements Comparable<Key> */ {
+	// private final String value;
+	//
+	// Key(String value) {
+	// 	this.value = value;
+	// }
+	//
+	// @Override
+	// public int compareTo(Key o) {
+	// 	return Objects.compare(this.value, o.value);
+	// }
+	//
+	// @Override
+	// public boolean equals(Object o) {
+	// 	if (this.equals(o)) {
+	// 		return true;
+	// 	}
+	// 	if (o == null || getClass() != o.getClass()) {
+	// 		return false;
+	// 	}
+	// 	Key key = (Key) o;
+	// 	return Objects.equals(value, key.value);
+	// }
+	//
+	// @Override
+	// public int hashCode() {
+	// 	return 0;
+	// }
+}
@@ -0,0 +1,63 @@
+package util;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Adapted from http://memorynotfound.com/calculating-elapsed-time-java/
+ */
+public class TimeWatch {
+
+	private long starts;
+
+	private TimeWatch() {
+		reset();
+	}
+
+	public static TimeWatch start() {
+		return new TimeWatch();
+	}
+
+	private TimeWatch reset() {
+		starts = System.nanoTime();
+		return this;
+	}
+
+	private long time() {
+		long ends = System.nanoTime();
+		return ends - starts;
+	}
+
+	private long time(TimeUnit unit) {
+		return unit.convert(time(), TimeUnit.NANOSECONDS);
+	}
+
+	private String toMinuteSeconds() {
+		return String.format("%d min, %d sec", time(TimeUnit.MINUTES),
+				time(TimeUnit.SECONDS) - time(TimeUnit.MINUTES));
+	}
+
+	public String toFullTime() {
+		long hours = time(TimeUnit.HOURS);
+		long minutes = time(TimeUnit.MINUTES) - TimeUnit.HOURS.toMinutes(hours);
+		long seconds = time(TimeUnit.SECONDS) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
+		long milliseconds = time(TimeUnit.MILLISECONDS) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
+
+		return String.format("%d h, %d min, %d s, %d ms", hours, minutes, seconds, milliseconds);
+	}
+
+	public String toString() {
+
+		return "Elapsed Time in nano seconds: ";
+	}
+
+	private void exampleUsage() {
+		TimeWatch watch = TimeWatch.start();
+
+		// do something...
+
+		System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds());
+		System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS));
+		System.out.println("Elapsed Time in nano seconds: " + watch.time());
+
+	}
+}
@@ -0,0 +1,225 @@
+package util;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.text.MessageFormat;
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Stream;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import data.Settings;
+import gui.GUIController;
+import gui.ValidationUtil;
+
+public class Util {
+	public final static Logger logger = LogManager.getLogger(Util.class);
+
+
+	public static String toReadableTime(long time) {
+		long hours = time(TimeUnit.HOURS, time);
+		long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours);
+		long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
+		long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
+		long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds);
+		long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds);
+
+		return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds);
+	}
+
+	private static long time(TimeUnit unit, long t) {
+		return unit.convert(t, TimeUnit.NANOSECONDS);
+	}
+
+	/**
+	 * Converts a number to a more readable format.
+	 * 12345 -> 12.345
+	 * 12345,678 -> 12.345,67
+	 *
+	 * @param o byte, double, float, int,long, short
+	 *
+	 * @return number formatted with thousands separator and 2 decimal places (floats)
+	 */
+	private static String formatNumberReadable(Object o) {
+		if (isInstanceOfInteger(o))
+			return String.format("%,d", o);
+		else if (isInstanceOfFloat(o))
+			return String.format("%,.2f", o);
+		else
+			return "- invalid input format -";
+	}
+
+	public static String formatNumberAsPercent(Object o) {
+		return MessageFormat.format("{0,number,#.###%}", o);
+	}
+
+	private static boolean isInstanceOfInteger(Object o) {
+		Set<Class<?>> types = new HashSet<>();
+		types.add(Byte.class);
+		types.add(Short.class);
+		types.add(Integer.class);
+		types.add(Long.class);
+
+		return types.contains(o.getClass());
+	}
+
+	private static boolean isInstanceOfFloat(Object o) {
+		Set<Class<?>> types = new HashSet<>();
+		types.add(Float.class);
+		types.add(Double.class);
+
+		return types.contains(o.getClass());
+	}
+
+	public static <K, V> void printMap(Map<K, V> map) {
+		System.out.println("\nkey: value");
+		map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v)));
+		System.out.println();
+	}
+
+	/**
+	 * Generic map converter -> since AtomicLongs aren't as comparable.
+	 * Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
+	 */
+	public static <K, V> Map<String, Long> atomicInt2StringAndInt(Map<K, V> map) {
+		Map m = new HashMap<String, Long>();
+
+		for (Map.Entry<K, V> e : map.entrySet()) {
+			m.put(e.getKey().toString(), ((AtomicLong) e.getValue()).longValue());
+		}
+
+		return m;
+	}
+
+	/**
+	 * Sorts a map in a descending order by value.
+	 */
+	public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue(Map<K, V> map, int limit) {
+		/*
+		sorted() in itself is O(1), since it's an intermediate operation that
+		doesn't consume the stream, but simply adds an operation to the pipeline.
+		Once the stream is consumed by a terminal operation, the sort happens and
+		 either
+		- it doesn't do anything (O(1)) because the stream knows that the
+		elements are already sorted (because they come from a SortedSet, for example)
+		- or the stream is not parallel, and it delegates to Arrays.sort() (O(n log n))
+		- or the stream is parallel, and it delegates to Arrays.parallelSort() (O(n log n))
+
+		As of JDK 8, the main sorting algorithm which is also used in standard
+		stream API implementation for sequential sorting is TimSort. Its worst
+		case is O(n log n), but it works incredibly fast (with O(n) and quite
+		small constant) if data is presorted (in forward or reverse direction)
+		or partially presorted (for example, if you concatenate two sorted lists
+		and sort them again).
+		*/
+		// if limit is set to 0 or less, we take that to mean no limit at all
+		if (limit <= 0) {
+			limit = map.size();
+		}
+
+		Map<K, V> result = new LinkedHashMap<>();
+		TimeWatch watch = TimeWatch.start();
+
+		Stream<Map.Entry<K, V>> st = map.entrySet().stream();
+
+		st.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).limit(limit)
+				.forEachOrdered(e -> result.put(e.getKey(), e.getValue()));
+
+		if (Settings.PRINT_LOG) {
+			System.out.println(String.format("Elapsed time for sorting %s items: %s",
+					formatNumberReadable(result.size()),
+					watch.toFullTime()));
+		}
+
+		return result;
+	}
+
+	public static <K, V> void printMap(Map<K, Integer> map, String title, int number_of_words) {
+		System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title));
+		map.forEach((k, v) ->
+				System.out.println(String.format("%s:\t %s\t %s%%",
+						k,
+						Util.formatNumberReadable(v),
+						Util.formatNumberReadable((double) v / number_of_words * 100))));
+		System.out.println();
+	}
+
+	static long mapSumFrequencies(Map<String, Long> map) {
+		long sum = 0;
+
+		for (long value : map.values()) {
+			sum += value;
+		}
+
+		return sum;
+	}
+
+	/**
+	 * Used for passing optional integer values for sorting.
+	 */
+	public static int getValidInt(int... i) {
+		if (i == null || i.length < 1 || i[0] <= 0) {
+			return 0;
+		} else {
+			return i[0];
+		}
+	}
+
+	/**
+	 * Check whether a map is empty. It also considers an edge case where map's keys are lists to check if those lists are empty.
+	 */
+	public static <K, V> boolean isMapEmpty(Map<K, V> map) {
+		if (map.isEmpty()) {
+			// default
+			return true;
+		}
+
+		// otherwise check if keys map to values that are empty
+		for (V v : map.values()) {
+			// todo: generalize to all collections if/when needed
+			ArrayList<String> vl = new ArrayList((List<String>) v);
+			if (!vl.isEmpty()) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	/**
+	 * Returns the location of the main class if possible, otherwise null
+	 */
+	public static File getWorkingDirectory() {
+		// get location of the currently executing class
+		String path = GUIController.class.getProtectionDomain().getCodeSource().getLocation().getPath();
+
+		logger.info("working dir path: ", path);
+
+		String decodedPath = null;
+		try {
+			decodedPath = URLDecoder.decode(path, "UTF-8");
+		} catch (UnsupportedEncodingException e) {
+			logger.error("decoding: ", e);
+			// e.printStackTrace();
+		}
+
+		if (decodedPath != null) {
+			File workingDirectory = new File(decodedPath);
+
+			// in case it's a file (class is packaged inside a jar), select its parent folder
+			workingDirectory = workingDirectory.isFile() ? workingDirectory.getParentFile() : workingDirectory;
+
+			if (ValidationUtil.isReadableDirectory(workingDirectory)) {
+				logger.info("working dir is ok: ", workingDirectory.getAbsolutePath());
+				return workingDirectory;
+			}
+		}
+
+		logger.info("working dir returing null");
+		return null;
+	}
+}
@@ -0,0 +1,132 @@
+package util.db;
+
+import static util.ByteUtils.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.time.LocalDateTime;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.commons.io.FileUtils;
+import org.rocksdb.*;
+
+import util.TimeWatch;
+
+public class RDB {
+
+	private RocksDB db;
+	private String path;
+	private static final String UTF_8 = "UTF-8";
+
+	public RDB() {
+		// different dbs i ncase of concurrent calculations
+		this.path = System.getProperty("java.io.tmpdir")
+				.concat(File.separator)
+				.concat(String.format("corpusAnalyzer_db%d", LocalDateTime.now().toString().hashCode()));
+
+		this.db = createDB();
+	}
+
+
+	private RocksDB createDB() {
+		RocksDB.loadLibrary();
+
+		// the Options class contains a set of configurable DB options
+		// that determines the behaviour of the database.
+		try (final Options options = new Options()) {
+			options.setCreateIfMissing(true);
+
+			// a factory method that returns a RocksDB instance
+			try (final RocksDB rdb = RocksDB.open(options, path)) {
+				if (db != null) {
+					return rdb;
+				} else {
+					this.db = rdb;
+				}
+			}
+		} catch (RocksDBException e) {
+			// do some error handling
+		}
+		return null;
+	}
+
+	public void writeBatch(Map<String, AtomicLong> results) throws UnsupportedEncodingException {
+		RocksDB.loadLibrary();
+
+		// a factory method that returns a RocksDB instance
+		try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
+			final WriteBatch wb = new WriteBatch();
+
+			for (Map.Entry<String, AtomicLong> entry : results.entrySet()) {
+				byte[] key = entry.getKey().getBytes(UTF_8);
+				long resultValue = entry.getValue().longValue();
+
+				try {
+					final byte[] dbValue = rdb.get(key);
+					if (dbValue != null) {
+						// value == null if key does not exist in db.
+						wb.put(key, longToBytes(bytesToLong(dbValue) + resultValue));
+					} else {
+						wb.put(key, longToBytes(entry.getValue().longValue()));
+					}
+				} catch (RocksDBException e) {
+					// TODO: error handling
+				}
+			}
+			TimeWatch watch = TimeWatch.start();
+			rdb.write(new WriteOptions(), wb);
+			System.out.println(String.format("Writing %d entries took: %s", wb.count(), watch.toFullTime()));
+		} catch (RocksDBException e) {
+			// do some error handling
+		}
+	}
+
+	// public byte[] atomicIntToByteArray(final AtomicLong i) {
+	// 	BigInteger bigInt = BigInteger.valueOf(i.intValue());
+	//
+	// 	return bigInt.toByteArray();
+	// }
+
+	public RocksDB getDb() {
+		return db;
+	}
+
+	public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
+		Map<String, AtomicLong> dump = new HashMap<>();
+		RocksDB.loadLibrary();
+
+		// the Options class contains a set of configurable DB options
+		// that determines the behaviour of the database.
+		// a factory method that returns a RocksDB instance
+		try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
+			try (RocksIterator it = rdb.newIterator()) {
+				it.seekToFirst();
+				// it.next();
+
+				while (it.isValid()) {
+					byte[] key = it.key();
+					byte[] value = it.value();
+
+					dump.put(new String(key, UTF_8), new AtomicLong(bytesToLong(value)));
+
+					it.next();
+				}
+			}
+		} catch (RocksDBException e) {
+			e.printStackTrace();
+		}
+
+		return dump;
+	}
+
+	public void delete() {
+		try {
+			FileUtils.deleteDirectory(new File(path));
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+}
@@ -0,0 +1,524 @@
+<?oxygen RNGSchema="http://nl.ijs.si/ssj/gos/schema/tei_gos.rnc" type="compact"?>
+<!--DOCTYPE TEI SYSTEM "http://nl.ijs.si/ssj/gos/schema/tei_gos.dtd"-->
+<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xml:id="gos" xml:lang="slv">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title xml:lang="slv">Korpus GOS</title>
+                <title xml:lang="eng">GOS Corpus</title>
+                <funder xml:lang="slv">Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za
+                    šolstvo in šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje
+                    2007/2013, razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve:
+                    izboljšanje kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007/2013.
+                </funder>
+                <funder xml:lang="eng">The operation is partly financed by the European Union, the European Social Fund, and the
+                    Ministry of Education and Sport of the Republic of Slovenia. The operation is being carried out within the
+                    operational programme Human Resources Development for the period 2007/2013, developmental priorities:
+                    improvement of the quality and efficiency of educational and training systems 2007/2013.
+                </funder>
+                <respStmt>
+                    <name xml:id="MIRO">Miro Romih, Amebis</name>
+                    <resp xml:lang="slv">Vodja projekta "Sporazumevanje v slovenskem jeziku.</resp>
+                    <resp xml:lang="eng">"Communication in Slovene" project leader.</resp>
+                </respStmt>
+                <respStmt>
+                    <name xml:id="SIMON">Simon Krek, Amebis, JSI</name>
+                    <resp xml:lang="slv">Koordinator projekta "Sporazumevanje v slovenskem jeziku.</resp>
+                    <resp xml:lang="eng">"Communication in Slovene" project coordinator.</resp>
+                </respStmt>
+                <respStmt>
+                    <name xml:id="ANA">Ana Zwitter Vitez, Trojina</name>
+                    <resp xml:lang="slv">Koordinatorica gradnje korpusa GOS.</resp>
+                    <resp xml:lang="eng">Coordinator of the GOS corpus compilation project.</resp>
+                </respStmt>
+                <respStmt>
+                    <name xml:id="DARINKA">Darinka Verdonik, FERI</name>
+                    <resp xml:lang="slv">Koordinatorica izdelave spletnega konkordančnika GOS.</resp>
+                    <resp xml:lang="eng">Coordinator of the GOS corpus web concordancer project.</resp>
+                </respStmt>
+                <respStmt>
+                    <name xml:id="ET">Tomaž Erjavec, JSI</name>
+                    <resp xml:lang="slv">Redakcija zapisa TEI / XML.</resp>
+                    <resp xml:lang="eng">TEI / XML corpus encoding.</resp>
+                </respStmt>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <publicationStmt>
+                <distributor>
+                    <address xml:lang="en">
+                        <addrLine>Amebis, d.o.o., Kamnik</addrLine>
+                        <addrLine>Bakovnik 3</addrLine>
+                        <addrLine>SI-1241 Kamnik</addrLine>
+                        <addrLine>Slovenia</addrLine>
+                    </address>
+                    <address xml:lang="sl">
+                        <addrLine>Amebis, d.o.o., Kamnik</addrLine>
+                        <addrLine>Bakovnik 3</addrLine>
+                        <addrLine>1241 Kamnik</addrLine>
+                    </address>
+                </distributor>
+                <pubPlace>
+                    <ref target="http://www.slovenscina.eu/">http://www.slovenscina.eu/</ref>
+                    <ref target="http://www.korpus-gos.net/">http://www.korpus-gos.net/</ref>
+                </pubPlace>
+                <availability>
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja licenca <ref
+                            target="http://creativecommons.org/licenses/by-nc-sa/3.0/deed.sl">Priznanje
+                        avtorstva-Nekomercialno-Deljenje pod enakimi pogoji 3.0</ref>.
+                    </p>
+                    <p xml:lang="sl">Dovoljeno vam je:
+                        <list>
+                            <item>reproduciranje, distribuiranje, dajanje v najem in priobčevanje dela javnosti</item>
+                            <item>predelati delo</item>
+                        </list>
+                        Pod naslednjimi pogoji:
+                        <list>
+                            <item>Priznanje avtorstva — Pri uporabi dela morate navesti izvirnega avtorja na način, ki ga določi
+                                izvirni avtor oziroma dajalec licence. V znanstvenih publikacijah to pomeni citiranje ustreznega
+                                dela ali del, dostopnih na domači strani projekta, <ref target="http://www.slovenscina.eu/">
+                                    http://www.slovenscina.eu/</ref>.
+                            </item>
+                            <item>Nekomercialno. Tega dela ne smete uporabiti v komercialne namene.</item>
+                            <item>Deljenje pod enakimi pogoji — Če spremenite, preoblikujete ali uporabite to delo v svojem delu,
+                                lahko distribuirate predelavo dela le pod licenco, ki je enaka tej.
+                            </item>
+                        </list>
+                    </p>
+                    <p xml:lang="en">This work is licenced under the <ref
+                            target="http://creativecommons.org/licenses/by-nc-sa/3.0/deed.en">Attribution-NonCommercial-ShareAlike
+                        3.0</ref>.
+                    </p>
+                    <p xml:lang="en">You are free:
+                        <list>
+                            <item>to Share — to copy, distribute and transmit the work</item>
+                            <item>to Remix — to adapt the work</item>
+                        </list>
+                        Under the following conditions:
+                        <list>
+                            <item>Attribution — You must attribute the work in the manner specified by the author or licensor. In
+                                scientific publications this means citing the relevant publication or publications, referred to on
+                                the home page of the project: <ref target="http://www.slovenscina.eu/">
+                                    http://www.slovenscina.eu/</ref>.
+                            </item>
+                            <item>Noncommercial. You may not use this work for commercial purposes.</item>
+                            <item>Share Alike. If you alter, transform, or build upon this work, you may distribute the resulting
+                                work only under the same or similar license to this one.
+                            </item>
+                        </list>
+                    </p>
+                </availability>
+                <date>2012-03-14</date>
+            </publicationStmt>
+            <sourceDesc>
+                <p xml:lang="slv">Besedila so pretvorjena v TEI XML iz datotek programa Transcriber.</p>
+                <p xml:lang="eng">Texts are transformed to TEI XML from Transcriber files.</p>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="slv">Projekt
+                    <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>
+                </p>
+                <p xml:lang="eng">Project
+                    <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>
+                </p>
+            </projectDesc>
+            <classDecl>
+                <taxonomy xml:id="gosTaxons">
+                    <!-- TIP DISKURZA -->
+                    <category xml:id="gos.T">
+                        <catDesc>tip diskurza</catDesc>
+                        <category xml:id="gos.T.J">
+                            <catDesc>javni</catDesc>
+                            <category xml:id="gos.T.J.I">
+                                <catDesc>informativno-izobraževalni</catDesc>
+                            </category>
+                            <category xml:id="gos.T.J.R">
+                                <catDesc>razvedrilni</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="gos.T.N">
+                            <catDesc>nejavni</catDesc>
+                            <category xml:id="gos.T.N.N">
+                                <catDesc>nezasebni</catDesc>
+                            </category>
+                            <category xml:id="gos.T.N.Z">
+                                <catDesc>zasebni</catDesc>
+                            </category>
+                        </category>
+                    </category>
+                    <!-- KANAL -->
+                    <category xml:id="gos.K">
+                        <catDesc>kanal</catDesc>
+                        <category xml:id="gos.K.O">
+                            <catDesc>osebni stik</catDesc>
+                        </category>
+                        <category xml:id="gos.K.P">
+                            <catDesc>telefon</catDesc>
+                        </category>
+                        <category xml:id="gos.K.R">
+                            <catDesc>radio</catDesc>
+                        </category>
+                        <category xml:id="gos.K.T">
+                            <catDesc>televizija</catDesc>
+                        </category>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <langUsage>
+                <language ident="slv">slovenščina</language>
+                <language ident="eng">angleščina</language>
+                <language ident="deu">nemščina</language>
+                <language ident="ita">italijanščina</language>
+                <language ident="zls">južnoslovanski jeziki</language>
+                <language ident="sla">drugi slovanski jeziki</language>
+                <language ident="roa">drugi romanski jeziki</language>
+            </langUsage>
+        </profileDesc>
+    </teiHeader>
+
+    <TEI xml:id="gos.001">
+        <teiHeader>
+            <fileDesc>
+                <titleStmt>
+                    <title xml:id="JIfajzakhu-np0911061839_s2">Splošno predavanje za prvi letnik prevajalstva.</title>
+                    <respStmt>
+                        <resp>snemanje</resp>
+                        <name>Neža Pahovnik</name>
+                    </respStmt>
+                    <respStmt>
+                        <resp>transkripcija</resp>
+                        <name>MatejaS</name>
+                    </respStmt>
+                </titleStmt>
+                <publicationStmt>
+                    <date>2009-11-05</date>
+                    <pubPlace></pubPlace>
+                </publicationStmt>
+                <sourceDesc>
+                    <recordingStmt>
+                        <recording type="audio" dur="PT28M56S">
+                            <broadcast>
+                                <bibl>
+                                    <title>terenski posnetek</title>
+                                </bibl>
+                            </broadcast>
+                            <date>2009-11-05</date>
+                        </recording>
+                    </recordingStmt>
+                </sourceDesc>
+            </fileDesc>
+            <profileDesc>
+                <textClass>
+                    <catRef target="gos.T.J.I"/>
+                    <catRef target="gos.K.O"/>
+                </textClass>
+                <textDesc>
+                    <channel/>
+                    <constitution/>
+                    <derivation/>
+                    <domain>akademski, družboslovje</domain>
+                    <factuality/>
+                    <interaction/>
+                    <preparedness/>
+                    <purpose/>
+                </textDesc>
+                <particDesc>
+                    <listPerson n="1">
+                        <person n="Af-pred-02166">
+                            <sex value="2">ženski</sex>
+                            <age atLeast="35" atMost="59"/>
+                            <residence>LJ</residence>
+                            <education>fakulteta ali več</education>
+                            <langKnowledge>
+                                <langKnown tag="slv" level="first"/>
+                            </langKnowledge>
+                        </person>
+                    </listPerson>
+                </particDesc>
+                <settingDesc>
+                    <place>
+                        <region>LJ</region>
+                        <settlement>Ljubljana</settlement>
+                    </place>
+                    <setting>
+                        <date>2009-10-22</date>
+                        <time>14:40</time>
+                    </setting>
+                </settingDesc>
+            </profileDesc>
+        </teiHeader>
+        <text>
+            <body>
+                <div type="norm">
+                    <u who="Af-pred-02166">
+                        <seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
+                            <w lemma="n3" msd="L">n3</w>
+                        </seg>
+                    </u>
+                </div>
+            </body>
+        </text>
+    </TEI>
+
+    <TEI xml:id="gos.002">
+        <teiHeader>
+            <fileDesc>
+                <titleStmt>
+                    <title xml:id="JIfajzakhu-np1003120917_s2">Ura filozofije, pri kateri predavatelj študentom razlaga nemško
+                        klasično filozofijo.
+                    </title>
+                    <respStmt>
+                        <resp>snemanje</resp>
+                        <name>Neža Pahovnik</name>
+                    </respStmt>
+                    <respStmt>
+                        <resp>transkripcija</resp>
+                        <name>Alenka Mirkac</name>
+                    </respStmt>
+                </titleStmt>
+                <publicationStmt>
+                    <date>2010-03-12</date>
+                    <pubPlace>Ljubljana</pubPlace>
+                </publicationStmt>
+                <sourceDesc>
+                    <recordingStmt>
+                        <recording type="audio" dur="PT34M12S">
+                            <broadcast>
+                                <bibl>
+                                    <title>terenski posnetek</title>
+                                </bibl>
+                            </broadcast>
+                            <date>2010-03-12</date>
+                        </recording>
+                    </recordingStmt>
+                </sourceDesc>
+            </fileDesc>
+            <profileDesc>
+                <textClass>
+                    <catRef target="gos.T.J.R"/>
+                    <catRef target="gos.K.O"/>
+                </textClass>
+                <textDesc>
+                    <channel/>
+                    <constitution/>
+                    <derivation/>
+                    <domain>akademski, humanistika</domain>
+                    <factuality/>
+                    <interaction/>
+                    <preparedness/>
+                    <purpose/>
+                </textDesc>
+                <particDesc>
+                    <listPerson n="1">
+                        <person n="Zm-prof-01084">
+                            <sex value="1">moški</sex>
+                            <age atLeast="35" atMost="59"/>
+                            <residence>LJ, NM</residence>
+                            <education>fakulteta ali več</education>
+                            <langKnowledge>
+                                <langKnown tag="slv" level="first"/>
+                            </langKnowledge>
+                        </person>
+                    </listPerson>
+                </particDesc>
+                <settingDesc>
+                    <place>
+                        <region>LJ</region>
+                        <settlement>Ljubljana</settlement>
+                    </place>
+                    <setting>
+                        <date>2010-01-06</date>
+                        <time>19:40</time>
+                    </setting>
+                </settingDesc>
+            </profileDesc>
+        </teiHeader>
+        <text>
+            <body>
+                <div type="norm">
+                    <u who="Af-pred-02166">
+                        <seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
+                            <w lemma="n4" msd="L">n4</w>
+                        </seg>
+                    </u>
+                </div>
+            </body>
+        </text>
+    </TEI>
+
+    <TEI xml:id="gos.001">
+        <teiHeader>
+            <fileDesc>
+                <titleStmt>
+                    <title xml:id="JIfajzakhu-np0911061839_s2">Splošno predavanje za prvi letnik prevajalstva.</title>
+                    <respStmt>
+                        <resp>snemanje</resp>
+                        <name>Neža Pahovnik</name>
+                    </respStmt>
+                    <respStmt>
+                        <resp>transkripcija</resp>
+                        <name>MatejaS</name>
+                    </respStmt>
+                </titleStmt>
+                <publicationStmt>
+                    <date>2009-11-05</date>
+                    <pubPlace></pubPlace>
+                </publicationStmt>
+                <sourceDesc>
+                    <recordingStmt>
+                        <recording type="audio" dur="PT28M56S">
+                            <broadcast>
+                                <bibl>
+                                    <title>terenski posnetek</title>
+                                </bibl>
+                            </broadcast>
+                            <date>2009-11-05</date>
+                        </recording>
+                    </recordingStmt>
+                </sourceDesc>
+            </fileDesc>
+            <profileDesc>
+                <textClass>
+                    <catRef target="gos.T.J.I"/>
+                    <catRef target="gos.K.O"/>
+                </textClass>
+                <textDesc>
+                    <channel/>
+                    <constitution/>
+                    <derivation/>
+                    <domain>akademski, družboslovje</domain>
+                    <factuality/>
+                    <interaction/>
+                    <preparedness/>
+                    <purpose/>
+                </textDesc>
+                <particDesc>
+                    <listPerson n="1">
+                        <person n="Af-pred-02166">
+                            <sex value="2">ženski</sex>
+                            <age atLeast="35" atMost="59"/>
+                            <residence>LJ</residence>
+                            <education>fakulteta ali več</education>
+                            <langKnowledge>
+                                <langKnown tag="slv" level="first"/>
+                            </langKnowledge>
+                        </person>
+                    </listPerson>
+                </particDesc>
+                <settingDesc>
+                    <place>
+                        <region>LJ</region>
+                        <settlement>Ljubljana</settlement>
+                    </place>
+                    <setting>
+                        <date>2009-10-22</date>
+                        <time>14:40</time>
+                    </setting>
+                </settingDesc>
+            </profileDesc>
+        </teiHeader>
+        <text>
+            <body>
+                <div type="norm">
+                    <u who="Af-pred-02166">
+                        <seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
+                            <w lemma="n3" msd="L">n3</w>
+                        </seg>
+                    </u>
+                </div>
+            </body>
+        </text>
+    </TEI>
+
+    <TEI xml:id="gos.002">
+        <teiHeader>
+            <fileDesc>
+                <titleStmt>
+                    <title xml:id="JIfajzakhu-np1003120917_s2">Ura filozofije, pri kateri predavatelj študentom razlaga nemško
+                        klasično filozofijo.
+                    </title>
+                    <respStmt>
+                        <resp>snemanje</resp>
+                        <name>Neža Pahovnik</name>
+                    </respStmt>
+                    <respStmt>
+                        <resp>transkripcija</resp>
+                        <name>Alenka Mirkac</name>
+                    </respStmt>
+                </titleStmt>
+                <publicationStmt>
+                    <date>2010-03-12</date>
+                    <pubPlace>Ljubljana</pubPlace>
+                </publicationStmt>
+                <sourceDesc>
+                    <recordingStmt>
+                        <recording type="audio" dur="PT34M12S">
+                            <broadcast>
+                                <bibl>
+                                    <title>terenski posnetek</title>
+                                </bibl>
+                            </broadcast>
+                            <date>2010-03-12</date>
+                        </recording>
+                    </recordingStmt>
+                </sourceDesc>
+            </fileDesc>
+            <profileDesc>
+                <textClass>
+                    <catRef target="gos.T.J.R"/>
+                    <catRef target="gos.K.O"/>
+                </textClass>
+                <textDesc>
+                    <channel/>
+                    <constitution/>
+                    <derivation/>
+                    <domain>akademski, humanistika</domain>
+                    <factuality/>
+                    <interaction/>
+                    <preparedness/>
+                    <purpose/>
+                </textDesc>
+                <particDesc>
+                    <listPerson n="1">
+                        <person n="Zm-prof-01084">
+                            <sex value="1">moški</sex>
+                            <age atLeast="35" atMost="59"/>
+                            <residence>LJ, NM</residence>
+                            <education>fakulteta ali več</education>
+                            <langKnowledge>
+                                <langKnown tag="slv" level="first"/>
+                            </langKnowledge>
+                        </person>
+                    </listPerson>
+                </particDesc>
+                <settingDesc>
+                    <place>
+                        <region>LJ</region>
+                        <settlement>Ljubljana</settlement>
+                    </place>
+                    <setting>
+                        <date>2010-01-06</date>
+                        <time>19:40</time>
+                    </setting>
+                </settingDesc>
+            </profileDesc>
+        </teiHeader>
+        <text>
+            <body>
+                <div type="norm">
+                    <u who="Af-pred-02166">
+                        <seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
+                            <w lemma="n4" msd="L">n4</w>
+                        </seg>
+                    </u>
+                </div>
+            </body>
+        </text>
+    </TEI>
+
+</teiCorpus>
@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--<?import gui.CorpusTab ?>-->
+<!--<?import gui.StringAnalysisTab ?>-->
+
+<?import java.lang.*?>
+<?import javafx.collections.FXCollections?>
+<?import javafx.scene.control.*?>
+<?import javafx.scene.layout.*?>
+<AnchorPane prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111" xmlns:fx="http://javafx.com/fxml/1"
+            fx:controller="gui.GUIController">
+    <children>
+        <TabPane fx:id="tabPane" prefHeight="600.0" prefWidth="800.0" tabClosingPolicy="UNAVAILABLE" AnchorPane.bottomAnchor="0.0"
+                 AnchorPane.leftAnchor="0.0" AnchorPane.rightAnchor="0.0" AnchorPane.topAnchor="0.0">
+            <tabs>
+                <Tab fx:id="corpusTab" closable="false" text="Korpus">
+                    <fx:include fx:id="ct" source="gui/CorpusTab.fxml"/>
+                </Tab>
+                <Tab fx:id="filterTab" closable="false" disable="true" text="Filter">
+                    <fx:include fx:id="ffs" source="gui/FiltersForSolar.fxml"/>
+                </Tab>
+                <Tab fx:id="CharacterLevelTabNew" closable="false" disable="true" text="Črke">
+                    <fx:include fx:id="cat" source="gui/CharacterAnalysisTab.fxml"/>
+                </Tab>
+                <Tab fx:id="wordLevelTab" closable="false" disable="true" text="Besedni deli">
+                    <fx:include fx:id="wl" source="gui/WordLevelTab.fxml"/>
+                </Tab>
+                <Tab fx:id="OneWordAnalysisTab" closable="false" disable="true" text="Besede">
+                    <fx:include fx:id="oneWordTab" source="gui/OneWordAnalysisTab.fxml"/>
+                </Tab>
+                <Tab fx:id="StringLevelTabNew2" closable="false" disable="true" text="Besedni nizi">
+                    <fx:include fx:id="satNew2" source="gui/StringAnalysisTabNew2.fxml"/>
+                </Tab>
+                <!--<Tab fx:id="wordFormationTab" closable="false" disable="true" text="Besedotvorni procesi">
+                    <fx:include fx:id="wf" source="gui/WordFormationTab.fxml"/>
+                </Tab>-->
+                <!--<Tab fx:id="wordLevelTab" closable="false" disable="true" text="Nivo besed in delov besed">-->
+                <!--<content>-->
+                <!--<AnchorPane minHeight="0.0" minWidth="0.0" prefHeight="180.0" prefWidth="200.0">-->
+                <!--<children>-->
+                <!--<Label fx:id="izbraniFiltriLabelB" layoutX="400.0" layoutY="14.0" text="Izbrani filtri:"/>-->
+                <!--<Label fx:id="selectedFiltersLabelB" layoutX="399.0" layoutY="45.0" text="/"/>-->
+                <!--<Label layoutX="35.0" layoutY="30.0" text="Različnica/lema"/>-->
+                <!--<ComboBox fx:id="distributionWordOrLemmaCombo" layoutX="146.0" layoutY="26.0"-->
+                <!--prefWidth="150.0" promptText="izberi">-->
+                <!--<items>-->
+                <!--<FXCollections fx:factory="observableArrayList">-->
+                <!--<String fx:value="različnica"/>-->
+                <!--<String fx:value="lema"/>-->
+                <!--</FXCollections>-->
+                <!--</items>-->
+                                <!--</ComboBox>-->
+                <!--<Label layoutX="35.0" layoutY="75.0" text="JOS:"/>-->
+                <!--<ComboBox fx:id="distributionJosCombo" layoutX="146.0" layoutY="71.0"-->
+                <!--prefWidth="150.0" promptText="izberi">-->
+                <!--<items>-->
+                <!--<FXCollections fx:factory="observableArrayList">-->
+                <!--<String fx:value="- brez -"/>-->
+                <!--<String fx:value="samostalnik"/>-->
+                <!--<String fx:value="glagol"/>-->
+                <!--<String fx:value="pridevnik"/>-->
+                <!--<String fx:value="prislov"/>-->
+                <!--<String fx:value="zaimek"/>-->
+                <!--<String fx:value="stevnik"/>-->
+                <!--<String fx:value="predlog"/>-->
+                <!--<String fx:value="veznik"/>-->
+                <!--<String fx:value="clenek"/>-->
+                <!--<String fx:value="medmet"/>-->
+                <!--<String fx:value="okrajsava"/>-->
+                <!--</FXCollections>-->
+                <!--</items>-->
+                <!--</ComboBox>-->
+                <!--<Label layoutX="35.0" layoutY="120.0" text="Taksonomija:"/>-->
+                <!--<ComboBox fx:id="distributionTaxonomyCombo" layoutX="146.0" layoutY="116.0"-->
+                <!--prefWidth="150.0" promptText="izberi"-->
+                <!--visibleRowCount="5">-->
+                <!--</ComboBox>-->
+                <!--<Button fx:id="distributionCalculateButton" layoutX="32.0" layoutY="180.0" mnemonicParsing="false"-->
+                <!--prefHeight="25.0" prefWidth="243.0" text="Izračunaj"/>-->
+                <!--&lt;!&ndash;<TitledPane animated="false" layoutX="-2.0" layoutY="315.0" prefHeight="256.0" prefWidth="806.0" text="Distribucija zaporedij samoglasnikov in soglasnikov">&ndash;&gt;-->
+                <!--&lt;!&ndash;<content>&ndash;&gt;-->
+                <!--&lt;!&ndash;<AnchorPane minHeight="0.0" minWidth="0.0" prefHeight="180.0" prefWidth="457.0">&ndash;&gt;-->
+                <!--&lt;!&ndash;<children>&ndash;&gt;-->
+                <!--&lt;!&ndash;<Label layoutX="21.0" layoutY="18.0" text="Samostalnik/lema:" />&ndash;&gt;-->
+                <!--&lt;!&ndash;<ComboBox fx:id="distributionCVVWordOrLemmaCombo" layoutX="135.0" layoutY="14.0" onAction="#distributionCVVWOrdOrLemma" prefWidth="150.0" promptText="izberi">&ndash;&gt;-->
+                <!--&lt;!&ndash;<items>&ndash;&gt;-->
+                <!--&lt;!&ndash;<FXCollections fx:factory="observableArrayList">&ndash;&gt;-->
+                <!--&lt;!&ndash;<String fx:value="različnica" />&ndash;&gt;-->
+                <!--&lt;!&ndash;<String fx:value="lema" />&ndash;&gt;-->
+                <!--&lt;!&ndash;</FXCollections>&ndash;&gt;-->
+                <!--&lt;!&ndash;</items>&ndash;&gt;-->
+                <!--&lt;!&ndash;</ComboBox>&ndash;&gt;-->
+                <!--&lt;!&ndash;<Label layoutX="21.0" layoutY="69.0" text="Dolžina zaporedja:" />&ndash;&gt;-->
+                <!--&lt;!&ndash;&lt;!&ndash;<TextField fx:id="CVVLengthTA" layoutX="136.0" layoutY="65.0" onAction="#CVVLength" prefHeight="25.0" prefWidth="214.0" promptText="vnesi dolžino zaporedja (celo število)" />&ndash;&gt;&ndash;&gt;-->
+                <!--&lt;!&ndash;<Button fx:id="distributionCalculateCVVButton" layoutX="22.0" layoutY="103.0" mnemonicParsing="false" onAction="#distributionCVVCalculate" prefHeight="25.0" prefWidth="243.0" text="Izračunaj" />&ndash;&gt;-->
+                <!--&lt;!&ndash;<TextField fx:id="morphosyntacticFilterTextField" layoutX="22.0" layoutY="158.0" onAction="#morphosyntacticFilterTextArea" prefHeight="25.0" prefWidth="766.0" />&ndash;&gt;-->
+
+                <!--&lt;!&ndash;<ProgressBar fx:id="distributionProgressBar" layoutX="20.0" layoutY="174.0" prefHeight="18.0" prefWidth="770.0" progress="0.0" />&ndash;&gt;-->
+                <!--&lt;!&ndash;<Label fx:id="distributionProgressLabel" layoutX="20.0" layoutY="199.0" prefHeight="17.0" prefWidth="769.0" text="Label" />&ndash;&gt;-->
+                <!--&lt;!&ndash;</children>&ndash;&gt;-->
+                <!--&lt;!&ndash;</AnchorPane>&ndash;&gt;-->
+                <!--&lt;!&ndash;</content>&ndash;&gt;-->
+                <!--&lt;!&ndash;</TitledPane>&ndash;&gt;-->
+                <!--</children>-->
+                <!--</AnchorPane>-->
+                <!--</content>-->
+                <!--</Tab>-->
+                <!--<Tab fx:id="wordFormationTab" disable="false" text="Oblikoslovne kategorije">-->
+                <!--<content>-->
+                <!--<AnchorPane minHeight="0.0" minWidth="0.0" prefHeight="180.0" prefWidth="200.0">-->
+                <!--<children>-->
+                <!--<Label fx:id="izbraniFiltriLabelC" layoutX="400.0" layoutY="14.0" text="Izbrani filtri:"/>-->
+                <!--<Label fx:id="selectedFiltersLabelC" layoutX="399.0" layoutY="45.0" text="/"/>-->
+                <!--<Label layoutX="18.0" layoutY="27.0" text="Taksonomija:"/>-->
+                <!--<ComboBox fx:id="inflectedJosTaxonomyCombo" layoutX="129.0" layoutY="23.0"-->
+                <!--prefWidth="150.0" promptText="izberi"-->
+                <!--visibleRowCount="5">-->
+                <!--</ComboBox>-->
+                <!--<Button fx:id="inflectedJosCalculateButton" layoutX="15.0" layoutY="87.0" mnemonicParsing="false"-->
+                <!--prefHeight="25.0" prefWidth="243.0" text="Izračunaj"/>-->
+
+                <!--<ProgressBar fx:id="inflectedJOSProgressBar" layoutX="15.0" layoutY="499.0" prefHeight="18.0"-->
+                <!--prefWidth="770.0" progress="0.0"/>-->
+                <!--<Label fx:id="inflectedJOSProgressLabel" layoutX="15.0" layoutY="524.0" prefHeight="17.0"-->
+                <!--prefWidth="769.0" text="Label"/>-->
+                <!--</children>-->
+                <!--</AnchorPane>-->
+                <!--</content>-->
+                <!--</Tab>-->
+            </tabs>
+        </TabPane>
+    </children>
+</AnchorPane>
@@ -0,0 +1,237 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0012405" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: Branko Gradišnik. ANTI2(1999)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>52 besed</extent>
+            <publicationStmt>
+                <idno>ANTI2</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title n="???">neznani naslov</title>
+                    <author>Branko Gradišnik</author>
+                    <date>1999</date>
+                    <publisher n="drugo">neznani založnik</publisher>
+                    <note type="sourceLang"/>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="50"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="11"/>
+                    <tagUsage gi="p" occurs="2"/>
+                    <tagUsage gi="s" occurs="5"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="52"/>
+                </namespace>
+            </tagsDecl>
+            <appInfo>
+                <application ident="Amebis_pretvornik" version="1.0">
+                    <label>[ZDRUZEVANJE] 1:1</label>
+                    <label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\ANTI2.ZDR</label>
+                    <label>[1] **********</label>
+                    <label>[IZVOR] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.doc</label>
+                    <label>[FORMAT] MS Word for Windows 6.0/7.0</label>
+                    <label>[DATUM] 2.12.1999</label>
+                    <label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.RTF</label>
+                    <label>[PRETVORBA] RTF</label>
+                    <label>[KONEC] **********</label>
+                </application>
+            </appInfo>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.P">
+                        <catDesc>prenosnik</catDesc>
+                        <category xml:id="Ft.P.G">
+                            <catDesc>govorni</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.E">
+                            <catDesc>elektronski</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.P">
+                            <catDesc>pisni</catDesc>
+                            <category xml:id="Ft.P.P.O">
+                                <catDesc>objavljeno</catDesc>
+                                <category xml:id="Ft.P.P.O.K">
+                                    <catDesc>knjižno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.O.P">
+                                    <catDesc>periodično</catDesc>
+                                    <category xml:id="Ft.P.P.O.P.C">
+                                        <catDesc>časopisno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.C.D">
+                                            <catDesc>dnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.V">
+                                            <catDesc>večkrat tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                    </category>
+                                    <category xml:id="Ft.P.P.O.P.R">
+                                        <catDesc>revialno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.R.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.S">
+                                            <catDesc>štirinajstdnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.M">
+                                            <catDesc>mesečno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.D">
+                                            <catDesc>redkeje kot na mesec</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.O">
+                                            <catDesc>občasno</catDesc>
+                                        </category>
+                                    </category>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.P.P.N">
+                                <catDesc>neobjavljeno</catDesc>
+                                <category xml:id="Ft.P.P.N.J">
+                                    <catDesc>javno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.I">
+                                    <catDesc>interno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.Z">
+                                    <catDesc>zasebno</catDesc>
+                                </category>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.Z">
+                        <catDesc>zvrst</catDesc>
+                        <category xml:id="Ft.Z.U">
+                            <catDesc>umetnostna</catDesc>
+                            <category xml:id="Ft.Z.U.P">
+                                <catDesc>pesniška</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.R">
+                                <catDesc>prozna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.D">
+                                <catDesc>dramska</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="Ft.Z.N">
+                            <catDesc>neumetnostna</catDesc>
+                            <category xml:id="Ft.Z.N.S">
+                                <catDesc>strokovna</catDesc>
+                                <category xml:id="Ft.Z.N.S.H">
+                                    <catDesc>humanistična in družboslovna</catDesc>
+                                </category>
+                                <category xml:id="Ft.Z.N.S.N">
+                                    <catDesc>naravoslovna in tehnična</catDesc>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.Z.N.N">
+                                <catDesc>nestrokovna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.N.P">
+                                <catDesc>pravna</catDesc>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.L">
+                        <catDesc>lektorirano</catDesc>
+                        <category xml:id="Ft.L.D">
+                            <catDesc>da</catDesc>
+                        </category>
+                        <category xml:id="Ft.L.N">
+                            <catDesc>ne</catDesc>
+                        </category>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.T.K.S"/>
+                <catRef target="#Ft.P.P.N.Z"/>
+                <catRef target="#Ft.Z.N.N"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0012405." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <w msd="Vd" lemma="ker">Ker</w>
+                    <S/>
+                    <w msd="Ggnste-n" lemma="imeti">ima</w>
+                    <S/>
+                    <w msd="Somei" lemma="junak">junak</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Sozem" lemma="posest">posesti</w>
+                    <c>.</c>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,70 @@
+"Korpus: ";Gigafida
+"Datum: ";14.05.2018 06:34
+"Analiza: ";Besedni nizi
+"n-gram nivo: ";nivo črk
+"Skip: ";0
+"Izračunaj za: ";lema
+"Izračunaj za kombinacije samoglasnikov in soglasnikov: ";ne
+"Dolžina niza: ";1
+
+
+word;frequency;percent
+a;438;11.086%
+i;390;9.871%
+e;341;8.631%
+o;328;8.302%
+t;262;6.631%
+n;261;6.606%
+r;229;5.796%
+k;174;4.404%
+d;144;3.645%
+s;141;3.569%
+v;133;3.366%
+l;123;3.113%
+j;120;3.037%
+p;120;3.037%
+z;81;2.05%
+b;75;1.898%
+u;71;1.797%
+"č";65;1.645%
+m;58;1.468%
+g;53;1.341%
+c;44;1.114%
+"š";32;0.81%
+"ž";32;0.81%
+1;28;0.709%
+h;20;0.506%
+0;19;0.481%
+2;18;0.456%
+".";17;0.43%
+M;13;0.329%
+6;12;0.304%
+f;11;0.278%
+9;10;0.253%
+3;8;0.202%
+A;7;0.177%
+J;7;0.177%
+T;6;0.152%
+B;5;0.127%
+K;5;0.127%
+P;5;0.127%
+5;4;0.101%
+8;4;0.101%
+R;4;0.101%
+S;4;0.101%
+4;3;0.076%
+":";3;0.076%
+D;3;0.076%
+F;3;0.076%
+I;3;0.076%
+7;2;0.051%
+G;2;0.051%
+w;2;0.051%
+"'";1;0.025%
+C;1;0.025%
+E;1;0.025%
+L;1;0.025%
+N;1;0.025%
+V;1;0.025%
+Z;1;0.025%
+"Š";1;0.025%
@@ -0,0 +1,390 @@
+"Korpus: ";Gigafida
+"Datum: ";14.05.2018 06:37
+"Analiza: ";Besedni nizi
+"n-gram nivo: ";nivo črk
+"Skip: ";0
+"Izračunaj za: ";lema
+"Izračunaj za kombinacije samoglasnikov in soglasnikov: ";ne
+"Dolžina niza: ";2
+
+
+word;frequency;percent
+ti;122;3.835%
+en;70;2.201%
+at;59;1.855%
+it;56;1.76%
+in;54;1.698%
+ko;54;1.698%
+st;48;1.509%
+na;48;1.509%
+po;46;1.446%
+ar;45;1.415%
+ka;45;1.415%
+ra;44;1.383%
+an;42;1.32%
+pr;40;1.257%
+bi;40;1.257%
+je;39;1.226%
+re;38;1.195%
+te;37;1.163%
+ja;37;1.163%
+od;36;1.132%
+ov;36;1.132%
+ta;33;1.037%
+ri;31;0.975%
+el;31;0.975%
+er;30;0.943%
+da;28;0.88%
+se;27;0.849%
+za;27;0.849%
+ni;26;0.817%
+av;24;0.754%
+do;24;0.754%
+vi;24;0.754%
+ro;23;0.723%
+ed;23;0.723%
+ek;23;0.723%
+le;23;0.723%
+li;22;0.692%
+nj;22;0.692%
+os;22;0.692%
+de;21;0.66%
+la;21;0.66%
+lo;21;0.66%
+ve;20;0.629%
+lj;20;0.629%
+no;20;0.629%
+ol;20;0.629%
+aj;19;0.597%
+or;19;0.597%
+rt;18;0.566%
+to;18;0.566%
+va;18;0.566%
+es;18;0.566%
+me;18;0.566%
+on;18;0.566%
+ki;17;0.534%
+pe;17;0.534%
+ak;16;0.503%
+ce;16;0.503%
+dr;16;0.503%
+et;15;0.472%
+ic;15;0.472%
+ik;15;0.472%
+is;15;0.472%
+ič;15;0.472%
+ob;15;0.472%
+sk;14;0.44%
+ca;14;0.44%
+ga;14;0.44%
+ot;14;0.44%
+as;13;0.409%
+rk;13;0.409%
+ru;13;0.409%
+ev;13;0.409%
+"ča";13;0.409%
+"če";13;0.409%
+ij;13;0.409%
+ir;13;0.409%
+kr;13;0.409%
+ma;13;0.409%
+ne;13;0.409%
+og;13;0.409%
+ur;12;0.377%
+"ža";12;0.377%
+vo;12;0.377%
+go;12;0.377%
+zd;12;0.377%
+iz;12;0.377%
+ju;12;0.377%
+op;12;0.377%
+ad;11;0.346%
+iž;11;0.346%
+"či";11;0.346%
+Ma;11;0.346%
+oz;11;0.346%
+al;10;0.314%
+di;10;0.314%
+us;10;0.314%
+em;10;0.314%
+eč;10;0.314%
+om;10;0.314%
+pa;10;0.314%
+so;9;0.283%
+ug;9;0.283%
+"ša";9;0.283%
+iv;9;0.283%
+mi;9;0.283%
+ok;9;0.283%
+be;8;0.251%
+bl;8;0.251%
+nč;8;0.251%
+oč;8;0.251%
+tr;8;0.251%
+ec;8;0.251%
+ze;8;0.251%
+ns;8;0.251%
+sp;7;0.22%
+dj;7;0.22%
+un;7;0.22%
+aš;7;0.22%
+il;7;0.22%
+"še";7;0.22%
+ke;7;0.22%
+eš;7;0.22%
+1.;6;0.189%
+10;6;0.189%
+ah;6;0.189%
+rj;6;0.189%
+ba;6;0.189%
+uh;6;0.189%
+eb;6;0.189%
+"že";6;0.189%
+ep;6;0.189%
+ji;6;0.189%
+ml;6;0.189%
+nb;6;0.189%
+nk;6;0.189%
+am;5;0.157%
+ap;5;0.157%
+az;5;0.157%
+20;5;0.157%
+sn;5;0.157%
+sr;5;0.157%
+dn;5;0.157%
+ej;5;0.157%
+ez;5;0.157%
+ač;5;0.157%
+ge;5;0.157%
+gl;5;0.157%
+gr;5;0.157%
+ha;5;0.157%
+"čk";5;0.157%
+"čl";5;0.157%
+"št";5;0.157%
+uč;5;0.157%
+jd;5;0.157%
+kl;5;0.157%
+ku;5;0.157%
+Ju;5;0.157%
+Ko;5;0.157%
+oj;5;0.157%
+01;4;0.126%
+11;4;0.126%
+rb;4;0.126%
+rm;4;0.126%
+bo;4;0.126%
+sa;4;0.126%
+si;4;0.126%
+ci;4;0.126%
+tj;4;0.126%
+tv;4;0.126%
+To;4;0.126%
+eh;4;0.126%
+"ži";4;0.126%
+"žn";4;0.126%
+vl;4;0.126%
+oš;4;0.126%
+ož;4;0.126%
+ib;4;0.126%
+id;4;0.126%
+"šk";4;0.126%
+zg;4;0.126%
+zi;4;0.126%
+mo;4;0.126%
+".1";4;0.126%
+nt;4;0.126%
+oc;4;0.126%
+of;4;0.126%
+ac;3;0.094%
+13;3;0.094%
+19;3;0.094%
+Ag;3;0.094%
+br;3;0.094%
+Ro;3;0.094%
+sv;3;0.094%
+ck;3;0.094%
+Br;3;0.094%
+dl;3;0.094%
+ud;3;0.094%
+du;3;0.094%
+um;3;0.094%
+up;3;0.094%
+ut;3;0.094%
+vn;3;0.094%
+62;3;0.094%
+vs;3;0.094%
+66;3;0.094%
+fi;3;0.094%
+he;3;0.094%
+hk;3;0.094%
+ho;3;0.094%
+9.;3;0.094%
+ig;3;0.094%
+im;3;0.094%
+"šn";3;0.094%
+až;3;0.094%
+nd;3;0.094%
+".2";3;0.094%
+00;2;0.063%
+pt;2;0.063%
+09;2;0.063%
+12;2;0.063%
+ag;2;0.063%
+rc;2;0.063%
+rd;2;0.063%
+rg;2;0.063%
+rn;2;0.063%
+21;2;0.063%
+rs;2;0.063%
+2:;2;0.063%
+Al;2;0.063%
+An;2;0.063%
+sl;2;0.063%
+3.;2;0.063%
+su;2;0.063%
+1s;2;0.063%
+th;2;0.063%
+tn;2;0.063%
+db;2;0.063%
+Sr;2;0.063%
+tu;2;0.063%
+46;2;0.063%
+dg;2;0.063%
+dk;2;0.063%
+ub;2;0.063%
+dt;2;0.063%
+Da;2;0.063%
+vd;2;0.063%
+fa;2;0.063%
+vr;2;0.063%
+ff;2;0.063%
+vz;2;0.063%
+fo;2;0.063%
+Fi;2;0.063%
+bč;2;0.063%
+gu;2;0.063%
+8.;2;0.063%
+"čn";2;0.063%
+Go;2;0.063%
+98;2;0.063%
+99;2;0.063%
+"šp";2;0.063%
+zm;2;0.063%
+zn;2;0.063%
+jc;2;0.063%
+Ja;2;0.063%
+ll;2;0.063%
+ln;2;0.063%
+uš;2;0.063%
+už;2;0.063%
+vš;2;0.063%
+ež;2;0.063%
+nu;2;0.063%
+vž;2;0.063%
+03;1;0.031%
+08;1;0.031%
+Pa;1;0.031%
+Pe;1;0.031%
+iš;1;0.031%
+Pl;1;0.031%
+Po;1;0.031%
+ab;1;0.031%
+Pr;1;0.031%
+rf;1;0.031%
+rh;1;0.031%
+t.;1;0.031%
+2.;1;0.031%
+22;1;0.031%
+24;1;0.031%
+25;1;0.031%
+29;1;0.031%
+bn;1;0.031%
+SC;1;0.031%
+sm;1;0.031%
+30;1;0.031%
+31;1;0.031%
+Ba;1;0.031%
+cc;1;0.031%
+35;1;0.031%
+Ru;1;0.031%
+Be;1;0.031%
+co;1;0.031%
+ct;1;0.031%
+4.;1;0.031%
+St;1;0.031%
+dp;1;0.031%
+Ta;1;0.031%
+uc;1;0.031%
+ds;1;0.031%
+uf;1;0.031%
+dv;1;0.031%
+uk;1;0.031%
+ea;1;0.031%
+56;1;0.031%
+Tu;1;0.031%
+ef;1;0.031%
+De;1;0.031%
+eg;1;0.031%
+ei;1;0.031%
+"žm";1;0.031%
+nš;1;0.031%
+vk;1;0.031%
+60;1;0.031%
+fe;1;0.031%
+El;1;0.031%
+Va;1;0.031%
+fu;1;0.031%
+nž;1;0.031%
+wi;1;0.031%
+i';1;0.031%
+gi;1;0.031%
+Fr;1;0.031%
+"čb";1;0.031%
+hi;1;0.031%
+I.;1;0.031%
+"ču";1;0.031%
+hr;1;0.031%
+"Šm";1;0.031%
+ie;1;0.031%
+97;1;0.031%
+9:;1;0.031%
+io;1;0.031%
+zb;1;0.031%
+"'s";1;0.031%
+zo;1;0.031%
+":2";1;0.031%
+zr;1;0.031%
+zs;1;0.031%
+":3";1;0.031%
+zu;1;0.031%
+":5";1;0.031%
+zv;1;0.031%
+jn;1;0.031%
+In;1;0.031%
+jo;1;0.031%
+js;1;0.031%
+Iv;1;0.031%
+kd;1;0.031%
+Zu;1;0.031%
+ld;1;0.031%
+lm;1;0.031%
+lu;1;0.031%
+Lj;1;0.031%
+mp;1;0.031%
+ms;1;0.031%
+MS;1;0.031%
+nc;1;0.031%
+ng;1;0.031%
+".0";1;0.031%
+Mo;1;0.031%
+nr;1;0.031%
+".7";1;0.031%
+".9";1;0.031%
+"šč";1;0.031%
+Ne;1;0.031%
+oh;1;0.031%
+oi;1;0.031%
+ow;1;0.031%
+pi;1;0.031%
+pl;1;0.031%
@@ -0,0 +1,455 @@
+"Korpus: ";Gigafida
+"Datum: ";31.01.2018 05:11
+"Analiza: ";Besedni nizi
+"n-gram nivo: ";1
+"Skip: ";0
+"Izračunaj za: ";lema
+
+
+word;frequency;percent
+biti;29;3.766%
+in;29;3.766%
+v;16;2.078%
+z;12;1.558%
+se;10;1.299%
+on;9;1.169%
+za;9;1.169%
+ki;8;1.039%
+na;8;1.039%
+da;7;0.909%
+kako;7;0.909%
+o;6;0.779%
+ta;5;0.649%
+elina;4;0.519%
+ajdov;4;0.519%
+zadruga;4;0.519%
+postati;4;0.519%
+grozdje;4;0.519%
+ne;4;0.519%
+pol;4;0.519%
+dodati;4;0.519%
+ti;4;0.519%
+cerkev;4;0.519%
+kaša;4;0.519%
+totenbirt;4;0.519%
+približno;4;0.519%
+drug;4;0.519%
+sestra;4;0.519%
+korenje;3;0.39%
+Jurkovič;3;0.39%
+do;3;0.39%
+srbeč;3;0.39%
+"če";3;0.39%
+narod;3;0.39%
+Matjaž;3;0.39%
+"član";3;0.39%
+Koper;3;0.39%
+ura;3;0.39%
+gost;3;0.39%
+ob;3;0.39%
+od;3;0.39%
+oreh;3;0.39%
+po;3;0.39%
+križarjenje;3;0.39%
+jaz;3;0.39%
+mlad;3;0.39%
+izdelovati;3;0.39%
+62;3;0.39%
+ogledalo;3;0.39%
+kocka;3;0.39%
+"še";3;0.39%
+kovinski;3;0.39%
+koža;3;0.39%
+Agata;3;0.39%
+vino;3;0.39%
+dati;3;0.39%
+zelenjaven;3;0.39%
+juha;3;0.39%
+pomaranča;3;0.39%
+dobro;2;0.26%
+imeti;2;0.26%
+ter;2;0.26%
+jesenski;2;0.26%
+lahko;2;0.26%
+1;2;0.26%
+3;2;0.26%
+korenčkov;2;0.26%
+več;2;0.26%
+Marta;2;0.26%
+gepard;2;0.26%
+ustanovitev;2;0.26%
+a;2;0.26%
+the;2;0.26%
+tiskarna;2;0.26%
+Roblek;2;0.26%
+učiteljica;2;0.26%
+eko;2;0.26%
+torta;2;0.26%
+Totenbirt;2;0.26%
+ideja;2;0.26%
+kuhati;2;0.26%
+Javšnik;2;0.26%
+"špasen";2;0.26%
+voda;2;0.26%
+društvo;2;0.26%
+"življenje";2;0.26%
+pečica;2;0.26%
+ladja;2;0.26%
+praven;2;0.26%
+oseba;2;0.26%
+medtem;2;0.26%
+namen;2;0.26%
+Jurkovička;2;0.26%
+Martika;2;0.26%
+oprati;2;0.26%
+resničen;2;0.26%
+kar;2;0.26%
+junak;2;0.26%
+Godec;2;0.26%
+pa;2;0.26%
+"čas";2;0.26%
+"žena";2;0.26%
+pekač;2;0.26%
+težava;2;0.26%
+1st;2;0.26%
+pot;2;0.26%
+ker;2;0.26%
+star;2;0.26%
+sodnica;2;0.26%
+nekaj;2;0.26%
+46;2;0.26%
+officer;2;0.26%
+lata;2;0.26%
+pri;2;0.26%
+nov;2;0.26%
+Tomijev;2;0.26%
+znebiti;2;0.26%
+april;2;0.26%
+pozdrav;2;0.26%
+posoda;2;0.26%
+vdova;2;0.26%
+Sredozemlje;2;0.26%
+svoj;2;0.26%
+občina;2;0.26%
+1998;2;0.26%
+Alenka;2;0.26%
+zgodba;2;0.26%
+mesto;2;0.26%
+pravi;2;0.26%
+Fijavž;2;0.26%
+velik;2;0.26%
+potem;2;0.26%
+veličasten;2;0.26%
+zahoden;2;0.26%
+organizacija;1;0.13%
+odvisno;1;0.13%
+dekan;1;0.13%
+viroza;1;0.13%
+drunk;1;0.13%
+pričati;1;0.13%
+Brolo;1;0.13%
+Končar;1;0.13%
+tek;1;0.13%
+sister;1;0.13%
+okusen;1;0.13%
+dokler;1;0.13%
+izgubiti;1;0.13%
+pospeševati;1;0.13%
+zvezdniški;1;0.13%
+vključno;1;0.13%
+spoštovan;1;0.13%
+5;1;0.13%
+cek;1;0.13%
+1113;1;0.13%
+roka;1;0.13%
+g;1;0.13%
+nedoločen;1;0.13%
+izumirati;1;0.13%
+uporabiti;1;0.13%
+pomarančen;1;0.13%
+Darko;1;0.13%
+polica;1;0.13%
+Frenk;1;0.13%
+križarjanje;1;0.13%
+de;1;0.13%
+gospodarski;1;0.13%
+Marseille;1;0.13%
+dl;1;0.13%
+torinski;1;0.13%
+12:35;1;0.13%
+strah;1;0.13%
+Danijel;1;0.13%
+vliti;1;0.13%
+"ženska";1;0.13%
+kompas;1;0.13%
+iti;1;0.13%
+test;1;0.13%
+ustaviti;1;0.13%
+Barcelona;1;0.13%
+tako;1;0.13%
+en;1;0.13%
+premešati;1;0.13%
+upravljanje;1;0.13%
+sutano;1;0.13%
+Tanja;1;0.13%
+naročiti;1;0.13%
+09.11.2010;1;0.13%
+intermarketing;1;0.13%
+nakazovati;1;0.13%
+križariti;1;0.13%
+2010;1;0.13%
+2130;1;0.13%
+zaprt;1;0.13%
+prezgodaj;1;0.13%
+zdeti;1;0.13%
+arhivo;1;0.13%
+sin;1;0.13%
+akreditacija;1;0.13%
+Performs;1;0.13%
+paličen;1;0.13%
+Marijana;1;0.13%
+sladkor;1;0.13%
+potekati;1;0.13%
+istospolno;1;0.13%
+12:25;1;0.13%
+I.;1;0.13%
+tisti;1;0.13%
+jesti;1;0.13%
+vnaprej;1;0.13%
+naj;1;0.13%
+mehko;1;0.13%
+judge;1;0.13%
+tukaj;1;0.13%
+iz;1;0.13%
+foto;1;0.13%
+palma;1;0.13%
+Mojca;1;0.13%
+nizek;1;0.13%
+blagajna;1;0.13%
+mešalnik;1;0.13%
+"želeti";1;0.13%
+vse;1;0.13%
+31.10;1;0.13%
+okus;1;0.13%
+dragocen;1;0.13%
+pojasnjevati;1;0.13%
+optimist;1;0.13%
+jogurt;1;0.13%
+vsebovati;1;0.13%
+skorajda;1;0.13%
+operacija;1;0.13%
+ko;1;0.13%
+podjetje;1;0.13%
+teden;1;0.13%
+ustanoviti;1;0.13%
+Kofu;1;0.13%
+666;1;0.13%
+druga;1;0.13%
+motnja;1;0.13%
+košček;1;0.13%
+izbrati;1;0.13%
+prav;1;0.13%
+ogret;1;0.13%
+rezina;1;0.13%
+odgovoren;1;0.13%
+vsota;1;0.13%
+Planinšek;1;0.13%
+pridružiti;1;0.13%
+sok;1;0.13%
+Indija;1;0.13%
+fantastica;1;0.13%
+Palermo;1;0.13%
+dober;1;0.13%
+"člen";1;0.13%
+29.03.2010;1;0.13%
+splošen;1;0.13%
+pojav;1;0.13%
+ali;1;0.13%
+poslednji;1;0.13%
+priokus;1;0.13%
+račun;1;0.13%
+trg;1;0.13%
+proklamirati;1;0.13%
+nazaj;1;0.13%
+Anand;1;0.13%
+pecilen;1;0.13%
+vame;1;0.13%
+peč;1;0.13%
+edinstven;1;0.13%
+1.7;1;0.13%
+cena;1;0.13%
+usta;1;0.13%
+med;1;0.13%
+veliko;1;0.13%
+zmešati;1;0.13%
+ogledati;1;0.13%
+srbečica;1;0.13%
+Maja;1;0.13%
+21.;1;0.13%
+kaj;1;0.13%
+Branko;1;0.13%
+zelo;1;0.13%
+Mallorca;1;0.13%
+polovica;1;0.13%
+zakon;1;0.13%
+aranžma;1;0.13%
+antikrist;1;0.13%
+bert;1;0.13%
+minuta;1;0.13%
+urednik;1;0.13%
+poleg;1;0.13%
+volilen;1;0.13%
+priloga;1;0.13%
+mareziga;1;0.13%
+unikaten;1;0.13%
+križati;1;0.13%
+dunajski;1;0.13%
+Detela;1;0.13%
+jurkovička;1;0.13%
+naročnik;1;0.13%
+naš;1;0.13%
+pred;1;0.13%
+lep;1;0.13%
+bogastvo;1;0.13%
+1.;1;0.13%
+ključen;1;0.13%
+6000;1;0.13%
+penast;1;0.13%
+"čast";1;0.13%
+2.;1;0.13%
+20;1;0.13%
+peška;1;0.13%
+22;1;0.13%
+moka;1;0.13%
+narezati;1;0.13%
+mik;1;0.13%
+danes;1;0.13%
+"članica";1;0.13%
+ravno;1;0.13%
+odpraviti;1;0.13%
+sprejemljiv;1;0.13%
+uresničevati;1;0.13%
+pristop;1;0.13%
+oni;1;0.13%
+ponuditi;1;0.13%
+obiskati;1;0.13%
+mogoč;1;0.13%
+določba;1;0.13%
+jed;1;0.13%
+umešati;1;0.13%
+tekoč;1;0.13%
+Ivek;1;0.13%
+Neapelj;1;0.13%
+povzročati;1;0.13%
+kateri;1;0.13%
+pogost;1;0.13%
+izdelan;1;0.13%
+izstop;1;0.13%
+prt;1;0.13%
+referendum;1;0.13%
+66;1;0.13%
+preprost;1;0.13%
+komedija;1;0.13%
+lupinica;1;0.13%
+Eli's;1;0.13%
+masa;1;0.13%
+korist;1;0.13%
+recept;1;0.13%
+požig;1;0.13%
+vzeti;1;0.13%
+komisija;1;0.13%
+Ankaran;1;0.13%
+prositi;1;0.13%
+tudi;1;0.13%
+posebej;1;0.13%
+8.;1;0.13%
+svoboden;1;0.13%
+sanjati;1;0.13%
+Tunis;1;0.13%
+ohraniti;1;0.13%
+kolobar;1;0.13%
+dieten;1;0.13%
+19.;1;0.13%
+Valentinrozman;1;0.13%
+09:56;1;0.13%
+kloniranje;1;0.13%
+začeti;1;0.13%
+anatemizirati;1;0.13%
+streti;1;0.13%
+97;1;0.13%
+zaradi;1;0.13%
+nekdo;1;0.13%
+sodelovanje;1;0.13%
+križarka;1;0.13%
+prostovoljen;1;0.13%
+počitnice;1;0.13%
+"število";1;0.13%
+jesen;1;0.13%
+koncert;1;0.13%
+Prison;1;0.13%
+prošnja;1;0.13%
+"želodec";1;0.13%
+older;1;0.13%
+MSC;1;0.13%
+prašek;1;0.13%
+"št.";1;0.13%
+Stepančič;1;0.13%
+zavreti;1;0.13%
+"škofija";1;0.13%
+lahek;1;0.13%
+prispevek;1;0.13%
+južek;1;0.13%
+temeljiti;1;0.13%
+novinar;1;0.13%
+popeljati;1;0.13%
+"Šmarje";1;0.13%
+zavračati;1;0.13%
+oziroma;1;0.13%
+ustanoiveti;1;0.13%
+Beljan;1;0.13%
+dermatologinja;1;0.13%
+goljufati;1;0.13%
+okrogel;1;0.13%
+Brecelj;1;0.13%
+Podobnik;1;0.13%
+13.9;1;0.13%
+prinašati;1;0.13%
+soliti;1;0.13%
+neškodljiv;1;0.13%
+widow;1;0.13%
+skrivati;1;0.13%
+08.11.2010;1;0.13%
+usmerjen;1;0.13%
+genova;1;0.13%
+dan;1;0.13%
+24.10;1;0.13%
+akreditirati;1;0.13%
+narediti;1;0.13%
+peder;1;0.13%
+ves;1;0.13%
+liter;1;0.13%
+posuti;1;0.13%
+zakaj;1;0.13%
+odkrivati;1;0.13%
+Roberto;1;0.13%
+detective;1;0.13%
+Ručigaj;1;0.13%
+bolan;1;0.13%
+odstraniti;1;0.13%
+jajce;1;0.13%
+odličen;1;0.13%
+konec;1;0.13%
+posest;1;0.13%
+nared;1;0.13%
+duhovnik;1;0.13%
+pogledati;1;0.13%
+sreča;1;0.13%
+zato;1;0.13%
+cesta;1;0.13%
+saj;1;0.13%
+sam;1;0.13%
+opreka;1;0.13%
+enakopraven;1;0.13%
+olje;1;0.13%
+Ljubljana;1;0.13%
+Zucco;1;0.13%
@@ -0,0 +1,512 @@
+"Korpus: ";Gigafida
+"Datum: ";25.01.2018 06:27
+"Analiza: ";Besedni nizi
+"n-gram nivo: ";1
+"Skip: ";0
+"Izračunaj za: ";različnica
+
+
+word;frequency;percent
+in;29;3.766%
+v;16;2.078%
+je;14;1.818%
+za;9;1.169%
+ki;8;1.039%
+na;8;1.039%
+se;8;1.039%
+da;7;0.909%
+kako;7;0.909%
+bi;6;0.779%
+o;6;0.779%
+s;6;0.779%
+z;6;0.779%
+elina;4;0.519%
+dodamo;4;0.519%
+ne;4;0.519%
+pol;4;0.519%
+ogledala;4;0.519%
+totenbirt;4;0.519%
+kašo;4;0.519%
+približno;4;0.519%
+sestra;4;0.519%
+ajdovo;4;0.519%
+korenje;3;0.39%
+Jurkovič;3;0.39%
+do;3;0.39%
+izdelujejo;3;0.39%
+"če";3;0.39%
+Koper;3;0.39%
+ure;3;0.39%
+"članov";3;0.39%
+drugo;3;0.39%
+postane;3;0.39%
+mu;3;0.39%
+grozdje;3;0.39%
+ob;3;0.39%
+od;3;0.39%
+po;3;0.39%
+cerkev;3;0.39%
+62;3;0.39%
+"še";3;0.39%
+kovinska;3;0.39%
+Agata;3;0.39%
+juho;3;0.39%
+zahodnega;2;0.26%
+tem;2;0.26%
+ter;2;0.26%
+lahko;2;0.26%
+1;2;0.26%
+3;2;0.26%
+več;2;0.26%
+Marta;2;0.26%
+gepard;2;0.26%
+bo;2;0.26%
+ustanovitev;2;0.26%
+a;2;0.26%
+the;2;0.26%
+tiskarna;2;0.26%
+Roblek;2;0.26%
+učiteljica;2;0.26%
+eko;2;0.26%
+Totenbirt;2;0.26%
+idejo;2;0.26%
+težav;2;0.26%
+vode;2;0.26%
+resnična;2;0.26%
+novih;2;0.26%
+orehe;2;0.26%
+zadruga;2;0.26%
+Matjaž;2;0.26%
+nam;2;0.26%
+jo;2;0.26%
+vse;2;0.26%
+medtem;2;0.26%
+namen;2;0.26%
+Jurkovička;2;0.26%
+aprila;2;0.26%
+Martika;2;0.26%
+starejša;2;0.26%
+srbečo;2;0.26%
+junak;2;0.26%
+Godec;2;0.26%
+gosta;2;0.26%
+pa;2;0.26%
+kuhamo;2;0.26%
+križarjenju;2;0.26%
+"času";2;0.26%
+"žena";2;0.26%
+pekač;2;0.26%
+1st;2;0.26%
+pot;2;0.26%
+si;2;0.26%
+ker;2;0.26%
+sodnica;2;0.26%
+občin;2;0.26%
+nekaj;2;0.26%
+46;2;0.26%
+officer;2;0.26%
+late;2;0.26%
+pri;2;0.26%
+zelenjavne;2;0.26%
+damo;2;0.26%
+znebiti;2;0.26%
+jih;2;0.26%
+kocke;2;0.26%
+operemo;2;0.26%
+posodi;2;0.26%
+kožo;2;0.26%
+tomijeva;2;0.26%
+vas;2;0.26%
+bosta;2;0.26%
+mlajša;2;0.26%
+tega;2;0.26%
+vdova;2;0.26%
+Sredozemlja;2;0.26%
+1998;2;0.26%
+korenčkovo;2;0.26%
+vino;2;0.26%
+zgodba;2;0.26%
+ima;2;0.26%
+Fijavž;2;0.26%
+potem;2;0.26%
+organizacija;1;0.13%
+dobro;1;0.13%
+zadrugah;1;0.13%
+odvisno;1;0.13%
+požigom;1;0.13%
+svobodnem;1;0.13%
+drunk;1;0.13%
+Brolo;1;0.13%
+priokusom;1;0.13%
+Končar;1;0.13%
+začne;1;0.13%
+tek;1;0.13%
+sister;1;0.13%
+naša;1;0.13%
+zvezdniška;1;0.13%
+dokler;1;0.13%
+pravno;1;0.13%
+pospeševati;1;0.13%
+vključno;1;0.13%
+5;1;0.13%
+zavremo;1;0.13%
+jesenska;1;0.13%
+dietna;1;0.13%
+pravne;1;0.13%
+ValentinRozman;1;0.13%
+postali;1;0.13%
+roko;1;0.13%
+1113;1;0.13%
+izberemo;1;0.13%
+kolobarje;1;0.13%
+zavračamo;1;0.13%
+sami;1;0.13%
+g;1;0.13%
+narežemo;1;0.13%
+popeljala;1;0.13%
+uporabite;1;0.13%
+police;1;0.13%
+okusu;1;0.13%
+Darko;1;0.13%
+"špasnem";1;0.13%
+gospodarske;1;0.13%
+de;1;0.13%
+upravljanju;1;0.13%
+torto;1;0.13%
+Marseille;1;0.13%
+dl;1;0.13%
+križarjanju;1;0.13%
+12:35;1;0.13%
+torta;1;0.13%
+mladi;1;0.13%
+Neaplja;1;0.13%
+temelji;1;0.13%
+proklamirali;1;0.13%
+odličnimi;1;0.13%
+tako;1;0.13%
+pristopu;1;0.13%
+testu;1;0.13%
+sutano;1;0.13%
+Tanja;1;0.13%
+poslednjih;1;0.13%
+Barcelono;1;0.13%
+cerkvi;1;0.13%
+Javšnik;1;0.13%
+09.11.2010;1;0.13%
+intermarketing;1;0.13%
+onemu;1;0.13%
+volilna;1;0.13%
+nakazovati;1;0.13%
+"števila";1;0.13%
+2010;1;0.13%
+2130;1;0.13%
+akreditacijo;1;0.13%
+mogoče;1;0.13%
+društvu;1;0.13%
+prezgodaj;1;0.13%
+arhivo;1;0.13%
+svojega;1;0.13%
+društvo;1;0.13%
+gre;1;0.13%
+Performs;1;0.13%
+odstranimo;1;0.13%
+"življenje";1;0.13%
+i.;1;0.13%
+veličastni;1;0.13%
+ustanovi;1;0.13%
+koščkov;1;0.13%
+Marijana;1;0.13%
+sladkor;1;0.13%
+veličastna;1;0.13%
+istospolno;1;0.13%
+"življenju";1;0.13%
+pečico;1;0.13%
+12:25;1;0.13%
+viroz;1;0.13%
+tekoča;1;0.13%
+pečice;1;0.13%
+vanjo;1;0.13%
+nedoločenega;1;0.13%
+posujemo;1;0.13%
+"ženskami";1;0.13%
+jeste;1;0.13%
+narod;1;0.13%
+vnaprej;1;0.13%
+uresničuje;1;0.13%
+ladji;1;0.13%
+naj;1;0.13%
+ceka;1;0.13%
+ladja;1;0.13%
+mehko;1;0.13%
+judge;1;0.13%
+dni;1;0.13%
+tista;1;0.13%
+palmo;1;0.13%
+Mojco;1;0.13%
+tukaj;1;0.13%
+iz;1;0.13%
+foto;1;0.13%
+duhovnike;1;0.13%
+ji;1;0.13%
+blagajna;1;0.13%
+ponudimo;1;0.13%
+31.10;1;0.13%
+ju;1;0.13%
+edinstvena;1;0.13%
+pojasnjuje;1;0.13%
+smo;1;0.13%
+osebe;1;0.13%
+ustanoivi;1;0.13%
+prostovoljnem;1;0.13%
+optimist;1;0.13%
+jogurt;1;0.13%
+osebo;1;0.13%
+skorajda;1;0.13%
+ko;1;0.13%
+obiskali;1;0.13%
+operaciji;1;0.13%
+večjih;1;0.13%
+podjetje;1;0.13%
+izumira;1;0.13%
+novinarja;1;0.13%
+druge;1;0.13%
+666;1;0.13%
+drugi;1;0.13%
+premešamo;1;0.13%
+motnja;1;0.13%
+prav;1;0.13%
+določbe;1;0.13%
+peči;1;0.13%
+Indiji;1;0.13%
+penasto;1;0.13%
+Palermu;1;0.13%
+Planinšek;1;0.13%
+sok;1;0.13%
+fantastica;1;0.13%
+dober;1;0.13%
+"člen";1;0.13%
+počitnic;1;0.13%
+ust;1;0.13%
+gosto;1;0.13%
+mi;1;0.13%
+29.03.2010;1;0.13%
+pojav;1;0.13%
+ali;1;0.13%
+račun;1;0.13%
+torinskim;1;0.13%
+grozdju;1;0.13%
+trg;1;0.13%
+Mallorco;1;0.13%
+nazaj;1;0.13%
+vami;1;0.13%
+koristi;1;0.13%
+rezino;1;0.13%
+"špasnega";1;0.13%
+naročili;1;0.13%
+srbečico;1;0.13%
+1.7;1;0.13%
+cena;1;0.13%
+Javšnika;1;0.13%
+med;1;0.13%
+veliko;1;0.13%
+Maja;1;0.13%
+21.;1;0.13%
+kaj;1;0.13%
+Branko;1;0.13%
+zelo;1;0.13%
+polovico;1;0.13%
+nižja;1;0.13%
+velike;1;0.13%
+kar;1;0.13%
+pedri;1;0.13%
+strli;1;0.13%
+zakon;1;0.13%
+aranžma;1;0.13%
+bert;1;0.13%
+srbeče;1;0.13%
+povzroča;1;0.13%
+urednik;1;0.13%
+jeseni;1;0.13%
+prilogi;1;0.13%
+poleg;1;0.13%
+dekani;1;0.13%
+vsote;1;0.13%
+marezige;1;0.13%
+Matjaža;1;0.13%
+križati;1;0.13%
+Detela;1;0.13%
+jurkovička;1;0.13%
+vsebuje;1;0.13%
+naročnik;1;0.13%
+dunajska;1;0.13%
+odkrivajte;1;0.13%
+pred;1;0.13%
+lep;1;0.13%
+anatemizirala;1;0.13%
+bogastvo;1;0.13%
+1.;1;0.13%
+spoštovani;1;0.13%
+antikrista;1;0.13%
+bolni;1;0.13%
+6000;1;0.13%
+ste;1;0.13%
+goljufal;1;0.13%
+zaprta;1;0.13%
+"čast";1;0.13%
+mešalnikom;1;0.13%
+pozdravi;1;0.13%
+sinov;1;0.13%
+križarjenja;1;0.13%
+peške;1;0.13%
+2.;1;0.13%
+20;1;0.13%
+minut;1;0.13%
+22;1;0.13%
+prtom;1;0.13%
+danes;1;0.13%
+ohranimo;1;0.13%
+sprejemljivi;1;0.13%
+"članica";1;0.13%
+paličnim;1;0.13%
+ravno;1;0.13%
+odpraviti;1;0.13%
+Anandm;1;0.13%
+umešamo;1;0.13%
+ta;1;0.13%
+pridružite;1;0.13%
+prinašala;1;0.13%
+zdi;1;0.13%
+Tunisu;1;0.13%
+jed;1;0.13%
+splošne;1;0.13%
+ogreto;1;0.13%
+Ivek;1;0.13%
+odgovorni;1;0.13%
+"želimo";1;0.13%
+pecilni;1;0.13%
+dala;1;0.13%
+skrivali;1;0.13%
+bolje;1;0.13%
+moko;1;0.13%
+solimo;1;0.13%
+izgubil;1;0.13%
+orehih;1;0.13%
+zmešamo;1;0.13%
+referendum;1;0.13%
+66;1;0.13%
+poglejte;1;0.13%
+maso;1;0.13%
+zelenjavna;1;0.13%
+preprost;1;0.13%
+komedija;1;0.13%
+Eli's;1;0.13%
+recept;1;0.13%
+komisija;1;0.13%
+Ankaran;1;0.13%
+naroda;1;0.13%
+Kofujem;1;0.13%
+tudi;1;0.13%
+posebej;1;0.13%
+usmerjeni;1;0.13%
+8.;1;0.13%
+lupinico;1;0.13%
+zadrugo;1;0.13%
+narodom;1;0.13%
+kocka;1;0.13%
+katerega;1;0.13%
+19.;1;0.13%
+izstopu;1;0.13%
+09:56;1;0.13%
+vzamemo;1;0.13%
+pozdrav;1;0.13%
+"škofije";1;0.13%
+"čemer";1;0.13%
+97;1;0.13%
+zaradi;1;0.13%
+izdelana;1;0.13%
+nekdo;1;0.13%
+kloniranju;1;0.13%
+vam;1;0.13%
+okusna;1;0.13%
+boste;1;0.13%
+križarki;1;0.13%
+prosimo;1;0.13%
+unikatna;1;0.13%
+ključno;1;0.13%
+kože;1;0.13%
+enem;1;0.13%
+naredimo;1;0.13%
+koncert;1;0.13%
+Prison;1;0.13%
+prošnja;1;0.13%
+"želodec";1;0.13%
+Frenki;1;0.13%
+older;1;0.13%
+MSC;1;0.13%
+prašek;1;0.13%
+kompasom;1;0.13%
+"št.";1;0.13%
+posesti;1;0.13%
+Stepančič;1;0.13%
+pomarančni;1;0.13%
+lahek;1;0.13%
+prispevek;1;0.13%
+južek;1;0.13%
+koncu;1;0.13%
+"Šmarje";1;0.13%
+oziroma;1;0.13%
+Beljan;1;0.13%
+dermatologinja;1;0.13%
+okrogel;1;0.13%
+vlijemo;1;0.13%
+Brecelj;1;0.13%
+Podobnik;1;0.13%
+13.9;1;0.13%
+Alenke;1;0.13%
+priča;1;0.13%
+neškodljiv;1;0.13%
+widow;1;0.13%
+nismo;1;0.13%
+Alenka;1;0.13%
+08.11.2010;1;0.13%
+strahu;1;0.13%
+genove;1;0.13%
+tednu;1;0.13%
+vinu;1;0.13%
+potekal;1;0.13%
+24.10;1;0.13%
+sanja;1;0.13%
+dragoceno;1;0.13%
+akreditirate;1;0.13%
+liter;1;0.13%
+mesta;1;0.13%
+zakaj;1;0.13%
+ustavili;1;0.13%
+Roberto;1;0.13%
+detective;1;0.13%
+Danijela;1;0.13%
+Ručigaj;1;0.13%
+jajci;1;0.13%
+mesti;1;0.13%
+mika;1;0.13%
+nared;1;0.13%
+pravo;1;0.13%
+križarili;1;0.13%
+bila;1;0.13%
+sodelovanju;1;0.13%
+prava;1;0.13%
+zato;1;0.13%
+cesta;1;0.13%
+saj;1;0.13%
+srečo;1;0.13%
+olje;1;0.13%
+svojih;1;0.13%
+Ljubljana;1;0.13%
+pomaranče;1;0.13%
+jesenskem;1;0.13%
+pomarančo;1;0.13%
+opreki;1;0.13%
+najpogostejša;1;0.13%
+pomaranči;1;0.13%
+Zucco;1;0.13%
+enakopravnem;1;0.13%
@@ -0,0 +1,623 @@
+word;frequency;percent
+ajdovo kašo;4;0.586%
+in ajdovo;3;0.439%
+kovinska ogledala;3;0.439%
+kako izdelujejo;3;0.439%
+pol ure;3;0.439%
+Agata Jurkovič;3;0.439%
+težav s;2;0.293%
+za pol;2;0.293%
+62 vdova;2;0.293%
+resnična zgodba;2;0.293%
+v času;2;0.293%
+srbečo kožo;2;0.293%
+novih občin;2;0.293%
+Roblek Martika;2;0.293%
+znebiti težav;2;0.293%
+korenje in;2;0.293%
+in damo;2;0.293%
+tiskarna gepard;2;0.293%
+elina starejša;2;0.293%
+vse v;2;0.293%
+the late;2;0.293%
+kako se;2;0.293%
+korenčkovo juho;2;0.293%
+totenbirt the;2;0.293%
+Marta Fijavž;2;0.293%
+za ustanovitev;2;0.293%
+ne bi;2;0.293%
+elina mlajša;2;0.293%
+vdova sodnica;2;0.293%
+46 učiteljica;2;0.293%
+učiteljica tomijeva;2;0.293%
+s srbečo;2;0.293%
+da je;2;0.293%
+dodamo še;2;0.293%
+"žena elina";2;0.293%
+zelenjavne kocke;2;0.293%
+zahodnega Sredozemlja;2;0.293%
+sodnica elina;2;0.293%
+gepard 1;2;0.293%
+Godec in;2;0.293%
+grozdje in;2;0.293%
+mlajša sestra;2;0.293%
+Martika 46;2;0.293%
+starejša sestra;2;0.293%
+tomijeva žena;2;0.293%
+se znebiti;2;0.293%
+idejo o;2;0.293%
+Fijavž Roblek;2;0.293%
+Jurkovič Jurkovička;2;0.293%
+da bi;2;0.293%
+in orehe;2;0.293%
+Jurkovička 62;2;0.293%
+izdelujejo kovinska;2;0.293%
+ustanovitev novih;2;0.293%
+z idejo;1;0.146%
+Jurkovič jurkovička;1;0.146%
+da za;1;0.146%
+gospodarske koristi;1;0.146%
+na križarjenju;1;0.146%
+in Marseille;1;0.146%
+Neaplja se;1;0.146%
+vode odvisno;1;0.146%
+polovico zelenjavne;1;0.146%
+"ženskami v";1;0.146%
+Planinšek Ručigaj;1;0.146%
+jajci in;1;0.146%
+pol eko;1;0.146%
+ravno prav;1;0.146%
+pojasnjuje dermatologinja;1;0.146%
+pot tista;1;0.146%
+mogoče da;1;0.146%
+približno pol;1;0.146%
+ohranimo nekaj;1;0.146%
+je zelo;1;0.146%
+recept za;1;0.146%
+temelji na;1;0.146%
+okusu in;1;0.146%
+v čast;1;0.146%
+"špasnega križarjenja";1;0.146%
+o zadrugah;1;0.146%
+posujemo grozdje;1;0.146%
+je nared;1;0.146%
+več sinov;1;0.146%
+officer detective;1;0.146%
+junak v;1;0.146%
+gosta in;1;0.146%
+in dietna;1;0.146%
+referendum za;1;0.146%
+palmo de;1;0.146%
+prošnja za;1;0.146%
+dekani ki;1;0.146%
+pri operaciji;1;0.146%
+enakopravnem sodelovanju;1;0.146%
+posesti nekaj;1;0.146%
+si kako;1;0.146%
+Brecelj Agata;1;0.146%
+paličnim mešalnikom;1;0.146%
+v sutano;1;0.146%
+lahko jeste;1;0.146%
+Koper 21.;1;0.146%
+rezino pomaranče;1;0.146%
+blagajna zaprta;1;0.146%
+09.11.2010 ob;1;0.146%
+cena ki;1;0.146%
+tega mi;1;0.146%
+oziroma postane;1;0.146%
+ustanoivi društvo;1;0.146%
+nedoločenega števila;1;0.146%
+ki bo;1;0.146%
+lahek in;1;0.146%
+"članov ki";1;0.146%
+ali drugo;1;0.146%
+1 veliko;1;0.146%
+namen pospeševati;1;0.146%
+pekač in;1;0.146%
+bi goljufal;1;0.146%
+zgodba sami;1;0.146%
+jeseni naredimo;1;0.146%
+osebe če;1;0.146%
+in požigom;1;0.146%
+Mallorco Barcelono;1;0.146%
+za korenčkovo;1;0.146%
+jesenska torta;1;0.146%
+tudi posebej;1;0.146%
+testu posujemo;1;0.146%
+poslednjih dni;1;0.146%
+Beljan in;1;0.146%
+prezgodaj je;1;0.146%
+jih cerkev;1;0.146%
+sodelovanju in;1;0.146%
+junak ustanoivi;1;0.146%
+tem uresničuje;1;0.146%
+drugi posodi;1;0.146%
+kocke in;1;0.146%
+danes skorajda;1;0.146%
+strli na;1;0.146%
+srbečico in;1;0.146%
+pospeševati gospodarske;1;0.146%
+do Neaplja;1;0.146%
+najpogostejša motnja;1;0.146%
+Totenbirt Marta;1;0.146%
+"če se";1;0.146%
+prava pot;1;0.146%
+s tem;1;0.146%
+mu odstranimo;1;0.146%
+narod pa;1;0.146%
+prilogi vam;1;0.146%
+se ustavili;1;0.146%
+komisija za;1;0.146%
+nekaj o;1;0.146%
+svobodnem izstopu;1;0.146%
+uresničuje namen;1;0.146%
+cerkev in;1;0.146%
+Prison officer;1;0.146%
+viroz saj;1;0.146%
+Totenbirt Agata;1;0.146%
+juho in;1;0.146%
+19. aprila;1;0.146%
+zmešamo s;1;0.146%
+nazaj peči;1;0.146%
+Roberto Zucco;1;0.146%
+foto arhivo;1;0.146%
+zato da;1;0.146%
+sanja a;1;0.146%
+kože je;1;0.146%
+"št. 2130";1;0.146%
+Tunisu obiskali;1;0.146%
+jed je;1;0.146%
+ne postane;1;0.146%
+jo odpraviti;1;0.146%
+olje vino;1;0.146%
+postane ravno;1;0.146%
+tem da;1;0.146%
+in neškodljiv;1;0.146%
+aprila 1998;1;0.146%
+se mu;1;0.146%
+late Frenki;1;0.146%
+Indiji naj;1;0.146%
+potem gre;1;0.146%
+ko dobro;1;0.146%
+s priokusom;1;0.146%
+vinu in;1;0.146%
+na roko;1;0.146%
+je cena;1;0.146%
+ogreto pečico;1;0.146%
+na veličastni;1;0.146%
+"števila članov";1;0.146%
+grozdje vino;1;0.146%
+jogurt olje;1;0.146%
+po grozdju;1;0.146%
+pečico za;1;0.146%
+do 13.9;1;0.146%
+pravno osebo;1;0.146%
+"še jogurt";1;0.146%
+akreditirate naša;1;0.146%
+bi prinašala;1;0.146%
+na ladji;1;0.146%
+je od;1;0.146%
+preprost recept;1;0.146%
+kocke ter;1;0.146%
+prostovoljnem pristopu;1;0.146%
+dobro premešamo;1;0.146%
+Alenka Godec;1;0.146%
+in dekani;1;0.146%
+iz pečice;1;0.146%
+Koper 8.;1;0.146%
+nismo naročili;1;0.146%
+odgovorni urednik;1;0.146%
+bo potekal;1;0.146%
+vlijemo v;1;0.146%
+roko zato;1;0.146%
+proklamirali in;1;0.146%
+velike vsote;1;0.146%
+približno liter;1;0.146%
+62 widow;1;0.146%
+in Danijela;1;0.146%
+prav gosta;1;0.146%
+je tiskarna;1;0.146%
+boste od;1;0.146%
+late južek;1;0.146%
+bi skrivali;1;0.146%
+račun št.;1;0.146%
+mladi istospolno;1;0.146%
+Danijela ceka;1;0.146%
+mu ne;1;0.146%
+Zucco 1st;1;0.146%
+povzroča srbečico;1;0.146%
+8. aprila;1;0.146%
+kar v;1;0.146%
+okrogel pekač;1;0.146%
+damo nazaj;1;0.146%
+ter kuhamo;1;0.146%
+dodamo drugo;1;0.146%
+5 popeljala;1;0.146%
+in kar;1;0.146%
+bogastvo in;1;0.146%
+o križarjenju;1;0.146%
+torinskim prtom;1;0.146%
+za referendum;1;0.146%
+zadruga lahko;1;0.146%
+križati idejo;1;0.146%
+pravo pot;1;0.146%
+Tanja Planinšek;1;0.146%
+ob 09:56;1;0.146%
+ustanovi podjetje;1;0.146%
+vino pomarančo;1;0.146%
+katerega je;1;0.146%
+skorajda najpogostejša;1;0.146%
+in sladkor;1;0.146%
+od 24.10;1;0.146%
+v opreki;1;0.146%
+torta vsebuje;1;0.146%
+v posodi;1;0.146%
+zavračamo račun;1;0.146%
+zelo lahek;1;0.146%
+izstopu enakopravnem;1;0.146%
+Barcelono in;1;0.146%
+tista ki;1;0.146%
+bosta zvezdniška;1;0.146%
+ki ji;1;0.146%
+od genove;1;0.146%
+Ankaran škofije;1;0.146%
+ki ju;1;0.146%
+mesta zahodnega;1;0.146%
+križarjenja je;1;0.146%
+bi se;1;0.146%
+na kolobarje;1;0.146%
+ponudimo z;1;0.146%
+je prava;1;0.146%
+pecilni prašek;1;0.146%
+splošne določbe;1;0.146%
+v ogreto;1;0.146%
+operaciji 666;1;0.146%
+ste bolni;1;0.146%
+v življenju;1;0.146%
+zdi ključno;1;0.146%
+"želimo na";1;0.146%
+mika 08.11.2010;1;0.146%
+za duhovnike;1;0.146%
+v okrogel;1;0.146%
+dala tiskarna;1;0.146%
+Performs in;1;0.146%
+lep pozdrav;1;0.146%
+Matjaž Javšnik;1;0.146%
+poleg tega;1;0.146%
+tako da;1;0.146%
+ob 12:25;1;0.146%
+sinov naroda;1;0.146%
+24.10 do;1;0.146%
+skrivali svojega;1;0.146%
+poglejte si;1;0.146%
+in komedija;1;0.146%
+sprejemljivi potem;1;0.146%
+odstranimo peške;1;0.146%
+potekal 19.;1;0.146%
+med mesti;1;0.146%
+podjetje drugo;1;0.146%
+"škofije Šmarje";1;0.146%
+ki ima;1;0.146%
+in pol;1;0.146%
+moko pecilni;1;0.146%
+maso vlijemo;1;0.146%
+prispevek ki;1;0.146%
+tega pa;1;0.146%
+srbeče kože;1;0.146%
+pedri če;1;0.146%
+dl vode;1;0.146%
+naročnik je;1;0.146%
+začne nakazovati;1;0.146%
+večjih koščkov;1;0.146%
+ker tega;1;0.146%
+nekdo mu;1;0.146%
+operemo grozdje;1;0.146%
+veličastna mesta;1;0.146%
+1st Prison;1;0.146%
+bo ladja;1;0.146%
+vsebuje grozdje;1;0.146%
+genove do;1;0.146%
+priokusom po;1;0.146%
+in srečo;1;0.146%
+detective 1st;1;0.146%
+dunajska cesta;1;0.146%
+pred ženskami;1;0.146%
+totenbirt Agata;1;0.146%
+pojav srbeče;1;0.146%
+de Mallorco;1;0.146%
+jih strli;1;0.146%
+widow judge;1;0.146%
+zadruga je;1;0.146%
+medtem v;1;0.146%
+older sister;1;0.146%
+izgubil dragoceno;1;0.146%
+in Matjaž;1;0.146%
+naredimo torto;1;0.146%
+29.03.2010 ob;1;0.146%
+volilna komisija;1;0.146%
+prosimo da;1;0.146%
+a okusna;1;0.146%
+zavremo približno;1;0.146%
+torto s;1;0.146%
+z narodom;1;0.146%
+antikrista vključno;1;0.146%
+požigom z;1;0.146%
+postane mehko;1;0.146%
+počitnic od;1;0.146%
+ob 12:35;1;0.146%
+in tekoča;1;0.146%
+Darko Stepančič;1;0.146%
+kako gosto;1;0.146%
+penasto umešamo;1;0.146%
+veličastni križarki;1;0.146%
+s kompasom;1;0.146%
+bert 09.11.2010;1;0.146%
+o društvu;1;0.146%
+ustavili v;1;0.146%
+zgodba prezgodaj;1;0.146%
+mi pri;1;0.146%
+20 minut;1;0.146%
+08.11.2010 ob;1;0.146%
+narežemo korenje;1;0.146%
+križarili boste;1;0.146%
+pomarančni sok;1;0.146%
+97 ker;1;0.146%
+se s;1;0.146%
+društvu poslednjih;1;0.146%
+ju lahko;1;0.146%
+zvezdniška gosta;1;0.146%
+odvisno kako;1;0.146%
+v posesti;1;0.146%
+gre za;1;0.146%
+od 1.7;1;0.146%
+organizacija vnaprej;1;0.146%
+bolje potem;1;0.146%
+anatemizirala tako;1;0.146%
+ki priča;1;0.146%
+aprila akreditirate;1;0.146%
+Matjaž Koper;1;0.146%
+ima namen;1;0.146%
+66 drunk;1;0.146%
+smo jih;1;0.146%
+enem tednu;1;0.146%
+onemu zdi;1;0.146%
+ladja MSC;1;0.146%
+eko zelenjavne;1;0.146%
+z rezino;1;0.146%
+dober tek;1;0.146%
+"še ajdovo";1;0.146%
+mesti zahodnega;1;0.146%
+"življenju mogoče";1;0.146%
+21. 2.;1;0.146%
+lahko ustanovi;1;0.146%
+saj je;1;0.146%
+popeljala med;1;0.146%
+kako jo;1;0.146%
+gosto juho;1;0.146%
+je blagajna;1;0.146%
+"še za";1;0.146%
+za želodec;1;0.146%
+juho želimo;1;0.146%
+vzamemo iz;1;0.146%
+Palermu in;1;0.146%
+dermatologinja Tanja;1;0.146%
+Alenke Godec;1;0.146%
+eko zelenjavna;1;0.146%
+nekaj večjih;1;0.146%
+Anandm Kofujem;1;0.146%
+bosta z;1;0.146%
+postali sprejemljivi;1;0.146%
+a se;1;0.146%
+je bila;1;0.146%
+križarjenju tukaj;1;0.146%
+križarjenju bosta;1;0.146%
+je vse;1;0.146%
+naroda narod;1;0.146%
+ogledala izdelana;1;0.146%
+pridružite se;1;0.146%
+nakazovati velike;1;0.146%
+okusna jed;1;0.146%
+Maja Končar;1;0.146%
+pa izumira;1;0.146%
+liter vode;1;0.146%
+dietna a;1;0.146%
+uporabite v;1;0.146%
+prtom in;1;0.146%
+na koncu;1;0.146%
+ValentinRozman 29.03.2010;1;0.146%
+je danes;1;0.146%
+obiskali palmo;1;0.146%
+ji dodamo;1;0.146%
+"članica druge";1;0.146%
+juho solimo;1;0.146%
+vino pomarančni;1;0.146%
+koristi svojih;1;0.146%
+se mladi;1;0.146%
+do 31.10;1;0.146%
+"Šmarje marezige";1;0.146%
+z Anandm;1;0.146%
+drugo zadrugo;1;0.146%
+cesta 22;1;0.146%
+kašo ki;1;0.146%
+namen zaradi;1;0.146%
+v približno;1;0.146%
+vas bo;1;0.146%
+kašo in;1;0.146%
+pekač vzamemo;1;0.146%
+istospolno usmerjeni;1;0.146%
+se onemu;1;0.146%
+bi anatemizirala;1;0.146%
+posodi zavremo;1;0.146%
+svojega strahu;1;0.146%
+zaradi katerega;1;0.146%
+veliko nižja;1;0.146%
+Marijana Brecelj;1;0.146%
+vključno z;1;0.146%
+i. splošne;1;0.146%
+unikatna ogledala;1;0.146%
+Branko Podobnik;1;0.146%
+gosta Alenka;1;0.146%
+mehko približno;1;0.146%
+odličnimi pozdravi;1;0.146%
+in upravljanju;1;0.146%
+1. člen;1;0.146%
+izdelujejo ta;1;0.146%
+grozdju pomaranči;1;0.146%
+o tem;1;0.146%
+naj bi;1;0.146%
+orehe ki;1;0.146%
+Končar Marta;1;0.146%
+trg Brolo;1;0.146%
+koncert Alenke;1;0.146%
+prinašala bogastvo;1;0.146%
+Marseille vse;1;0.146%
+judge Eli's;1;0.146%
+vas nismo;1;0.146%
+cerkev v;1;0.146%
+pomaranči vinu;1;0.146%
+je izgubil;1;0.146%
+sami si;1;0.146%
+v drugi;1;0.146%
+priča o;1;0.146%
+prašek in;1;0.146%
+strahu pred;1;0.146%
+vami koncert;1;0.146%
+MSC fantastica;1;0.146%
+občin Ankaran;1;0.146%
+je organizacija;1;0.146%
+s paličnim;1;0.146%
+Kofujem torinskim;1;0.146%
+približno 20;1;0.146%
+jesenskem špasnem;1;0.146%
+"špasnem križarjanju";1;0.146%
+cerkvi bolje;1;0.146%
+duhovnike več;1;0.146%
+izberemo pravo;1;0.146%
+1113 Ljubljana;1;0.146%
+Detela Matjaž;1;0.146%
+dokler korenje;1;0.146%
+posodi zmešamo;1;0.146%
+postane članica;1;0.146%
+6000 Koper;1;0.146%
+Mojco Beljan;1;0.146%
+vam zavračamo;1;0.146%
+mu začne;1;0.146%
+ima junak;1;0.146%
+ker ima;1;0.146%
+bila ust;1;0.146%
+bi postali;1;0.146%
+"čast jeseni";1;0.146%
+v Palermu;1;0.146%
+z odličnimi;1;0.146%
+pot prispevek;1;0.146%
+približno 3;1;0.146%
+v enem;1;0.146%
+pri vas;1;0.146%
+nam jo;1;0.146%
+ki nam;1;0.146%
+je cerkvi;1;0.146%
+kaj povzroča;1;0.146%
+premešamo dodamo;1;0.146%
+odpraviti pojasnjuje;1;0.146%
+društvo da;1;0.146%
+z vami;1;0.146%
+"če jih";1;0.146%
+po testu;1;0.146%
+nam na;1;0.146%
+jeste tudi;1;0.146%
+in Tunisu;1;0.146%
+pristopu svobodnem;1;0.146%
+in kako;1;0.146%
+narodom če;1;0.146%
+zelenjavna kocka;1;0.146%
+v Indiji;1;0.146%
+ter temelji;1;0.146%
+korenje ne;1;0.146%
+pomarančo in;1;0.146%
+naša novinarja;1;0.146%
+upravljanju članov;1;0.146%
+kolobarje narežemo;1;0.146%
+damo v;1;0.146%
+da ohranimo;1;0.146%
+novinarja Mojco;1;0.146%
+dodamo moko;1;0.146%
+pravne osebe;1;0.146%
+na prostovoljnem;1;0.146%
+za akreditacijo;1;0.146%
+cerkev ne;1;0.146%
+"čemer se";1;0.146%
+Brolo 3;1;0.146%
+in ne;1;0.146%
+izdelana v;1;0.146%
+ta unikatna;1;0.146%
+"času počitnic";1;0.146%
+o kloniranju;1;0.146%
+koncu vanjo;1;0.146%
+marezige in;1;0.146%
+medtem operemo;1;0.146%
+in mu;1;0.146%
+drugo pravno;1;0.146%
+zadrugo ali;1;0.146%
+svojih članov;1;0.146%
+vode ki;1;0.146%
+in lupinico;1;0.146%
+potem bi;1;0.146%
+3 dl;1;0.146%
+dragoceno življenje;1;0.146%
+Ivek 66;1;0.146%
+jurkovička 62;1;0.146%
+se nam;1;0.146%
+Matjaža Javšnika;1;0.146%
+"članov ter";1;0.146%
+2130 97;1;0.146%
+osebo oziroma;1;0.146%
+po okusu;1;0.146%
+o čemer;1;0.146%
+pa je;1;0.146%
+"če je";1;0.146%
+mešalnikom da;1;0.146%
+križarjanju vas;1;0.146%
+kuhamo dokler;1;0.146%
+1st police;1;0.146%
+opreki z;1;0.146%
+vanjo dodamo;1;0.146%
+totenbirt Ivek;1;0.146%
+si izberemo;1;0.146%
+operemo korenje;1;0.146%
+aranžma špasnega;1;0.146%
+ključno pri;1;0.146%
+in orehih;1;0.146%
+več o;1;0.146%
+Javšnika optimist;1;0.146%
+zakon o;1;0.146%
+"času viroz";1;0.146%
+ladji bosta;1;0.146%
+kuhamo približno;1;0.146%
+v prilogi;1;0.146%
+neškodljiv za;1;0.146%
+kompasom odkrivajte;1;0.146%
+ne sanja;1;0.146%
+sok in;1;0.146%
+drugo polovico;1;0.146%
+usmerjeni proklamirali;1;0.146%
+peči še;1;0.146%
+kloniranju antikrista;1;0.146%
+edinstvena kovinska;1;0.146%
+solimo po;1;0.146%
+da postane;1;0.146%
+police officer;1;0.146%
+je dala;1;0.146%
+pečice po;1;0.146%
+druge pravne;1;0.146%
+sladkor penasto;1;0.146%
+odkrivajte veličastna;1;0.146%
+komedija Matjaža;1;0.146%
+vnaprej nedoločenega;1;0.146%
+na jesenskem;1;0.146%
+in pedri;1;0.146%
+Eli's older;1;0.146%
+2. 1998;1;0.146%
+fantastica 5;1;0.146%
+posebej uporabite;1;0.146%
+ki smo;1;0.146%
+jo je;1;0.146%
@@ -0,0 +1,572 @@
+word;frequency;percent
+in ajdovo kašo;3;0.499%
+težav s srbečo;2;0.333%
+za pol ure;2;0.333%
+46 učiteljica tomijeva;2;0.333%
+elina starejša sestra;2;0.333%
+izdelujejo kovinska ogledala;2;0.333%
+62 vdova sodnica;2;0.333%
+učiteljica tomijeva žena;2;0.333%
+Jurkovič Jurkovička 62;2;0.333%
+tomijeva žena elina;2;0.333%
+kako se znebiti;2;0.333%
+vdova sodnica elina;2;0.333%
+Roblek Martika 46;2;0.333%
+kako izdelujejo kovinska;2;0.333%
+elina mlajša sestra;2;0.333%
+Marta Fijavž Roblek;2;0.333%
+znebiti težav s;2;0.333%
+ustanovitev novih občin;2;0.333%
+za ustanovitev novih;2;0.333%
+sodnica elina starejša;2;0.333%
+Martika 46 učiteljica;2;0.333%
+Fijavž Roblek Martika;2;0.333%
+se znebiti težav;2;0.333%
+"žena elina mlajša";2;0.333%
+totenbirt the late;2;0.333%
+tiskarna gepard 1;2;0.333%
+Agata Jurkovič Jurkovička;2;0.333%
+Jurkovička 62 vdova;2;0.333%
+s srbečo kožo;2;0.333%
+ne postane mehko;1;0.166%
+ko dobro premešamo;1;0.166%
+v življenju mogoče;1;0.166%
+da bi goljufal;1;0.166%
+Ivek 66 drunk;1;0.166%
+vas nismo naročili;1;0.166%
+posesti nekaj o;1;0.166%
+posebej uporabite v;1;0.166%
+ki ji dodamo;1;0.166%
+okusu in kar;1;0.166%
+lahko ustanovi podjetje;1;0.166%
+Alenka Godec in;1;0.166%
+junak ustanoivi društvo;1;0.166%
+tako da bi;1;0.166%
+novinarja Mojco Beljan;1;0.166%
+torinskim prtom in;1;0.166%
+da postane ravno;1;0.166%
+cena ki nam;1;0.166%
+kar v posodi;1;0.166%
+začne nakazovati velike;1;0.166%
+s paličnim mešalnikom;1;0.166%
+na ladji bosta;1;0.166%
+neškodljiv za želodec;1;0.166%
+damo nazaj peči;1;0.166%
+prošnja za akreditacijo;1;0.166%
+bolje potem gre;1;0.166%
+edinstvena kovinska ogledala;1;0.166%
+da ohranimo nekaj;1;0.166%
+MSC fantastica 5;1;0.166%
+med mesti zahodnega;1;0.166%
+izberemo pravo pot;1;0.166%
+po testu posujemo;1;0.166%
+Prison officer detective;1;0.166%
+duhovnike več sinov;1;0.166%
+in pedri če;1;0.166%
+več sinov naroda;1;0.166%
+Agata Jurkovič jurkovička;1;0.166%
+judge Eli's older;1;0.166%
+jurkovička 62 widow;1;0.166%
+nekaj o čemer;1;0.166%
+v čast jeseni;1;0.166%
+jih strli na;1;0.166%
+in damo nazaj;1;0.166%
+izdelana v Indiji;1;0.166%
+kloniranju antikrista vključno;1;0.166%
+je bila ust;1;0.166%
+cerkev v opreki;1;0.166%
+dermatologinja Tanja Planinšek;1;0.166%
+dl vode ki;1;0.166%
+približno pol ure;1;0.166%
+ravno prav gosta;1;0.166%
+račun št. 2130;1;0.166%
+Mallorco Barcelono in;1;0.166%
+ima namen pospeševati;1;0.166%
+gre za duhovnike;1;0.166%
+zmešamo s paličnim;1;0.166%
+zakon o zadrugah;1;0.166%
+o kloniranju antikrista;1;0.166%
+potekal 19. aprila;1;0.166%
+naročnik je tiskarna;1;0.166%
+postali sprejemljivi potem;1;0.166%
+potem bi se;1;0.166%
+Maja Končar Marta;1;0.166%
+dekani ki bo;1;0.166%
+jeseni naredimo torto;1;0.166%
+mu začne nakazovati;1;0.166%
+mladi istospolno usmerjeni;1;0.166%
+ustavili v Palermu;1;0.166%
+bert 09.11.2010 ob;1;0.166%
+jeste tudi posebej;1;0.166%
+dodamo moko pecilni;1;0.166%
+totenbirt Ivek 66;1;0.166%
+prtom in požigom;1;0.166%
+druge pravne osebe;1;0.166%
+posujemo grozdje in;1;0.166%
+"če se s";1;0.166%
+pomarančo in orehe;1;0.166%
+zato da ohranimo;1;0.166%
+tega pa je;1;0.166%
+ustanoivi društvo da;1;0.166%
+Anandm Kofujem torinskim;1;0.166%
+preprost recept za;1;0.166%
+a okusna jed;1;0.166%
+mesti zahodnega Sredozemlja;1;0.166%
+Končar Marta Fijavž;1;0.166%
+pri operaciji 666;1;0.166%
+bi prinašala bogastvo;1;0.166%
+komisija za ustanovitev;1;0.166%
+medtem operemo grozdje;1;0.166%
+in Danijela ceka;1;0.166%
+jih cerkev ne;1;0.166%
+in dekani ki;1;0.166%
+si kako izdelujejo;1;0.166%
+"špasnega križarjenja je";1;0.166%
+Marijana Brecelj Agata;1;0.166%
+nazaj peči še;1;0.166%
+zadruga je organizacija;1;0.166%
+Jurkovič jurkovička 62;1;0.166%
+se nam na;1;0.166%
+sprejemljivi potem bi;1;0.166%
+in Tunisu obiskali;1;0.166%
+pridružite se nam;1;0.166%
+vinu in orehih;1;0.166%
+za duhovnike več;1;0.166%
+namen zaradi katerega;1;0.166%
+danes skorajda najpogostejša;1;0.166%
+in upravljanju članov;1;0.166%
+koncert Alenke Godec;1;0.166%
+poleg tega pa;1;0.166%
+komedija Matjaža Javšnika;1;0.166%
+gosto juho želimo;1;0.166%
+Brecelj Agata Jurkovič;1;0.166%
+narod pa izumira;1;0.166%
+smo jih strli;1;0.166%
+ker tega mi;1;0.166%
+aprila akreditirate naša;1;0.166%
+s tem uresničuje;1;0.166%
+palmo de Mallorco;1;0.166%
+vključno z Anandm;1;0.166%
+prispevek ki priča;1;0.166%
+sinov naroda narod;1;0.166%
+se mladi istospolno;1;0.166%
+zdi ključno pri;1;0.166%
+o tem da;1;0.166%
+je cena ki;1;0.166%
+cerkvi bolje potem;1;0.166%
+ki ju lahko;1;0.166%
+v enem tednu;1;0.166%
+v posesti nekaj;1;0.166%
+i. splošne določbe;1;0.166%
+da za referendum;1;0.166%
+nakazovati velike vsote;1;0.166%
+prava pot tista;1;0.166%
+grozdje in damo;1;0.166%
+za korenčkovo juho;1;0.166%
+drugo polovico zelenjavne;1;0.166%
+8. aprila 1998;1;0.166%
+kuhamo dokler korenje;1;0.166%
+z narodom če;1;0.166%
+Godec in komedija;1;0.166%
+je dala tiskarna;1;0.166%
+juho solimo po;1;0.166%
+se ustavili v;1;0.166%
+ter kuhamo dokler;1;0.166%
+bo ladja MSC;1;0.166%
+novih občin Ankaran;1;0.166%
+na roko zato;1;0.166%
+v Palermu in;1;0.166%
+v približno liter;1;0.166%
+posodi zavremo približno;1;0.166%
+pristopu svobodnem izstopu;1;0.166%
+grozdje vino pomarančo;1;0.166%
+potem gre za;1;0.166%
+Totenbirt Marta Fijavž;1;0.166%
+jajci in sladkor;1;0.166%
+kolobarje narežemo korenje;1;0.166%
+in dietna a;1;0.166%
+vino pomarančni sok;1;0.166%
+pred ženskami v;1;0.166%
+z vami koncert;1;0.166%
+vami koncert Alenke;1;0.166%
+pri vas nismo;1;0.166%
+prezgodaj je izgubil;1;0.166%
+tem uresničuje namen;1;0.166%
+ju lahko jeste;1;0.166%
+ne bi skrivali;1;0.166%
+po grozdju pomaranči;1;0.166%
+o križarjenju tukaj;1;0.166%
+mehko približno 20;1;0.166%
+s kompasom odkrivajte;1;0.166%
+peči še za;1;0.166%
+1 veliko nižja;1;0.166%
+kocke in ajdovo;1;0.166%
+in damo v;1;0.166%
+je izgubil dragoceno;1;0.166%
+mogoče da je;1;0.166%
+21. 2. 1998;1;0.166%
+z Anandm Kofujem;1;0.166%
+koncu vanjo dodamo;1;0.166%
+jesenska torta vsebuje;1;0.166%
+Zucco 1st Prison;1;0.166%
+ohranimo nekaj večjih;1;0.166%
+widow judge Eli's;1;0.166%
+5 popeljala med;1;0.166%
+uresničuje namen zaradi;1;0.166%
+vse v življenju;1;0.166%
+zgodba sami si;1;0.166%
+v posodi zmešamo;1;0.166%
+vlijemo v okrogel;1;0.166%
+pečico za pol;1;0.166%
+oziroma postane članica;1;0.166%
+zadruga lahko ustanovi;1;0.166%
+Koper 8. aprila;1;0.166%
+društvo da bi;1;0.166%
+zaradi katerega je;1;0.166%
+okrogel pekač in;1;0.166%
+Alenke Godec in;1;0.166%
+mika 08.11.2010 ob;1;0.166%
+boste od genove;1;0.166%
+Matjaža Javšnika optimist;1;0.166%
+juho želimo na;1;0.166%
+je danes skorajda;1;0.166%
+kaj povzroča srbečico;1;0.166%
+opreki z narodom;1;0.166%
+gepard 1 veliko;1;0.166%
+polovico zelenjavne kocke;1;0.166%
+officer detective 1st;1;0.166%
+je blagajna zaprta;1;0.166%
+drugo zadrugo ali;1;0.166%
+pospeševati gospodarske koristi;1;0.166%
+Koper 21. 2.;1;0.166%
+pomarančni sok in;1;0.166%
+pojav srbeče kože;1;0.166%
+operemo korenje in;1;0.166%
+dokler korenje ne;1;0.166%
+namen pospeševati gospodarske;1;0.166%
+viroz saj je;1;0.166%
+ter temelji na;1;0.166%
+postane članica druge;1;0.166%
+ki bo potekal;1;0.166%
+"št. 2130 97";1;0.166%
+pravo pot prispevek;1;0.166%
+kašo ki ju;1;0.166%
+in Marseille vse;1;0.166%
+veličastna mesta zahodnega;1;0.166%
+medtem v drugi;1;0.166%
+za referendum za;1;0.166%
+osebe če se;1;0.166%
+ki ima namen;1;0.166%
+je tiskarna gepard;1;0.166%
+zgodba prezgodaj je;1;0.166%
+08.11.2010 ob 09:56;1;0.166%
+zadrugo ali drugo;1;0.166%
+naj bi prinašala;1;0.166%
+"Šmarje marezige in";1;0.166%
+odpraviti pojasnjuje dermatologinja;1;0.166%
+"članov ter temelji";1;0.166%
+osebo oziroma postane;1;0.166%
+2130 97 ker;1;0.166%
+križarjenju bosta z;1;0.166%
+genove do Neaplja;1;0.166%
+je zelo lahek;1;0.166%
+okusna jed je;1;0.166%
+ker ima junak;1;0.166%
+iz pečice po;1;0.166%
+korenčkovo juho in;1;0.166%
+organizacija vnaprej nedoločenega;1;0.166%
+zelenjavne kocke in;1;0.166%
+postane ravno prav;1;0.166%
+od genove do;1;0.166%
+aranžma špasnega križarjenja;1;0.166%
+korenje ne postane;1;0.166%
+kože je danes;1;0.166%
+Roberto Zucco 1st;1;0.166%
+uporabite v času;1;0.166%
+vanjo dodamo še;1;0.166%
+Tunisu obiskali palmo;1;0.166%
+vode odvisno kako;1;0.166%
+prosimo da za;1;0.166%
+tega mi pri;1;0.166%
+bosta zvezdniška gosta;1;0.166%
+pravne osebe če;1;0.166%
+v času viroz;1;0.166%
+Matjaž Koper 21.;1;0.166%
+bo potekal 19.;1;0.166%
+saj je zelo;1;0.166%
+sok in lupinico;1;0.166%
+testu posujemo grozdje;1;0.166%
+zelenjavne kocke ter;1;0.166%
+križarjanju vas bo;1;0.166%
+kako gosto juho;1;0.166%
+maso vlijemo v;1;0.166%
+dobro premešamo dodamo;1;0.166%
+pečice po testu;1;0.166%
+in sladkor penasto;1;0.166%
+do Neaplja se;1;0.166%
+damo v ogreto;1;0.166%
+nekdo mu začne;1;0.166%
+jogurt olje vino;1;0.166%
+skorajda najpogostejša motnja;1;0.166%
+v okrogel pekač;1;0.166%
+pa je cena;1;0.166%
+je cerkvi bolje;1;0.166%
+eko zelenjavna kocka;1;0.166%
+priča o tem;1;0.166%
+občin Ankaran škofije;1;0.166%
+naredimo torto s;1;0.166%
+"članica druge pravne";1;0.166%
+resnična zgodba prezgodaj;1;0.166%
+prinašala bogastvo in;1;0.166%
+detective 1st police;1;0.166%
+roko zato da;1;0.166%
+ajdovo kašo in;1;0.166%
+se mu ne;1;0.166%
+dunajska cesta 22;1;0.166%
+naša novinarja Mojco;1;0.166%
+istospolno usmerjeni proklamirali;1;0.166%
+24.10 do 31.10;1;0.166%
+približno liter vode;1;0.166%
+korenčkovo juho solimo;1;0.166%
+podjetje drugo zadrugo;1;0.166%
+cerkev ne bi;1;0.166%
+"življenju mogoče da";1;0.166%
+pojasnjuje dermatologinja Tanja;1;0.166%
+koristi svojih članov;1;0.166%
+odvisno kako gosto;1;0.166%
+kašo in dietna;1;0.166%
+v času počitnic;1;0.166%
+v Indiji naj;1;0.166%
+09.11.2010 ob 12:35;1;0.166%
+gosta in tekoča;1;0.166%
+nedoločenega števila članov;1;0.166%
+pecilni prašek in;1;0.166%
+katerega je bila;1;0.166%
+a se onemu;1;0.166%
+1st police officer;1;0.166%
+1st Prison officer;1;0.166%
+lahko jeste tudi;1;0.166%
+se s tem;1;0.166%
+pedri če jih;1;0.166%
+križati idejo o;1;0.166%
+in komedija Matjaža;1;0.166%
+se onemu zdi;1;0.166%
+na prostovoljnem pristopu;1;0.166%
+da je prava;1;0.166%
+postane mehko približno;1;0.166%
+ustanovi podjetje drugo;1;0.166%
+"času viroz saj";1;0.166%
+z odličnimi pozdravi;1;0.166%
+"želimo na kolobarje";1;0.166%
+gosta Alenka Godec;1;0.166%
+in kako jo;1;0.166%
+proklamirali in ne;1;0.166%
+v opreki z;1;0.166%
+mu odstranimo peške;1;0.166%
+Palermu in Tunisu;1;0.166%
+Kofujem torinskim prtom;1;0.166%
+Indiji naj bi;1;0.166%
+kompasom odkrivajte veličastna;1;0.166%
+ki smo jih;1;0.166%
+in kar v;1;0.166%
+"še jogurt olje";1;0.166%
+pot tista ki;1;0.166%
+Detela Matjaž Koper;1;0.166%
+obiskali palmo de;1;0.166%
+recept za korenčkovo;1;0.166%
+ta unikatna ogledala;1;0.166%
+na jesenskem špasnem;1;0.166%
+pot prispevek ki;1;0.166%
+narodom če je;1;0.166%
+si izberemo pravo;1;0.166%
+bogastvo in srečo;1;0.166%
+ladji bosta zvezdniška;1;0.166%
+Beljan in Danijela;1;0.166%
+vnaprej nedoločenega števila;1;0.166%
+sodelovanju in upravljanju;1;0.166%
+približno 3 dl;1;0.166%
+pol eko zelenjavne;1;0.166%
+totenbirt Agata Jurkovič;1;0.166%
+počitnic od 1.7;1;0.166%
+jo odpraviti pojasnjuje;1;0.166%
+zavračamo račun št.;1;0.166%
+dodamo še jogurt;1;0.166%
+Barcelono in Marseille;1;0.166%
+križarili boste od;1;0.166%
+ključno pri operaciji;1;0.166%
+paličnim mešalnikom da;1;0.166%
+Totenbirt Agata Jurkovič;1;0.166%
+anatemizirala tako da;1;0.166%
+je od 24.10;1;0.166%
+torta vsebuje grozdje;1;0.166%
+zvezdniška gosta Alenka;1;0.166%
+"še ajdovo kašo";1;0.166%
+da bi postali;1;0.166%
+"še za pol";1;0.166%
+zelo lahek in;1;0.166%
+orehe ki smo;1;0.166%
+nekaj večjih koščkov;1;0.166%
+ki priča o;1;0.166%
+korenje in ajdovo;1;0.166%
+"čast jeseni naredimo";1;0.166%
+temelji na prostovoljnem;1;0.166%
+izdelujejo ta unikatna;1;0.166%
+pekač in damo;1;0.166%
+strahu pred ženskami;1;0.166%
+antikrista vključno z;1;0.166%
+več o križarjenju;1;0.166%
+je organizacija vnaprej;1;0.166%
+mu ne sanja;1;0.166%
+svobodnem izstopu enakopravnem;1;0.166%
+kocke ter kuhamo;1;0.166%
+vse v enem;1;0.166%
+Mojco Beljan in;1;0.166%
+premešamo dodamo še;1;0.166%
+Eli's older sister;1;0.166%
+posodi zmešamo s;1;0.166%
+bi anatemizirala tako;1;0.166%
+prostovoljnem pristopu svobodnem;1;0.166%
+in neškodljiv za;1;0.166%
+svojih članov ter;1;0.166%
+29.03.2010 ob 12:25;1;0.166%
+skrivali svojega strahu;1;0.166%
+vsebuje grozdje vino;1;0.166%
+sladkor penasto umešamo;1;0.166%
+ki nam jo;1;0.166%
+Tanja Planinšek Ručigaj;1;0.166%
+vzamemo iz pečice;1;0.166%
+olje vino pomarančni;1;0.166%
+ogledala izdelana v;1;0.166%
+enakopravnem sodelovanju in;1;0.166%
+referendum za ustanovitev;1;0.166%
+v prilogi vam;1;0.166%
+dodamo drugo polovico;1;0.166%
+od 24.10 do;1;0.166%
+izstopu enakopravnem sodelovanju;1;0.166%
+bi skrivali svojega;1;0.166%
+odkrivajte veličastna mesta;1;0.166%
+mešalnikom da postane;1;0.166%
+Ankaran škofije Šmarje;1;0.166%
+in orehe ki;1;0.166%
+zavremo približno 3;1;0.166%
+bi se mladi;1;0.166%
+dodamo še ajdovo;1;0.166%
+"škofije Šmarje marezige";1;0.166%
+nam jo je;1;0.166%
+povzroča srbečico in;1;0.166%
+bosta z vami;1;0.166%
+je vse v;1;0.166%
+jesenskem špasnem križarjanju;1;0.166%
+akreditirate naša novinarja;1;0.166%
+97 ker tega;1;0.166%
+juho in ajdovo;1;0.166%
+korenje in pol;1;0.166%
+ogreto pečico za;1;0.166%
+o društvu poslednjih;1;0.166%
+junak v posesti;1;0.166%
+liter vode odvisno;1;0.166%
+strli na roko;1;0.166%
+in Matjaž Javšnik;1;0.166%
+grozdje in mu;1;0.166%
+marezige in dekani;1;0.166%
+nam na veličastni;1;0.166%
+je prava pot;1;0.166%
+usmerjeni proklamirali in;1;0.166%
+mi pri vas;1;0.166%
+drugi posodi zavremo;1;0.166%
+ji dodamo drugo;1;0.166%
+mesta zahodnega Sredozemlja;1;0.166%
+torto s priokusom;1;0.166%
+62 widow judge;1;0.166%
+vino pomarančo in;1;0.166%
+jo je dala;1;0.166%
+prilogi vam zavračamo;1;0.166%
+naroda narod pa;1;0.166%
+s priokusom po;1;0.166%
+ne bi anatemizirala;1;0.166%
+svojega strahu pred;1;0.166%
+the late južek;1;0.166%
+3 dl vode;1;0.166%
+jed je nared;1;0.166%
+lahek in neškodljiv;1;0.166%
+pravno osebo oziroma;1;0.166%
+operemo grozdje in;1;0.166%
+resnična zgodba sami;1;0.166%
+eko zelenjavne kocke;1;0.166%
+volilna komisija za;1;0.166%
+dala tiskarna gepard;1;0.166%
+na križarjenju bosta;1;0.166%
+ponudimo z rezino;1;0.166%
+"števila članov ki";1;0.166%
+pomaranči vinu in;1;0.166%
+"ženskami v sutano";1;0.166%
+o čemer se;1;0.166%
+the late Frenki;1;0.166%
+požigom z idejo;1;0.166%
+19. aprila akreditirate;1;0.166%
+na kolobarje narežemo;1;0.166%
+de Mallorco Barcelono;1;0.166%
+v drugi posodi;1;0.166%
+na koncu vanjo;1;0.166%
+Neaplja se ustavili;1;0.166%
+dietna a okusna;1;0.166%
+fantastica 5 popeljala;1;0.166%
+na veličastni križarki;1;0.166%
+onemu zdi ključno;1;0.166%
+cerkev in pedri;1;0.166%
+tem da je;1;0.166%
+kovinska ogledala izdelana;1;0.166%
+gospodarske koristi svojih;1;0.166%
+društvu poslednjih dni;1;0.166%
+sami si izberemo;1;0.166%
+vode ki ji;1;0.166%
+"čemer se mu";1;0.166%
+izgubil dragoceno življenje;1;0.166%
+idejo o društvu;1;0.166%
+poglejte si kako;1;0.166%
+po okusu in;1;0.166%
+sanja a se;1;0.166%
+bi postali sprejemljivi;1;0.166%
+da je vse;1;0.166%
+in ne bi;1;0.166%
+z rezino pomaranče;1;0.166%
+"času počitnic od";1;0.166%
+ladja MSC fantastica;1;0.166%
+vas bo ladja;1;0.166%
+priokusom po grozdju;1;0.166%
+kako jo odpraviti;1;0.166%
+pekač vzamemo iz;1;0.166%
+in požigom z;1;0.166%
+v ogreto pečico;1;0.166%
+idejo o kloniranju;1;0.166%
+križarjenja je od;1;0.166%
+ne sanja a;1;0.166%
+srbeče kože je;1;0.166%
+z idejo o;1;0.166%
+"če je cerkvi";1;0.166%
+kako izdelujejo ta;1;0.166%
+narežemo korenje in;1;0.166%
+grozdju pomaranči vinu;1;0.166%
+drugo pravno osebo;1;0.166%
+moko pecilni prašek;1;0.166%
+"če jih cerkev";1;0.166%
+ValentinRozman 29.03.2010 ob;1;0.166%
+ajdovo kašo ki;1;0.166%
+prav gosta in;1;0.166%
+Marseille vse v;1;0.166%
+srbečico in kako;1;0.166%
+približno 20 minut;1;0.166%
+kuhamo približno pol;1;0.166%
+trg Brolo 3;1;0.166%
+in pol eko;1;0.166%
+Godec in Matjaž;1;0.166%
+"članov ki ima";1;0.166%
+ali drugo pravno;1;0.166%
+vam zavračamo račun;1;0.166%
+prašek in orehe;1;0.166%
+popeljala med mesti;1;0.166%
+"špasnem križarjanju vas";1;0.166%
+tudi posebej uporabite;1;0.166%
+solimo po okusu;1;0.166%
+in mu odstranimo;1;0.166%
+ima junak v;1;0.166%
@@ -0,0 +1,350 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0012405" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: Branko Gradišnik. ANTI2(1999)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>52 besed</extent>
+            <publicationStmt>
+                <idno>ANTI2</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title n="???">neznani naslov</title>
+                    <author>Branko Gradišnik</author>
+                    <date>1999</date>
+                    <publisher n="drugo">neznani založnik</publisher>
+                    <note type="sourceLang"/>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="50"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="11"/>
+                    <tagUsage gi="p" occurs="2"/>
+                    <tagUsage gi="s" occurs="5"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="52"/>
+                </namespace>
+            </tagsDecl>
+            <appInfo>
+                <application ident="Amebis_pretvornik" version="1.0">
+                    <label>[ZDRUZEVANJE] 1:1</label>
+                    <label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\ANTI2.ZDR</label>
+                    <label>[1] **********</label>
+                    <label>[IZVOR] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.doc</label>
+                    <label>[FORMAT] MS Word for Windows 6.0/7.0</label>
+                    <label>[DATUM] 2.12.1999</label>
+                    <label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.RTF</label>
+                    <label>[PRETVORBA] RTF</label>
+                    <label>[KONEC] **********</label>
+                </application>
+            </appInfo>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.P">
+                        <catDesc>prenosnik</catDesc>
+                        <category xml:id="Ft.P.G">
+                            <catDesc>govorni</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.E">
+                            <catDesc>elektronski</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.P">
+                            <catDesc>pisni</catDesc>
+                            <category xml:id="Ft.P.P.O">
+                                <catDesc>objavljeno</catDesc>
+                                <category xml:id="Ft.P.P.O.K">
+                                    <catDesc>knjižno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.O.P">
+                                    <catDesc>periodično</catDesc>
+                                    <category xml:id="Ft.P.P.O.P.C">
+                                        <catDesc>časopisno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.C.D">
+                                            <catDesc>dnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.V">
+                                            <catDesc>večkrat tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                    </category>
+                                    <category xml:id="Ft.P.P.O.P.R">
+                                        <catDesc>revialno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.R.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.S">
+                                            <catDesc>štirinajstdnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.M">
+                                            <catDesc>mesečno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.D">
+                                            <catDesc>redkeje kot na mesec</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.O">
+                                            <catDesc>občasno</catDesc>
+                                        </category>
+                                    </category>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.P.P.N">
+                                <catDesc>neobjavljeno</catDesc>
+                                <category xml:id="Ft.P.P.N.J">
+                                    <catDesc>javno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.I">
+                                    <catDesc>interno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.Z">
+                                    <catDesc>zasebno</catDesc>
+                                </category>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.Z">
+                        <catDesc>zvrst</catDesc>
+                        <category xml:id="Ft.Z.U">
+                            <catDesc>umetnostna</catDesc>
+                            <category xml:id="Ft.Z.U.P">
+                                <catDesc>pesniška</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.R">
+                                <catDesc>prozna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.D">
+                                <catDesc>dramska</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="Ft.Z.N">
+                            <catDesc>neumetnostna</catDesc>
+                            <category xml:id="Ft.Z.N.S">
+                                <catDesc>strokovna</catDesc>
+                                <category xml:id="Ft.Z.N.S.H">
+                                    <catDesc>humanistična in družboslovna</catDesc>
+                                </category>
+                                <category xml:id="Ft.Z.N.S.N">
+                                    <catDesc>naravoslovna in tehnična</catDesc>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.Z.N.N">
+                                <catDesc>nestrokovna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.N.P">
+                                <catDesc>pravna</catDesc>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.L">
+                        <catDesc>lektorirano</catDesc>
+                        <category xml:id="Ft.L.D">
+                            <catDesc>da</catDesc>
+                        </category>
+                        <category xml:id="Ft.L.N">
+                            <catDesc>ne</catDesc>
+                        </category>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.T.K.S"/>
+                <catRef target="#Ft.P.P.N.Z"/>
+                <catRef target="#Ft.Z.N.N"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0012405." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <w msd="Somei" lemma="junak">Junak</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="ustanoiveti">ustanoivi</w>
+                    <S/>
+                    <w msd="Sosei" lemma="društvo">društvo</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="da">da</w>
+                    <S/>
+                    <w msd="Gp-g" lemma="biti">bi</w>
+                    <S/>
+                    <w msd="Ggnd-em" lemma="goljufati">goljufal</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Zn-mei" lemma="nekdo">Nekdo</w>
+                    <S/>
+                    <w msd="Zotmed--k" lemma="on">mu</w>
+                    <S/>
+                    <w msd="Ggdste" lemma="začeti">začne</w>
+                    <S/>
+                    <w msd="Ggnn" lemma="nakazovati">nakazovati</w>
+                    <S/>
+                    <w msd="Ppnzmt" lemma="velik">velike</w>
+                    <S/>
+                    <w msd="Sozmt" lemma="vsota">vsote</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Rsn" lemma="zakaj">Zakaj</w>
+                    <c>?</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Vd" lemma="ker">Ker</w>
+                    <S/>
+                    <w msd="Ggnste-n" lemma="imeti">ima</w>
+                    <S/>
+                    <w msd="Somei" lemma="junak">junak</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Sozem" lemma="posest">posesti</w>
+                    <S/>
+                    <w msd="Rsn" lemma="nekaj">nekaj</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Dm" lemma="o">o</w>
+                    <S/>
+                    <w msd="Zz-sem" lemma="kar">čemer</w>
+                    <S/>
+                    <w msd="Zp------k" lemma="se">se</w>
+                    <S/>
+                    <w msd="Zotmed--k" lemma="on">mu</w>
+                    <S/>
+                    <w msd="L" lemma="ne">ne</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="sanjati">sanja</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vp" lemma="a">a</w>
+                    <S/>
+                    <w msd="Zp------k" lemma="se">se</w>
+                    <S/>
+                    <w msd="Zk-sed" lemma="oni">onemu</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="zdeti">zdi</w>
+                    <S/>
+                    <w msd="Ppnsei" lemma="ključen">ključno</w>
+                    <S/>
+                    <w msd="Dm" lemma="pri">pri</w>
+                    <S/>
+                    <w msd="Sozem" lemma="operacija">operaciji</w>
+                    <S/>
+                    <w msd="Kag" lemma="666">666</w>
+                    <c>.</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Ggvn" lemma="križati">Križati</w>
+                    <S/>
+                    <w msd="Sozet" lemma="ideja">idejo</w>
+                    <S/>
+                    <w msd="Dm" lemma="o">o</w>
+                    <S/>
+                    <w msd="Sosem" lemma="kloniranje">kloniranju</w>
+                    <S/>
+                    <w msd="Somer" lemma="antikrist">Antikrista</w>
+                    <S/>
+                    <c>(</c>
+                    <w msd="Rsn" lemma="vključno">vključno</w>
+                    <S/>
+                    <w msd="Do" lemma="z">z</w>
+                    <S/>
+                    <w msd="Slmeo" lemma="Anand">Anandm</w>
+                    <S/>
+                    <w msd="Slmeo" lemma="Kofu">Kofujem</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Ppnmeo" lemma="torinski">torinskim</w>
+                    <S/>
+                    <w msd="Someo" lemma="prt">prtom</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Someo" lemma="požig">požigom</w>
+                    <c>)</c>
+                    <S/>
+                    <w msd="Do" lemma="z">z</w>
+                    <S/>
+                    <w msd="Sozeo" lemma="ideja">idejo</w>
+                    <S/>
+                    <w msd="Dm" lemma="o">o</w>
+                    <S/>
+                    <w msd="Sosem" lemma="društvo">Društvu</w>
+                    <S/>
+                    <w msd="Ppnmmr" lemma="poslednji">poslednjih</w>
+                    <S/>
+                    <w msd="Sommr" lemma="dan">dni</w>
+                    <c>.</c>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,367 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0016316" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: Državni zbor Republike Slovenije. ZZad. (1992)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>62 besed</extent>
+            <publicationStmt>
+                <idno>A0050230</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <pubPlace>Ljubljana</pubPlace>
+                    <title>ZZad</title>
+                    <author>Državni zbor Republike Slovenije</author>
+                    <date>1992</date>
+                    <publisher n="Državni zbor Republike Slovenije">Državni zbor Republike Slovenije</publisher>
+                    <note type="sourceLang"/>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="58"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="7"/>
+                    <tagUsage gi="p" occurs="5"/>
+                    <tagUsage gi="s" occurs="5"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="62"/>
+                </namespace>
+            </tagsDecl>
+            <appInfo>
+                <application ident="Amebis_pretvornik" version="1.0">
+                    <label>[AVTOMATSKO] DZZAK</label>
+                    <label>[IME] A0050230</label>
+                    <label>[IZVOR] d:\fida\korpus\vhod\dzzak\0118.txt</label>
+                    <label>[DATUM] 7.4.2000</label>
+                </application>
+            </appInfo>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.P">
+                        <catDesc>prenosnik</catDesc>
+                        <category xml:id="Ft.P.G">
+                            <catDesc>govorni</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.E">
+                            <catDesc>elektronski</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.P">
+                            <catDesc>pisni</catDesc>
+                            <category xml:id="Ft.P.P.O">
+                                <catDesc>objavljeno</catDesc>
+                                <category xml:id="Ft.P.P.O.K">
+                                    <catDesc>knjižno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.O.P">
+                                    <catDesc>periodično</catDesc>
+                                    <category xml:id="Ft.P.P.O.P.C">
+                                        <catDesc>časopisno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.C.D">
+                                            <catDesc>dnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.V">
+                                            <catDesc>večkrat tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                    </category>
+                                    <category xml:id="Ft.P.P.O.P.R">
+                                        <catDesc>revialno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.R.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.S">
+                                            <catDesc>štirinajstdnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.M">
+                                            <catDesc>mesečno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.D">
+                                            <catDesc>redkeje kot na mesec</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.O">
+                                            <catDesc>občasno</catDesc>
+                                        </category>
+                                    </category>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.P.P.N">
+                                <catDesc>neobjavljeno</catDesc>
+                                <category xml:id="Ft.P.P.N.J">
+                                    <catDesc>javno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.I">
+                                    <catDesc>interno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.Z">
+                                    <catDesc>zasebno</catDesc>
+                                </category>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.Z">
+                        <catDesc>zvrst</catDesc>
+                        <category xml:id="Ft.Z.U">
+                            <catDesc>umetnostna</catDesc>
+                            <category xml:id="Ft.Z.U.P">
+                                <catDesc>pesniška</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.R">
+                                <catDesc>prozna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.D">
+                                <catDesc>dramska</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="Ft.Z.N">
+                            <catDesc>neumetnostna</catDesc>
+                            <category xml:id="Ft.Z.N.S">
+                                <catDesc>strokovna</catDesc>
+                                <category xml:id="Ft.Z.N.S.H">
+                                    <catDesc>humanistična in družboslovna</catDesc>
+                                </category>
+                                <category xml:id="Ft.Z.N.S.N">
+                                    <catDesc>naravoslovna in tehnična</catDesc>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.Z.N.N">
+                                <catDesc>nestrokovna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.N.P">
+                                <catDesc>pravna</catDesc>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.L">
+                        <catDesc>lektorirano</catDesc>
+                        <category xml:id="Ft.L.D">
+                            <catDesc>da</catDesc>
+                        </category>
+                        <category xml:id="Ft.L.N">
+                            <catDesc>ne</catDesc>
+                        </category>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.T.D"/>
+                <catRef target="#Ft.P.P.O"/>
+                <catRef target="#Ft.Z.N.S.H"/>
+                <catRef target="#Ft.L.D"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0016316." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <w msd="Kav" lemma="1.">1.</w>
+                    <S/>
+                    <w msd="Somei" lemma="člen">člen</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Krv" lemma="I.">I.</w>
+                    <S/>
+                    <w msd="Ppnzer" lemma="splošen">SPLOŠNE</w>
+                    <S/>
+                    <w msd="Sozer" lemma="določba">DOLOČBE</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Sozei" lemma="zadruga">Zadruga</w>
+                    <S/>
+                    <w msd="Rsn" lemma="lahko">lahko</w>
+                    <S/>
+                    <w msd="Ggdste" lemma="ustanoviti">ustanovi</w>
+                    <S/>
+                    <w msd="Soset" lemma="podjetje">podjetje</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kbzzet" lemma="drug">drugo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="zadruga">zadrugo</w>
+                    <S/>
+                    <w msd="Vp" lemma="ali">ali</w>
+                    <S/>
+                    <w msd="Kbzzet" lemma="drug">drugo</w>
+                    <S/>
+                    <w msd="Ppnzet" lemma="praven">pravno</w>
+                    <S/>
+                    <w msd="Sozet" lemma="oseba">osebo</w>
+                    <S/>
+                    <w msd="Vp" lemma="oziroma">oziroma</w>
+                    <S/>
+                    <w msd="Ggdste" lemma="postati">postane</w>
+                    <S/>
+                    <w msd="Sozei" lemma="članica">članica</w>
+                    <S/>
+                    <w msd="Sozer" lemma="druga">druge</w>
+                    <S/>
+                    <w msd="Ppnzer" lemma="praven">pravne</w>
+                    <S/>
+                    <w msd="Sozer" lemma="oseba">osebe</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="če">če</w>
+                    <S/>
+                    <w msd="Zp------k" lemma="se">se</w>
+                    <S/>
+                    <w msd="Do" lemma="z">s</w>
+                    <S/>
+                    <w msd="Zk-seo" lemma="ta">tem</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="uresničevati">uresničuje</w>
+                    <S/>
+                    <w msd="Sometn" lemma="namen">namen</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Dr" lemma="zaradi">zaradi</w>
+                    <S/>
+                    <w msd="Zv-mer" lemma="kateri">katerega</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Gp-d-ez" lemma="biti">bila</w>
+                    <S/>
+                    <w msd="Sosmr" lemma="usta">ust</w>
+                    <S/>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Somei" lemma="zakon">ZAKON</w>
+                    <S/>
+                    <w msd="Dm" lemma="o">O</w>
+                    <S/>
+                    <w msd="Sozmm" lemma="zadruga">ZADRUGAH</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Sozei" lemma="zadruga">Zadruga</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Sozei" lemma="organizacija">organizacija</w>
+                    <S/>
+                    <w msd="Rsn" lemma="vnaprej">vnaprej</w>
+                    <S/>
+                    <w msd="Ppnser" lemma="nedoločen">nedoločenega</w>
+                    <S/>
+                    <w msd="Soser" lemma="število">števila</w>
+                    <S/>
+                    <w msd="Sommr" lemma="član">članov</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="ki">ki</w>
+                    <S/>
+                    <w msd="Ggnste-n" lemma="imeti">ima</w>
+                    <S/>
+                    <w msd="Sometn" lemma="namen">namen</w>
+                    <S/>
+                    <w msd="Ggnn" lemma="pospeševati">pospeševati</w>
+                    <S/>
+                    <w msd="Ppnzmt" lemma="gospodarski">gospodarske</w>
+                    <S/>
+                    <w msd="Sozer" lemma="korist">koristi</w>
+                    <S/>
+                    <w msd="Zp-mmr" lemma="svoj">svojih</w>
+                    <S/>
+                    <w msd="Sommr" lemma="član">članov</w>
+                    <S/>
+                    <w msd="Vp" lemma="ter">ter</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="temeljiti">temelji</w>
+                    <S/>
+                    <w msd="Dm" lemma="na">na</w>
+                    <S/>
+                    <w msd="Ppnmem" lemma="prostovoljen">prostovoljnem</w>
+                    <S/>
+                    <w msd="Somem" lemma="pristop">pristopu</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Ppnmem" lemma="svoboden">svobodnem</w>
+                    <S/>
+                    <w msd="Somem" lemma="izstop">izstopu</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Ppnsem" lemma="enakopraven">enakopravnem</w>
+                    <S/>
+                    <w msd="Sosem" lemma="sodelovanje">sodelovanju</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sosem" lemma="upravljanje">upravljanju</w>
+                    <S/>
+                    <w msd="Sommr" lemma="član">članov</w>
+                    <c>.</c>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,336 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0018194" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: Branko Gradišnik. CERKEV(2000)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>50 besed</extent>
+            <publicationStmt>
+                <idno>CERKEV</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title n="???">neznani naslov</title>
+                    <author>Branko Gradišnik</author>
+                    <date>2000</date>
+                    <publisher n="drugo">neznani založnik</publisher>
+                    <note type="sourceLang"/>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="48"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="7"/>
+                    <tagUsage gi="p" occurs="2"/>
+                    <tagUsage gi="s" occurs="2"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="50"/>
+                </namespace>
+            </tagsDecl>
+            <appInfo>
+                <application ident="Amebis_pretvornik" version="1.0">
+                    <label>[ZDRUZEVANJE] 1:1</label>
+                    <label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\CERKEV.ZDR</label>
+                    <label>[1] **********</label>
+                    <label>[IZVOR] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\SOBOTNA\KOLUMNE\pnz\IDEJE\CERKEV.DOC</label>
+                    <label>[FORMAT] MS Word for Windows 6.0/7.0</label>
+                    <label>[DATUM] 24.2.2000</label>
+                    <label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\SOBOTNA\KOLUMNE\pnz\IDEJE\CERKEV.RTF</label>
+                    <label>[PRETVORBA] RTF</label>
+                    <label>[KONEC] **********</label>
+                </application>
+            </appInfo>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.P">
+                        <catDesc>prenosnik</catDesc>
+                        <category xml:id="Ft.P.G">
+                            <catDesc>govorni</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.E">
+                            <catDesc>elektronski</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.P">
+                            <catDesc>pisni</catDesc>
+                            <category xml:id="Ft.P.P.O">
+                                <catDesc>objavljeno</catDesc>
+                                <category xml:id="Ft.P.P.O.K">
+                                    <catDesc>knjižno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.O.P">
+                                    <catDesc>periodično</catDesc>
+                                    <category xml:id="Ft.P.P.O.P.C">
+                                        <catDesc>časopisno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.C.D">
+                                            <catDesc>dnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.V">
+                                            <catDesc>večkrat tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                    </category>
+                                    <category xml:id="Ft.P.P.O.P.R">
+                                        <catDesc>revialno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.R.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.S">
+                                            <catDesc>štirinajstdnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.M">
+                                            <catDesc>mesečno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.D">
+                                            <catDesc>redkeje kot na mesec</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.O">
+                                            <catDesc>občasno</catDesc>
+                                        </category>
+                                    </category>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.P.P.N">
+                                <catDesc>neobjavljeno</catDesc>
+                                <category xml:id="Ft.P.P.N.J">
+                                    <catDesc>javno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.I">
+                                    <catDesc>interno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.Z">
+                                    <catDesc>zasebno</catDesc>
+                                </category>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.Z">
+                        <catDesc>zvrst</catDesc>
+                        <category xml:id="Ft.Z.U">
+                            <catDesc>umetnostna</catDesc>
+                            <category xml:id="Ft.Z.U.P">
+                                <catDesc>pesniška</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.R">
+                                <catDesc>prozna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.D">
+                                <catDesc>dramska</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="Ft.Z.N">
+                            <catDesc>neumetnostna</catDesc>
+                            <category xml:id="Ft.Z.N.S">
+                                <catDesc>strokovna</catDesc>
+                                <category xml:id="Ft.Z.N.S.H">
+                                    <catDesc>humanistična in družboslovna</catDesc>
+                                </category>
+                                <category xml:id="Ft.Z.N.S.N">
+                                    <catDesc>naravoslovna in tehnična</catDesc>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.Z.N.N">
+                                <catDesc>nestrokovna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.N.P">
+                                <catDesc>pravna</catDesc>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.L">
+                        <catDesc>lektorirano</catDesc>
+                        <category xml:id="Ft.L.D">
+                            <catDesc>da</catDesc>
+                        </category>
+                        <category xml:id="Ft.L.N">
+                            <catDesc>ne</catDesc>
+                        </category>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.T.K.S"/>
+                <catRef target="#Ft.P.P.N.Z"/>
+                <catRef target="#Ft.Z.N.N"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0018194." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <w msd="Sozei" lemma="cerkev">Cerkev</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Sozem" lemma="opreka">opreki</w>
+                    <S/>
+                    <w msd="Do" lemma="z">z</w>
+                    <S/>
+                    <w msd="Someo" lemma="narod">narodom</w>
+                    <c>:</c>
+                    <S/>
+                    <w msd="Vd" lemma="če">Če</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Sozed" lemma="cerkev">Cerkvi</w>
+                    <S/>
+                    <w msd="Rsr" lemma="dobro">bolje</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Rsn" lemma="potem">potem</w>
+                    <S/>
+                    <w msd="Ggvste" lemma="iti">gre</w>
+                    <S/>
+                    <w msd="Dt" lemma="za">za</w>
+                    <S/>
+                    <w msd="Sommt" lemma="duhovnik">duhovnike</w>
+                    <S/>
+                    <w msd="Rsr" lemma="več">več</w>
+                    <S/>
+                    <w msd="Sommr" lemma="sin">sinov</w>
+                    <S/>
+                    <w msd="Somer" lemma="narod">naroda</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Somei" lemma="narod">narod</w>
+                    <S/>
+                    <w msd="Vp" lemma="pa">pa</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="izumirati">izumira</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Sozei" lemma="cerkev">Cerkev</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sommi" lemma="peder">pedri</w>
+                    <c>:</c>
+                    <S/>
+                    <w msd="Vd" lemma="če">Če</w>
+                    <S/>
+                    <w msd="Zotmmt--k" lemma="on">jih</w>
+                    <S/>
+                    <w msd="Sozei" lemma="cerkev">Cerkev</w>
+                    <S/>
+                    <w msd="L" lemma="ne">ne</w>
+                    <S/>
+                    <w msd="Gp-g" lemma="biti">bi</w>
+                    <S/>
+                    <w msd="Ggvd-ez" lemma="anatemizirati">anatemizirala</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vp" lemma="tako">tako</w>
+                    <S/>
+                    <w msd="Vd" lemma="da">da</w>
+                    <S/>
+                    <w msd="Gp-g" lemma="biti">bi</w>
+                    <S/>
+                    <w msd="Ggdd-mm" lemma="postati">postali</w>
+                    <S/>
+                    <w msd="Ppnmmi" lemma="sprejemljiv">sprejemljivi</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Rsn" lemma="potem">potem</w>
+                    <S/>
+                    <w msd="Gp-g" lemma="biti">bi</w>
+                    <S/>
+                    <w msd="Zp------k" lemma="se">se</w>
+                    <S/>
+                    <w msd="Ppnmmi" lemma="mlad">mladi</w>
+                    <S/>
+                    <w msd="Rsn" lemma="istospolno">istospolno</w>
+                    <S/>
+                    <w msd="Pdnmmi" lemma="usmerjen">usmerjeni</w>
+                    <S/>
+                    <w msd="Ggvd-mm" lemma="proklamirati">proklamirali</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="L" lemma="ne">ne</w>
+                    <S/>
+                    <w msd="Gp-g" lemma="biti">bi</w>
+                    <S/>
+                    <w msd="Ggnd-mm" lemma="skrivati">skrivali</w>
+                    <S/>
+                    <w msd="Zp-mer" lemma="svoj">svojega</w>
+                    <S/>
+                    <w msd="Somer" lemma="strah">strahu</w>
+                    <S/>
+                    <w msd="Do" lemma="pred">pred</w>
+                    <S/>
+                    <w msd="Sozmo" lemma="ženska">ženskami</w>
+                    <S/>
+                    <w msd="Dt" lemma="v">v</w>
+                    <S/>
+                    <w msd="Rsn" lemma="sutano">sutano</w>
+                    <c>!</c>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,367 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0026709" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: INTERA(1998)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>53 besed</extent>
+            <publicationStmt>
+                <idno>INTERA</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title n="???">neznani naslov</title>
+                    <author n="???">neznani avtor</author>
+                    <date>1998</date>
+                    <publisher n="drugo">neznani založnik</publisher>
+                    <note type="sourceLang"/>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="45"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="10"/>
+                    <tagUsage gi="p" occurs="7"/>
+                    <tagUsage gi="s" occurs="11"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="53"/>
+                </namespace>
+            </tagsDecl>
+            <appInfo>
+                <application ident="Amebis_pretvornik" version="1.0">
+                    <label>[ZDRUZEVANJE] 1:1</label>
+                    <label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\INTERA.ZDR</label>
+                    <label>[1] **********</label>
+                    <label>[IZVOR] D:\FIDA\KORPUS\Vhod\PrimNov\KOMERC\INTERa.MAR</label>
+                    <label>[FORMAT] WordStar 4.0</label>
+                    <label>[DATUM] 12.10.1999</label>
+                    <label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\PrimNov\KOMERC\INTERa.RTF</label>
+                    <label>[PRETVORBA] RTF</label>
+                    <label>[KONEC] **********</label>
+                </application>
+            </appInfo>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.P">
+                        <catDesc>prenosnik</catDesc>
+                        <category xml:id="Ft.P.G">
+                            <catDesc>govorni</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.E">
+                            <catDesc>elektronski</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.P">
+                            <catDesc>pisni</catDesc>
+                            <category xml:id="Ft.P.P.O">
+                                <catDesc>objavljeno</catDesc>
+                                <category xml:id="Ft.P.P.O.K">
+                                    <catDesc>knjižno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.O.P">
+                                    <catDesc>periodično</catDesc>
+                                    <category xml:id="Ft.P.P.O.P.C">
+                                        <catDesc>časopisno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.C.D">
+                                            <catDesc>dnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.V">
+                                            <catDesc>večkrat tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                    </category>
+                                    <category xml:id="Ft.P.P.O.P.R">
+                                        <catDesc>revialno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.R.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.S">
+                                            <catDesc>štirinajstdnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.M">
+                                            <catDesc>mesečno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.D">
+                                            <catDesc>redkeje kot na mesec</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.O">
+                                            <catDesc>občasno</catDesc>
+                                        </category>
+                                    </category>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.P.P.N">
+                                <catDesc>neobjavljeno</catDesc>
+                                <category xml:id="Ft.P.P.N.J">
+                                    <catDesc>javno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.I">
+                                    <catDesc>interno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.Z">
+                                    <catDesc>zasebno</catDesc>
+                                </category>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.Z">
+                        <catDesc>zvrst</catDesc>
+                        <category xml:id="Ft.Z.U">
+                            <catDesc>umetnostna</catDesc>
+                            <category xml:id="Ft.Z.U.P">
+                                <catDesc>pesniška</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.R">
+                                <catDesc>prozna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.D">
+                                <catDesc>dramska</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="Ft.Z.N">
+                            <catDesc>neumetnostna</catDesc>
+                            <category xml:id="Ft.Z.N.S">
+                                <catDesc>strokovna</catDesc>
+                                <category xml:id="Ft.Z.N.S.H">
+                                    <catDesc>humanistična in družboslovna</catDesc>
+                                </category>
+                                <category xml:id="Ft.Z.N.S.N">
+                                    <catDesc>naravoslovna in tehnična</catDesc>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.Z.N.N">
+                                <catDesc>nestrokovna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.N.P">
+                                <catDesc>pravna</catDesc>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.L">
+                        <catDesc>lektorirano</catDesc>
+                        <category xml:id="Ft.L.D">
+                            <catDesc>da</catDesc>
+                        </category>
+                        <category xml:id="Ft.L.N">
+                            <catDesc>ne</catDesc>
+                        </category>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.T.K.S"/>
+                <catRef target="#Ft.P.P.N.I"/>
+                <catRef target="#Ft.Z.N.N"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0026709." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <w msd="Somei" lemma="g">g</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Slmei" lemma="Detela">Detela</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Matjaž">Matjaž</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Koper">Koper</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kav" lemma="21.">21.</w>
+                    <S/>
+                    <w msd="Kav" lemma="2.">2.</w>
+                    <S/>
+                    <w msd="Kag" lemma="1998">1998</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Dm" lemma="v">V</w>
+                    <S/>
+                    <w msd="Sozem" lemma="priloga">prilogi</w>
+                    <S/>
+                    <w msd="Zod-md" lemma="ti">vam</w>
+                    <S/>
+                    <w msd="Ggnspm" lemma="zavračati">zavračamo</w>
+                    <S/>
+                    <w msd="Sometn" lemma="račun">račun</w>
+                    <S/>
+                    <w msd="O" lemma="št.">št.</w>
+                    <S/>
+                    <w msd="Kag" lemma="2130">2130</w>
+                    <c>/</c>
+                    <w msd="Kag" lemma="97">97</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="ker">ker</w>
+                    <S/>
+                    <w msd="Zk-ser" lemma="ta">tega</w>
+                    <S/>
+                    <w msd="Zop-ed--k" lemma="jaz">mi</w>
+                    <S/>
+                    <w msd="Dm" lemma="pri">pri</w>
+                    <S/>
+                    <w msd="Zod-mm" lemma="ti">vas</w>
+                    <S/>
+                    <w msd="Gp-spm-d" lemma="biti">nismo</w>
+                    <S/>
+                    <w msd="Ggdd-mm" lemma="naročiti">naročili</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Somei" lemma="naročnik">Naročnik</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Sozei" lemma="tiskarna">Tiskarna</w>
+                    <S/>
+                    <w msd="Somei" lemma="gepard">Gepard</w>
+                    <S/>
+                    <w msd="Kag" lemma="1">1</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Dr" lemma="poleg">Poleg</w>
+                    <S/>
+                    <w msd="Zk-ser" lemma="ta">tega</w>
+                    <S/>
+                    <w msd="Vp" lemma="pa">pa</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Sozei" lemma="cena">cena</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="ki">ki</w>
+                    <S/>
+                    <w msd="Zop-md" lemma="jaz">nam</w>
+                    <S/>
+                    <w msd="Zotzet--k" lemma="on">jo</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Ggdd-ez" lemma="dati">dala</w>
+                    <S/>
+                    <w msd="Sozei" lemma="tiskarna">Tiskarna</w>
+                    <S/>
+                    <w msd="Somei" lemma="gepard">Gepard</w>
+                    <S/>
+                    <w msd="Kag" lemma="1">1</w>
+                    <S/>
+                    <w msd="Rsn" lemma="veliko">veliko</w>
+                    <S/>
+                    <w msd="Pppzei" lemma="nizek">nižja</w>
+                    <c>.</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Ppnzei" lemma="dunajski">Dunajska</w>
+                    <S/>
+                    <w msd="Sozei" lemma="cesta">cesta</w>
+                    <S/>
+                    <w msd="Kag" lemma="22">22</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Somei" lemma="intermarketing">INTERMARKETING</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Ppnmein" lemma="lep">Lep</w>
+                    <S/>
+                    <w msd="Somei" lemma="pozdrav">pozdrav</w>
+                    <c>!</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Slmei" lemma="Darko">Darko</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Stepančič">Stepančič</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Kag" lemma="1113">1113</w>
+                    <S/>
+                    <w msd="Slzei" lemma="Ljubljana">Ljubljana</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Pdnmeid" lemma="spoštovan">Spoštovani</w>
+                    <c>!</c>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,365 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0030361" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: REFEREND(1998)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>52 besed</extent>
+            <publicationStmt>
+                <idno>REFEREND</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title n="???">neznani naslov</title>
+                    <author n="???">neznani avtor</author>
+                    <date>1998</date>
+                    <publisher n="drugo">neznani založnik</publisher>
+                    <note type="sourceLang"/>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="44"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="10"/>
+                    <tagUsage gi="p" occurs="9"/>
+                    <tagUsage gi="s" occurs="9"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="52"/>
+                </namespace>
+            </tagsDecl>
+            <appInfo>
+                <application ident="Amebis_pretvornik" version="1.0">
+                    <label>[ZDRUZEVANJE] 1:1</label>
+                    <label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\REFEREND.ZDR</label>
+                    <label>[1] **********</label>
+                    <label>[IZVOR] D:\FIDA\KORPUS\Vhod\PrimNov\NADJA\AKREDITI\1998\REFEREND</label>
+                    <label>[FORMAT] MS Word for Windows 6.0/7.0</label>
+                    <label>[DATUM] 13.10.1999</label>
+                    <label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\PrimNov\NADJA\AKREDITI\1998\REFEREND.RTF</label>
+                    <label>[PRETVORBA] RTF</label>
+                    <label>[KONEC] **********</label>
+                </application>
+            </appInfo>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.P">
+                        <catDesc>prenosnik</catDesc>
+                        <category xml:id="Ft.P.G">
+                            <catDesc>govorni</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.E">
+                            <catDesc>elektronski</catDesc>
+                        </category>
+                        <category xml:id="Ft.P.P">
+                            <catDesc>pisni</catDesc>
+                            <category xml:id="Ft.P.P.O">
+                                <catDesc>objavljeno</catDesc>
+                                <category xml:id="Ft.P.P.O.K">
+                                    <catDesc>knjižno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.O.P">
+                                    <catDesc>periodično</catDesc>
+                                    <category xml:id="Ft.P.P.O.P.C">
+                                        <catDesc>časopisno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.C.D">
+                                            <catDesc>dnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.V">
+                                            <catDesc>večkrat tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.C.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                    </category>
+                                    <category xml:id="Ft.P.P.O.P.R">
+                                        <catDesc>revialno</catDesc>
+                                        <category xml:id="Ft.P.P.O.P.R.T">
+                                            <catDesc>tedensko</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.S">
+                                            <catDesc>štirinajstdnevno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.M">
+                                            <catDesc>mesečno</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.D">
+                                            <catDesc>redkeje kot na mesec</catDesc>
+                                        </category>
+                                        <category xml:id="Ft.P.P.O.P.R.O">
+                                            <catDesc>občasno</catDesc>
+                                        </category>
+                                    </category>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.P.P.N">
+                                <catDesc>neobjavljeno</catDesc>
+                                <category xml:id="Ft.P.P.N.J">
+                                    <catDesc>javno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.I">
+                                    <catDesc>interno</catDesc>
+                                </category>
+                                <category xml:id="Ft.P.P.N.Z">
+                                    <catDesc>zasebno</catDesc>
+                                </category>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.Z">
+                        <catDesc>zvrst</catDesc>
+                        <category xml:id="Ft.Z.U">
+                            <catDesc>umetnostna</catDesc>
+                            <category xml:id="Ft.Z.U.P">
+                                <catDesc>pesniška</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.R">
+                                <catDesc>prozna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.U.D">
+                                <catDesc>dramska</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="Ft.Z.N">
+                            <catDesc>neumetnostna</catDesc>
+                            <category xml:id="Ft.Z.N.S">
+                                <catDesc>strokovna</catDesc>
+                                <category xml:id="Ft.Z.N.S.H">
+                                    <catDesc>humanistična in družboslovna</catDesc>
+                                </category>
+                                <category xml:id="Ft.Z.N.S.N">
+                                    <catDesc>naravoslovna in tehnična</catDesc>
+                                </category>
+                            </category>
+                            <category xml:id="Ft.Z.N.N">
+                                <catDesc>nestrokovna</catDesc>
+                            </category>
+                            <category xml:id="Ft.Z.N.P">
+                                <catDesc>pravna</catDesc>
+                            </category>
+                        </category>
+                    </category>
+                </taxonomy>
+                <taxonomy>
+                    <category xml:id="Ft.L">
+                        <catDesc>lektorirano</catDesc>
+                        <category xml:id="Ft.L.D">
+                            <catDesc>da</catDesc>
+                        </category>
+                        <category xml:id="Ft.L.N">
+                            <catDesc>ne</catDesc>
+                        </category>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.T.K.S"/>
+                <catRef target="#Ft.P.P.N.I"/>
+                <catRef target="#Ft.Z.N.N"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0030361." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <w msd="Ppnmeid" lemma="odgovoren">odgovorni</w>
+                    <S/>
+                    <w msd="Somei" lemma="urednik">urednik</w>
+                    <S/>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Kag" lemma="6000">6000</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Koper">Koper</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Ppnzei" lemma="volilen">Volilna</w>
+                    <S/>
+                    <w msd="Sozei" lemma="komisija">komisija</w>
+                    <S/>
+                    <w msd="Dt" lemma="za">za</w>
+                    <S/>
+                    <w msd="Sozet" lemma="ustanovitev">ustanovitev</w>
+                    <S/>
+                    <w msd="Ppnzmr" lemma="nov">novih</w>
+                    <S/>
+                    <w msd="Sozmr" lemma="občina">občin</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Somei" lemma="trg">Trg</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Brolo">Brolo</w>
+                    <S/>
+                    <w msd="Kag" lemma="3">3</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Ggnspm" lemma="prositi">Prosimo</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="da">da</w>
+                    <S/>
+                    <w msd="Dt" lemma="za">za</w>
+                    <S/>
+                    <w msd="Sometn" lemma="referendum">referendum</w>
+                    <S/>
+                    <w msd="Dt" lemma="za">za</w>
+                    <S/>
+                    <w msd="Sozet" lemma="ustanovitev">ustanovitev</w>
+                    <S/>
+                    <w msd="Ppnzmr" lemma="nov">novih</w>
+                    <S/>
+                    <w msd="Sozmr" lemma="občina">občin</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Ankaran">Ankaran</w>
+                    <c>-</c>
+                    <w msd="Sozer" lemma="škofija">Škofije</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Slsei" lemma="Šmarje">Šmarje</w>
+                    <c>-</c>
+                    <w msd="Sozmi" lemma="mareziga">Marezige</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sommi" lemma="dekan">Dekani</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="ki">ki</w>
+                    <S/>
+                    <w msd="Gp-pte-n" lemma="biti">bo</w>
+                    <S/>
+                    <w msd="Ggnd-em" lemma="potekati">potekal</w>
+                    <S/>
+                    <w msd="Kav" lemma="19.">19.</w>
+                    <S/>
+                    <w msd="Somer" lemma="april">aprila</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Ggvsdm" lemma="akreditirati">akreditirate</w>
+                    <S/>
+                    <w msd="Zspzeim" lemma="naš">naša</w>
+                    <S/>
+                    <w msd="Somer" lemma="novinar">novinarja</w>
+                    <S/>
+                    <w msd="Slzet" lemma="Mojca">Mojco</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Beljan">Beljan</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Slmetd" lemma="Danijel">Danijela</w>
+                    <S/>
+                    <w msd="Somer" lemma="cek">Ceka</w>
+                    <c>.</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slmei" lemma="Koper">Koper</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kav" lemma="8.">8.</w>
+                    <S/>
+                    <w msd="Somer" lemma="april">aprila</w>
+                    <S/>
+                    <w msd="Kag" lemma="1998">1998</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Sozei" lemma="prošnja">Prošnja</w>
+                    <S/>
+                    <w msd="Dt" lemma="za">za</w>
+                    <S/>
+                    <w msd="Sozet" lemma="akreditacija">akreditacijo</w>
+                    <S/>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slmei" lemma="Branko">Branko</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Podobnik">Podobnik</w>
+                    <c>,</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Do" lemma="z">Z</w>
+                    <S/>
+                    <w msd="Ppnmmo" lemma="odličen">odličnimi</w>
+                    <S/>
+                    <w msd="Sommo" lemma="pozdrav">pozdravi</w>
+                    <c>,</c>
+                    <S/>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,356 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0036980" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: INTERNET (2010-10-12)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>100 besed</extent>
+            <publicationStmt>
+                <idno>arhivo.com</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title>INTERNET</title>
+                    <author n="???">neznani avtor</author>
+                    <date>2010-10-12</date>
+                    <publisher n="internet, novice">arhivo.com</publisher>
+                    <note type="sourceLang"/>
+                    <note n="URL">
+                        <list>
+                            <item>
+                                <ref target="http://www.arhivo.com/jesenska-torta&amp;GID%3D8">
+                                    http://www.arhivo.com/jesenska-torta&amp;GID=8
+                                </ref>
+                            </item>
+                        </list>
+                    </note>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="97"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="25"/>
+                    <tagUsage gi="p" occurs="3"/>
+                    <tagUsage gi="s" occurs="11"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="100"/>
+                </namespace>
+            </tagsDecl>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.I"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0036980." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <w msd="Ppnzei" lemma="jesenski">Jesenska</w>
+                    <S/>
+                    <w msd="Sozei" lemma="torta">torta</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="vsebovati">vsebuje</w>
+                    <S/>
+                    <w msd="Soset" lemma="grozdje">grozdje</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Soset" lemma="vino">vino</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozet" lemma="pomaranča">pomarančo</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sommt" lemma="oreh">orehe</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Somei" lemma="foto">Foto</w>
+                    <c>:</c>
+                    <S/>
+                    <w msd="Rsn" lemma="arhivo">Arhivo</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Dt" lemma="v">V</w>
+                    <S/>
+                    <w msd="Sozet" lemma="čast">čast</w>
+                    <S/>
+                    <w msd="Sozer" lemma="jesen">jeseni</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="narediti">naredimo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="torta">torto</w>
+                    <S/>
+                    <w msd="Do" lemma="z">s</w>
+                    <S/>
+                    <w msd="Someo" lemma="priokus">priokusom</w>
+                    <S/>
+                    <w msd="Dm" lemma="po">po</w>
+                    <S/>
+                    <w msd="Sosem" lemma="grozdje">grozdju</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozem" lemma="pomaranča">pomaranči</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sosem" lemma="vino">vinu</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sommm" lemma="oreh">orehih</w>
+                    <c>.</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Sosmo" lemma="jajce">Jajci</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sometn" lemma="sladkor">sladkor</w>
+                    <S/>
+                    <w msd="Ppnzet" lemma="penast">penasto</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="umešati">umešamo</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Ggdspm" lemma="dodati">Dodamo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="moka">moko</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Ppnmeid" lemma="pecilen">pecilni</w>
+                    <S/>
+                    <w msd="Somei" lemma="prašek">prašek</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sommt" lemma="oreh">orehe</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="ki">ki</w>
+                    <S/>
+                    <w msd="Gp-spm-n" lemma="biti">smo</w>
+                    <S/>
+                    <w msd="Zotmmt--k" lemma="on">jih</w>
+                    <S/>
+                    <w msd="Ggdd-mm" lemma="streti">strli</w>
+                    <S/>
+                    <w msd="Dt" lemma="na">na</w>
+                    <S/>
+                    <w msd="Sozet" lemma="roka">roko</w>
+                    <S/>
+                    <c>(</c>
+                    <w msd="Rsn" lemma="zato">zato</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="da">da</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="ohraniti">ohranimo</w>
+                    <S/>
+                    <w msd="Rsn" lemma="nekaj">nekaj</w>
+                    <S/>
+                    <w msd="Pppzmr" lemma="velik">večjih</w>
+                    <S/>
+                    <w msd="Sommr" lemma="košček">koščkov</w>
+                    <c>)</c>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Vd" lemma="ko">Ko</w>
+                    <S/>
+                    <w msd="Rsn" lemma="dobro">dobro</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="premešati">premešamo</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Ggdspm" lemma="dodati">dodamo</w>
+                    <S/>
+                    <w msd="L" lemma="še">še</w>
+                    <S/>
+                    <w msd="Sometn" lemma="jogurt">jogurt</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sosei" lemma="olje">olje</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sosei" lemma="vino">vino</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Ppnmeid" lemma="pomarančen">pomarančni</w>
+                    <S/>
+                    <w msd="Sometn" lemma="sok">sok</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sozet" lemma="lupinica">lupinico</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Sozet" lemma="masa">Maso</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="vliti">vlijemo</w>
+                    <S/>
+                    <w msd="Dt" lemma="v">v</w>
+                    <S/>
+                    <w msd="Ppnmetn" lemma="okrogel">okrogel</w>
+                    <S/>
+                    <w msd="Sometn" lemma="pekač">pekač</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="dati">damo</w>
+                    <S/>
+                    <w msd="Dt" lemma="v">v</w>
+                    <S/>
+                    <w msd="Pdnzet" lemma="ogret">ogreto</w>
+                    <S/>
+                    <w msd="Sozet" lemma="pečica">pečico</w>
+                    <S/>
+                    <w msd="Dt" lemma="za">za</w>
+                    <S/>
+                    <w msd="Rsn" lemma="pol">pol</w>
+                    <S/>
+                    <w msd="Sozer" lemma="ura">ure</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Rsn" lemma="medtem">Medtem</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="oprati">operemo</w>
+                    <S/>
+                    <w msd="Soset" lemma="grozdje">grozdje</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Zotmed--k" lemma="on">mu</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="odstraniti">odstranimo</w>
+                    <S/>
+                    <w msd="Sozmt" lemma="peška">peške</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Somei" lemma="pekač">Pekač</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="vzeti">vzamemo</w>
+                    <S/>
+                    <w msd="Dr" lemma="iz">iz</w>
+                    <S/>
+                    <w msd="Sozer" lemma="pečica">pečice</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Dm" lemma="po">po</w>
+                    <S/>
+                    <w msd="Somem" lemma="test">testu</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="posuti">posujemo</w>
+                    <S/>
+                    <w msd="Soset" lemma="grozdje">grozdje</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="dati">damo</w>
+                    <S/>
+                    <w msd="Rsn" lemma="nazaj">nazaj</w>
+                    <S/>
+                    <w msd="Sozem" lemma="peč">peči</w>
+                    <S/>
+                    <w msd="L" lemma="še">še</w>
+                    <S/>
+                    <w msd="Dt" lemma="za">za</w>
+                    <S/>
+                    <w msd="Rsn" lemma="pol">pol</w>
+                    <S/>
+                    <w msd="Sozer" lemma="ura">ure</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Ggdspm" lemma="ponuditi">Ponudimo</w>
+                    <S/>
+                    <w msd="Do" lemma="z">z</w>
+                    <S/>
+                    <w msd="Sozeo" lemma="rezina">rezino</w>
+                    <S/>
+                    <w msd="Sozer" lemma="pomaranča">pomaranče</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Ppnmein" lemma="dober">Dober</w>
+                    <S/>
+                    <w msd="Somei" lemma="tek">tek</w>
+                    <c>!</c>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,408 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0037258" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: INTERNET (2010-11-09)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>104 besed</extent>
+            <publicationStmt>
+                <idno>n-tv.si</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title>INTERNET</title>
+                    <author n="???">neznani avtor</author>
+                    <date>2010-11-09</date>
+                    <publisher n="internet, novice">n-tv.si</publisher>
+                    <note type="sourceLang"/>
+                    <note n="URL">
+                        <list>
+                            <item>
+                                <ref target="http://www.n-tv.si/zakaj-moci-posteljo">http://www.n-tv.si/zakaj-moci-posteljo</ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.n-tv.si/video-blog-joc-o-predsodkih">
+                                    http://www.n-tv.si/video-blog-joc-o-predsodkih
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.n-tv.si/resnicne-zgodbe">http://www.n-tv.si/resnicne-zgodbe</ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.n-tv.si/resnicna-zgodba-v-spomin-andreju-markovicu">
+                                    http://www.n-tv.si/resnicna-zgodba-v-spomin-andreju-markovicu
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.n-tv.si/ogledalo-ki-ni-iz-stekla-naj-bi-prinasalo-bogastvo">
+                                    http://www.n-tv.si/ogledalo-ki-ni-iz-stekla-naj-bi-prinasalo-bogastvo
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.n-tv.si/dermatologija-znebite-se-tezav-s-srbeco-kozo">
+                                    http://www.n-tv.si/dermatologija-znebite-se-tezav-s-srbeco-kozo
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.n-tv.si/je-res-slab-metabolizem-vzrok-debelosti">
+                                    http://www.n-tv.si/je-res-slab-metabolizem-vzrok-debelosti
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.n-tv.si/strokovnjaki?p%3D4">http://www.n-tv.si/strokovnjaki?p=4</ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.n-tv.si/pregled-rubrike?p%3D7">http://www.n-tv.si/pregled-rubrike?p=7
+                                </ref>
+                            </item>
+                        </list>
+                    </note>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="98"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="23"/>
+                    <tagUsage gi="p" occurs="9"/>
+                    <tagUsage gi="s" occurs="13"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="104"/>
+                </namespace>
+            </tagsDecl>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.I"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0037258." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <w msd="Rsn" lemma="kako">Kako</w>
+                    <S/>
+                    <w msd="Zp------k" lemma="se">se</w>
+                    <S/>
+                    <w msd="Ggdn" lemma="znebiti">znebiti</w>
+                    <S/>
+                    <w msd="Sozmr" lemma="težava">težav</w>
+                    <S/>
+                    <w msd="Do" lemma="z">s</w>
+                    <S/>
+                    <w msd="Pdnzeo" lemma="srbeč">srbečo</w>
+                    <S/>
+                    <w msd="Sozeo" lemma="koža">kožo</w>
+                    <c>?</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Ppnzei" lemma="resničen">RESNIČNA</w>
+                    <S/>
+                    <w msd="Sozei" lemma="zgodba">ZGODBA</w>
+                    <c>:</c>
+                    <S/>
+                    <w msd="Ppnmmi" lemma="sam">Sami</w>
+                    <S/>
+                    <w msd="Zp---d--k" lemma="se">si</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="izbrati">izberemo</w>
+                    <S/>
+                    <w msd="Ppnzet" lemma="pravi">pravo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="pot">pot</w>
+                    <S/>
+                    <w msd="Somei" lemma="prispevek">Prispevek</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="ki">ki</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="pričati">priča</w>
+                    <S/>
+                    <w msd="Dm" lemma="o">o</w>
+                    <S/>
+                    <w msd="Zk-sem" lemma="ta">tem</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="da">da</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Zc-sei" lemma="ves">vse</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Sosem" lemma="življenje">življenju</w>
+                    <S/>
+                    <w msd="Ppnsei" lemma="mogoč">mogoče</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="da">da</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Ppnzei" lemma="pravi">prava</w>
+                    <S/>
+                    <w msd="Sozei" lemma="pot">pot</w>
+                    <S/>
+                    <w msd="Zk-zei" lemma="tisti">tista</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Slmei" lemma="ki">ki</w>
+                    <c>.</c>
+                    <c>.</c>
+                    <c>.</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slmei" lemma="bert">bert</w>
+                    <S/>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kag" lemma="09.11.2010">09.11.2010</w>
+                    <S/>
+                    <w msd="Dm" lemma="ob">ob</w>
+                    <S/>
+                    <w msd="Kag" lemma="12:35">12:35</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Rsn" lemma="kako">Kako</w>
+                    <S/>
+                    <w msd="Ggnstm" lemma="izdelovati">izdelujejo</w>
+                    <S/>
+                    <w msd="Ppnsmt" lemma="kovinski">kovinska</w>
+                    <S/>
+                    <w msd="Soser" lemma="ogledalo">ogledala</w>
+                    <c>?</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slmei" lemma="Valentinrozman">ValentinRozman</w>
+                    <S/>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kag" lemma="29.03.2010">29.03.2010</w>
+                    <S/>
+                    <w msd="Dm" lemma="ob">ob</w>
+                    <S/>
+                    <w msd="Kag" lemma="12:25">12:25</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Rsn" lemma="kako">Kako</w>
+                    <S/>
+                    <w msd="Zp------k" lemma="se">se</w>
+                    <S/>
+                    <w msd="Ggdn" lemma="znebiti">znebiti</w>
+                    <S/>
+                    <w msd="Sozmr" lemma="težava">težav</w>
+                    <S/>
+                    <w msd="Do" lemma="z">s</w>
+                    <S/>
+                    <w msd="Pdnzeo" lemma="srbeč">srbečo</w>
+                    <S/>
+                    <w msd="Sozeo" lemma="koža">kožo</w>
+                    <c>?</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Somei" lemma="pojav">Pojav</w>
+                    <S/>
+                    <w msd="Pdnzer" lemma="srbeč">srbeče</w>
+                    <S/>
+                    <w msd="Sozer" lemma="koža">kože</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Rsn" lemma="danes">danes</w>
+                    <S/>
+                    <w msd="L" lemma="skorajda">skorajda</w>
+                    <S/>
+                    <w msd="Ppszei" lemma="pogost">najpogostejša</w>
+                    <S/>
+                    <w msd="Sozei" lemma="motnja">motnja</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Rsn" lemma="kaj">Kaj</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="povzročati">povzroča</w>
+                    <S/>
+                    <w msd="Sozet" lemma="srbečica">srbečico</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Rsn" lemma="kako">kako</w>
+                    <S/>
+                    <w msd="Zotzet--k" lemma="on">jo</w>
+                    <S/>
+                    <w msd="Ggdn" lemma="odpraviti">odpraviti</w>
+                    <S/>
+                    <w msd="Ggnste" lemma="pojasnjevati">pojasnjuje</w>
+                    <S/>
+                    <w msd="Sozei" lemma="dermatologinja">dermatologinja</w>
+                    <S/>
+                    <w msd="Slzei" lemma="Tanja">Tanja</w>
+                    <S/>
+                    <w msd="Slzei" lemma="Planinšek">Planinšek</w>
+                    <S/>
+                    <w msd="Slzei" lemma="Ručigaj">Ručigaj</w>
+                    <c>.</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Ppnzei" lemma="resničen">RESNIČNA</w>
+                    <S/>
+                    <w msd="Sozei" lemma="zgodba">ZGODBA</w>
+                    <c>:</c>
+                    <S/>
+                    <w msd="Rsn" lemma="prezgodaj">Prezgodaj</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Ggdd-em" lemma="izgubiti">izgubil</w>
+                    <S/>
+                    <w msd="Ppnset" lemma="dragocen">dragoceno</w>
+                    <S/>
+                    <w msd="Soset" lemma="življenje">življenje</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Somer" lemma="mik">Mika</w>
+                    <S/>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kag" lemma="08.11.2010">08.11.2010</w>
+                    <S/>
+                    <w msd="Dm" lemma="ob">ob</w>
+                    <S/>
+                    <w msd="Kag" lemma="09:56">09:56</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Rsn" lemma="kako">Kako</w>
+                    <S/>
+                    <w msd="Ggnstm" lemma="izdelovati">izdelujejo</w>
+                    <S/>
+                    <w msd="Ppnsmt" lemma="kovinski">kovinska</w>
+                    <S/>
+                    <w msd="Ggdd-ez" lemma="ogledati">ogledala</w>
+                    <c>?</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Ppnzei" lemma="edinstven">Edinstvena</w>
+                    <S/>
+                    <w msd="Ppnsmi" lemma="kovinski">kovinska</w>
+                    <S/>
+                    <w msd="Sosmi" lemma="ogledalo">ogledala</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Pdnzei" lemma="izdelan">izdelana</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Slzem" lemma="Indija">Indiji</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="L" lemma="naj">naj</w>
+                    <S/>
+                    <w msd="Gp-g" lemma="biti">bi</w>
+                    <S/>
+                    <w msd="Ggnd-ez" lemma="prinašati">prinašala</w>
+                    <S/>
+                    <w msd="Soset" lemma="bogastvo">bogastvo</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sozet" lemma="sreča">srečo</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Ggdvdm" lemma="pogledati">Poglejte</w>
+                    <S/>
+                    <w msd="Zp---d--k" lemma="se">si</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Rsn" lemma="kako">kako</w>
+                    <S/>
+                    <w msd="Ggnstm" lemma="izdelovati">izdelujejo</w>
+                    <S/>
+                    <w msd="Zk-zei" lemma="ta">ta</w>
+                    <S/>
+                    <w msd="Ppnsmt" lemma="unikaten">unikatna</w>
+                    <S/>
+                    <w msd="Sosmt" lemma="ogledalo">ogledala</w>
+                    <c>.</c>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,391 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0037544" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: INTERNET (2010-12-09)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>121 besed</extent>
+            <publicationStmt>
+                <idno>arhivo.com</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title>INTERNET</title>
+                    <author n="???">neznani avtor</author>
+                    <date>2010-12-09</date>
+                    <publisher n="internet, novice">arhivo.com</publisher>
+                    <note type="sourceLang"/>
+                    <note n="URL">
+                        <list>
+                            <item>
+                                <ref target="http://www.arhivo.com/korenckova-juha-z-ajdovo-kaso&amp;GID%3D8">
+                                    http://www.arhivo.com/korenckova-juha-z-ajdovo-kaso&amp;GID=8
+                                </ref>
+                            </item>
+                        </list>
+                    </note>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="119"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="21"/>
+                    <tagUsage gi="p" occurs="3"/>
+                    <tagUsage gi="s" occurs="9"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="121"/>
+                </namespace>
+            </tagsDecl>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.I"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0037544." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Rsn" lemma="eko">eko</w>
+                    <S/>
+                    <w msd="Ppnzei" lemma="zelenjaven">zelenjavna</w>
+                    <S/>
+                    <w msd="Sozei" lemma="kocka">kocka</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Gp-sdm-n" lemma="biti">Ste</w>
+                    <S/>
+                    <w msd="Ppnmmi" lemma="bolan">bolni</w>
+                    <c>?</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Ppnmein" lemma="preprost">Preprost</w>
+                    <S/>
+                    <w msd="Somei" lemma="recept">recept</w>
+                    <S/>
+                    <w msd="Dt" lemma="za">za</w>
+                    <S/>
+                    <w msd="Psnzet" lemma="korenčkov">korenčkovo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="juha">juho</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Psnzet" lemma="ajdov">ajdovo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="kaša">kašo</w>
+                    <S/>
+                    <c>(</c>
+                    <w msd="Vd" lemma="ki">ki</w>
+                    <S/>
+                    <w msd="Zotmdt--k" lemma="on">ju</w>
+                    <S/>
+                    <w msd="Rsn" lemma="lahko">lahko</w>
+                    <S/>
+                    <w msd="Ggnsdm" lemma="jesti">jeste</w>
+                    <S/>
+                    <w msd="L" lemma="tudi">tudi</w>
+                    <S/>
+                    <w msd="Rsn" lemma="posebej">posebej</w>
+                    <c>)</c>
+                    <S/>
+                    <w msd="Ggdvdm" lemma="uporabiti">uporabite</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Somem" lemma="čas">času</w>
+                    <S/>
+                    <w msd="Sozmr" lemma="viroza">viroz</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vp" lemma="saj">saj</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Rsn" lemma="zelo">zelo</w>
+                    <S/>
+                    <w msd="Ppnmein" lemma="lahek">lahek</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Ppnmein" lemma="neškodljiv">neškodljiv</w>
+                    <S/>
+                    <w msd="Dt" lemma="za">za</w>
+                    <S/>
+                    <w msd="Sometn" lemma="želodec">želodec</w>
+                    <c>.</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Ggdspm" lemma="oprati">Operemo</w>
+                    <S/>
+                    <w msd="Soset" lemma="korenje">korenje</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Psnzet" lemma="ajdov">ajdovo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="kaša">kašo</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Dt" lemma="v">V</w>
+                    <S/>
+                    <w msd="Rsn" lemma="približno">približno</w>
+                    <S/>
+                    <w msd="Sometn" lemma="liter">liter</w>
+                    <S/>
+                    <w msd="Sozer" lemma="voda">vode</w>
+                    <S/>
+                    <c>(</c>
+                    <w msd="Rsn" lemma="odvisno">odvisno</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Rsn" lemma="kako">kako</w>
+                    <S/>
+                    <w msd="Ppnzet" lemma="gost">gosto</w>
+                    <S/>
+                    <w msd="Sozet" lemma="juha">juho</w>
+                    <S/>
+                    <w msd="Ggnspm" lemma="želeti">želimo</w>
+                    <c>)</c>
+                    <S/>
+                    <w msd="Dt" lemma="na">na</w>
+                    <S/>
+                    <w msd="Sommt" lemma="kolobar">kolobarje</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="narezati">narežemo</w>
+                    <S/>
+                    <w msd="Soset" lemma="korenje">korenje</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Rsn" lemma="pol">pol</w>
+                    <S/>
+                    <w msd="Rsn" lemma="eko">eko</w>
+                    <S/>
+                    <w msd="Ppnzer" lemma="zelenjaven">zelenjavne</w>
+                    <S/>
+                    <w msd="Sozmt" lemma="kocka">kocke</w>
+                    <S/>
+                    <w msd="Vp" lemma="ter">ter</w>
+                    <S/>
+                    <w msd="Ggnspm" lemma="kuhati">kuhamo</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="dokler">dokler</w>
+                    <S/>
+                    <w msd="Sosei" lemma="korenje">korenje</w>
+                    <S/>
+                    <w msd="L" lemma="ne">ne</w>
+                    <S/>
+                    <w msd="Ggdste" lemma="postati">postane</w>
+                    <S/>
+                    <w msd="Rsn" lemma="mehko">mehko</w>
+                    <S/>
+                    <c>(</c>
+                    <w msd="Rsn" lemma="približno">približno</w>
+                    <S/>
+                    <w msd="Kag" lemma="20">20</w>
+                    <S/>
+                    <w msd="Sozmr" lemma="minuta">minut</w>
+                    <c>)</c>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Rsn" lemma="medtem">Medtem</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Kbzmmi" lemma="drug">drugi</w>
+                    <S/>
+                    <w msd="Sozem" lemma="posoda">posodi</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="zavreti">zavremo</w>
+                    <S/>
+                    <w msd="Rsn" lemma="približno">približno</w>
+                    <S/>
+                    <w msd="Kag" lemma="3">3</w>
+                    <S/>
+                    <w msd="Somei" lemma="dl">dl</w>
+                    <S/>
+                    <w msd="Sozer" lemma="voda">vode</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="ki">ki</w>
+                    <S/>
+                    <w msd="Zotzed--k" lemma="on">ji</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="dodati">dodamo</w>
+                    <S/>
+                    <w msd="Kbzzet" lemma="drug">drugo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="polovica">polovico</w>
+                    <S/>
+                    <w msd="Ppnzer" lemma="zelenjaven">zelenjavne</w>
+                    <S/>
+                    <w msd="Sozmt" lemma="kocka">kocke</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Psnzet" lemma="ajdov">ajdovo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="kaša">kašo</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Ggnspm" lemma="kuhati">Kuhamo</w>
+                    <S/>
+                    <w msd="Rsn" lemma="približno">približno</w>
+                    <S/>
+                    <w msd="Rsn" lemma="pol">pol</w>
+                    <S/>
+                    <w msd="Sozer" lemma="ura">ure</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Psnsei" lemma="korenčkov">Korenčkovo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="juha">juho</w>
+                    <S/>
+                    <w msd="Ggnspm" lemma="soliti">solimo</w>
+                    <S/>
+                    <w msd="Dm" lemma="po">po</w>
+                    <S/>
+                    <w msd="Somem" lemma="okus">okusu</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Rsn" lemma="kar">kar</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Sozem" lemma="posoda">posodi</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="zmešati">zmešamo</w>
+                    <S/>
+                    <w msd="Do" lemma="z">s</w>
+                    <S/>
+                    <w msd="Ppnmeo" lemma="paličen">paličnim</w>
+                    <S/>
+                    <w msd="Someo" lemma="mešalnik">mešalnikom</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vd" lemma="da">da</w>
+                    <S/>
+                    <w msd="Ggdste" lemma="postati">postane</w>
+                    <S/>
+                    <w msd="L" lemma="ravno">ravno</w>
+                    <S/>
+                    <w msd="L" lemma="prav">prav</w>
+                    <S/>
+                    <w msd="Ppnzei" lemma="gost">gosta</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Pdnzei" lemma="tekoč">tekoča</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Dm" lemma="na">Na</w>
+                    <S/>
+                    <w msd="Somem" lemma="konec">koncu</w>
+                    <S/>
+                    <w msd="Zotzet--z" lemma="vame">vanjo</w>
+                    <S/>
+                    <w msd="Ggdspm" lemma="dodati">dodamo</w>
+                    <S/>
+                    <w msd="L" lemma="še">še</w>
+                    <S/>
+                    <w msd="Psnzet" lemma="ajdov">ajdovo</w>
+                    <S/>
+                    <w msd="Sozet" lemma="kaša">kašo</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Ppnzei" lemma="dieten">dietna</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Vp" lemma="a">a</w>
+                    <S/>
+                    <w msd="Ppnzei" lemma="okusen">okusna</w>
+                    <S/>
+                    <w msd="Sozei" lemma="jed">jed</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Rsn" lemma="nared">nared</w>
+                    <c>.</c>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,355 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0038754" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: INTERNET (2010-07-21)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>97 besed</extent>
+            <publicationStmt>
+                <idno>spasteater.si</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title>INTERNET</title>
+                    <author n="???">neznani avtor</author>
+                    <date>2010-07-21</date>
+                    <publisher n="internet, ustanove">spasteater.si</publisher>
+                    <note type="sourceLang"/>
+                    <note n="URL">
+                        <list>
+                            <item>
+                                <ref target="http://www.spasteater.si/si/novice_in_napovedi/4843/detail.html">
+                                    http://www.spasteater.si/si/novice_in_napovedi/4843/detail.html
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.spasteater.si/si/vstopnice/kako_do_vstopnic/default.html">
+                                    http://www.spasteater.si/si/vstopnice/kako_do_vstopnic/default.html
+                                </ref>
+                            </item>
+                        </list>
+                    </note>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="91"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="18"/>
+                    <tagUsage gi="p" occurs="7"/>
+                    <tagUsage gi="s" occurs="12"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="97"/>
+                </namespace>
+            </tagsDecl>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.I"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0038754." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <w msd="L" lemma="več">Več</w>
+                    <S/>
+                    <w msd="Dm" lemma="o">o</w>
+                    <S/>
+                    <w msd="Sosem" lemma="križarjenje">križarjenju</w>
+                    <S/>
+                    <w msd="Rsn" lemma="tukaj">tukaj</w>
+                    <c>!</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Do" lemma="z">S</w>
+                    <S/>
+                    <w msd="Someo" lemma="kompas">Kompasom</w>
+                    <S/>
+                    <w msd="Ggnvdm" lemma="odkrivati">odkrivajte</w>
+                    <S/>
+                    <w msd="Ppnsmt" lemma="veličasten">veličastna</w>
+                    <S/>
+                    <w msd="Sosmt" lemma="mesto">mesta</w>
+                    <S/>
+                    <w msd="Ppnser" lemma="zahoden">zahodnega</w>
+                    <S/>
+                    <w msd="Slser" lemma="Sredozemlje">Sredozemlja</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Dm" lemma="na">Na</w>
+                    <S/>
+                    <w msd="Sosem" lemma="križarjenje">križarjenju</w>
+                    <S/>
+                    <w msd="Gp-ptd-n" lemma="biti">bosta</w>
+                    <S/>
+                    <w msd="Do" lemma="z">z</w>
+                    <S/>
+                    <w msd="Zod-mo" lemma="ti">vami</w>
+                    <S/>
+                    <w msd="Somei" lemma="koncert">koncert</w>
+                    <S/>
+                    <w msd="Slzer" lemma="Alenka">Alenke</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Godec">Godec</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Sozei" lemma="komedija">komedija</w>
+                    <S/>
+                    <w msd="Slmer" lemma="Matjaž">Matjaža</w>
+                    <S/>
+                    <w msd="Slmer" lemma="Javšnik">Javšnika</w>
+                    <S/>
+                    <w msd="Somei" lemma="optimist">Optimist</w>
+                    <S/>
+                    <c>!</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Dm" lemma="na">Na</w>
+                    <S/>
+                    <w msd="Sozem" lemma="ladja">ladji</w>
+                    <S/>
+                    <w msd="Gp-ptd-n" lemma="biti">bosta</w>
+                    <S/>
+                    <w msd="Ppnzei" lemma="zvezdniški">zvezdniška</w>
+                    <S/>
+                    <w msd="Somdi" lemma="gost">gosta</w>
+                    <S/>
+                    <w msd="Slzei" lemma="Alenka">Alenka</w>
+                    <S/>
+                    <w msd="Slzei" lemma="Godec">Godec</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Matjaž">Matjaž</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Javšnik">Javšnik</w>
+                    <c>.</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Dm" lemma="na">Na</w>
+                    <S/>
+                    <w msd="Ppnsem" lemma="jesenski">jesenskem</w>
+                    <S/>
+                    <w msd="Ppnsem" lemma="špasen">špasnem</w>
+                    <S/>
+                    <w msd="Sosem" lemma="križarjanje">križarjanju</w>
+                    <S/>
+                    <w msd="Zod-mt" lemma="ti">vas</w>
+                    <S/>
+                    <w msd="Gp-pte-n" lemma="biti">bo</w>
+                    <S/>
+                    <w msd="Sozei" lemma="ladja">ladja</w>
+                    <S/>
+                    <w msd="Slmei" lemma="MSC">MSC</w>
+                    <S/>
+                    <w msd="Sozei" lemma="fantastica">Fantastica</w>
+                    <S/>
+                    <w msd="Kag" lemma="5">5</w>
+                    <c>*</c>
+                    <S/>
+                    <w msd="Ggdd-ez" lemma="popeljati">popeljala</w>
+                    <S/>
+                    <w msd="Do" lemma="med">med</w>
+                    <S/>
+                    <w msd="Sosmo" lemma="mesto">mesti</w>
+                    <S/>
+                    <w msd="Ppnser" lemma="zahoden">zahodnega</w>
+                    <S/>
+                    <w msd="Slser" lemma="Sredozemlje">Sredozemlja</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Ggnd-mm" lemma="križariti">Križarili</w>
+                    <S/>
+                    <w msd="Gp-pdm-n" lemma="biti">boste</w>
+                    <S/>
+                    <w msd="Dr" lemma="od">od</w>
+                    <S/>
+                    <w msd="Sozer" lemma="genova">Genove</w>
+                    <S/>
+                    <w msd="Dr" lemma="do">do</w>
+                    <S/>
+                    <w msd="Slmer" lemma="Neapelj">Neaplja</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Zp------k" lemma="se">se</w>
+                    <S/>
+                    <w msd="Ggdd-mm" lemma="ustaviti">ustavili</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Slmem" lemma="Palermo">Palermu</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Slmem" lemma="Tunis">Tunisu</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Ggdd-mm" lemma="obiskati">obiskali</w>
+                    <S/>
+                    <w msd="Sozet" lemma="palma">Palmo</w>
+                    <S/>
+                    <w msd="N" lemma="de">de</w>
+                    <S/>
+                    <w msd="Slzet" lemma="Mallorca">Mallorco</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Slzet" lemma="Barcelona">Barcelono</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Marseille">Marseille</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Rsn" lemma="vse">vse</w>
+                    <S/>
+                    <w msd="Dm" lemma="v">v</w>
+                    <S/>
+                    <w msd="Kbzmem" lemma="en">enem</w>
+                    <S/>
+                    <w msd="Somem" lemma="teden">tednu</w>
+                    <c>!</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Somei" lemma="aranžma">Aranžma</w>
+                    <S/>
+                    <w msd="Ppnser" lemma="špasen">špasnega</w>
+                    <S/>
+                    <w msd="Soser" lemma="križarjenje">križarjenja</w>
+                    <S/>
+                    <w msd="Gp-ste-n" lemma="biti">je</w>
+                    <S/>
+                    <w msd="Dr" lemma="od">od</w>
+                    <S/>
+                    <w msd="Kag" lemma="24.10">24.10</w>
+                    <S/>
+                    <w msd="Dr" lemma="do">do</w>
+                    <S/>
+                    <w msd="Kag" lemma="31.10">31.10</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Kag" lemma="2010">2010</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Ggdvdm" lemma="pridružiti">Pridružite</w>
+                    <S/>
+                    <w msd="Zp------k" lemma="se">se</w>
+                    <S/>
+                    <w msd="Zop-md" lemma="jaz">nam</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Dm" lemma="na">na</w>
+                    <S/>
+                    <w msd="Ppnzem" lemma="veličasten">veličastni</w>
+                    <S/>
+                    <w msd="Sozem" lemma="križarka">križarki</w>
+                    <c>!</c>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Dm" lemma="v">V</w>
+                    <S/>
+                    <w msd="Somem" lemma="čas">ČASU</w>
+                    <S/>
+                    <w msd="Sozmr" lemma="počitnice">POČITNIC</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Dr" lemma="od">OD</w>
+                    <S/>
+                    <w msd="Kag" lemma="1.7">1.7</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Dr" lemma="do">DO</w>
+                    <S/>
+                    <w msd="Kag" lemma="13.9">13.9</w>
+                    <c>.</c>
+                    <S/>
+                </s>
+                <s>
+                    <w msd="Gp-ste-n" lemma="biti">JE</w>
+                    <S/>
+                    <w msd="Sozei" lemma="blagajna">BLAGAJNA</w>
+                    <S/>
+                    <w msd="Pdnzei" lemma="zaprt">ZAPRTA</w>
+                    <c>!</c>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,402 @@
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0038920" xml:lang="sl">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Gigafida: INTERNET (2010-09-20)</title>
+                <funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
+                    šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
+                    razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
+                    kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
+                </funder>
+            </titleStmt>
+            <editionStmt>
+                <edition>1.0</edition>
+            </editionStmt>
+            <extent>79 besed</extent>
+            <publicationStmt>
+                <idno>drama.si</idno>
+                <availability status="restricted">
+                    <p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
+                        okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
+                        v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
+                        <ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
+                    </p>
+                </availability>
+                <date>2012-04-15</date>
+            </publicationStmt>
+            <sourceDesc>
+                <bibl>
+                    <title>INTERNET</title>
+                    <author n="???">neznani avtor</author>
+                    <date>2010-09-20</date>
+                    <publisher n="internet, ustanove">drama.si</publisher>
+                    <note type="sourceLang"/>
+                    <note n="URL">
+                        <list>
+                            <item>
+                                <ref target="http://www.drama.si/eng/ansambel/matija-rozman.html">
+                                    http://www.drama.si/eng/ansambel/matija-rozman.html
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.drama.si/eng/ansambel/andrej-nahtigal.html">
+                                    http://www.drama.si/eng/ansambel/andrej-nahtigal.html
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.drama.si/eng/ansambel/janez-skof.html">
+                                    http://www.drama.si/eng/ansambel/janez-skof.html
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.drama.si/eng/ansambel/boris-mihalj.html">
+                                    http://www.drama.si/eng/ansambel/boris-mihalj.html
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.drama.si/eng/ansambel/marijana-brecelj.html">
+                                    http://www.drama.si/eng/ansambel/marijana-brecelj.html
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.drama.si/eng/ansambel/gregor-bakovic.html">
+                                    http://www.drama.si/eng/ansambel/gregor-bakovic.html
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.drama.si/repertoar/totenbirt.html">
+                                    http://www.drama.si/repertoar/totenbirt.html
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.drama.si/ansambel/marijana-brecelj.html">
+                                    http://www.drama.si/ansambel/marijana-brecelj.html
+                                </ref>
+                            </item>
+                            <item>
+                                <ref target="http://www.drama.si/ansambel/maja-koncar.html">
+                                    http://www.drama.si/ansambel/maja-koncar.html
+                                </ref>
+                            </item>
+                        </list>
+                    </note>
+                </bibl>
+            </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+            <projectDesc>
+                <p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
+                </p>
+                <p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
+                </p>
+            </projectDesc>
+            <tagsDecl>
+                <namespace name="http://www.tei-c.org/ns/1.0">
+                    <tagUsage gi="S" occurs="91"/>
+                    <tagUsage gi="body" occurs="1"/>
+                    <tagUsage gi="c" occurs="46"/>
+                    <tagUsage gi="p" occurs="10"/>
+                    <tagUsage gi="s" occurs="10"/>
+                    <tagUsage gi="text" occurs="1"/>
+                    <tagUsage gi="w" occurs="79"/>
+                </namespace>
+            </tagsDecl>
+            <classDecl>
+                <taxonomy xml:id="SSJ">
+                    <category xml:id="SSJ.T">
+                        <catDesc>tisk</catDesc>
+                        <category xml:id="SSJ.T.K">
+                            <catDesc>knjižno</catDesc>
+                            <category xml:id="SSJ.T.K.L">
+                                <catDesc>leposlovno</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.K.S">
+                                <catDesc>strokovno</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.P">
+                            <catDesc>periodično</catDesc>
+                            <category xml:id="SSJ.T.P.C">
+                                <catDesc>časopis</catDesc>
+                            </category>
+                            <category xml:id="SSJ.T.P.R">
+                                <catDesc>revija</catDesc>
+                            </category>
+                        </category>
+                        <category xml:id="SSJ.T.D">
+                            <catDesc>drugo</catDesc>
+                        </category>
+                    </category>
+                    <category xml:id="SSJ.I">
+                        <catDesc>internet</catDesc>
+                    </category>
+                </taxonomy>
+            </classDecl>
+        </encodingDesc>
+        <profileDesc>
+            <textClass>
+                <catRef target="#SSJ.I"/>
+            </textClass>
+        </profileDesc>
+    </teiHeader>
+    <text xml:id="F0038920." xml:lang="sl">
+        <body>
+            <p>
+                <s>
+                    <c>“</c>
+                    <w msd="Somei" lemma="totenbirt">Totenbirt</w>
+                    <c>”</c>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slzei" lemma="Agata">Agata</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Jurkovič">Jurkovič</w>
+                    <S/>
+                    <c>–</c>
+                    <S/>
+                    <w msd="Sozei" lemma="jurkovička">Jurkovička</w>
+                    <S/>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kag" lemma="62">62</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Slmei" lemma="widow">widow</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Slmei" lemma="judge">judge</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Slmei" lemma="Eli's">Eli's</w>
+                    <S/>
+                    <w msd="Slmei" lemma="older">older</w>
+                    <S/>
+                    <w msd="Slmei" lemma="sister">sister</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slmei" lemma="Performs">Performs</w>
+                    <S/>
+                    <w msd="Vp" lemma="in">in</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <c>“</c>
+                    <w msd="Somei" lemma="totenbirt">Totenbirt</w>
+                    <c>”</c>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slmei" lemma="Ivek">Ivek</w>
+                    <S/>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kag" lemma="66">66</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Slmei" lemma="drunk">drunk</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slzei" lemma="Maja">Maja</w>
+                    <S/>
+                    <w msd="Slzei" lemma="Končar">Končar</w>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slzei" lemma="Marta">Marta</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Fijavž">Fijavž</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Roblek">Roblek</w>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slzei" lemma="Martika">Martika</w>
+                    <S/>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kag" lemma="46">46</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="učiteljica">učiteljica</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Psnzei" lemma="Tomijev">Tomijeva</w>
+                    <S/>
+                    <w msd="Sozei" lemma="žena">žena</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="elina">Elina</w>
+                    <S/>
+                    <w msd="Pppzei" lemma="mlad">mlajša</w>
+                    <S/>
+                    <w msd="Sozei" lemma="sestra">sestra</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slzei" lemma="Totenbirt">Totenbirt</w>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slzei" lemma="Agata">Agata</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Jurkovič">Jurkovič</w>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slmei" lemma="Jurkovička">Jurkovička</w>
+                    <S/>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kag" lemma="62">62</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="vdova">vdova</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="sodnica">sodnica</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="elina">Elina</w>
+                    <S/>
+                    <w msd="Pppzei" lemma="star">starejša</w>
+                    <S/>
+                    <w msd="Sozei" lemma="sestra">sestra</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slzei" lemma="Totenbirt">Totenbirt</w>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slzei" lemma="Marta">Marta</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Fijavž">Fijavž</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Roblek">Roblek</w>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slzei" lemma="Martika">Martika</w>
+                    <S/>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kag" lemma="46">46</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="učiteljica">učiteljica</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Psnzei" lemma="Tomijev">Tomijeva</w>
+                    <S/>
+                    <w msd="Sozei" lemma="žena">žena</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="elina">Elina</w>
+                    <S/>
+                    <w msd="Pppzei" lemma="mlad">mlajša</w>
+                    <S/>
+                    <w msd="Sozei" lemma="sestra">sestra</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slzei" lemma="Marijana">Marijana</w>
+                    <S/>
+                    <w msd="Slzei" lemma="Brecelj">Brecelj</w>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slzei" lemma="Agata">Agata</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Jurkovič">Jurkovič</w>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slmei" lemma="Jurkovička">Jurkovička</w>
+                    <S/>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Kag" lemma="62">62</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="vdova">vdova</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="sodnica">sodnica</w>
+                    <c>,</c>
+                    <S/>
+                    <w msd="Sozei" lemma="elina">Elina</w>
+                    <S/>
+                    <w msd="Pppzei" lemma="star">starejša</w>
+                    <S/>
+                    <w msd="Sozei" lemma="sestra">sestra</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <c>“</c>
+                    <w msd="Somei" lemma="totenbirt">Totenbirt</w>
+                    <c>”</c>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="N" lemma="the">The</w>
+                    <S/>
+                    <w msd="Sozer" lemma="lata">late</w>
+                    <S/>
+                    <w msd="N" lemma="južek">Južek</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <w msd="Slmei" lemma="Roberto">Roberto</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Zucco">Zucco</w>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="Slzet" lemma="1st">1st</w>
+                    <S/>
+                    <w msd="Slmei" lemma="Prison">Prison</w>
+                    <S/>
+                    <w msd="N" lemma="officer">Officer</w>
+                    <S/>
+                    <c>/</c>
+                    <S/>
+                    <w msd="Somei" lemma="detective">Detective</w>
+                    <S/>
+                    <c>/</c>
+                    <S/>
+                    <w msd="Somei" lemma="1st">1st</w>
+                    <S/>
+                    <w msd="Sozer" lemma="polica">Police</w>
+                    <S/>
+                    <w msd="N" lemma="officer">Officer</w>
+                </s>
+            </p>
+            <p>
+                <s>
+                    <c>“</c>
+                    <w msd="Somei" lemma="totenbirt">Totenbirt</w>
+                    <c>”</c>
+                    <S/>
+                    <c>-</c>
+                    <S/>
+                    <w msd="N" lemma="the">The</w>
+                    <S/>
+                    <w msd="Sozer" lemma="lata">late</w>
+                    <S/>
+                    <w msd="Slmmi" lemma="Frenk">Frenki</w>
+                </s>
+            </p>
+        </body>
+    </text>
+</TEI>
@@ -0,0 +1,18 @@
+brez
+dis
+do
+eks
+inter
+iz
+na
+ne
+ni
+ob
+od
+po
+pre
+pri
+pro
+raz
+spre
+za
@@ -0,0 +1,7 @@
+ga
+ma
+me
+nj
+nje
+njo
+se
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<?import javafx.scene.control.Button?>
+<?import javafx.scene.control.CheckBox?>
+<?import javafx.scene.control.Hyperlink?>
+<?import javafx.scene.control.Label?>
+<?import javafx.scene.control.ProgressBar?>
+<?import javafx.scene.control.RadioButton?>
+<?import javafx.scene.control.TextField?>
+<?import javafx.scene.control.ToggleGroup?>
+<?import javafx.scene.layout.AnchorPane?>
+<?import javafx.scene.layout.HBox?>
+<?import javafx.scene.layout.Pane?>
+<?import org.controlsfx.control.CheckComboBox?>
+
+<AnchorPane fx:id="characterAnalysisTab" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.112" xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.CharacterAnalysisTab">
+    <Pane>
+        <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Število črk" />
+        <TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="20.0" prefWidth="180.0" />
+
+        <HBox layoutX="10.0" layoutY="60.0">
+            <children>
+                <RadioButton fx:id="lemmaRB" mnemonicParsing="false" prefHeight="25.0" prefWidth="86.0" selected="true" text="lema">
+                    <toggleGroup>
+                        <ToggleGroup fx:id="calculateForRB" />
+                    </toggleGroup></RadioButton>
+                <RadioButton fx:id="varietyRB" mnemonicParsing="false" text="različnica" toggleGroup="$calculateForRB" />
+            </children>
+        </HBox>
+        <Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
+        <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD" />
+        <TextField fx:id="msdTF" layoutX="100.0" layoutY="160.0" prefWidth="180.0" />
+        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija" />
+        <CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0" />
+
+
+        <Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
+            <children>
+                <CheckBox fx:id="calculatecvvCB" layoutX="10.0" mnemonicParsing="false" prefHeight="25.0" text="Izračunaj za kombinacije samoglasnikov in soglasnikov" visible="false" />
+            </children>
+        </Pane>
+
+        <Button fx:id="computeNgramsB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
+    </Pane>
+
+    <Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" />
+    <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0" text=" " wrapText="true" />
+
+    <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
+
+    <ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0" />
+    <Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0" />
+
+</AnchorPane>
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<?import javafx.scene.control.Button?>
+<?import javafx.scene.control.CheckBox?>
+<?import javafx.scene.control.Label?>
+<?import javafx.scene.control.Hyperlink?>
+<?import javafx.scene.control.ProgressIndicator?>
+<?import javafx.scene.layout.AnchorPane?>
+<?import javafx.scene.layout.Pane?>
+
+<AnchorPane prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111" xmlns:fx="http://javafx.com/fxml/1"
+            fx:controller="gui.CorpusTab">
+    <children>
+        <Pane/>
+        <Button fx:id="chooseCorpusLocationB" layoutX="10.0" layoutY="20.0" mnemonicParsing="false"
+                text="Nastavi lokacijo korpusa"/>
+        <CheckBox fx:id="readHeaderInfoChB" layoutX="176.0" layoutY="24.0" mnemonicParsing="false"
+                  text="Preberi info iz headerjev"/>
+        <Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
+            <children>
+                <Label fx:id="chooseCorpusL" prefHeight="50.0" prefWidth="704.0" text="Label"/>
+                <CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/>
+            </children>
+        </Pane>
+        <ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/>
+        <Button fx:id="chooseResultsLocationB" layoutX="10.0" layoutY="180.0" mnemonicParsing="false"
+                text="Nastavi lokacijo rezultatov"/>
+        <Label fx:id="chooseResultsL" layoutX="10.0" layoutY="220.0" text="Label"/>
+
+        <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
+    </children>
+</AnchorPane>
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<?import javafx.scene.control.Label?>
+<?import javafx.scene.layout.AnchorPane?>
+<?import javafx.scene.layout.Pane?>
+<?import javafx.scene.control.Hyperlink?>
+<?import org.controlsfx.control.CheckComboBox?>
+
+<AnchorPane fx:id="solarFiltersTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
+            xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.FiltersForSolar">
+    <Pane>
+        <CheckComboBox fx:id="solarRegijaCCB" layoutX="104.0" layoutY="40.0" prefHeight="25.0" prefWidth="372.0"/>
+        <Label layoutX="14.0" layoutY="44.0" text="Regija:"/>
+        <CheckComboBox fx:id="solarPredmetCCB" layoutX="104.0" layoutY="87.0" prefHeight="25.0" prefWidth="372.0"/>
+        <Label layoutX="14.0" layoutY="91.0" text="Predmet"/>
+        <CheckComboBox fx:id="solarRazredCCB" layoutX="104.0" layoutY="136.0" prefHeight="25.0" prefWidth="372.0"/>
+        <Label layoutX="14.0" layoutY="140.0" text="Razred"/>
+        <CheckComboBox fx:id="solarLetoCCB" layoutX="104.0" layoutY="189.0" prefHeight="25.0" prefWidth="372.0"/>
+        <Label layoutX="14.0" layoutY="193.0" text="Leto"/>
+        <CheckComboBox fx:id="solarSolaCCB" layoutX="104.0" layoutY="246.0" prefHeight="25.0" prefWidth="372.0"/>
+        <Label layoutX="14.0" layoutY="250.0" text="Šola"/>
+        <CheckComboBox fx:id="solarVrstaBesedilaCCB" layoutX="104.0" layoutY="293.0" prefHeight="25.0" prefWidth="372.0"/>
+        <Label layoutX="14.0" layoutY="297.0" text="Vrsta besedila"/>
+
+        <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
+        <Label layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
+        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0"
+               prefWidth="275.0" text=" " wrapText="true"/>
+    </Pane>
+</AnchorPane>
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<?import java.lang.String?>
+<?import javafx.collections.FXCollections?>
+<?import javafx.scene.control.Button?>
+<?import javafx.scene.control.CheckBox?>
+<?import javafx.scene.control.Hyperlink?>
+<?import javafx.scene.control.ComboBox?>
+<?import javafx.scene.control.Label?>
+<?import javafx.scene.control.ProgressBar?>
+<?import javafx.scene.control.TextField?>
+<?import javafx.scene.layout.AnchorPane?>
+<?import javafx.scene.layout.Pane?>
+<?import org.controlsfx.control.CheckComboBox?>
+
+<AnchorPane fx:id="oneWordAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
+            xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.OneWordAnalysisTab">
+    <Pane>
+        <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za"/>
+        <ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="20.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
+                  visibleRowCount="5">
+            <items>
+                <FXCollections fx:factory="observableArrayList">
+                    <String fx:value="lema"/>
+                    <String fx:value="različnica"/>
+                    <String fx:value="oblikoskladenjska oznaka"/>
+                    <String fx:value="oblikoskladenjska lastnost"/>
+                    <String fx:value="besedna vrsta"/>
+                </FXCollections>
+            </items>
+        </ComboBox>
+
+        <!-- MSD and Taxonomy separated -->
+
+        <Label layoutX="10.0" layoutY="80.0" prefHeight="25.0" text="Omejitev podatkov" />
+
+        <Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Oznaka MSD"/>
+        <TextField fx:id="msdTF" layoutX="100.0" layoutY="120.0" prefWidth="180.0"/>
+        <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Taksonomija"/>
+        <CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="160.0" prefHeight="25.0" prefWidth="180.0"/>
+
+
+        <Button fx:id="computeNgramsB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
+                prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
+    </Pane>
+
+    <Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
+    <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
+           text=" " wrapText="true"/>
+
+    <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
+
+    <ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
+    <Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
+
+</AnchorPane>
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<?import javafx.scene.control.Label?>
+<?import javafx.scene.layout.AnchorPane?>
+<?import javafx.scene.control.Label?>
+<AnchorPane prefHeight="400.0" prefWidth="600.0" xmlns="http://javafx.com/javafx/8.0.111" xmlns:fx="http://javafx.com/fxml/1">
+    <children>
+        <Label layoutX="371.0" layoutY="26.0" text="Izbrani filtri:"/>
+        <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="371.0" layoutY="43.0" prefHeight="188.0"
+               prefWidth="215.0"
+               text=" "/>
+    </children>
+</AnchorPane>
@@ -0,0 +1,105 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<?import java.lang.String?>
+<?import javafx.collections.FXCollections?>
+<?import javafx.scene.control.Button?>
+<?import javafx.scene.control.CheckBox?>
+<?import javafx.scene.control.ComboBox?>
+<?import javafx.scene.control.Hyperlink?>
+<?import javafx.scene.control.Label?>
+<?import javafx.scene.control.ProgressBar?>
+<?import javafx.scene.control.TextField?>
+<?import javafx.scene.layout.AnchorPane?>
+<?import javafx.scene.layout.Pane?>
+<?import org.controlsfx.control.CheckComboBox?>
+
+<AnchorPane fx:id="stringAnalysisTabPaneNew2" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
+            xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.StringAnalysisTabNew2">
+    <Pane>
+        <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="N-gram nivo"/>
+        <ComboBox fx:id="ngramValueCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi"
+                  visibleRowCount="5">
+            <items>
+                <FXCollections fx:factory="observableArrayList">
+                    <String fx:value="2"/>
+                    <String fx:value="3"/>
+                    <String fx:value="4"/>
+                    <String fx:value="5"/>
+                </FXCollections>
+            </items>
+        </ComboBox>
+        <Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izračunaj za"/>
+        <ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="60.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
+                  visibleRowCount="5">
+            <items>
+                <FXCollections fx:factory="observableArrayList">
+                    <String fx:value="lema"/>
+                    <String fx:value="različnica"/>
+                    <String fx:value="oblikoskladenjska oznaka"/>
+                    <String fx:value="oblikoskladenjska lastnost"/>
+                    <String fx:value="besedna vrsta"/>
+                </FXCollections>
+            </items>
+        </ComboBox>
+
+
+        <Pane fx:id="paneWords" layoutX="0.0" layoutY="100.0" prefHeight="36.0" prefWidth="380.0">
+            <children>
+                <Label layoutX="10.0" prefHeight="25.0" text="Preskok besed"/>
+                <ComboBox fx:id="skipValueCB" layoutX="100.0" prefWidth="180.0" promptText="izberi"
+                          visibleRowCount="5">
+                    <items>
+                        <FXCollections fx:factory="observableArrayList">
+                            <String fx:value="0"/>
+                            <String fx:value="1"/>
+                            <String fx:value="2"/>
+                            <String fx:value="3"/>
+                            <String fx:value="4"/>
+                            <String fx:value="5"/>
+                            <String fx:value="6"/>
+                            <String fx:value="7"/>
+                        </FXCollections>
+                    </items>
+                </ComboBox>
+            </children>
+        </Pane>
+
+
+
+
+        <!-- MSD and Taxonomy separated -->
+
+        <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov"/>
+
+        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
+        <TextField fx:id="msdTF" layoutX="100.0" layoutY="200.0" prefWidth="180.0"/>
+        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
+        <CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
+
+
+        <!-- samoglasniki/soglasniki -->
+        <Pane fx:id="paneLetters" layoutX="0.0" layoutY="280.0" prefHeight="84.0" prefWidth="380.0">
+            <children>
+                <CheckBox fx:id="calculatecvvCB" layoutX="10.0" mnemonicParsing="false" prefHeight="25.0"
+                          text="Izračunaj za kombinacije samoglasnikov in soglasnikov"/>
+                <TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="40.0" prefWidth="180.0"/>
+                <Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Dolžina niza"/>
+            </children>
+        </Pane>
+
+
+
+
+
+        <Button fx:id="computeNgramsB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
+                prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
+    </Pane>
+
+    <Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
+    <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
+           text=" " wrapText="true"/>
+    <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
+    <ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
+    <Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
+
+</AnchorPane>
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<?import org.controlsfx.control.CheckComboBox?>
+<?import javafx.scene.control.*?>
+<?import javafx.scene.layout.AnchorPane?>
+<?import javafx.scene.layout.Pane?>
+<AnchorPane fx:id="wordAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
+            xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordFormationTab">
+    <Pane>
+        <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
+        <CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
+        <Button fx:id="computeB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
+                prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
+    </Pane>
+
+    <Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
+    <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
+           text=" " wrapText="true"/>
+
+    <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
+
+    <ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
+    <Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
+
+</AnchorPane>
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<?import org.controlsfx.control.CheckComboBox?>
+<?import javafx.scene.control.*?>
+<?import javafx.scene.layout.AnchorPane?>
+<?import javafx.scene.layout.Pane?>
+<AnchorPane fx:id="wordLevelAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
+            xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordLevelTab">
+    <Pane>
+        <Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
+        <CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
+        <Button fx:id="computeB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
+                prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
+    </Pane>
+
+    <Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
+    <Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
+           text=" " wrapText="true"/>
+
+    <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
+
+    <ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
+    <Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
+
+</AnchorPane>
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Configuration status="ALL" name="log-config">
+    <Properties>
+        <Property name="LOG_DIR">log</Property>
+        <Property name="ARCHIVE">${LOG_DIR}/archive</Property>
+        <Property name="PATTERN">[%p] %d{dd.MM.yyyy HH:mm:ss} - %c.%M:%L - %m%n</Property>
+    </Properties>
+    <Appenders>
+        <Console name="STDOUT" target="SYSTEM_OUT">
+            <PatternLayout pattern="${PATTERN}"/>
+        </Console>
+        <File name="MyFile" fileName="${LOG_DIR}/CorpusAnalyzer.log" immediateFlush="true" append="true">
+            <PatternLayout pattern="${PATTERN}"/>
+        </File>
+    </Appenders>
+    <loggers>
+        <root level="all">
+            <appender-ref ref="MyFile" level="all"/>
+            <appender-ref ref="STDOUT" level="all"/>
+        </root>
+    </loggers>
+</Configuration>
@@ -0,0 +1,85 @@
+import java.util.ArrayList;
+import java.util.List;
+
+import data.Sentence;
+import data.Word;
+
+public class Common {
+
+	public static List<Sentence> corpus;
+	public static List<Sentence> minCorpus;
+	public static List<Sentence> midCorpus;
+	public static List<Sentence> midCorpusSkip;
+	public static List<Sentence> josTest;
+
+	static {
+		Sentence testSentence;
+
+		// full sentence
+		List<Word> words = new ArrayList<>();
+		words.add(new Word("ker", "ker", "Vd"));
+		words.add(new Word("ima", "imeti", "Ggnste-n"));
+		words.add(new Word("junak", "junak", "Somei"));
+		words.add(new Word("v", "v", "Dm"));
+		words.add(new Word("posesti", "posest", "Sozem"));
+		words.add(new Word("nekaj", "nekaj", "Rsn"));
+		words.add(new Word("o", "o", "Dm"));
+		words.add(new Word("čemer", "kar", "Zz-sem"));
+		words.add(new Word("se", "se", "Zp------k"));
+		words.add(new Word("mu", "on", "Zotmed--k"));
+		words.add(new Word("ne", "ne", "L"));
+		words.add(new Word("sanja", "sanjati", "Ggnste"));
+		words.add(new Word("a", "a", "Vp"));
+		words.add(new Word("se", "se", "Zp------k"));
+		words.add(new Word("onemu", "oni", "Zk-sed"));
+		words.add(new Word("zdi", "zdeti", "Ggnste"));
+		words.add(new Word("ključno", "ključen", "Ppnsei"));
+		words.add(new Word("pri", "pri", "Dm"));
+		words.add(new Word("operaciji", "operacija", "Sozem"));
+		words.add(new Word("666", "666", "Kag"));
+
+		testSentence = new Sentence(words, "#Ft.Z.N.N");
+		corpus = new ArrayList<>();
+		corpus.add(testSentence);
+
+		// three word sentence
+		testSentence = new Sentence(corpus.get(0).getSublist(0, 3), "#Ft.Z.N.N");
+		minCorpus = new ArrayList<>();
+		minCorpus.add(testSentence);
+
+		// five word sentence
+		words = new ArrayList<>();
+		words.add(new Word("ker", "ker", "Vd"));
+		words.add(new Word("ima", "imeti", "Ggnste-n"));
+		words.add(new Word("junak", "junak", "Somei"));
+		words.add(new Word("ima", "imeti", "Ggnste-n"));
+		words.add(new Word("posesti", "posest", "Sozem"));
+		testSentence = new Sentence(words, "#Ft.Z.N.N");
+
+		midCorpus = new ArrayList<>();
+		midCorpus.add(testSentence);
+
+		// five word sentence - for skipgrams
+		words = new ArrayList<>();
+		words.add(new Word("ker", "ker", "Vd"));
+		words.add(new Word("ima", "imeti", "Ggnste-n"));
+		words.add(new Word("junak", "junak", "Somei"));
+		words.add(new Word("v", "v", "Dm"));
+		words.add(new Word("posesti", "posest", "Sozem"));
+		testSentence = new Sentence(words, "#Ft.Z.N.N");
+
+		midCorpusSkip = new ArrayList<>();
+		midCorpusSkip.add(testSentence);
+
+		// JOS test
+		words = new ArrayList<>();
+		words.add(new Word("junak", "junak", "Somei"));
+		words.add(new Word("ima", "imeti", "Ggnste-n"));
+		words.add(new Word("posesti", "posest", "Sozem"));
+		testSentence = new Sentence(words, "#Ft.Z.N.N");
+
+		josTest = new ArrayList<>();
+		josTest.add(testSentence);
+	}
+
+}
@@ -0,0 +1,42 @@
+import java.io.File;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOCase;
+import org.apache.commons.io.filefilter.FileFilterUtils;
+import org.apache.commons.io.filefilter.TrueFileFilter;
+import org.junit.Test;
+
+import data.*;
+import javafx.collections.ObservableList;
+
+public class CorpusTests {
+
+	@Test
+	public void solarTest() {
+		//File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/Solar");
+		// File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/GOS");
+		File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/Gigafida_subset");
+
+		Settings.resultsFilePath = new File(selectedDirectory.getAbsolutePath().concat(File.separator));
+
+		Settings.corpus = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("xml", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
+
+		File f = Settings.corpus.iterator().next();
+
+		Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, 0, CalculateFor.WORD);
+		// stats.setCorpusType(CorpusType.GOS);
+		stats.setCorpusType(CorpusType.SOLAR);
+
+		// XML_processing.readXMLGos(f.toString(), stats);
+		// XML_processing.readXML(f.toString(), stats);
+		// XML_processing.readXMLHeaderTag(f.toString(), "stats");
+
+	}
+
+	@Test
+	public void test() {
+		ObservableList<String> var = GosTaxonomy.getForComboBox();
+		String debug = "";
+
+	}
+}
@@ -0,0 +1,66 @@
+import static junit.framework.Assert.*;
+
+import java.io.UnsupportedEncodingException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.rocksdb.RocksDB;
+
+import util.db.RDB;
+
+public class DBTest {
+
+	static {
+		RocksDB.loadLibrary();
+	}
+
+	// @Test
+	public void dbConnectorTest() throws UnsupportedEncodingException {
+		String key1 = "alfa";
+		AtomicLong value1 = new AtomicLong(10);
+		String key2 = "beta";
+		AtomicLong value2 = new AtomicLong(20);
+		String key3 = "alfa";
+		AtomicLong value3 = new AtomicLong(50);
+		String key4 = "theta";
+		AtomicLong value4 = new AtomicLong(40);
+
+		HashMap<String, AtomicLong> results = new HashMap<>();
+		results.put(key1, value1);
+		results.put(key2, value2);
+
+		RDB db = new RDB();
+		db.writeBatch(results);
+
+		// let's check how that fared out
+		Map<String, AtomicLong> dumpedResults = db.getDump();
+		// should have 2 items
+		assertEquals(2, dumpedResults.size());
+
+		// entry comparison
+		assertTrue(dumpedResults.containsKey(key1));
+		assertTrue(value1.longValue() == dumpedResults.get(key1).longValue());
+		assertTrue(dumpedResults.containsKey(key2));
+		assertTrue(value2.longValue() == dumpedResults.get(key2).longValue());
+
+		results = new HashMap<>();
+		results.put(key3, value3);
+		results.put(key4, value4);
+		db.writeBatch(results);
+		dumpedResults = db.getDump();
+
+		// should have 3 items with alfa's value reflecting summation
+		assertEquals(3, dumpedResults.size());
+
+		// entry comparison
+		assertTrue(dumpedResults.containsKey(key1));
+		assertTrue(value1.longValue() + value3.longValue() == dumpedResults.get(key1).longValue());
+		assertTrue(dumpedResults.containsKey(key2));
+		assertTrue(value2.longValue() == dumpedResults.get(key2).longValue());
+		assertTrue(dumpedResults.containsKey(key4));
+		assertTrue(value4.longValue() == dumpedResults.get(key4).longValue());
+
+		db.delete();
+	}
+}
@@ -0,0 +1,334 @@
+import static org.junit.Assert.*;
+
+import java.util.*;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
+
+import org.junit.Test;
+
+import alg.ngram.Ngrams;
+import data.*;
+
+@SuppressWarnings({"Duplicates", "unused"})
+public class NgramTests {
+
+	@Test
+	public void letterNgramsTest() {
+		Map<String, AtomicLong> result = null;
+
+		Filter filter = new Filter();
+		filter.setAl(AnalysisLevel.STRING_LEVEL);
+		filter.setStringLength(4);
+		filter.setNgramValue(0); // letters
+		filter.setCalculateFor(CalculateFor.WORD);
+
+		Corpus testCorpus = new Corpus();
+		testCorpus.setCorpusType(CorpusType.GIGAFIDA);
+		testCorpus.setDetectedCorpusFiles(new ArrayList<>());
+
+		// tests:
+		//  - no regex
+		StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.minCorpus, stats);
+		result = stats.getResult();
+
+		// tests:
+		// - algorithm skips words that are shorter than set length value
+		assertEquals(2, result.size());
+		assertTrue(result.containsKey("juna"));
+		assertEquals(1, result.get("juna").longValue());
+		assertTrue(result.containsKey("unak"));
+		assertEquals(1, result.get("unak").longValue());
+
+		// tests:
+		// - map update (count) works ok
+		filter.setStringLength(3);
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertEquals(2, result.get("ima").longValue());
+
+		// tests:
+		//  - pre-check for the following regex test - this one should include word "ima", next one shouldn't
+		filter.setStringLength(3);
+
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertTrue(result.containsKey("ima"));
+
+		// tests:
+		//  - regex: S.* // vsi samostalniki
+		ArrayList<Pattern> msdRegex = new ArrayList<>();
+		msdRegex.add(Pattern.compile("S.*"));
+		filter.setMsd(msdRegex);
+
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertFalse(result.containsKey("ima"));
+
+		// tests:
+		// - more precise regex
+		msdRegex = new ArrayList<>();
+		msdRegex.add(Pattern.compile("S.z.*")); // should include "posesti", but not "junak"
+		filter.setMsd(msdRegex);
+		filter.setStringLength(5);
+
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertFalse(result.containsKey("junak"));
+		assertEquals(3, result.size());
+
+		// tests:
+		// - trickier regex
+		msdRegex = new ArrayList<>();
+		msdRegex.add(Pattern.compile(".{2}")); // should count only for msd="Vd" - "ker"
+		filter.setMsd(msdRegex);
+		filter.setStringLength(3);
+
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertEquals(1, result.size());
+		assertTrue(result.containsKey("ker"));
+		assertEquals(1, result.get("ker").longValue());
+	}
+
+	@Test
+	public void wordsNgramsTest() {
+		Map<String, AtomicLong> result = null;
+
+		Filter filter = new Filter();
+		filter.setAl(AnalysisLevel.STRING_LEVEL);
+		filter.setNgramValue(3);
+
+		Corpus testCorpus = new Corpus();
+		testCorpus.setCorpusType(CorpusType.GIGAFIDA);
+		testCorpus.setDetectedCorpusFiles(new ArrayList<>());
+
+		// tests:
+		//  - normal ngrams - word
+		// midCorpus contains 5 words which should make for 3 3-grams
+		filter.setCalculateFor(CalculateFor.WORD);
+		StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertEquals(3, result.size());
+		assertTrue(result.containsKey("ker ima junak"));
+		assertTrue(result.containsKey("ima junak ima"));
+		assertTrue(result.containsKey("junak ima posesti"));
+
+		// tests:
+		//  - normal ngrams - lemmas
+		filter.setCalculateFor(CalculateFor.LEMMA);
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertEquals(3, result.size());
+		assertTrue(result.containsKey("ker imeti junak"));
+		assertTrue(result.containsKey("imeti junak imeti"));
+		assertTrue(result.containsKey("junak imeti posest"));
+
+		// tests:
+		//  - normal ngrams - msd
+		filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertEquals(3, result.size());
+		assertTrue(result.containsKey("Vd Ggnste-n Somei"));
+		assertTrue(result.containsKey("Ggnste-n Somei Ggnste-n"));
+		assertTrue(result.containsKey("Somei Ggnste-n Sozem"));
+
+		// tests:
+		//  - ngrams - word - regex filter
+		filter.setCalculateFor(CalculateFor.WORD);
+		ArrayList<Pattern> msdRegex = new ArrayList<>();
+		msdRegex.add(Pattern.compile("S.*"));
+		msdRegex.add(Pattern.compile("G.*"));
+		msdRegex.add(Pattern.compile(".*"));
+		filter.setMsd(msdRegex);
+
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertEquals(1, result.size());
+		assertTrue(result.containsKey("junak ima posesti"));
+
+		// tests:
+		//  - ngrams - word - regex filter
+		filter.setCalculateFor(CalculateFor.WORD);
+		filter.setNgramValue(2);
+		msdRegex = new ArrayList<>();
+		msdRegex.add(Pattern.compile("G.*"));
+		msdRegex.add(Pattern.compile("Some.*"));
+		filter.setMsd(msdRegex);
+
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpus, stats);
+		result = stats.getResult();
+
+		assertEquals(1, result.size());
+		assertTrue(result.containsKey("ima junak"));
+	}
+
+
+	// @Test
+	// public void ngramsTest() {
+	// 	// minimal compliance test
+	// 	Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_SPECS);
+	//
+	// 	Map<String, AtomicLong> results = recalculate(minCorpus, stats);
+	//
+	// 	// 1-gram minCorpusa should equal minCorpus' size
+	// 	assertEquals(minCorpus.get(0).getWords().size(), results.size());
+	//
+	// 	// each resulting word should have a frequency of 1
+	// 	List<Word> words = minCorpus.get(0).getWords();
+	// 	for (int i = 0; i < results.size(); i++) {
+	// 		Word w = words.get(i);
+	// 		AtomicLong frequency = results.get(w.getMsd());
+	// 		assertEquals(1, frequency.intValue());
+	// 	}
+	//
+	// 	// repeat for 2grams
+	// 	stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, null, CalculateFor.MORPHOSYNTACTIC_SPECS);
+	// 	results = recalculate(minCorpus, stats);
+	//
+	// 	// 2-gram of a 3 item corpus should equal 2 (first two words and second two words)
+	// 	assertEquals(2, results.size());
+	//
+	// 	// add a filter
+	// 	stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+	//
+	// 	List<String> morphosyntacticFilter = new ArrayList<>();
+	// 	morphosyntacticFilter.add("Sozem");
+	// 	stats.setMorphosyntacticFilter(morphosyntacticFilter);
+	//
+	// 	results = recalculate(minCorpus, stats);
+	//
+	// 	// since min corpus doesn't contain Sozem, results should be empty
+	// 	assertEquals(0, results.size());
+	//
+	// 	stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+	// 	morphosyntacticFilter = new ArrayList<>();
+	// 	morphosyntacticFilter.add("Somei");
+	// 	stats.setMorphosyntacticFilter(morphosyntacticFilter);
+	// 	results = recalculate(minCorpus, stats);
+	//
+	// 	// since we have 1 Somei, 1 result
+	// 	assertEquals(1, results.size());
+	// 	assertEquals(1, results.get("Somei").intValue());
+	//
+	// 	// actual filter with wildcards
+	// 	// 1gram
+	// 	stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+	// 	morphosyntacticFilter = new ArrayList<>();
+	// 	morphosyntacticFilter.add("So***");
+	// 	stats.setMorphosyntacticFilter(morphosyntacticFilter);
+	// 	results = recalculate(minCorpus, stats);
+	//
+	// 	assertEquals(1, results.size());
+	// 	assertEquals(1, results.get("Somei").intValue());
+	//
+	// 	// 2gram
+	// 	stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+	// 	morphosyntacticFilter = new ArrayList<>();
+	// 	morphosyntacticFilter.add("Ggns*e-n");
+	// 	morphosyntacticFilter.add("So***");
+	// 	stats.setMorphosyntacticFilter(morphosyntacticFilter);
+	// 	results = recalculate(minCorpus, stats);
+	//
+	// 	assertEquals(1, results.size());
+	// 	assertEquals(1, results.get("Ggnste-n Somei").intValue());
+	//
+	// 	// 2gram midCorpus
+	// 	stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+	// 	morphosyntacticFilter = new ArrayList<>();
+	// 	morphosyntacticFilter.add("Ggns*e-n");
+	// 	morphosyntacticFilter.add("So***");
+	// 	stats.setMorphosyntacticFilter(morphosyntacticFilter);
+	// 	results = recalculate(midCorpus, stats);
+	//
+	// 	assertEquals(2, results.size());
+	// 	assertEquals(1, results.get("Ggnste-n Somei").intValue());
+	// 	assertEquals(1, results.get("Ggnste-n Sozem").intValue());
+	// }
+
+	private Map<String, AtomicLong> recalculate(List<Sentence> corpus, Statistics stats) {
+		// calculateForAll(corpus, stats);
+		return stats.getResult();
+	}
+
+	@Test
+	public void skipgramsTest() {
+		Map<String, AtomicLong> result = null;
+
+		Filter filter = new Filter();
+		filter.setAl(AnalysisLevel.STRING_LEVEL);
+		filter.setCalculateFor(CalculateFor.WORD);
+
+		Corpus testCorpus = new Corpus();
+		testCorpus.setCorpusType(CorpusType.GIGAFIDA);
+		testCorpus.setDetectedCorpusFiles(new ArrayList<>());
+
+		// tests:
+		//  - bigrams
+		filter.setNgramValue(2);
+		StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpusSkip, stats);
+		result = stats.getResult();
+
+		Set<String> bigrams = new HashSet<>(Arrays.asList("ker ima", "ima junak", "junak v", "v posesti"));
+		Set<String> bigramsActual = result.keySet();
+		assertEquals(bigrams, bigramsActual);
+
+		// test:
+		// - two skip bigrams
+		filter.setNgramValue(2);
+		filter.setSkipValue(2);
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpusSkip, stats);
+		result = stats.getResult();
+
+		Set<String> twoSkipBigrams = new HashSet<>(Arrays.asList("ker ima", "ker junak", "ker v", "ima junak", "ima v", "ima posesti", "junak v", "junak posesti", "v posesti"));
+		Set<String> twoSkipBigramsActual = result.keySet();
+
+		assertEquals(twoSkipBigrams, twoSkipBigramsActual);
+
+		// tests:
+		// - trigrams
+		filter.setNgramValue(3);
+		filter.setSkipValue(null);
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpusSkip, stats);
+		result = stats.getResult();
+		Set<String> trigrams = new HashSet<>(Arrays.asList("ker ima junak", "ima junak v", "junak v posesti"));
+		Set<String> trigramsActual = result.keySet();
+
+		assertEquals(trigrams, trigramsActual);
+
+		// tests:
+		// - two skip trigrams
+		filter.setNgramValue(3);
+		filter.setSkipValue(2);
+		stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.midCorpusSkip, stats);
+		result = stats.getResult();
+		HashSet<String> twoSkipTrigrams = new HashSet<>(Arrays.asList("ker ima junak", "ker ima v", "ker ima posesti", "ker junak v", "ker junak posesti", "ker v posesti", "ima junak v", "ima junak posesti", "ima v posesti", "junak v posesti"));
+		Set<String> twoSkipTrigramsActual = result.keySet();
+
+		assertEquals(twoSkipTrigrams, twoSkipTrigramsActual);
+	}
+}
@@ -0,0 +1,51 @@
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.junit.Test;
+
+import alg.inflectedJOS.WordFormation;
+import alg.ngram.Ngrams;
+import data.*;
+
+public class WordFormationTest {
+
+	@Test
+	public void calculationTest() throws UnsupportedEncodingException {
+		Map<String, AtomicLong> result = null;
+
+		Filter filter = new Filter();
+		filter.setAl(AnalysisLevel.STRING_LEVEL);
+		filter.setNgramValue(1);
+
+		Corpus testCorpus = new Corpus();
+		testCorpus.setCorpusType(CorpusType.GIGAFIDA);
+		testCorpus.setDetectedCorpusFiles(new ArrayList<>());
+
+		// tests:
+		//  - normal ngrams - word
+		// midCorpus contains 5 words which should make for 3 3-grams
+		filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
+		StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
+		Ngrams.calculateForAll(Common.josTest, stats);
+		result = stats.getResult();
+		WordFormation.calculateStatistics(stats);
+		Object[][] resultArr = stats.getResultCustom();
+		String debug = "";
+
+	}
+
+	@Test
+	public void testAnything() {
+		String a = "Somei";
+		String b = "SomeiD";
+
+		String c = a.substring(0, 5);
+		String d = b.substring(0, 5);
+
+		String debug = "";
+
+	}
+
+}
@@ -0,0 +1,15 @@
+import java.util.HashSet;
+
+import org.junit.Test;
+
+import data.Enums.WordLevelDefaultValues;
+
+public class WordLevelTest {
+
+	@Test
+	public void testResourceFiles() {
+		HashSet<String> suffixes = WordLevelDefaultValues.getSuffixes();
+		String debug = "";
+
+	}
+}
@@ -0,0 +1,39 @@
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+
+import data.Word;
+
+public class WordTest {
+	@Test
+	public void paddingTest() {
+		Word w1 = new Word("w1", "l1", "Somei");
+		Word w2 = new Word("w2", "l2", "Sometd");
+
+		// w1's msd should get padded
+		String msd1 = w1.getMsd();
+		String msd2 = w2.getMsd();
+		assertEquals(msd1.length(), msd2.length());
+		assertEquals(Word.PAD_CHARACTER, msd1.charAt(msd1.length() - 1));
+
+		w1 = new Word("w1", "l1", "Gp-g");
+		w2 = new Word("w2", "l2", "Gp-g---d");
+
+		// w1's msd should get padded
+		msd1 = w1.getMsd();
+		msd2 = w2.getMsd();
+		assertEquals(msd1.length(), msd2.length());
+		assertEquals(Word.PAD_CHARACTER, msd1.charAt(msd1.length() - 1));
+		assertEquals(Word.PAD_CHARACTER, msd2.charAt(2));
+
+	}
+
+	@Test
+	public void cvvTest() {
+		String siAlphabet = "abcčdefghijklmnoprsštuvzž";
+		String siAlphabetCvv = "VCCCCVCCCVCCCCCVCCCCCVCCC";
+
+		Word w1 = new Word(siAlphabet, "l1", null);
+		assertEquals(siAlphabetCvv, w1.getCVVWord());
+	}
+}