Project copied

This commit is contained in:
2018-06-19 09:15:37 +02:00
commit a18e52a599
94 changed files with 87092 additions and 0 deletions

160
.gitignore vendored Normal file
View File

@@ -0,0 +1,160 @@
# Created by .ignore support plugin (hsz.mobi)
### Maven template
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
!/.mvn/wrapper/maven-wrapper.jar
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff:
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries
.idea/
# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.xml
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
# Gradle:
.idea/**/gradle.xml
.idea/**/libraries
# Mongo Explorer plugin:
.idea/**/mongoSettings.xml
## File-based project format:
*.iws
## Plugin-specific files:
# IntelliJ
/out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
### Java template
# Compiled class file
# Log file
*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.war
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
### Eclipse template
.metadata
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
.recommenders
# Eclipse Core
.project
# External tool builders
.externalToolBuilders/
# Locally stored "Eclipse launch configurations"
*.launch
# PyDev specific (Python IDE for Eclipse)
*.pydevproject
# CDT-specific (C/C++ Development Tooling)
.cproject
# JDT-specific (Eclipse Java Development Tools)
.classpath
# Java annotation processor (APT)
.factorypath
# PDT-specific (PHP Development Tools)
.buildpath
# sbteclipse plugin
.target
# Tern plugin
.tern-project
# TeXlipse plugin
.texlipse
# STS (Spring Tool Suite)
.springBeans
# Code Recommenders
.recommenders/
# Scala IDE specific (Scala & Java development for Eclipse)
.cache-main
.scala_dependencies
.worksheet
### Windows ###
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk

28
Corpus Analyzer.iml Normal file
View File

@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: commons-io:commons-io:2.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.6" level="project" />
<orderEntry type="library" name="Maven: com.googlecode.json-simple:json-simple:1.1.1" level="project" />
<orderEntry type="library" name="Maven: junit:junit:4.10" level="project" />
<orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-csv:1.4" level="project" />
<orderEntry type="library" name="Maven: org.controlsfx:controlsfx:8.40.13" level="project" />
<orderEntry type="library" name="Maven: org.rocksdb:rocksdbjni:5.7.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.9.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-core:2.9.0" level="project" />
<orderEntry type="library" name="Maven: org.kordamp.ikonli:ikonli-fontawesome-pack:1.9.0" level="project" />
<orderEntry type="library" name="Maven: org.kordamp.ikonli:ikonli-core:1.9.0" level="project" />
<orderEntry type="library" name="Maven: org.kordamp.ikonli:ikonli-javafx:1.9.0" level="project" />
</component>
</module>

122
pom.xml Normal file
View File

@@ -0,0 +1,122 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>thesis</groupId>
<artifactId>corpus-analyzer</artifactId>
<version>1.2</version>
<dependencies>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.6</version>
</dependency>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>org.controlsfx</groupId>
<artifactId>controlsfx</artifactId>
<version>8.40.13</version>
</dependency>
<dependency>
<groupId>org.rocksdb</groupId>
<artifactId>rocksdbjni</artifactId>
<version>5.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.kordamp.ikonli</groupId>
<artifactId>ikonli-fontawesome-pack</artifactId>
<version>1.9.0</version>
</dependency>
<dependency>
<groupId>org.kordamp.ikonli</groupId>
<artifactId>ikonli-javafx</artifactId>
<version>1.9.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<!-- packages dependencies into the jar -->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<archive>
<manifest>
<mainClass>gui.GUIController</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<appendAssemblyId>false</appendAssemblyId>
<outputDirectory>artifact</outputDirectory>
<finalName>Corpus_Analyzer_${version}</finalName>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- JavaFX -->
<groupId>com.zenjava</groupId>
<artifactId>javafx-maven-plugin</artifactId>
<version>8.6.0</version>
<configuration>
<mainClass>gui.GUIController</mainClass>
<verbose>true</verbose>
</configuration>
<executions>
<execution>
<id>create-jfxjar</id>
<phase>package</phase>
<goals>
<goal>build-jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,3 @@
Manifest-Version: 1.0
Main-Class: gui.GUIController

View File

@@ -0,0 +1,15 @@
package alg;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
public class Common {
public static <K, V> void updateMap(Map<K, AtomicLong> map, K o) {
// if not in map
AtomicLong r = map.putIfAbsent(o, new AtomicLong(1));
// else
if (r != null)
map.get(o).incrementAndGet();
}
}

View File

@@ -0,0 +1,794 @@
package alg;
import static data.Enums.solar.SolarFilters.*;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.*;
import java.util.concurrent.ForkJoinPool;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.*;
import org.apache.logging.log4j.LogManager;
import data.*;
import gui.ValidationUtil;
public class XML_processing {
public final static org.apache.logging.log4j.Logger logger = LogManager.getLogger(XML_processing.class);
// public static void processCorpus(Statistics stats) {
// // we can preset the list's size, so there won't be a need to resize it
// List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
//
// int i = 0;
// for (File f : Settings.corpus) {
// i++;
// readXML(f.toString(), stats);
// }
// }
// public static void readXML(String path, Statistics stats) {
// if (stats.getCorpusType() == CorpusType.GIGAFIDA) {
// readXMLGigafida(path, stats);
// } else if (stats.getCorpusType() == CorpusType.GOS) {
// readXMLGos(path, stats);
// } else if (stats.getCorpusType() == CorpusType.SOLAR) {
// readXMLSolar(path, stats);
// }
// }
public static void readXML(String path, StatisticsNew stats) {
if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
|| stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
readXMLGigafida(path, stats);
} else if (stats.getCorpus().getCorpusType() == CorpusType.GOS) {
readXMLGos(path, stats);
} else if (stats.getCorpus().getCorpusType() == CorpusType.SOLAR) {
readXMLSolar(path, stats);
}
}
/**
* Reads and returns the value of a passed header tag or an empty string.
* E.g. title tag, for discerning the corpus' type.
* Notice: returns only the value of the first occurrence of a given tag name.
*/
public static String readXMLHeaderTag(String path, String tag) {
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLEventReader eventReader = null;
try {
eventReader = factory.createXMLEventReader(new FileInputStream(path));
while (eventReader.hasNext()) {
XMLEvent xmlEvent = eventReader.nextEvent();
if (xmlEvent.isStartElement()) {
StartElement startElement = xmlEvent.asStartElement();
String var = startElement.getName().getLocalPart();
if (var.equalsIgnoreCase(tag)) {
return eventReader.nextEvent().asCharacters().getData();
}
}
}
} catch (FileNotFoundException | XMLStreamException e) {
e.printStackTrace();
} finally {
if (eventReader != null) {
try {
eventReader.close();
} catch (XMLStreamException e) {
logger.error("closing stream", e);
}
}
}
return "";
}
private static void fj(List<Sentence> corpus, StatisticsNew stats) {
ForkJoinPool pool = new ForkJoinPool();
if (stats.getFilter().getAl() == AnalysisLevel.STRING_LEVEL) {
alg.ngram.ForkJoin wc = new alg.ngram.ForkJoin(corpus, stats);
pool.invoke(wc);
} else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
pool.invoke(wc);
} else {
// TODO:
// alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
// pool.invoke(wc);
}
}
// public static void readXMLGos(String path, Statistics stats) {
// boolean in_word = false;
// String taksonomija = "";
// String lemma = "";
// String msd = "";
// String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm
//
// List<Word> stavek = new ArrayList<>();
// List<Sentence> corpus = new ArrayList<>();
// String sentenceDelimiter = "seg";
// String taxonomyPrefix = "gos.";
//
// try {
// XMLInputFactory factory = XMLInputFactory.newInstance();
// XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
//
// while (eventReader.hasNext()) {
// XMLEvent event = eventReader.nextEvent();
//
// switch (event.getEventType()) {
// case XMLStreamConstants.START_ELEMENT:
//
// StartElement startElement = event.asStartElement();
// String qName = startElement.getName().getLocalPart();
//
// // "word" node
// if (qName.equals("w")) {
// in_word = true;
//
// if (type.equals("norm")) {
// // make sure we're looking at <w lemma...> and not <w type...>
// Iterator var = startElement.getAttributes();
// ArrayList<Object> attributes = new ArrayList<>();
// while (var.hasNext()) {
// attributes.add(var.next());
// }
//
// if (attributes.contains("msd")) {
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
// } else {
// msd = null;
// }
//
// if (attributes.contains("lemma")) {
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
// }
// }
// }
// // taxonomy node
// else if (qName.equalsIgnoreCase("catRef")) {
// // there are some term nodes at the beginning that are of no interest to us
// // they differ by not having the attribute "ref", so test will equal null
// Attribute test = startElement.getAttributeByName(QName.valueOf("target"));
//
// if (test != null) {
// // keep only taxonomy properties
// taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, "");
// }
// } else if (qName.equalsIgnoreCase("div")) {
// type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
//
// }
// break;
//
// case XMLStreamConstants.CHARACTERS:
// Characters characters = event.asCharacters();
//
// // "word" node value
// if (in_word) {
// if (type.equals("norm") && msd != null) {
// stavek.add(new Word(characters.getData(), lemma, msd));
// } else {
// stavek.add(new Word(characters.getData()));
// }
//
// in_word = false;
// }
// break;
//
// case XMLStreamConstants.END_ELEMENT:
// EndElement endElement = event.asEndElement();
//
// // parser reached end of the current sentence
// if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
// // add sentence to corpus
// corpus.add(new Sentence(stavek, taksonomija, type));
// // and start a new one
// stavek = new ArrayList<>();
//
// /* Invoke Fork-Join when we reach maximum limit of
// * sentences (because we can't read everything to
// * memory) or we reach the end of the file.
// */
// if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
// fj(corpus, stats);
// // empty the current corpus, since we don't need
// // the data anymore
// corpus.clear();
// }
// }
//
// // backup
// if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
// fj(corpus, stats);
// corpus.clear();
// }
//
// break;
// }
// }
// } catch (FileNotFoundException | XMLStreamException e) {
// e.printStackTrace();
// }
// }
@SuppressWarnings("unused")
public static void readXMLSolar(String path, StatisticsNew stats) {
boolean in_word = false;
String lemma = "";
String msd = "";
List<Word> stavek = new ArrayList<>();
List<Sentence> corpus = new ArrayList<>();
// used for filter
Set<String> headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto"));
Map<String, String> headBlock = null;
boolean includeThisBlock = false;
try {
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
switch (event.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
StartElement startElement = event.asStartElement();
// System.out.println(String.format("%s", startElement.toString()));
String qName = startElement.getName().getLocalPart();
// "word" node
if (qName.equals("w3")) {
in_word = true;
msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
} else if (qName.equals("c3")) {
String c3Content = eventReader.nextEvent().asCharacters().getData();
if (c3Content.equals(".") && includeThisBlock) {
// add sentence to corpus
corpus.add(new Sentence(stavek));
// and start a new one
stavek = new ArrayList<>();
/* Invoke Fork-Join when we reach maximum limit of
* sentences (because we can't read everything to
* memory) or we reach the end of the file.
*/
if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
fj(corpus, stats);
// empty the current corpus, since we don't need
// the data anymore
corpus.clear();
}
}
} else if (headTags.contains(qName)) {
String tagContent = eventReader.nextEvent().asCharacters().getData();
headBlock.put(qName, tagContent);
} else if (qName.equals("head")) {
headBlock = new HashMap<>();
}
break;
case XMLStreamConstants.CHARACTERS:
Characters characters = event.asCharacters();
// "word" node value
if (in_word) {
stavek.add(new Word(characters.getData(), lemma, msd));
in_word = false;
}
break;
case XMLStreamConstants.END_ELEMENT:
EndElement endElement = event.asEndElement();
String qNameEnd = endElement.getName().getLocalPart();
if (qNameEnd.equals("head")) {
// validate and set boolean
if (validateHeadBlock(headBlock, stats.getFilter().getSolarFilters())) {
includeThisBlock = true;
}
} else if (qNameEnd.equals("body")) {
// new block, reset filter status
includeThisBlock = false;
}
// backup
if (endElement.getName().getLocalPart().equalsIgnoreCase("korpus")) {
fj(corpus, stats);
corpus.clear();
}
break;
}
}
} catch (FileNotFoundException | XMLStreamException e) {
e.printStackTrace();
}
}
/**
* @param readHeadBlock block of tags read from the corpus
* @param userSetFilter tags with values set by the user
*
* @return
*/
private static boolean validateHeadBlock(Map<String, String> readHeadBlock, HashMap<String, HashSet<String>> userSetFilter) {
boolean pass = true;
if (userSetFilter == null) {
return true;
}
for (Map.Entry<String, HashSet<String>> filterEntry : userSetFilter.entrySet()) {
String key = filterEntry.getKey();
HashSet<String> valueObject = filterEntry.getValue();
// if (valueObject instanceof String) {
// pass = validateHeadBlockEntry(readHeadBlock, key, (String) valueObject);
// } else
if (valueObject != null) {
//noinspection unchecked
for (String value : valueObject) {
pass = validateHeadBlockEntry(readHeadBlock, key, value);
}
}
if (!pass) {
// current head block does not include one of the set filters - not likely, but an edge case anyway
return false;
}
}
// if it gets to this point, it passed all the filters
return true;
}
private static boolean validateHeadBlockEntry(Map<String, String> readHeadBlock, String userSetKey, String userSetValue) {
if (!readHeadBlock.keySet().contains(userSetKey)) {
// current head block does not include one of the set filters - not likely, but an edge case anyway
return false;
} else if (!readHeadBlock.get(userSetKey).equals(userSetValue)) {
// different values -> doesn't pass the filter
return false;
}
return true;
}
/**
* Parses XML headers for information about its taxonomy (if supported) or filters (solar)
*
* @param filepath
* @param corpusIsSplit is corpus split into multiple xml files, or are all entries grouped into one large xml file
* @param corpusType
*/
public static Object readXmlHeaderTaxonomyAndFilters(String filepath, boolean corpusIsSplit, CorpusType corpusType) {
boolean parseTaxonomy = Tax.getCorpusTypesWithTaxonomy().contains(corpusType);
// solar
Set<String> headTags = null;
HashMap<String, HashSet<String>> resultFilters = new HashMap<>();
// taxonomy corpora
HashSet<String> resultTaxonomy = new HashSet<>();
String headTagName;
if (corpusType == CorpusType.SOLAR) {
headTagName = "head";
// used for filter
headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
// init results now to avoid null pointers
headTags.forEach(f -> resultFilters.put(f, new HashSet<>()));
} else {
headTagName = "teiHeader";
}
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLEventReader xmlEventReader = null;
try {
xmlEventReader = factory.createXMLEventReader(new FileInputStream(filepath));
boolean insideHeader = false;
while (xmlEventReader.hasNext()) {
XMLEvent xmlEvent = xmlEventReader.nextEvent();
if (xmlEvent.isStartElement()) {
StartElement startElement = xmlEvent.asStartElement();
String elementName = startElement.getName().getLocalPart();
if (elementName.equalsIgnoreCase(headTagName)) {
// if the corpus is split into files, we skip bodies
// this toggle is true when we're inside a header (next block of code executes)
// and false when we're not (skip reading unnecessary attributes)
insideHeader = true;
}
if (insideHeader) {
if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
HashMap<String, String> atts = extractAttributes(startElement);
String debug = "";
String tax = startElement.getAttributeByName(QName.valueOf("target"))
.getValue()
.replace("#", "");
resultTaxonomy.add(tax);
} else if (!parseTaxonomy && headTags.contains(elementName)) {
String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
resultFilters.get(elementName).add(tagContent);
}
}
} else if (xmlEvent.isEndElement() && corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
// if the corpus is split into multiple files, each with only one header block per file
// that means we should stop after we reach the end of the header
return parseTaxonomy ? resultTaxonomy : resultFilters;
} else if (xmlEvent.isEndElement() && !corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
// whole corpus in one file, so we have to continue reading in order to find all header blocks
insideHeader = false;
}
}
} catch (XMLStreamException e) {
logger.error("Streaming error", e);
return parseTaxonomy ? resultTaxonomy : resultFilters;
} catch (FileNotFoundException e) {
logger.error("File not found", e);
return parseTaxonomy ? resultTaxonomy : resultFilters;
// TODO: keep a list of files that threw this error and a dirty boolean marker -> if true, alert user
} finally {
if (xmlEventReader != null) {
try {
xmlEventReader.close();
} catch (XMLStreamException e) {
logger.error("closing stream", e);
}
}
}
return parseTaxonomy ? resultTaxonomy : resultFilters;
}
private static boolean isEndElementEndOfHeader(XMLEvent event, String headerTag) {
return event.asEndElement()
.getName()
.getLocalPart()
.equalsIgnoreCase(headerTag);
}
@SuppressWarnings("Duplicates")
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
boolean inWord = false;
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
String lemma = "";
String msd = "";
List<Word> sentence = new ArrayList<>();
List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
String sentenceDelimiter = "s";
XMLEventReader eventReader = null;
try {
XMLInputFactory factory = XMLInputFactory.newInstance();
eventReader = factory.createXMLEventReader(new FileInputStream(path));
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
switch (event.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
StartElement startElement = event.asStartElement();
String qName = startElement.getName().getLocalPart();
// "word" node
if (qName.equals("w")) {
inWord = true;
msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
}
// taxonomy node
else if (qName.equalsIgnoreCase("catRef")) {
// there are some term nodes at the beginning that are of no interest to us
// they differ by not having the attribute "ref", so test will equal null
Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
if (tax != null) {
// keep only taxonomy properties
currentFiletaxonomy.add(String.valueOf(tax.getValue()).replace("#", ""));
}
}
break;
case XMLStreamConstants.CHARACTERS:
Characters characters = event.asCharacters();
// "word" node value
if (inWord) {
String word = characters.getData();
sentence.add(new Word(word, lemma, msd));
inWord = false;
}
break;
case XMLStreamConstants.END_ELEMENT:
EndElement endElement = event.asEndElement();
String var = endElement.getName().getLocalPart();
String debug = "";
// parser reached end of the current sentence
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
// add sentence to corpus if it passes filters
sentence = runFilters(sentence, stats.getFilter());
if (!ValidationUtil.isEmpty(sentence)) {
corpus.add(new Sentence(sentence));
}
// and start a new one
sentence = new ArrayList<>();
/* Invoke Fork-Join when we reach maximum limit of
* sentences (because we can't read everything to
* memory) or we reach the end of the file.
*/
if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
fj(corpus, stats);
// empty the current corpus, since we don't need the data anymore
corpus.clear();
// TODO: if (stats.isUseDB()) {
// stats.storeTmpResultsToDB();
// }
}
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
// before proceeding to read this file, make sure that taxonomy filters are a match
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
if (currentFiletaxonomy.isEmpty()) {
// taxonomies don't match so stop
return false;
}
}
}
// fallback
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
fj(corpus, stats);
corpus.clear();
// TODO: if (stats.isUseDB()) {
// stats.storeTmpResultsToDB();
// }
}
break;
}
}
} catch (FileNotFoundException | XMLStreamException e) {
e.printStackTrace();
} finally {
if (eventReader != null) {
try {
eventReader.close();
} catch (XMLStreamException e) {
logger.error("closing stream", e);
}
}
}
return true;
}
@SuppressWarnings("Duplicates")
public static boolean readXMLGos(String path, StatisticsNew stats) {
boolean inWord = false;
boolean inOrthDiv = false;
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
String lemma = "";
String msd = "";
List<Word> sentence = new ArrayList<>();
List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
String sentenceDelimiter = "seg";
String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
XMLEventReader eventReader = null;
boolean includeFile = true;
try {
XMLInputFactory factory = XMLInputFactory.newInstance();
eventReader = factory.createXMLEventReader(new FileInputStream(path));
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
// System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
switch (event.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
StartElement startElement = event.asStartElement();
String qName = startElement.getName().getLocalPart();
if (qName.equals("div")) {
HashMap<String, String> atts = extractAttributes(startElement);
if (atts.keySet().contains("type")) {
inOrthDiv = atts.get("type").equals("orth");
}
}
// "word" node
if (qName.equals("w")) {
// check that it's not a type
HashMap<String, String> atts = extractAttributes(startElement);
if (!atts.containsKey("type")) {
inWord = true;
if (atts.containsKey("msd")) {
msd = atts.get("msd");
}
if (atts.containsKey("lemma")) {
lemma = atts.get("lemma");
}
//
// if (!inOrthDiv) {
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
// }
}
// }
}
// taxonomy node
else if (qName.equalsIgnoreCase("catRef")) {
// there are some term nodes at the beginning that are of no interest to us
// they differ by not having the attribute "ref", so test will equal null
Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
if (tax != null) {
// keep only taxonomy properties
currentFiletaxonomy.add(String.valueOf(tax.getValue()));
}
} else if (qName.equalsIgnoreCase("div")) {
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
}
break;
case XMLStreamConstants.CHARACTERS:
// "word" node value
if (inWord) {
Characters characters = event.asCharacters();
if (gosType.equals("norm") && msd != null) {
sentence.add(new Word(characters.getData(), lemma, msd));
} else {
sentence.add(new Word(characters.getData()));
}
inWord = false;
}
break;
case XMLStreamConstants.END_ELEMENT:
EndElement endElement = event.asEndElement();
// parser reached end of the current sentence
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
// add sentence to corpus if it passes filters
boolean saveSentence = computeForOrth == inOrthDiv;
if (includeFile && saveSentence && !ValidationUtil.isEmpty(sentence)) {
sentence = runFilters(sentence, stats.getFilter());
corpus.add(new Sentence(sentence));
}
// and start a new one
sentence = new ArrayList<>();
/* Invoke Fork-Join when we reach maximum limit of
* sentences (because we can't read everything to
* memory) or we reach the end of the file.
*/
if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
fj(corpus, stats);
// empty the current corpus, since we don't need
// the data anymore
corpus.clear();
}
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
// before proceeding to read this file, make sure that taxonomy filters are a match
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
// disregard this entry if taxonomies don't match
includeFile = !currentFiletaxonomy.isEmpty();
currentFiletaxonomy = new ArrayList<>();
}
}
// backup
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
fj(corpus, stats);
corpus.clear();
}
break;
}
}
} catch (FileNotFoundException | XMLStreamException e) {
e.printStackTrace();
} finally {
if (eventReader != null) {
try {
eventReader.close();
} catch (XMLStreamException e) {
logger.error("closing stream", e);
} catch (Exception e) {
logger.error("general error", e);
}
}
}
return true;
}
/**
* Runs the sentence through some filters, so we don't do calculations when unnecessary.
* Filters:
* <ol>
* <li><b>Ngrams:</b> omit sentences that are shorter than the ngram value (e.g. 3 gram of a single word sentence)</li>
* <li><b>Letter ngrams:</b> omit words that are shorter than the specified string length (e.g. combinations of 3 letters when the word consists of only 2 letters)</li>
* </ol>
*
* @return Empty sentence (if fails 1.) or a sentence with some words removed (2.)
*/
private static List<Word> runFilters(List<Word> sentence, Filter filter) {
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
// ngram level: if not 0 must be less than or equal to number of words in this sentence.
if (filter.getNgramValue() > 0 && filter.getNgramValue() > sentence.size()) {
return null;
}
// if we're calculating values for letters, omit words that are shorter than string length
if (filter.getNgramValue() == 0) {
sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord().length() < filter.getStringLength())
|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma().length() < filter.getStringLength()));
}
}
return sentence;
}
private static HashMap<String, String> extractAttributes(StartElement se) {
Iterator attributesIt = se.getAttributes();
HashMap<String, String> atts = new HashMap<>();
while (attributesIt.hasNext()) {
Attribute a = (Attribute) attributesIt.next();
atts.put(a.getName().getLocalPart(), a.getValue());
}
return atts;
}
}

View File

@@ -0,0 +1,67 @@
package alg.inflectedJOS;
import java.util.List;
import java.util.concurrent.RecursiveAction;
import data.Sentence;
import data.Statistics;
public class ForkJoin extends RecursiveAction {
private static final long serialVersionUID = -1260951004477299634L;
private static final int ACCEPTABLE_SIZE = 1000;
private List<Sentence> corpus;
private Statistics stats;
private int start;
private int end;
/**
* Constructor for subproblems.
*/
private ForkJoin(List<Sentence> corpus, int start, int end, Statistics stats) {
this.corpus = corpus;
this.start = start;
this.end = end;
this.stats = stats;
}
/**
* Default constructor for the initial problem
*/
public ForkJoin(List<Sentence> corpus, Statistics stats) {
this.corpus = corpus;
this.start = 0;
this.end = corpus.size();
this.stats = stats;
}
private void computeDirectly() {
List<Sentence> subCorpus = corpus.subList(start, end);
if (stats.isTaxonomySet()) {
InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy());
} else {
InflectedJOSCount.calculateForAll(subCorpus, stats, null);
}
}
@Override
protected void compute() {
int subCorpusSize = end - start;
if (subCorpusSize < ACCEPTABLE_SIZE) {
computeDirectly();
} else {
int mid = start + subCorpusSize / 2;
ForkJoin left = new ForkJoin(corpus, start, mid, stats);
ForkJoin right = new ForkJoin(corpus, mid, end, stats);
// fork (push to queue)-> compute -> join
left.fork();
right.fork();
left.join();
right.join();
}
}
}

View File

@@ -0,0 +1,170 @@
package alg.inflectedJOS;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import alg.Common;
import data.Sentence;
import data.Statistics;
import data.StatisticsNew;
import data.Word;
public class InflectedJOSCount {
public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
// static {
// // calculate all possible combinations of indices we will substitute with a '-' for substring statistics
// indices = new HashMap<>();
// for (int i = 5; i <= 8; i++) {
// indices.put(i, calculateCombinations(i));
// }
// }
//
// private static List<Integer> calculateCombinations(int i) {
// int arr[] = {1, 2, 3, 4, 5};
// int r = 3;
// int n = arr.length;
// ArrayList<ArrayList<Integer>> result = new ArrayList<>();
//
// return printCombination(arr, n, r);
// }
//
// /* arr[] ---> Input Array
// data[] ---> Temporary array to store current combination
// start & end ---> Staring and Ending indexes in arr[]
// index ---> Current index in data[]
// r ---> Size of a combination to be printed */
// static void combinationUtil(int arr[], int data[], int start,
// int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
// // Current combination is ready to be printed, print it
// ArrayList<Integer> tmpResult = new ArrayList<>();
//
// if (index == r) {
// ArrayList<Integer> tmpResult = new ArrayList<>();
// for (int j = 0; j < r; j++)
// System.out.print(data[j] + " ");
// System.out.println("");
// return;
// }
//
// // replace index with all possible elements. The condition
// // "end-i+1 >= r-index" makes sure that including one element
// // at index will make a combination with remaining elements
// // at remaining positions
// for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
// data[index] = arr[i];
// combinationUtil(arr, data, i + 1, end, index + 1, r);
// }
// }
//
// // The main function that prints all combinations of size r
// // in arr[] of size n. This function mainly uses combinationUtil()
// static void printCombination(int arr[], int n, int r) {
// // A temporary array to store all combination one by one
// int data[] = new int[r];
//
// // Print all combination using temprary array 'data[]'
// combinationUtil(arr, data, 0, n - 1, 0, r);
// }
// public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
// for (Sentence s : corpus) {
// // disregard if wrong taxonomy
// if (!(s.getTaxonomy().startsWith(taxonomy))) {
// continue;
// }
//
// calculateCommon(s, stats.result);
//
// for (Word word : s.getWords()) {
// // skip if current word is not inflected
// if (!(word.getMsd().length() > 0)) {
// continue;
// }
//
// String msd = word.getMsd();
//
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
//
// for (int i = 1; i < msd.length(); i++) {
// entry.setCharAt(i, msd.charAt(i));
// Common.updateMap(stats.result, entry.toString());
// entry.setCharAt(i, '-');
// }
// }
// }
// }
// public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
// for (Sentence s : corpus) {
// for (Word word : s.getWords()) {
// if (!(word.getMsd().length() > 0)) {
// continue;
// }
//
// String msd = word.getMsd();
//
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
//
// for (int i = 1; i < msd.length(); i++) {
// entry.setCharAt(i, msd.charAt(i));
// Common.updateMap(stats.result, entry.toString());
// entry.setCharAt(i, '-');
// }
// }
// }
// }
static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
for (Sentence s : corpus) {
// disregard if wrong taxonomy
if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
continue;
}
for (Word word : s.getWords()) {
// skip if current word is not inflected
if (!(word.getMsd().length() > 0)) {
continue;
}
String msd = word.getMsd();
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
for (int i = 1; i < msd.length(); i++) {
entry.setCharAt(i, msd.charAt(i));
Common.updateMap(stats.result, entry.toString());
entry.setCharAt(i, '-');
}
}
}
}
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
for (Sentence s : corpus) {
for (Word word : s.getWords()) {
// skip if current word is not inflected
// // TODO: if has defined msd and is of correct type (create a set)
// if (!(word.getMsd().length() > 0)) {
// continue;
// }
String msd = word.getMsd();
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
for (int i = 1; i < msd.length(); i++) {
entry.setCharAt(i, msd.charAt(i));
stats.updateResults(entry.toString());
entry.setCharAt(i, '-');
}
}
}
}
}

View File

@@ -0,0 +1,131 @@
package alg.inflectedJOS;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import data.Enums.InflectedJosTypes;
import data.StatisticsNew;
import gui.ValidationUtil;
import util.Combinations;
// adapted from http://www.geeksforgeeks.org/print-all-possible-combinations-of-r-elements-in-a-given-array-of-size-n/
public class WordFormation {
private static HashMap<String, Long> josTypeResult;
private static Object[][] tmpResults;
private static HashMap<Integer, HashSet<HashSet<Integer>>> indices;
static {
indices = new HashMap<>();
for (int i = 4; i <= 8; i++) {
indices.put(i, Combinations.generateIndices(i));
}
}
public static void calculateStatistics(StatisticsNew stat) {
Map<String, AtomicLong> result = stat.getResult();
// 1. filter - keep only inflected types
result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.charAt(0)));
// 2. for each inflected type get all possible subcombinations
for (Character josChar : InflectedJosTypes.inflectedJosTypes) {
josTypeResult = new HashMap<>();
// filter out results for a single word type
Map<String, AtomicLong> singleTypeResults = result.entrySet().stream()
.filter(x -> x.getKey().charAt(0) == josChar)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
if (ValidationUtil.isEmpty(singleTypeResults)) {
continue;
}
// get all possible indices combos for a msd of this length
// HashSet<HashSet<Integer>> indicesCombos = indices.get()
//Combinations.generateIndices(singleTypeResults.keySet().stream().findFirst().get().length());
for (Map.Entry<String, AtomicLong> e : singleTypeResults.entrySet()) {
int l = e.getKey().length();
for (HashSet<Integer> indicesCombo : indices.get(e.getKey().length())) {
updateResults(mask(e.getKey(), indicesCombo), e.getValue().longValue());
}
}
resultsMapToArray(singleTypeResults.values().stream().mapToLong(Number::longValue).sum());
}
stat.setResultCustom(tmpResults);
}
private static String mask(String word, HashSet<Integer> indicesCombo) {
StringBuilder sb = new StringBuilder();
sb.append(word.charAt(0));
for (int i = 1; i < word.length(); i++) {
sb.append(indicesCombo.contains(i) ? word.charAt(i) : ".");
}
return sb.toString();
}
private static void updateResults(String s, Long nOfOccurences) {
// if not in map add
Long r = josTypeResult.putIfAbsent(s, nOfOccurences);
// else update
if (r != null) {
josTypeResult.put(s, josTypeResult.get(s) + nOfOccurences);
}
}
private static void resultsMapToArray(Long totalValue) {
Double total = totalValue * 1.0;
Object[][] josTypeResultArray = new Object[josTypeResult.size()][3];
int i = 0;
for (Map.Entry<String, Long> e : josTypeResult.entrySet()) {
josTypeResultArray[i][0] = e.getKey();
josTypeResultArray[i][1] = e.getValue();
josTypeResultArray[i][2] = e.getValue() / total;
if (e.getValue() > total) {
String debug = "";
}
i++;
}
if (tmpResults == null) {
tmpResults = josTypeResultArray;
} else {
int firstLength = tmpResults.length;
int secondLength = josTypeResultArray.length;
Object[][] tmp = new Object[firstLength + secondLength][3];
System.arraycopy(tmpResults, 0, tmp, 0, firstLength);
System.arraycopy(josTypeResultArray, 0, tmp, firstLength, secondLength);
tmpResults = tmp;
// tmpResults = ArrayUtils.addAll(tmpResults, josTypeResultArray);
}
}
private static void printArray() {
for (int i = 0; i < tmpResults.length; i++) {
for (int j = 0; j < tmpResults[i].length; j++) {
System.out.print(tmpResults[i][j] + "\t");
}
System.out.println();
}
}
}

View File

@@ -0,0 +1,62 @@
package alg.ngram;
import java.util.List;
import java.util.concurrent.RecursiveAction;
import data.Sentence;
import data.StatisticsNew;
public class ForkJoin extends RecursiveAction {
private static final long serialVersionUID = 5074814035083362355L;
private static final int ACCEPTABLE_SIZE = 1000;
private List<Sentence> corpus;
private StatisticsNew stats;
private int start;
private int end;
/**
* Constructor for subproblems.
*/
private ForkJoin(List<Sentence> corpus, int start, int end, StatisticsNew stats) {
this.corpus = corpus;
this.start = start;
this.end = end;
this.stats = stats;
}
/**
* Default constructor for the initial problem
*/
public ForkJoin(List<Sentence> corpus, StatisticsNew stats) {
this.corpus = corpus;
this.start = 0;
this.end = corpus.size();
this.stats = stats;
}
private void computeDirectly() {
List<Sentence> subCorpus = corpus.subList(start, end);
Ngrams.calculateForAll(subCorpus, stats);
}
@Override
protected void compute() {
int subCorpusSize = end - start;
if (subCorpusSize < ACCEPTABLE_SIZE) {
computeDirectly();
} else {
int mid = start + subCorpusSize / 2;
ForkJoin left = new ForkJoin(corpus, start, mid, stats);
ForkJoin right = new ForkJoin(corpus, mid, end, stats);
// fork (push to queue)-> compute -> join
left.fork();
right.fork();
left.join();
right.join();
}
}
}

View File

@@ -0,0 +1,204 @@
package alg.ngram;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import data.CalculateFor;
import data.Sentence;
import data.StatisticsNew;
import data.Word;
import gui.ValidationUtil;
public class Ngrams {
public final static Logger logger = LogManager.getLogger(Ngrams.class);
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
if (stats.getFilter().getNgramValue() == 0) { // letter ngram
generateNgramLetterCandidates(corpus, stats);
} else if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue()) && stats.getFilter().getSkipValue() > 0) {
generateSkipgramCandidates(corpus, stats);
} else {
generateNgramCandidates(corpus, stats);
}
}
public static void generateNgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
for (Sentence s : corpus) {
// skip sentences shorter than specified ngram length
if (s.getWords().size() < stats.getFilter().getNgramValue()) {
continue;
}
for (int i = 0; i < s.getWords().size() - stats.getFilter().getNgramValue() + 1; i++) {
List<Word> ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue());
// if msd regex is set and this candidate doesn't pass it, skip this iteration
if (stats.getFilter().hasMsd() && !passesRegex(ngramCandidate, stats.getFilter().getMsd())) {
continue;
}
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
}
}
}
/**
* Checks whether an ngram candidate passes specified regex filter.
*/
private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex) {
if (ngramCandidate.size() != regex.size()) {
logger.error("ngramCandidate.size() & msd.size() mismatch"); // should not occur anyway
return false;
}
for (int i = 0; i < regex.size(); i++) {
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
return false;
}
}
return true;
}
private static String wordToString(List<Word> ngramCandidate, CalculateFor calculateFor) {
ArrayList<String> candidate = new ArrayList<>(ngramCandidate.size());
switch (calculateFor) {
case LEMMA:
candidate.addAll(ngramCandidate
.stream()
.map(Word::getLemma)
.collect(Collectors.toList()));
break;
case WORD:
candidate.addAll(ngramCandidate
.stream()
.map(Word::getWord)
.collect(Collectors.toList()));
break;
case MORPHOSYNTACTIC_SPECS:
case MORPHOSYNTACTIC_PROPERTY:
candidate.addAll(ngramCandidate
.stream()
.map(Word::getMsd)
.collect(Collectors.toList()));
break;
case WORD_TYPE:
candidate.addAll(ngramCandidate
.stream()
.map(w -> Character.toString(w.getMsd().charAt(0)))
.collect(Collectors.toList()));
break;
}
return StringUtils.join(candidate, " ");
}
/**
* Generates candidates and updates results
*
* @param corpus
* @param stats
*/
private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
for (Sentence s : corpus) {
for (Word w : s.getWords()) {
String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
// skip this iteration if:
// - word doesn't contain a proper version (missing lemma for example)
// - msd regex is given but this word's msd doesn't match it, skip this iteration
// - given substring length is larger than the word length
if (ValidationUtil.isEmpty(word)
|| stats.getFilter().hasMsd() && !w.getMsd().matches(stats.getFilter().getMsd().get(0).pattern())
|| word.length() < stats.getFilter().getStringLength()) {
continue;
}
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
// TODO: locila?
stats.updateResults(word.substring(i, i + stats.getFilter().getStringLength()));
}
}
}
}
/**
* Extracts skipgram candidates.
*
* @return List of candidates represented as a list<candidates(String)>
*/
public static void generateSkipgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
ArrayList<Word> currentLoop;
int ngram = stats.getFilter().getNgramValue();
int skip = stats.getFilter().getSkipValue();
for (Sentence s : corpus) {
List<Word> sentence = s.getWords();
for (int i = 0; i <= sentence.size() - ngram; i++) { // 1gram
for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
if (ngram == 2 && j < sentence.size()) {
currentLoop = new ArrayList<>();
currentLoop.add(sentence.get(i));
currentLoop.add(sentence.get(j));
validateAndCountSkipgramCandidate(currentLoop, stats);
} else {
for (int k = j + 1; k <= j + 1 + skip; k++) { // 3gram
if (ngram == 3 && k < sentence.size()) {
currentLoop = new ArrayList<>();
currentLoop.add(sentence.get(i));
currentLoop.add(sentence.get(j));
currentLoop.add(sentence.get(k));
validateAndCountSkipgramCandidate(currentLoop, stats);
} else {
for (int l = k + 1; l <= k + 1 + skip; l++) { // 4gram
if (ngram == 4 && k < sentence.size()) {
currentLoop = new ArrayList<>();
currentLoop.add(sentence.get(i));
currentLoop.add(sentence.get(j));
currentLoop.add(sentence.get(k));
currentLoop.add(sentence.get(l));
validateAndCountSkipgramCandidate(currentLoop, stats);
} else {
for (int m = k + 1; m <= k + 1 + skip; m++) { // 5gram
if (ngram == 5 && k < sentence.size()) {
currentLoop = new ArrayList<>();
currentLoop.add(sentence.get(i));
currentLoop.add(sentence.get(j));
currentLoop.add(sentence.get(k));
currentLoop.add(sentence.get(l));
currentLoop.add(sentence.get(m));
validateAndCountSkipgramCandidate(currentLoop, stats);
}
}
}
}
}
}
}
}
}
}
}
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats) {
// count if no regex is set or if it is & candidate passes it
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
stats.updateResults(wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()));
}
}
}

View File

@@ -0,0 +1,62 @@
package alg.word;
import java.util.List;
import java.util.concurrent.RecursiveAction;
import data.Sentence;
import data.StatisticsNew;
public class ForkJoin extends RecursiveAction {
private static final long serialVersionUID = 7711587510996456040L;
private static final int ACCEPTABLE_SIZE = 1000;
private List<Sentence> corpus;
private StatisticsNew stats;
private int start;
private int end;
/**
* Constructor for subproblems.
*/
private ForkJoin(List<Sentence> corpus, int start, int end, StatisticsNew stats) {
this.corpus = corpus;
this.start = start;
this.end = end;
this.stats = stats;
}
/**
* Default constructor for the initial problem
*/
public ForkJoin(List<Sentence> corpus, StatisticsNew stats) {
this.corpus = corpus;
this.start = 0;
this.end = corpus.size();
this.stats = stats;
}
private void computeDirectly() {
List<Sentence> subCorpus = corpus.subList(start, end);
WordLevel.calculateForAll(subCorpus, stats);
}
@Override
protected void compute() {
int subCorpusSize = end - start;
if (subCorpusSize < ACCEPTABLE_SIZE) {
computeDirectly();
} else {
int mid = start + subCorpusSize / 2;
ForkJoin left = new ForkJoin(corpus, start, mid, stats);
ForkJoin right = new ForkJoin(corpus, mid, end, stats);
// fork (push to queue)-> compute -> join
left.fork();
right.fork();
left.join();
right.join();
}
}
}

View File

@@ -0,0 +1,167 @@
package alg.word;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import alg.Common;
import data.CalculateFor;
import data.Sentence;
import data.Statistics;
import data.Word;
class WordCount {
private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
for (Sentence s : corpus) {
List<String> sentence = new ArrayList<>(s.getWords().size());
if (stats.getCf() == CalculateFor.LEMMA) {
sentence.addAll(s.getWords()
.stream()
.map(Word::getLemma)
.collect(Collectors.toList()));
} else if (stats.getCf() == CalculateFor.WORD) {
sentence.addAll(s.getWords()
.stream()
.map(Word::getWord)
.collect(Collectors.toList()));
}
for (String word : sentence) {
Common.updateMap(stats.result, word);
}
}
}
private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
for (Sentence s : corpus) {
List<String> sentence = new ArrayList<>(s.getWords().size());
if (stats.getCf() == CalculateFor.LEMMA) {
sentence.addAll(s.getWords()
.stream()
.map(Word::getCVVLemma)
.collect(Collectors.toList()));
} else if (stats.getCf() == CalculateFor.WORD) {
sentence.addAll(s.getWords()
.stream()
.map(Word::getCVVWord)
.collect(Collectors.toList()));
}
for (String word : sentence) {
if (word.length() > stats.getSubstringLength()) {
for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
String substring = word.substring(i, i + stats.getSubstringLength());
Common.updateMap(stats.result, substring);
}
}
}
}
}
private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
for (Sentence s : corpus) {
List<String> sentence = new ArrayList<>(s.getWords().size());
List<Word> filteredWords = new ArrayList<>();
for (Word word : s.getWords()) {
if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
filteredWords.add(word);
}
}
if (stats.getCf() == CalculateFor.LEMMA) {
sentence.addAll(filteredWords
.stream()
.map(Word::getLemma)
.collect(Collectors.toList()));
} else if (stats.getCf() == CalculateFor.WORD) {
sentence.addAll(filteredWords
.stream()
.map(Word::getWord)
.collect(Collectors.toList()));
}
for (String word : sentence) {
Common.updateMap(stats.result, word);
}
}
}
private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
for (Sentence s : corpus) {
if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
List<String> sentence = new ArrayList<>(s.getWords().size());
List<Word> filteredWords = new ArrayList<>();
for (Word word : s.getWords()) {
if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
filteredWords.add(word);
}
}
if (stats.getCf() == CalculateFor.LEMMA) {
sentence.addAll(filteredWords
.stream()
.map(Word::getLemma)
.collect(Collectors.toList()));
} else if (stats.getCf() == CalculateFor.WORD) {
sentence.addAll(filteredWords
.stream()
.map(Word::getWord)
.collect(Collectors.toList()));
}
for (String word : sentence) {
Common.updateMap(stats.result, word);
}
}
}
}
private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
for (Sentence s : corpus) {
if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
List<String> sentence = new ArrayList<>(s.getWords().size());
if (stats.getCf() == CalculateFor.LEMMA) {
sentence.addAll(s.getWords()
.stream()
.map(Word::getLemma)
.collect(Collectors.toList()));
} else if (stats.getCf() == CalculateFor.WORD) {
sentence.addAll(s.getWords()
.stream()
.map(Word::getWord)
.collect(Collectors.toList()));
}
for (String word : sentence) {
Common.updateMap(stats.result, word);
}
}
}
}
static void calculateForAll(List<Sentence> corpus, Statistics stats) {
boolean taxonomyIsSet = stats.isTaxonomySet();
boolean JosTypeIsSet = stats.isJOSTypeSet();
// branching because even though the only difference is an if or two &&
// O(if) = 1, the amount of ifs adds up and this saves some time
if (taxonomyIsSet && JosTypeIsSet) {
calculateForTaxonomyAndJosType(corpus, stats);
} else if (taxonomyIsSet && !JosTypeIsSet) {
calculateForTaxonomy(corpus, stats);
} else if (!taxonomyIsSet && JosTypeIsSet) {
calculateForJosType(corpus, stats);
} else {
if (stats.isVcc()) {
calculateVCC(corpus, stats);
} else {
calculateNoFilter(corpus, stats);
}
}
}
}

View File

@@ -0,0 +1,112 @@
package alg.word;
import static data.Enums.WordLevelDefaultValues.*;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import data.Enums.WordLevelDefaultValues;
import data.Enums.WordLevelType;
import data.Sentence;
import data.StatisticsNew;
import data.Word;
@SuppressWarnings("Duplicates")
public class WordLevel {
private static HashSet<String> suffixes;
private static int minSuffixLength;
private static int maxSuffixLength;
private static HashSet<String> prefixes;
private static int minPrefixLength;
private static int maxPrefixLength;
static {
suffixes = WordLevelDefaultValues.getSuffixes();
calculateSuffixesLengths();
prefixes = WordLevelDefaultValues.getPrefixes();
calculatePrefixesLengths();
}
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
for (Sentence s : corpus) {
for (Word word : s.getWords()) {
calculateForSuffixes(word.getWord(), stats);
calculateForPrefixes(word.getWord(), stats);
}
}
}
private static void calculateForPrefixes(String word, StatisticsNew stats) {
for (int tmpPrefixLength = maxPrefixLength; tmpPrefixLength >= minPrefixLength; tmpPrefixLength++) {
if (word.length() - tmpPrefixLength < MIN_N_OF_CHARACTERS_LEFT_PREFIX) {
return;
}
String extractedPrefix = StringUtils.left(word, tmpPrefixLength);
if (prefixes.contains(extractedPrefix)) {
// save suffix and full word
stats.updateResultsNested(WordLevelType.PREFIX, extractedPrefix, word);
return;
}
}
}
public static void calculateForSuffixes(String word, StatisticsNew stats) {
for (int tmpSuffixLength = maxSuffixLength; tmpSuffixLength >= minSuffixLength; tmpSuffixLength++) {
// preveri, da je beseda - cuttan suffix daljši od prednastavljene vrednosti
// ker gremo od najdaljše opcije k najkrajši, se ob dosegu tega pogoja lahko zaključi računanje za trenutno besedo
if (word.length() - tmpSuffixLength < MIN_N_OF_CHARACTERS_LEFT_SUFFIX) {
return;
}
String extractedSuffix = StringUtils.right(word, tmpSuffixLength);
if (suffixes.contains(extractedSuffix)) {
// save suffix and full word
stats.updateResultsNested(WordLevelType.SUFFIX, extractedSuffix, word);
return;
}
}
}
// finds the shortest and longest suffix for quicker calculations
public static void calculateSuffixesLengths() {
minSuffixLength = -1;
maxSuffixLength = -1;
for (String suffix : suffixes) {
if (suffix.length() > maxSuffixLength) {
maxSuffixLength = suffix.length();
if (minSuffixLength < 0) {
minSuffixLength = maxSuffixLength;
}
} else if (suffix.length() < minSuffixLength) {
minSuffixLength = suffix.length();
}
}
}
// finds the shortest and longest suffix for quicker calculations
public static void calculatePrefixesLengths() {
minPrefixLength = -1;
maxPrefixLength = -1;
for (String prefix : prefixes) {
if (prefix.length() > maxPrefixLength) {
maxPrefixLength = prefix.length();
if (minPrefixLength < 0) {
minPrefixLength = maxPrefixLength;
}
} else if (prefix.length() < minPrefixLength) {
minPrefixLength = prefix.length();
}
}
}
}

View File

@@ -0,0 +1,17 @@
package data;
public enum AnalysisLevel {
STRING_LEVEL("Besedni nizi"),
WORD_LEVEL("Nivo besed in delov besed"),
WORD_FORMATION("Besedotvorni procesi");
private final String name;
AnalysisLevel(String name) {
this.name = name;
}
public String toString() {
return this.name;
}
}

View File

@@ -0,0 +1,43 @@
package data;
public enum CalculateFor {
WORD("različnica"),
LEMMA("lema"),
MORPHOSYNTACTIC_SPECS("oblikoskladenjska oznaka"),
MORPHOSYNTACTIC_PROPERTY("oblikoskladenjska lastnost"),
WORD_TYPE("besedna vrsta"),
DIST_WORDS("različnica"),
DIST_LEMMAS("lema");
private final String name;
CalculateFor(String name) {
this.name = name;
}
public String toString() {
return this.name;
}
public static CalculateFor factory(String cf) {
if (cf != null) {
if (WORD.toString().equals(cf)) {
return WORD;
}
if (LEMMA.toString().equals(cf)) {
return LEMMA;
}
if (MORPHOSYNTACTIC_SPECS.toString().equals(cf)) {
return MORPHOSYNTACTIC_SPECS;
}
if (MORPHOSYNTACTIC_PROPERTY.toString().equals(cf)) {
return MORPHOSYNTACTIC_PROPERTY;
}
if (WORD_TYPE.toString().equals(cf)) {
return WORD_TYPE;
}
}
return null;
}
}

View File

@@ -0,0 +1,163 @@
package data;
import static gui.Messages.*;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import data.Enums.solar.SolarFilters;
import gui.ValidationUtil;
import javafx.collections.ObservableList;
public class Corpus {
public final static Logger logger = LogManager.getLogger(Corpus.class);
private CorpusType corpusType;
private File chosenResultsLocation;
private File chosenCorpusLocation;
private Collection<File> detectedCorpusFiles;
boolean headerRead;
private ObservableList<String> taxonomy; // if gigafida or gos
private HashMap<String, ObservableList<String>> solarFilters; // if solar
private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
private boolean gosOrthMode;
boolean hasMsdData;
private ArrayList<String> validationErrors;
public Corpus() {
validationErrors = new ArrayList<>();
}
public CorpusType getCorpusType() {
return corpusType;
}
public void setCorpusType(CorpusType corpusType) {
this.corpusType = corpusType;
logger.info("Corpus.set: ", corpusType);
}
public File getChosenResultsLocation() {
return chosenResultsLocation;
}
public void setChosenResultsLocation(File chosenResultsLocation) {
this.chosenResultsLocation = chosenResultsLocation;
logger.info("Corpus.set: ", chosenResultsLocation);
}
public File getChosenCorpusLocation() {
return chosenCorpusLocation;
}
public void setChosenCorpusLocation(File chosenCorpusLocation) {
this.chosenCorpusLocation = chosenCorpusLocation;
logger.info("Corpus.set: ", chosenCorpusLocation);
}
public Collection<File> getDetectedCorpusFiles() {
return detectedCorpusFiles;
}
public void setDetectedCorpusFiles(Collection<File> detectedCorpusFiles) {
this.detectedCorpusFiles = detectedCorpusFiles;
logger.info("Corpus.set: ", detectedCorpusFiles);
}
public boolean isHeaderRead() {
return headerRead;
}
public void setHeaderRead(boolean headerRead) {
this.headerRead = headerRead;
}
public ObservableList<String> getTaxonomy() {
return taxonomy;
}
public void setTaxonomy(ObservableList<String> taxonomy) {
this.taxonomy = taxonomy;
logger.info("Corpus.set: ", taxonomy);
}
public HashMap<String, ObservableList<String>> getSolarFilters() {
return solarFilters;
}
public void setSolarFilters(HashMap<String, ObservableList<String>> solarFilters) {
this.solarFilters = solarFilters;
logger.info("Corpus.set: ", solarFilters);
}
public HashMap<String, HashSet<String>> getSolarFiltersForXML() {
return solarFiltersForXML;
}
public void setSolarFiltersForXML(HashMap<String, HashSet<String>> solarFiltersForXML) {
this.solarFiltersForXML = solarFiltersForXML;
logger.info("Corpus.set: ", solarFiltersForXML);
}
public boolean isGosOrthMode() {
return gosOrthMode;
}
public void setGosOrthMode(boolean gosOrthMode) {
this.gosOrthMode = gosOrthMode;
logger.info("Corpus.set: ", gosOrthMode);
}
public ArrayList<String> getValidationErrors() {
return validationErrors;
}
public String getValidationErrorsToString() {
return StringUtils.join(validationErrors, "\n - ");
}
public void setValidationErrors(ArrayList<String> validationErrors) {
this.validationErrors = validationErrors;
}
public boolean validate() {
if (corpusType == null) {
validationErrors.add(LABEL_RESULTS_CORPUS_TYPE_NOT_SET);
}
if (chosenCorpusLocation == null) {
validationErrors.add(LABEL_CORPUS_LOCATION_NOT_SET);
}
if (chosenResultsLocation == null) {
validationErrors.add(LABEL_RESULTS_LOCATION_NOT_SET);
}
if (!headerRead && corpusType != null) {
// if user didn't opt into reading the headers, set default taxonomy or solar filters
if (Tax.getCorpusTypesWithTaxonomy().contains(corpusType)) {
taxonomy = Tax.getTaxonomyForComboBox(corpusType);
} else if (corpusType == CorpusType.SOLAR && solarFilters == null) {
setSolarFilters(SolarFilters.getFiltersForComboBoxes());
}
}
if (headerRead && ValidationUtil.isEmpty(taxonomy)) {
// mustn't happen, intercept at gui level
}
if (!ValidationUtil.isEmpty(validationErrors)) {
logger.error("Corpus validation error: ", StringUtils.join(validationErrors, "\n - "));
return false;
} else {
return true;
}
}
}

View File

@@ -0,0 +1,25 @@
package data;
public enum CorpusType {
GIGAFIDA("Gigafida", "gigafida"),
CCKRES("ccKres ", "cckres"),
SOLAR("Šolar", "šolar"),
GOS("GOS", "gos");
private final String name;
private final String nameLowerCase;
CorpusType(String name, String nameLowerCase) {
this.name = name;
this.nameLowerCase = nameLowerCase;
}
public String toString() {
return this.name;
}
public String getNameLowerCase() {
return nameLowerCase;
}
}

View File

@@ -0,0 +1,12 @@
package data.Enums;
import java.util.Arrays;
import java.util.HashSet;
public class InflectedJosTypes {
public static final HashSet<Character> inflectedJosTypes = new HashSet<>();
static {
inflectedJosTypes.addAll(Arrays.asList('S', 'G', 'P'));
}
}

View File

@@ -0,0 +1,68 @@
package data.Enums;
import java.util.HashMap;
public enum Msd {
NOUN("samostalnik", 'S', "Noun", 'N', 5),
VERB("glagol", 'G', "Verb", 'V', 7),
ADJECTIVE("pridevnik", 'P', "Adjective", 'A', 6),
ADVERB("prislov", 'R', "Adverb", 'R', 2),
PRONOUN("zaimek", 'Z', "Pronoun", 'P', 8),
NUMERAL("števnik", 'K', "Numeral", 'M', 6),
PREPOSITION("predlog", 'D', "Preposition", 'S', 1),
CONJUNCTION("veznik", 'V', "Conjunction", 'C', 1),
PARTICLE("členek", 'L', "Particle", 'Q', 0),
INTERJECTION("medmet", 'M', "Interjection", 'I', 0),
ABBREVIATION("okrajšava", 'O', "Abbreviation", 'Y', 0),
RESIDUAL("neuvrščeno", 'N', "Residual", 'X', 1);
private final String siName;
private final Character siCode;
private final String enName;
private final Character enCode;
private final Integer nOfAttributes;
private static HashMap<Character, Integer> siCodeNOfAttributes;
static {
siCodeNOfAttributes = new HashMap<>();
for (Msd msd : Msd.values()) {
siCodeNOfAttributes.put(msd.getSiCode(), msd.nOfAttributes);
}
}
Msd(String siName, Character siCode, String enName, Character enCode, int nOfAttributes) {
this.siName = siName;
this.siCode = siCode;
this.enName = enName;
this.enCode = enCode;
this.nOfAttributes = nOfAttributes;
}
public String getSiName() {
return siName;
}
public Character getSiCode() {
return siCode;
}
public String getEnName() {
return enName;
}
public Character getEnCode() {
return enCode;
}
/**
* Returns the number of attributes for the given type.
*
* @param msd
*
* @return
*/
public static int getMsdLengthForType(String msd) {
return siCodeNOfAttributes.get(msd.charAt(0)) + 1;
}
}

View File

@@ -0,0 +1,55 @@
package data.Enums;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class WordLevelDefaultValues {
public final static Logger logger = LogManager.getLogger(WordLevelDefaultValues.class);
private static HashSet<String> suffixes;
private static final String SUFFIXES_FILE = "/Lists/suffixes.txt";
public static final int MIN_N_OF_CHARACTERS_LEFT_SUFFIX = 2;
private static HashSet<String> prefixes;
private static final String PREFIXES_FILE = "/Lists/prefixes.txt";
public static final int MIN_N_OF_CHARACTERS_LEFT_PREFIX = 2;
static {
suffixes = new HashSet<>();
suffixes = readFromFile(SUFFIXES_FILE);
prefixes = new HashSet<>();
prefixes = readFromFile(PREFIXES_FILE);
}
private static HashSet<String> readFromFile(String fileName) {
Set<String> dictionary = new HashSet<>();
try (InputStream is = WordLevelDefaultValues.class.getClass().getResourceAsStream(fileName)) {
if (is != null) {
// TODO: warn if !exists
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
dictionary = reader.lines().collect(Collectors.toSet());
}
} catch (IOException e) {
logger.error("Problem reading init dictionary", e);
}
return (HashSet<String>) dictionary;
}
public static HashSet<String> getSuffixes() {
return suffixes;
}
public static HashSet<String> getPrefixes() {
return prefixes;
}
}

View File

@@ -0,0 +1,16 @@
package data.Enums;
public enum WordLevelType {
SUFFIX("pripona"),
PREFIX("predpona");
private final String name;
WordLevelType(String name) {
this.name = name;
}
public String getName() {
return name;
}
}

View File

@@ -0,0 +1,57 @@
package data.Enums.solar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
public class SolarFilters {
private static HashMap<String, ObservableList<String>> SOLAR_FILTERS;
public static final String SOLA = "sola";
public static final String PREDMET = "predmet";
public static final String RAZRED = "razred";
public static final String REGIJA = "regija";
public static final String TIP = "tip";
public static final String LETO = "leto";
static {
SOLAR_FILTERS = new HashMap<>();
SOLAR_FILTERS.put(REGIJA, FXCollections.observableArrayList("Celje", "Gorica", "Koper", "Kranj", "Krško", "Ljubljana", "Maribor", "Murska Sobota", "Novo mesto", "Postojna", "Slovenj Gradec"));
SOLAR_FILTERS.put(PREDMET, FXCollections.observableArrayList("državljanska vzgoja in etika", "ekonomija", "filozofija", "geografija", "kemija", "podjetništvo", "psihologija", "slovenščina", "sociologija", "umetnostna vzgoja", "zgodovina"));
SOLAR_FILTERS.put(RAZRED, FXCollections.observableArrayList("6. razred", "7. razred", "8. razred", "9. razred", "1. letnik", "2. letnik", "3. letnik", "4. letnik", "5. letnik", "maturitetni tečaj"));
SOLAR_FILTERS.put(LETO, FXCollections.observableArrayList("2007", "2008", "2009", "2009/2010", "2010"));
SOLAR_FILTERS.put(SOLA, FXCollections.observableArrayList("gimnazija", "osnovna šola", "poklicna šola", "strokovna šola"));
SOLAR_FILTERS.put(TIP, FXCollections.observableArrayList("esej/spis", "pisni izdelek (učna ura)", "test (daljše besedilo)", "test (odgovori na vprašanja)"));
}
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_FULL = FXCollections.observableArrayList("različnica", "lema", "oblikoskladenjska oznaka", "oblikoskladenjska lastnost", "besedna vrsta");
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_LIMITED = FXCollections.observableArrayList("različnica", "lema");
/**
* Returns filters with all possible values
*/
public static HashMap<String, ObservableList<String>> getFiltersForComboBoxes() {
return SOLAR_FILTERS;
}
/**
* Returns filters with all possible values
*/
public static HashMap<String, ObservableList<String>> getFiltersForComboBoxes(HashMap<String, HashSet<String>> foundFilters) {
HashMap<String, ObservableList<String>> filtersForComboBoxes = new HashMap<>();
for (Map.Entry<String, ObservableList<String>> e : SOLAR_FILTERS.entrySet()) {
if (!foundFilters.containsKey(e.getKey())) {
// if, by some reason a specific filter wasn't in the corpus, return a blank list for that filter
filtersForComboBoxes.put(e.getKey(), FXCollections.observableArrayList());
} else {
filtersForComboBoxes.put(e.getKey(), FXCollections.observableArrayList(foundFilters.get(e.getKey())).sorted());
}
}
return filtersForComboBoxes;
}
}

View File

@@ -0,0 +1,144 @@
package data;
import static data.Filter.filterName.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.regex.Pattern;
import gui.ValidationUtil;
@SuppressWarnings("unchecked")
public class Filter {
private HashMap<filterName, Object> filter;
public enum filterName {
ANALYSIS_LEVEL,
CALCULATE_FOR,
NGRAM_VALUE,
SKIP_VALUE,
IS_CVV,
STRING_LENGTH,
TAXONOMY,
MSD,
HAS_MSD,
SOLAR_FILTERS
}
public Filter() {
filter = new HashMap<>();
}
public Filter(AnalysisLevel al, CalculateFor cf) {
filter = new HashMap<>();
filter.put(ANALYSIS_LEVEL, al);
filter.put(CALCULATE_FOR, cf);
}
public void setAl(AnalysisLevel al) {
filter.put(ANALYSIS_LEVEL, al);
}
public AnalysisLevel getAl() {
return (AnalysisLevel) filter.get(ANALYSIS_LEVEL);
}
public void setCalculateFor(CalculateFor cf) {
filter.put(CALCULATE_FOR, cf);
}
public CalculateFor getCalculateFor() {
return (CalculateFor) filter.get(CALCULATE_FOR);
}
public void setNgramValue(Integer ngramValue) {
filter.put(NGRAM_VALUE, ngramValue);
}
public Integer getNgramValue() {
return (Integer) filter.get(NGRAM_VALUE);
}
public void setSkipValue(Integer skipValue) {
filter.put(SKIP_VALUE, skipValue);
}
public Integer getSkipValue() {
return (Integer) filter.get(SKIP_VALUE);
}
public void setIsCvv(boolean isCvv) {
filter.put(IS_CVV, isCvv);
}
public boolean isCvv() {
return filter.containsKey(IS_CVV) && (boolean) filter.get(IS_CVV);
}
public void setStringLength(int stringLength) {
filter.put(STRING_LENGTH, stringLength);
}
public Integer getStringLength() {
return (Integer) filter.get(STRING_LENGTH);
}
public void setTaxonomy(ArrayList<String> taxonomy) {
filter.put(TAXONOMY, taxonomy);
}
public ArrayList<String> getTaxonomy() {
if (filter.containsKey(TAXONOMY) && filter.get(TAXONOMY) != null) {
return (ArrayList<String>) filter.get(TAXONOMY);
} else {
return new ArrayList<>();
}
}
public void setMsd(ArrayList<Pattern> msd) {
filter.put(MSD, msd);
if (!ValidationUtil.isEmpty(msd)) {
setHasMsd(true);
} else {
setHasMsd(false);
}
}
public ArrayList<Pattern> getMsd() {
return (ArrayList<Pattern>) filter.get(MSD);
}
public void setHasMsd(boolean hasMsd) {
filter.put(HAS_MSD, hasMsd);
}
public boolean hasMsd() {
return filter.containsKey(HAS_MSD) && (boolean) filter.get(HAS_MSD);
}
public String toString() {
String newLine = "\n\t- ";
StringBuilder sb = new StringBuilder();
sb.append(newLine).append("Filter:");
for (Map.Entry<filterName, Object> entry : filter.entrySet()) {
sb.append(newLine)
.append(entry.getKey().toString())
.append(": ")
.append(entry.getValue() != null ? entry.getValue().toString() : "null");
}
return sb.toString();
}
public void setSolarFilters(HashMap<String, HashSet<String>> filters) {
filter.put(SOLAR_FILTERS, filters);
}
public HashMap<String, HashSet<String>> getSolarFilters() {
return (HashMap<String, HashSet<String>>) filter.get(SOLAR_FILTERS);
}
}

View File

@@ -0,0 +1,71 @@
package data;
public enum GigafidaJosWordType {
SAMOSTALNIK("samostalnik", 'S'),
GLAGOL("glagol", 'G'),
PRIDEVNIK("pridevnik", 'P'),
PRISLOV("prislov", 'R'),
ZAIMEK("zaimek", 'Z'),
STEVNIK("stevnik", 'K'),
PREDLOG("predlog", 'D'),
VEZNIK("veznik", 'V'),
CLENEK("clenek", 'L'),
MEDMET("medmet", 'M'),
OKRAJSAVA("okrajsava", 'O');
private final String name;
private final char wordType;
GigafidaJosWordType(String name, char wordType) {
this.name = name;
this.wordType = wordType;
}
public String toString() {
return this.name;
}
public char getWordType() {
return wordType;
}
public static GigafidaJosWordType factory(String wType) {
if (wType != null) {
if (SAMOSTALNIK.toString().equals(wType)) {
return SAMOSTALNIK;
}
if (GLAGOL.toString().equals(wType)) {
return GLAGOL;
}
if (PRIDEVNIK.toString().equals(wType)) {
return PRIDEVNIK;
}
if (PRISLOV.toString().equals(wType)) {
return PRISLOV;
}
if (ZAIMEK.toString().equals(wType)) {
return ZAIMEK;
}
if (STEVNIK.toString().equals(wType)) {
return STEVNIK;
}
if (PREDLOG.toString().equals(wType)) {
return PREDLOG;
}
if (VEZNIK.toString().equals(wType)) {
return VEZNIK;
}
if (CLENEK.toString().equals(wType)) {
return CLENEK;
}
if (MEDMET.toString().equals(wType)) {
return MEDMET;
}
if (OKRAJSAVA.toString().equals(wType)) {
return OKRAJSAVA;
}
}
return null;
}
}

View File

@@ -0,0 +1,76 @@
package data;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.stream.Collectors;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
public enum GigafidaTaxonomy {
TISK("tisk", "T"),
KNJIZNO("knjižno", "T.K"),
LEPOSLOVNO("leposlovno", "T.K.L"),
STROKOVNO("strokovno", "T.K.S"),
PERIODICNO("periodično", "T.P"),
CASOPIS("časopis", "T.P.C"),
REVIJA("revija", "T.P.R"),
INTERNET("internet", "I");
private final String name;
private final String taxonomy;
private static final ObservableList<String> FOR_COMBO_BOX;
static {
ArrayList<String> values = Arrays.stream(GigafidaTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
}
GigafidaTaxonomy(String name, String taxonomy) {
this.name = name;
this.taxonomy = taxonomy;
}
public String toString() {
return this.name;
}
public String getTaxonomnyString() {
return this.taxonomy;
}
public static GigafidaTaxonomy factory(String tax) {
if (tax != null) {
if (TISK.toString().equals(tax)) {
return TISK;
}
if (KNJIZNO.toString().equals(tax)) {
return KNJIZNO;
}
if (LEPOSLOVNO.toString().equals(tax)) {
return LEPOSLOVNO;
}
if (STROKOVNO.toString().equals(tax)) {
return STROKOVNO;
}
if (PERIODICNO.toString().equals(tax)) {
return PERIODICNO;
}
if (CASOPIS.toString().equals(tax)) {
return CASOPIS;
}
if (REVIJA.toString().equals(tax)) {
return REVIJA;
}
if (INTERNET.toString().equals(tax)) {
return INTERNET;
}
}
return null;
}
public static ObservableList<String> getForComboBox() {
return FOR_COMBO_BOX;
}
}

View File

@@ -0,0 +1,85 @@
package data;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.stream.Collectors;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
public enum GosTaxonomy {
JAVNI("javni", "gos.T.J"),
INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "gos.T.J.I"),
RAZVEDRILNI("razvedrilni", "gos.T.J.R"),
NEJAVNI("nejavni", "gos.T.N"),
NEZASEBNI("nezasebni", "gos.T.N.N"),
ZASEBNI("zasebni", "gos.T.N.Z"),
OSEBNI_STIK("osebni stik", "gos.K.O"),
TELEFON("telefon", "gos.K.P"),
RADIO("radio", "gos.K.R"),
TELEVIZIJA("televizija", "gos.K.T");
private final String name;
private final String taxonomy;
private static final ObservableList<String> FOR_COMBO_BOX;
static {
ArrayList<String> values = Arrays.stream(GosTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
}
GosTaxonomy(String name, String taxonomy) {
this.name = name;
this.taxonomy = taxonomy;
}
public String toString() {
return this.name;
}
public String getTaxonomnyString() {
return this.taxonomy;
}
public static GosTaxonomy factory(String tax) {
if (tax != null) {
if (JAVNI.toString().equals(tax)) {
return JAVNI;
}
if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
return INFORMATIVNO_IZOBRAZEVALNI;
}
if (RAZVEDRILNI.toString().equals(tax)) {
return RAZVEDRILNI;
}
if (NEJAVNI.toString().equals(tax)) {
return NEJAVNI;
}
if (NEZASEBNI.toString().equals(tax)) {
return NEZASEBNI;
}
if (ZASEBNI.toString().equals(tax)) {
return ZASEBNI;
}
if (OSEBNI_STIK.toString().equals(tax)) {
return OSEBNI_STIK;
}
if (TELEFON.toString().equals(tax)) {
return TELEFON;
}
if (RADIO.toString().equals(tax)) {
return RADIO;
}
if (TELEVIZIJA.toString().equals(tax)) {
return TELEVIZIJA;
}
}
return null;
}
public static ObservableList<String> getForComboBox() {
return FOR_COMBO_BOX;
}
}

View File

@@ -0,0 +1,56 @@
package data;
import java.util.List;
import java.util.Map;
public class Sentence {
private List<Word> words;
private String taksonomija;
// GOS
private String type;
private Map<String, String> properties;
public Sentence(List<Word> words, String taksonomija) {
this.words = words;
this.taksonomija = taksonomija;
}
public Sentence(List<Word> words) {
this.words = words;
}
public Sentence(List<Word> words, String taksonomija, Map<String, String> properties) {
this.words = words;
this.taksonomija = taksonomija;
this.properties = properties;
}
public Sentence(List<Word> words, String taksonomija, String type) {
this.words = words;
this.taksonomija = taksonomija;
this.type = type;
}
public List<Word> getWords() {
return words;
}
public String getTaxonomy() {
return taksonomija;
}
public List<Word> getSublist(int indexFrom, int indexTo) {
return this.words.subList(indexFrom, indexTo);
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}

View File

@@ -0,0 +1,16 @@
package data;
import java.io.File;
import java.util.Collection;
public class Settings {
public static final int CORPUS_SENTENCE_LIMIT = 50000;
public static final boolean PRINT_LOG = false;
public static final String FX_ACCENT_OK = "-fx-accent: forestgreen;";
public static final String FX_ACCENT_NOK = "-fx-accent: red;";
public static Collection<File> corpus;
public static File resultsFilePath;
}

View File

@@ -0,0 +1,299 @@
package data;
import java.io.UnsupportedEncodingException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import util.Util;
import util.db.RDB;
public class Statistics {
private CorpusType corpusType;
private AnalysisLevel analysisLevel;
private boolean useDB;
private RDB db;
private boolean analysisProducedResults;
private String taxonomy;
private boolean taxonomyIsSet;
private char JOSType;
private boolean JOSTypeIsSet;
private String resultTitle;
public Map<String, AtomicLong> result = new ConcurrentHashMap<>();
// nGrams
private int nGramLevel;
private Integer skip;
private CalculateFor cf;
private List<Pattern> morphosyntacticFilter;
// distributions
private String distributionTaxonomy;
private char distributionJosWordType;
private boolean vcc;
private Integer substringLength;
// inflected JOS
private String inflectedJosTaxonomy;
// GOS
boolean gosOrthMode;
// šolar
Map<String, Object> solarHeadBlockFilter;
// for ngrams
public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
this.cf = cf;
this.analysisLevel = al;
this.nGramLevel = nGramLevel;
this.skip = skip == null || skip == 0 ? null : skip;
this.resultTitle = String.format("%s%d-gram_%s_%s",
this.skip != null ? String.format("%d-%s-", skip, "skip") : "",
nGramLevel,
cf.toString(),
dateTime);
}
// for words distributions
public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
this.resultTitle = String.format("%s_%s_%s",
distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
distributionJosWordType != null ? distributionJosWordType.toString() : "",
dateTime);
this.analysisLevel = al;
this.cf = cf;
this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
this.taxonomyIsSet = distributionTaxonomy != null;
this.JOSTypeIsSet = distributionJosWordType != null;
this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
}
public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
this.resultTitle = String.format("%s_%d_%s",
"Distribucija zaporedij samoglasnikov in soglasnikov",
substringLength,
dateTime);
this.analysisLevel = al;
this.cf = cf;
this.substringLength = substringLength;
this.vcc = true;
}
public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
this.resultTitle = String.format("InflectedJOS_%s_%s",
distributionTaxonomy != null ? distributionTaxonomy : "",
dateTime);
this.analysisLevel = al;
this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
this.taxonomyIsSet = inflectedJosTaxonomy != null;
}
public Integer getSkip() {
return skip;
}
public Integer getSubstringLength() {
return substringLength;
}
public String getInflectedJosTaxonomy() {
return inflectedJosTaxonomy;
}
public void setSubstringLength(Integer substringLength) {
this.substringLength = substringLength;
}
public boolean isVcc() {
return vcc;
}
public void setVcc(boolean vcc) {
this.vcc = vcc;
}
public String getDistributionTaxonomy() {
return distributionTaxonomy;
}
public void setDistributionTaxonomy(String distributionTaxonomy) {
this.distributionTaxonomy = distributionTaxonomy;
}
public char getDistributionJosWordType() {
return distributionJosWordType;
}
public void setDistributionJosWordType(char distributionJosWordType) {
this.distributionJosWordType = distributionJosWordType;
}
public void setMorphosyntacticFilter(List<String> morphosyntacticFilter) {
// change filter strings to regex patterns
this.morphosyntacticFilter = new ArrayList<>();
for (String s : morphosyntacticFilter) {
this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", ".")));
}
}
public List<Pattern> getMsd() {
return morphosyntacticFilter;
}
public Map<String, AtomicLong> getResult() {
return result;
}
public void setTaxonomy(String taxonomy) {
this.taxonomy = taxonomy;
}
public void setTaxonomyIsSet(boolean taxonomyIsSet) {
this.taxonomyIsSet = taxonomyIsSet;
}
public char getJOSType() {
return JOSType;
}
public void setJOSType(char JOSType) {
this.JOSType = JOSType;
}
public boolean isJOSTypeSet() {
return JOSTypeIsSet;
}
public void setJOSType(boolean JOSTypeIsSet) {
this.JOSTypeIsSet = JOSTypeIsSet;
}
public void saveResultToDisk(int... limit) throws UnsupportedEncodingException {
// Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
//
// if (useDB) {
// result = db.getDump();
// db.delete();
// }
//
// // if no results and nothing to save, return false
// if (!(result.size() > 0)) {
// analysisProducedResults = false;
// return;
// } else {
// analysisProducedResults = true;
// }
//
// stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
// Export.SetToCSV(stats);
}
// private Map<String, Integer> getSortedResultInflected(Map map) {
// // first convert to <String, Integer>
// Map<String, Integer> m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0);
//
// Map<String, Integer> sortedM = new TreeMap<>();
//
// sortedM.putAll(m);
//
// return sortedM;
// }
private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
}
public String getTaxonomy() {
return taxonomy;
}
public boolean isTaxonomySet() {
return taxonomyIsSet;
}
public int getnGramLevel() {
return nGramLevel;
}
public CalculateFor getCf() {
return cf;
}
public AnalysisLevel getAnalysisLevel() {
return analysisLevel;
}
public CorpusType getCorpusType() {
return corpusType;
}
public void setCorpusType(CorpusType corpusType) {
this.corpusType = corpusType;
}
public boolean isGosOrthMode() {
return gosOrthMode;
}
public void setGosOrthMode(boolean gosOrthMode) {
this.gosOrthMode = gosOrthMode;
}
public Map<String, Object> getSolarHeadBlockFilter() {
return solarHeadBlockFilter;
}
public void setSolarHeadBlockFilter(Map<String, Object> solarHeadBlockFilter) {
this.solarHeadBlockFilter = solarHeadBlockFilter;
}
public boolean isUseDB() {
return useDB;
}
public void setUseDB(boolean useDB) {
if (useDB && db == null) {
db = new RDB();
}
this.useDB = useDB;
}
/**
* Stores results from this batch to a database and clears results map
*/
public void storeTmpResultsToDB() {
try {
db.writeBatch(result);
result = new ConcurrentHashMap<>();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
public boolean isAnalysisProducedResults() {
return analysisProducedResults;
}
}

View File

@@ -0,0 +1,409 @@
package data;
import static gui.ValidationUtil.*;
import java.io.UnsupportedEncodingException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import alg.inflectedJOS.WordFormation;
import data.Enums.WordLevelType;
import javafx.collections.ObservableList;
import util.Export;
import util.Util;
import util.db.RDB;
@SuppressWarnings("Duplicates")
public class StatisticsNew {
public final static Logger logger = LogManager.getLogger(StatisticsNew.class);
private Corpus corpus;
private Filter filter;
private String resultTitle;
private Map<String, AtomicLong> result;
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix;
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix;
private boolean useDB;
private RDB db;
private boolean analysisProducedResults;
private LocalDateTime time;
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
this.corpus = corpus;
this.filter = filter;
if (useDB) {
this.useDB = true;
db = new RDB();
}
if (filter.getAl() == AnalysisLevel.WORD_LEVEL) {
resultNestedSuffix = new ConcurrentHashMap<>();
resultNestedPrefix = new ConcurrentHashMap<>();
} else {
result = new ConcurrentHashMap<>();
}
resultTitle = generateResultTitle();
logger.debug(toString());
}
/**
* Result's title consists of:
* <ul>
* <li>Corpus type</li>
* <li>Analysis level</li>
* <li>Calculate for</li>
* <li></li>
* <li></li>
* <li></li>
* <li></li>
* </ul>
*
* @return
*/
private String generateResultTitle() {
String separator = "_";
StringBuilder sb = new StringBuilder();
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
Integer ngramLevel = filter.getNgramValue();
if(ngramLevel == 0) {
sb.append("Crke").
append(separator)
.append(corpus.getCorpusType().toString())
.append(separator);
} else if(ngramLevel == 1) {
sb.append("Besede").append(separator)
.append(corpus.getCorpusType().toString())
.append(separator);
}
else {
sb.append(filter.getAl().toString())
.append(separator)
.append(corpus.getCorpusType().toString())
.append(separator);
sb.append(filter.getCalculateFor().toString())
.append(separator);
// ngram value
sb.append(filter.getNgramValue()).append("-gram")
.append(separator);
sb.append(filter.getSkipValue()).append("-preskok")
.append(separator);
}
// TODO: assure skip is not null but zero
} else {
sb.append(filter.getAl().toString()) // analysis level
.append(separator)
.append(corpus.getCorpusType().toString())
.append(separator);
}
// skip value
// msd ?
// if taxonomy -> taxonomy
// if cvv -> cvv + dolžina
this.time = this.time != null ? this.time : LocalDateTime.now();
sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
return sb.toString();
}
public boolean isAnalysisProducedResults() {
return analysisProducedResults;
}
public void setAnalysisProducedResults(boolean analysisProducedResults) {
this.analysisProducedResults = analysisProducedResults;
}
public String toString() {
String newLine = "\n\t- ";
StringBuilder sb = new StringBuilder();
sb.append(newLine).append("Statistic properties:");
sb.append(newLine).append(corpus.getCorpusType().toString()).append(String.format(" (%d files)", corpus.getDetectedCorpusFiles().size()));
sb.append(newLine).append(useDB ? "use DB" : "run in memory");
sb.append(newLine).append(filter.toString());
return sb.toString();
}
public String getResultTitle() {
return resultTitle;
}
// ****************************************
// ***************** util *****************
// ****************************************
/**
* Stores results from this batch to a database and clears results map
*/
public void storeTmpResultsToDB() {
try {
db.writeBatch(result);
result = new ConcurrentHashMap<>();
} catch (UnsupportedEncodingException e) {
logger.error("Store tmp results to DB", e);
// e.printStackTrace();
}
}
public Filter getFilter() {
return filter;
}
public Corpus getCorpus() {
return corpus;
}
public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
if (useDB) {
result = db.getDump();
db.delete();
}
// if no results and nothing to save, return false
if (!(result.size() > 0)) {
analysisProducedResults = false;
return false;
} else {
analysisProducedResults = true;
}
stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock());
return true;
}
public boolean saveResultNestedToDisk(int... limit) throws UnsupportedEncodingException {
resultTitle = generateResultTitle();
if (useDB) {
result = db.getDump();
db.delete();
}
Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();
if (!isEmpty(resultNestedSuffix)) {
results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
}
if (!isEmpty(resultNestedPrefix)) {
results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
}
// if no results and nothing to save, return false
if (!(results.size() > 0)) {
analysisProducedResults = false;
return false;
} else {
analysisProducedResults = true;
}
Export.nestedMapToCSV(resultTitle, results, corpus.getChosenResultsLocation(), headerInfoBlock());
return true;
}
public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
filter.setAl(AnalysisLevel.WORD_FORMATION);
resultTitle = generateResultTitle();
if (useDB) {
result = db.getDump();
db.delete();
}
// if no results and nothing to save, return false
if (!(result.size() > 0)) {
analysisProducedResults = false;
return false;
} else {
analysisProducedResults = true;
}
WordFormation.calculateStatistics(this);
Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
return true;
}
private Map<String, Map<String, Long>> sortNestedMap(Map<String, ConcurrentHashMap<String, AtomicLong>> nestedMap, int limit) {
Map<String, Map<String, Long>> sorted = new HashMap<>();
for (String s : nestedMap.keySet()) {
sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
}
return sorted;
}
private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
}
public void updateResults(String o) {
// if not in map
AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
// else
if (r != null)
result.get(o).incrementAndGet();
}
public Map<String, AtomicLong> getResult() {
return result;
}
public Object[][] getResultCustom() {
return resultCustom;
}
public void setResultCustom(Object[][] resultCustom) {
this.resultCustom = resultCustom;
}
public void updateResultsNested(WordLevelType type, String key, String stringValue) {
ConcurrentHashMap<String, ConcurrentHashMap<String, AtomicLong>> resultsMap;
if (type == WordLevelType.SUFFIX) {
updateResultsNestedSuffix(key, stringValue);
} else if (type == WordLevelType.PREFIX) {
updateResultsNestedPrefix(key, stringValue);
}
}
public void updateResultsNestedSuffix(String key, String stringValue) {
if (resultNestedSuffix.containsKey(key)) {
// if not in map
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
// else
if (r != null) {
resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
}
} else {
resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
if (r != null) {
resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
}
}
}
public void updateResultsNestedPrefix(String key, String stringValue) {
if (resultNestedPrefix.containsKey(key)) {
// if not in map
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
// else
if (r != null) {
resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
}
} else {
resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
if (r != null) {
resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
}
}
}
private LinkedHashMap<String, String> headerInfoBlock() {
LinkedHashMap<String, String> info = new LinkedHashMap<>();
info.put("Korpus:", corpus.getCorpusType().toString());
info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
Integer ngramLevel = filter.getNgramValue();
if (ngramLevel == 0)
info.put("Analiza:", "Črke");
else if (ngramLevel == 1)
info.put("Analiza", "Besede");
else
info.put("Analiza:", filter.getAl().toString());
} else {
info.put("Analiza:", filter.getAl().toString());
}
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
Integer ngramLevel = filter.getNgramValue();
// n.gram nivo
if (ngramLevel > 1) {
info.put("n-gram nivo:", String.valueOf(ngramLevel));
} else if (ngramLevel == 1){
info.put("n-gram nivo:", "nivo besed");
} else {
info.put("n-gram nivo:", "nivo črk");
}
// skip
if (ngramLevel > 1)
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
// izračunaj za
info.put("Izračunaj za:", filter.getCalculateFor().toString());
// msd
if (!isEmpty(filter.getMsd())) {
StringBuilder msdPattern = new StringBuilder();
for (Pattern pattern : filter.getMsd()) {
msdPattern.append(pattern.toString()).append(" ");
}
info.put("MSD:", msdPattern.toString());
}
// taksonomija
if (!isEmpty(filter.getTaxonomy())) {
info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
}
}
if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
info.put("Taksonomija: ", "");
String sep = "";
for (String s : tax) {
info.put(sep = sep + " ", s);
}
}
if (corpus.getCorpusType() == CorpusType.SOLAR) {
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
if (!isEmpty(filters)) {
info.put("Dodatni filtri: ", "");
for (Map.Entry<String, ObservableList<String>> f : filters.entrySet()) {
info.put(f.getKey(), StringUtils.join(f.getValue(), ", "));
}
}
}
return info;
}
}

175
src/main/java/data/Tax.java Normal file
View File

@@ -0,0 +1,175 @@
package data;
import java.util.*;
import java.util.stream.Collectors;
import gui.ValidationUtil;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
public class Tax {
private static LinkedHashMap<String, String> GIGAFIDA_TAXONOMY;
private static LinkedHashMap<String, String> GOS_TAXONOMY;
private static final HashSet<CorpusType> corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES));
static {
// GIGAFIDA ----------------------------
GIGAFIDA_TAXONOMY = new LinkedHashMap<>();
GIGAFIDA_TAXONOMY.put("SSJ.T", "tisk");
GIGAFIDA_TAXONOMY.put("SSJ.T.K", "tisk-knjižno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", "tisk-knjižno-leposlovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", "tisk-knjižno-strokovno");
GIGAFIDA_TAXONOMY.put("SSJ.T.P", "tisk-periodično");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", "tisk-periodično-časopis");
GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", "tisk-periodično-revija");
GIGAFIDA_TAXONOMY.put("SSJ.T.D", "tisk-drugo");
GIGAFIDA_TAXONOMY.put("SSJ.I", "internet");
GIGAFIDA_TAXONOMY.put("Ft.P", "prenosnik");
GIGAFIDA_TAXONOMY.put("Ft.P.G", "prenosnik-govorni");
GIGAFIDA_TAXONOMY.put("Ft.P.E", "prenosnik-elektronski");
GIGAFIDA_TAXONOMY.put("Ft.P.P", "prenosnik-pisni");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O", "prenosnik-pisni-objavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", "prenosnik-pisni-objavljeno-knjižno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", "prenosnik-pisni-objavljeno-periodično");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", "prenosnik-pisni-objavljeno-periodično-časopisno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", "prenosnik-pisni-objavljeno-periodično-časopisno-dnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", "prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", "prenosnik-pisni-objavljeno-periodično-časopisno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", "prenosnik-pisni-objavljeno-periodično-revialno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", "prenosnik-pisni-objavljeno-periodično-revialno-tedensko");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", "prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", "prenosnik-pisni-objavljeno-periodično-revialno-mesečno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", "prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec");
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", "prenosnik-pisni-objavljeno-periodično-revialno-občasno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N", "prenosnik-pisni-neobjavljeno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", "prenosnik-pisni-neobjavljeno-javno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", "prenosnik-pisni-neobjavljeno-interno");
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", "prenosnik-pisni-neobjavljeno-zasebno");
GIGAFIDA_TAXONOMY.put("Ft.Z", "zvrst");
GIGAFIDA_TAXONOMY.put("Ft.Z.U", "zvrst-umetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", "zvrst-umetnostna-pesniška");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", "zvrst-umetnostna-prozna");
GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", "zvrst-umetnostna-dramska");
GIGAFIDA_TAXONOMY.put("Ft.Z.N", "zvrst-neumetnostna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", "zvrst-neumetnostna-strokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", "zvrst-neumetnostna-strokovna-humanistična in družboslovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", "zvrst-neumetnostna-strokovna-naravoslovna in tehnična");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", "zvrst-neumetnostna-nestrokovna");
GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", "zvrst-neumetnostna-pravna");
GIGAFIDA_TAXONOMY.put("Ft.L", "zvrst-lektorirano");
GIGAFIDA_TAXONOMY.put("Ft.L.D", "zvrst-lektorirano-da");
GIGAFIDA_TAXONOMY.put("Ft.L.N", "zvrst-lektorirano-ne");
// GOS ----------------------------------
GOS_TAXONOMY = new LinkedHashMap<>();
GOS_TAXONOMY.put("gos.T", "diskurz");
GOS_TAXONOMY.put("gos.T.J", "diskurz-javni");
GOS_TAXONOMY.put("gos.T.J.I", "diskurz-javni-informativno-izobraževalni");
GOS_TAXONOMY.put("gos.T.J.R", "diskurz-javni-razvedrilni");
GOS_TAXONOMY.put("gos.T.N", "diskurz-nejavni");
GOS_TAXONOMY.put("gos.T.N.N", "diskurz-nejavni-nezasebni");
GOS_TAXONOMY.put("gos.T.N.Z", "diskurz-nejavni-zasebni");
GOS_TAXONOMY.put("gos.S", "situacija");
GOS_TAXONOMY.put("gos.S.R", "situacija-radio");
GOS_TAXONOMY.put("gos.S.T", "situacija-televizija");
}
/**
* Returns the whole default taxonomy for the specified corpus type
*/
public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType) {
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
return FXCollections.observableArrayList(GIGAFIDA_TAXONOMY.values());
} else if (corpusType == CorpusType.GOS) {
return FXCollections.observableArrayList(GOS_TAXONOMY.values());
}
return FXCollections.observableArrayList(new ArrayList<>());
}
/**
* Returns taxonomy names only for items found in headers
*/
public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType, HashSet<String> foundTax) {
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
tax = GIGAFIDA_TAXONOMY;
} else if (corpusType == CorpusType.GOS) {
tax = GOS_TAXONOMY;
}
ArrayList<String> taxForCombo = new ArrayList<>();
// assures same relative order
for (String t : tax.keySet()) {
if (foundTax.contains(t)) {
taxForCombo.add(tax.get(t));
}
}
return FXCollections.observableArrayList(taxForCombo);
}
public static HashSet<CorpusType> getCorpusTypesWithTaxonomy() {
return corpusTypesWithTaxonomy;
}
public static ArrayList<String> getTaxonomyCodes(ArrayList<String> taxonomyNames, CorpusType corpusType) {
ArrayList<String> result = new ArrayList<>();
if (ValidationUtil.isEmpty(taxonomyNames)) {
return result;
}
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
tax = GIGAFIDA_TAXONOMY;
} else if (corpusType == CorpusType.GOS) {
tax = GOS_TAXONOMY;
}
// for easier lookup
Map<String, String> taxInversed = tax.entrySet()
.stream()
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
for (String taxonomyName : taxonomyNames) {
result.add(taxInversed.get(taxonomyName));
}
return result;
}
/**
* Returns a list of proper names for codes
*
* @param corpusType
* @param taxonomy
*
* @return
*/
public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<String> taxonomy) {
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
tax = GIGAFIDA_TAXONOMY;
} else if (corpusType == CorpusType.GOS) {
tax = GOS_TAXONOMY;
}
ArrayList<String> result = new ArrayList<>();
for (String t : taxonomy) {
result.add(tax.get(t));
}
return result;
}
}

View File

@@ -0,0 +1,171 @@
package data;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.stream.Collectors;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
public enum Taxonomy {
// GOS
JAVNI("javni", "T.J", "gos"),
INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "T.J.I", "gos"),
RAZVEDRILNI("razvedrilni", "T.J.R", "gos"),
NEJAVNI("nejavni", "T.N", "gos"),
NEZASEBNI("nezasebni", "T.N.N", "gos"),
ZASEBNI("zasebni", "T.N.Z", "gos"),
OSEBNI_STIK("osebni stik", "K.O", "gos"),
TELEFON("telefon", "K.P", "gos"),
RADIO("radio", "K.R", "gos"),
TELEVIZIJA("televizija", "K.T", "gos"),
// Gigafida
KNJIZNO("knjižno", "T.K", "gigafida"),
LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
STROKOVNO("strokovno", "T.K.S", "gigafida"),
PERIODICNO("periodično", "T.P", "gigafida"),
CASOPIS("časopis", "T.P.C", "gigafida"),
REVIJA("revija", "T.P.R", "gigafida"),
INTERNET("internet", "I", "gigafida"),
SSJ_TISK("tisk", "SSJ.T", "gigafida"),
SSJ_KNJIZNO("opis", "identifikator", "gigafida"),
SSJ_LEPOSLOVNO("opis", "identifikator", "gigafida"),
SSJ_STROKOVNO("opis", "identifikator", "gigafida"),
SSJ_PERIODICNO("opis", "identifikator", "gigafida"),
SSJ_CASOPIS("opis", "identifikator", "gigafida"),
SSJ_REVIJA("opis", "identifikator", "gigafida"),
SSJ_DRUGO("opis", "identifikator", "gigafida"),
SSJ_INTERNET("opis", "identifikator", "gigafida"),
FT_P_PRENOSNIK("opis", "identifikator", "gigafida"),
FT_P_GOVORNI("opis", "identifikator", "gigafida"),
FT_P_ELEKTRONSKI("opis", "identifikator", "gigafida"),
FT_P_PISNI("opis", "identifikator", "gigafida"),
FT_P_OBJAVLJENO("opis", "identifikator", "gigafida"),
FT_P_KNJIZNO("opis", "identifikator", "gigafida"),
FT_P_PERIODICNO("opis", "identifikator", "gigafida"),
FT_P_CASOPISNO("opis", "identifikator", "gigafida"),
FT_P_DNEVNO("opis", "identifikator", "gigafida"),
FT_P_VECKRAT_TEDENSKO("opis", "identifikator", "gigafida"),
// FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
FT_P_REVIALNO("opis", "identifikator", "gigafida"),
FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
FT_P_STIRINAJSTDNEVNO("opis", "identifikator", "gigafida"),
FT_P_MESECNO("opis", "identifikator", "gigafida"),
FT_P_REDKEJE_KOT_MESECNO("opis", "identifikator", "gigafida"),
FT_P_OBCASNO("opis", "identifikator", "gigafida"),
FT_P_NEOBJAVLJENO("opis", "identifikator", "gigafida"),
FT_P_JAVNO("opis", "identifikator", "gigafida"),
FT_P_INTERNO("opis", "identifikator", "gigafida"),
FT_P_ZASEBNO("opis", "identifikator", "gigafida"),
FT_ZVRST("opis", "identifikator", "gigafida"),
FT_UMETNOSTNA("opis", "identifikator", "gigafida"),
FT_PESNISKA("opis", "identifikator", "gigafida"),
FT_PROZNA("opis", "identifikator", "gigafida"),
FT_DRAMSKA("opis", "identifikator", "gigafida"),
FT_NEUMETNOSTNA("opis", "identifikator", "gigafida"),
FT_STROKOVNA("opis", "identifikator", "gigafida"),
FT_HID("opis", "identifikator", "gigafida"),
FT_NIT("opis", "identifikator", "gigafida"),
FT_NESTROKOVNA("opis", "identifikator", "gigafida"),
FT_PRAVNA("opis", "identifikator", "gigafida"),
FT_LEKTORIRANO("opis", "identifikator", "gigafida"),
FT_DA("opis", "identifikator", "gigafida"),
FT_NE("opis", "identifikator", "gigafida");
private final String name;
private final String taxonomy;
private final String corpus;
Taxonomy(String name, String taxonomy, String corpusType) {
this.name = name;
this.taxonomy = taxonomy;
this.corpus = corpusType;
}
public String toString() {
return this.name;
}
public String getTaxonomnyString() {
return this.taxonomy;
}
public static Taxonomy factory(String tax) {
if (tax != null) {
// GOS
if (JAVNI.toString().equals(tax)) {
return JAVNI;
}
if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
return INFORMATIVNO_IZOBRAZEVALNI;
}
if (RAZVEDRILNI.toString().equals(tax)) {
return RAZVEDRILNI;
}
if (NEJAVNI.toString().equals(tax)) {
return NEJAVNI;
}
if (NEZASEBNI.toString().equals(tax)) {
return NEZASEBNI;
}
if (ZASEBNI.toString().equals(tax)) {
return ZASEBNI;
}
if (OSEBNI_STIK.toString().equals(tax)) {
return OSEBNI_STIK;
}
if (TELEFON.toString().equals(tax)) {
return TELEFON;
}
if (RADIO.toString().equals(tax)) {
return RADIO;
}
if (TELEVIZIJA.toString().equals(tax)) {
return TELEVIZIJA;
}
// Gigafida
// if (TISK.toString().equals(tax)) {
// return TISK;
// }
if (KNJIZNO.toString().equals(tax)) {
return KNJIZNO;
}
if (LEPOSLOVNO.toString().equals(tax)) {
return LEPOSLOVNO;
}
if (STROKOVNO.toString().equals(tax)) {
return STROKOVNO;
}
if (PERIODICNO.toString().equals(tax)) {
return PERIODICNO;
}
if (CASOPIS.toString().equals(tax)) {
return CASOPIS;
}
if (REVIJA.toString().equals(tax)) {
return REVIJA;
}
if (INTERNET.toString().equals(tax)) {
return INTERNET;
}
}
return null;
}
public static ObservableList<String> getDefaultForComboBox(String corpusType) {
ArrayList<String> values = Arrays.stream(Taxonomy.values())
.filter(x -> x.corpus.equals(corpusType))
.map(x -> x.name)
.collect(Collectors.toCollection(ArrayList::new));
return FXCollections.observableArrayList(values);
}
public static ObservableList<String> getDefaultForComboBox(CorpusType corpusType) {
return getDefaultForComboBox(corpusType.toString());
}
}

View File

@@ -0,0 +1,53 @@
package data;
import static gui.ValidationUtil.*;
import java.util.ArrayList;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import gui.Messages;
import gui.ValidationUtil;
public class Validation {
public static String validateForStringLevel(Filter filter) {
ArrayList<String> errors = new ArrayList<>();
// should not be null, error if null, because init failed
if (filter.getNgramValue() == null) {
errors.add(Messages.MISSING_NGRAM_LEVEL);
}
// should not be null, error if null, because init failed
if (filter.getCalculateFor() == null) {
errors.add(Messages.MISSING_CALCULATE_FOR);
}
if (filter.getSkipValue() == null) {
filter.setSkipValue(0);
}
if (filter.getNgramValue() != null && ValidationUtil.isEmpty(filter.getMsd()) &&
(filter.getMsd().size() != filter.getNgramValue())) {
if (!(filter.getMsd().size() == 1 && filter.getNgramValue() == 0) && !ValidationUtil.isEmpty(filter.getMsd()))
errors.add(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES);
}
Integer ngramValue = filter.getNgramValue();
ArrayList<Pattern> msd = filter.getMsd();
if (ngramValue > 0 && !ValidationUtil.isEmpty(msd) && ngramValue != msd.size()) {
errors.add(String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, ngramValue, msd.size()));
}
if (filter.getNgramValue() != null && filter.getNgramValue() == 0 && isEmpty(filter.getStringLength())) {
// if count letters, make sure that the length is given
// TODO: check that words we're adding in xml reader are longer than this value
errors.add(Messages.MISSING_STRING_LENGTH);
}
return isEmpty(errors) ? null : StringUtils.join(errors, ", \n");
}
}

View File

@@ -0,0 +1,141 @@
package data;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import org.apache.commons.lang3.StringUtils;
import data.Enums.Msd;
import gui.ValidationUtil;
public class Word implements Serializable {
public static final char PAD_CHARACTER = '-';
private String word;
private String lemma;
private String msd;
private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
/**
* Possible values:
* <p>
* <ul>
* <li>S = samostalnik</li>
* <li>G = glagol</li>
* <li>P = pridevnik</li>
* <li>R = prislov</li>
* <li>Z = zaimek</li>
* <li>K = števnik</li>
* <li>D = predlog</li>
* <li>V = veznik</li>
* <li>L = členek</li>
* <li>M = medmet</li>
* <li>O = okrajšava</li>
* <li>N = neuvrščeno</li>
* </ul>
*/
//private char besedna_vrsta;
public Word(String word, String lemma, String msd) {
this.lemma = lemma;
this.msd = normalizeMsd(msd);
// veliko zacetnico ohranimo samo za lastna imena
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
&& this.msd.length() >= 2
&& this.msd.charAt(1) == 'l')) {
this.word = word.toLowerCase();
} else {
this.word = word;
}
}
public Word() {
}
/**
* Appends a number of '-' to msds which are not properly sized.
* E.g. nouns should have 5 attributes, yet the last one isn't always defined (Somei vs. Sometd)
*
* @param msdInput
*
* @return
*/
private String normalizeMsd(String msdInput) {
if (ValidationUtil.isEmpty(msdInput)) {
return "";
} else {
return StringUtils.rightPad(msdInput, Msd.getMsdLengthForType(msdInput), PAD_CHARACTER);
}
}
public Word(String word) {
this.word = word;
}
public String getWord() {
return word;
}
public String getCVVWord() {
return covertToCvv(word);
}
public String getCVVLemma() {
return covertToCvv(lemma);
}
private String covertToCvv(String s) {
char[] StringCA = s.toCharArray();
for (int i = 0; i < StringCA.length; i++) {
StringCA[i] = VOWELS.contains(StringCA[i]) ? 'V' : 'C';
}
return new String(StringCA);
}
public void setWord(String word) {
this.word = word;
}
public String getLemma() {
return lemma;
}
public void setLemma(String lemma) {
this.lemma = lemma;
}
public String getMsd() {
return msd;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("beseda:\t")
.append(getWord())
.append("\n")
.append("lema:\t")
.append(getLemma())
.append("\n")
.append("msd:\t")
.append(getMsd())
.append("\n");
return sb.toString();
}
public String getForCf(CalculateFor calculateFor, boolean cvv) {
String returnValue = "";
if (cvv) {
returnValue = calculateFor == CalculateFor.WORD ? getCVVWord() : getCVVLemma();
} else {
returnValue = calculateFor == CalculateFor.WORD ? getWord() : getLemma();
}
return returnValue;
}
}

View File

@@ -0,0 +1,454 @@
package gui;
import data.*;
import javafx.application.HostServices;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.collections.FXCollections;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
import javafx.scene.layout.Pane;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
import static gui.GUIController.showAlert;
import static gui.Messages.*;
@SuppressWarnings("Duplicates")
public class CharacterAnalysisTab {
public final static Logger logger = LogManager.getLogger(CharacterAnalysisTab.class);
@FXML
public Label selectedFiltersLabel;
@FXML
public Label solarFilters;
@FXML
private TextField msdTF;
private ArrayList<Pattern> msd;
private ArrayList<String> msdStrings;
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
@FXML
private CheckBox calculatecvvCB;
private boolean calculateCvv;
@FXML
private TextField stringLengthTF;
private Integer stringLength;
@FXML
private ToggleGroup calculateForRB;
private CalculateFor calculateFor;
@FXML
private RadioButton lemmaRB;
@FXML
private RadioButton varietyRB;
@FXML
private Pane paneLetters;
@FXML
private Button computeNgramsB;
@FXML
public ProgressBar ngramProgressBar;
@FXML
public Label progressLabel;
@FXML
private Hyperlink helpH;
private enum MODE {
LETTER
}
private MODE currentMode;
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private Filter filter;
private boolean useDb;
private HostServices hostService;
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("različnica", "lema");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
// TODO: pass observables for taxonomy based on header scan
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
public void init() {
currentMode = MODE.LETTER;
toggleMode(currentMode);
calculateForRB.selectedToggleProperty().addListener(new ChangeListener<Toggle>() {
@Override
public void changed(ObservableValue<? extends Toggle> observable, Toggle oldValue, Toggle newValue) {
//logger.info("calculateForRB:", newValue.toString());
RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button
calculateFor = CalculateFor.factory(chk.getText());
logger.info("calculateForRB:", chk.getText());
//System.out.println("Selected Radio Button - "+chk.getText());
}
});
// msd
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = msdTF.getText();
logger.info("msdTf: ", value);
if (!ValidationUtil.isEmpty(value)) {
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
int nOfRequiredMsdTokens = 1;
if (msdTmp.size() != nOfRequiredMsdTokens) {
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
logAlert(msg);
showAlert(Alert.AlertType.ERROR, msg);
}
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
for (String msdToken : msdTmp) {
msd.add(Pattern.compile(msdToken));
msdStrings.add(msdToken);
}
logger.info(String.format("msd accepted (%d)", msd.size()));
} else if (!ValidationUtil.isEmpty(newValue)) {
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
}
}
});
msdTF.setText("");
msd = new ArrayList<>();
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
taxonomyCCB.setDisable(true);
}
// cvv
calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
calculateCvv = newValue;
logger.info("calculate cvv: " + calculateCvv);
});
// string length
stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = stringLengthTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
}
stringLength = Integer.parseInt(value);
} else {
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
stringLengthTF.setText("1");
logAlert(WARNING_MISSING_STRING_LENGTH);
}
}
});
computeNgramsB.setOnAction(e -> {
compute();
logger.info("compute button");
});
helpH.setOnAction(e -> openHelpWebsite());
}
/**
* case a: values for combo boxes can change after a corpus change
* <ul>
* <li>different corpus type - reset all fields so no old values remain</li>
* <li>same corpus type, different subset - keep</li>
* </ul>
* <p>
* case b: values for combo boxes can change after a header scan
* <ul>
* <li>at first, fields are populated by corpus type defaults</li>
* <li>after, with gathered data</li>
* </ul>
* <p></p>
* ngrams: 1
* calculateFor: word
* msd:
* taxonomy:
* skip: 0
* iscvv: false
* string length: 1
*/
public void populateFields() {
// corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType();
// TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
if (calculateFor == null) {
calculateForRB.selectToggle(lemmaRB);
calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
}
if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(true);
logger.info("no msd data");
} else {
if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously
// or msd has been set but the corpus changed -> reset
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(false);
logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false);
logger.info("msd kept");
}
}
// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
// keep calculateCvv
calculatecvvCB.setSelected(calculateCvv);
// keep string length if set
if (stringLength != null) {
stringLengthTF.setText(String.valueOf(stringLength));
} else {
stringLengthTF.setText("1");
stringLength = 1;
}
// TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false);
} else {
}
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
}
/**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
* sets combobox values to what is applicable ...
*
* @param mode
*/
public void toggleMode(MODE mode) {
if (mode == null) {
mode = currentMode;
}
logger.info("mode: ", mode.toString());
if (mode == MODE.LETTER) {
paneLetters.setVisible(true);
// populate with default cvv length value
if (stringLength == null) {
stringLengthTF.setText("1");
stringLength = 1;
} else {
stringLengthTF.setText(String.valueOf(stringLength));
}
// if calculateFor was selected for something other than a word or a lemma -> reset
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
// if the user selected something else before selecting ngram for letters, reset that choice
calculateFor = CalculateFor.LEMMA;
calculateForRB.selectToggle(lemmaRB);
}
}
// override if orth mode, allow only word
if (corpus.isGosOrthMode()) {
// TODO change to
varietyRB.setDisable(true);
msdTF.setDisable(true);
} else {
msdTF.setDisable(false);
varietyRB.setDisable(false);
}
}
private void compute() {
Filter filter = new Filter();
filter.setNgramValue(0);
filter.setCalculateFor(calculateFor);
filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0);
filter.setIsCvv(calculateCvv);
filter.setSolarFilters(solarFiltersMap);
filter.setStringLength(stringLength);
String message = Validation.validateForStringLevel(filter);
if (message == null) {
// no errors
logger.info("Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
execute(statistic);
} else {
logAlert(message);
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
}
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
}
private void logAlert(String alert) {
logger.info("alert: " + alert);
}
public Corpus getCorpus() {
return corpus;
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
if (corpus.getCorpusType() != CorpusType.SOLAR) {
setSelectedFiltersLabel(null);
} else {
setSelectedFiltersLabel("/");
}
}
public void setSelectedFiltersLabel(String content) {
if (content != null) {
solarFilters.setVisible(true);
selectedFiltersLabel.setVisible(true);
selectedFiltersLabel.setText(content);
} else {
solarFilters.setVisible(false);
selectedFiltersLabel.setVisible(false);
}
}
private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
}
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
this.solarFiltersMap = solarFiltersMap;
}
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
}
}

View File

@@ -0,0 +1,517 @@
package gui;
import static data.CorpusType.*;
import static gui.GUIController.*;
import static gui.Messages.*;
import static util.Util.*;
import java.io.File;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOCase;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import alg.XML_processing;
import data.Corpus;
import data.CorpusType;
import data.Enums.solar.SolarFilters;
import data.Tax;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
import javafx.scene.layout.Pane;
import javafx.stage.DirectoryChooser;
import javafx.stage.Stage;
import javafx.application.HostServices;
public class CorpusTab {
public final static Logger logger = LogManager.getLogger(CorpusTab.class);
public Pane setCorpusWrapperP;
private Stage stage;
@FXML
private Button chooseCorpusLocationB;
private File chosenCorpusLocation;
@FXML
private CheckBox readHeaderInfoChB;
private boolean readHeaderInfo;
@FXML
private CheckBox gosUseOrthChB;
private boolean gosUseOrth;
@FXML
private Button chooseResultsLocationB;
@FXML
private Label chooseCorpusL;
private String chooseCorpusLabelContent;
@FXML
private Label chooseResultsL;
private String chooseResultsLabelContent;
@FXML
private ProgressIndicator locationScanPI;
@FXML
private Hyperlink helpH;
// *** shared ***
private Corpus corpus;
private CorpusType corpusType;
// tabs - used to enable/disable
private Tab stringLevelTabNew2;
private Tab oneWordAnalysisTab;
private Tab characterLevelTab;
private Tab wordFormationTab;
private Tab wordLevelTab;
private Tab filterTab;
private TabPane tabPane;
private StringAnalysisTabNew2 satNew2Controller;
private OneWordAnalysisTab oneWordTabController;
private CharacterAnalysisTab catController;
private FiltersForSolar ffsController;
//private WordFormationTab wfController;
private WordLevelTab wlController;
private HostServices hostService;
public void initialize() {
stage = new Stage();
// add listeners
chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
chooseCorpusLocationB.setTooltip(new Tooltip(TOOLTIP_chooseCorpusLocationB));
helpH.setOnAction(e -> openHelpWebsite());
readHeaderInfoChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
readHeaderInfo = newValue;
logger.info("read headers: ", readHeaderInfo);
});
readHeaderInfoChB.setTooltip(new Tooltip(TOOLTIP_readHeaderInfoChB));
gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
gosUseOrth = newValue;
corpus.setGosOrthMode(gosUseOrth);
wordFormationTab.setDisable(gosUseOrth);
satNew2Controller.toggleMode(null);
oneWordTabController.toggleMode(null);
catController.toggleMode(null);
logger.info("gosUseOrth: ", gosUseOrth);
});
chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));
// set labels and toggle visibility
toggleGosChBVisibility();
chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET;
chooseCorpusL.setText(chooseCorpusLabelContent);
chooseResultsLabelContent = Messages.LABEL_RESULTS_LOCATION_NOT_SET;
chooseResultsL.setText(chooseResultsLabelContent);
togglePiAndSetCorpusWrapper(false);
}
private void togglePiAndSetCorpusWrapper(boolean piIsActive) {
locationScanPI.setVisible(piIsActive);
setCorpusWrapperP.setLayoutX(piIsActive ? 100.0 : 10.0);
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
}
/**
* In order for a directory to pass as a valid corpus location, following criteria has to be met:
* <ul>
* <li>it can't be null</li>
* <li>it has to be readable</li>
* <li>it has to contain xml files</li>
* <li>xml files have to contain valid headers from which we can infer the corpus type</li>
* <li>corpus type must be one of the expected corpus types - as noted in the @see data.CorpusType.class </li>
* </ul>
* <p>
* Additionally, if the user checks to read taxonomy/filters from the corpus files, that read
* has to produce a non-empty list results list
*/
private void chooseCorpusLocation() {
File selectedDirectory = directoryChooser();
if (selectedDirectory != null && ValidationUtil.isReadableDirectory(selectedDirectory)) {
logger.info("selected corpus dir: ", selectedDirectory.getAbsolutePath());
// scan for xml files
Collection<File> corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("xml", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
// make sure there are corpus files in selected directory or notify the user about it
if (corpusFiles.size() == 0) {
logger.info("alert: ", WARNING_CORPUS_NOT_FOUND);
showAlert(Alert.AlertType.ERROR, WARNING_CORPUS_NOT_FOUND, null);
} else {
String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles, selectedDirectory.getAbsolutePath());
if (chooseCorpusLabelContentTmp == null) {
logger.info("alert: ", WARNING_CORPUS_NOT_FOUND);
showAlert(Alert.AlertType.ERROR, WARNING_CORPUS_NOT_FOUND, null);
} else {
initNewCorpus(selectedDirectory, corpusFiles);
corpus.setChosenCorpusLocation(selectedDirectory);
corpus.setDetectedCorpusFiles(corpusFiles);
chooseCorpusLabelContent = chooseCorpusLabelContentTmp;
logger.info("corpus dir: ", corpus.getChosenCorpusLocation().getAbsolutePath());
if (readHeaderInfo) {
logger.info("reading header info...");
readHeaderInfo();
} else {
setResults();
setCorpusForAnalysis();
}
}
}
}
}
/**
* If a user selects a valid corpus location, we define a new corpus (so none of the old data gets carried over)
*
* @param selectedDirectory
* @param corpusFiles
*/
private void initNewCorpus(File selectedDirectory, Collection<File> corpusFiles) {
corpus = new Corpus();
corpus.setCorpusType(corpusType);
corpus.setDetectedCorpusFiles(corpusFiles);
corpus.setChosenCorpusLocation(selectedDirectory);
chooseResultsLocation(selectedDirectory);
}
private void chooseResultsLocation(File dir) {
// results location can be set either to default value (after selecting valid corpus location) - dir attribute
// or to a dir picked via directoryChooser (when dir == null
File selectedDirectory = dir == null ? directoryChooser() : dir;
if (selectedDirectory != null) {
String resultsLocationPath = selectedDirectory.getAbsolutePath().concat(File.separator);
File chosenResultsLocationTmp = new File(resultsLocationPath);
if (!ValidationUtil.isValidDirectory(chosenResultsLocationTmp)) {
showAlert(Alert.AlertType.ERROR, WARNING_RESULTS_DIR_NOT_VALID);
logger.info("alert: ", WARNING_RESULTS_DIR_NOT_VALID);
} else {
corpus.setChosenResultsLocation(chosenResultsLocationTmp);
chooseResultsLabelContent = corpus.getChosenResultsLocation().getAbsolutePath();
chooseResultsL.setText(chooseResultsLabelContent);
logger.info("results dir: " + chooseResultsLabelContent);
}
}
}
private void setResults() {
// if everything is ok
// check and enable checkbox if GOS
toggleGosChBVisibility();
// set default results location
String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
logger.info("setting default results location to: ", defaultResultsLocationPath);
chooseCorpusL.setText(chooseCorpusLabelContent);
}
private void readHeaderInfo() {
CorpusType corpusType = corpus.getCorpusType();
Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
togglePiAndSetCorpusWrapper(true);
chooseCorpusL.setText(LABEL_SCANNING_CORPUS);
logger.info("reading header data for ", corpusType.toString());
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES) {
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<HashSet<String>> task = new Task<HashSet<String>>() {
@Override
protected HashSet<String> call() throws Exception {
HashSet<String> values = new HashSet<>();
long i = 0;
if (!corpusIsSplit) {
updateProgress(-1.0f, -1.0f);
}
for (File file : corpusFiles) {
values.addAll((Collection<? extends String>) XML_processing.readXmlHeaderTaxonomyAndFilters(file.getAbsolutePath(), corpusIsSplit, corpusType));
i++;
if (corpusIsSplit) {
updateProgress(i, corpusFiles.size());
}
}
updateProgress(1.0f, 1.0f);
return values;
}
};
locationScanPI.progressProperty().bind(task.progressProperty());
task.setOnSucceeded(e -> {
ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
if (ValidationUtil.isEmpty(readTaxonomy)) {
// if no taxonomy found alert the user and keep other tabs disabled
logger.info("No taxonomy found in headers.");
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_NO_TAXONOMY_FOUND);
} else {
// set taxonomy, update label
corpus.setTaxonomy(readTaxonomy);
corpus.setHeaderRead(true);
chooseCorpusL.setText(chooseCorpusLabelContent);
setResults();
setCorpusForAnalysis();
}
togglePiAndSetCorpusWrapper(false);
});
task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
} else if (corpusType == CorpusType.SOLAR) {
// many many fields
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<HashMap<String, HashSet<String>>> task = new Task<HashMap<String, HashSet<String>>>() {
@Override
protected HashMap<String, HashSet<String>> call() throws Exception {
HashMap<String, HashSet<String>> values = new HashMap<>();
long i = 0;
if (!corpusIsSplit) {
updateProgress(-1.0f, -1.0f);
}
for (File file : corpusFiles) {
HashMap<String, HashSet<String>> tmpvalues = (HashMap<String, HashSet<String>>) XML_processing.readXmlHeaderTaxonomyAndFilters(file.getAbsolutePath(), corpusIsSplit, corpusType);
// update final results
for (Map.Entry<String, HashSet<String>> entry : tmpvalues.entrySet()) {
if (values.containsKey(entry.getKey())) {
values.get(entry.getKey()).addAll(entry.getValue());
} else {
values.put(entry.getKey(), entry.getValue());
}
}
i++;
if (corpusIsSplit) {
updateProgress(i, corpusFiles.size());
}
}
updateProgress(1.0f, 1.0f);
return values;
}
};
locationScanPI.progressProperty().bind(task.progressProperty());
task.setOnSucceeded(e -> {
HashMap<String, HashSet<String>> values = task.getValue();
if (ValidationUtil.isEmpty(values)) {
// if no taxonomy found alert the user and keep other tabs disabled
logger.info("No solar filters found in headers.");
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_NO_SOLAR_FILTERS_FOUND);
} else {
HashMap<String, ObservableList<String>> filtersForComboBoxes = SolarFilters.getFiltersForComboBoxes(values);
// set taxonomy, update label
corpus.setSolarFiltersForXML(values);
corpus.setSolarFilters(filtersForComboBoxes);
corpus.setHeaderRead(true);
chooseCorpusL.setText(chooseCorpusLabelContent);
setResults();
setCorpusForAnalysis();
}
togglePiAndSetCorpusWrapper(false);
});
task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
}
}
private void setCorpusForAnalysis() {
if (corpus.validate()) {
// new statistic, enable tabs...
stringLevelTabNew2.setDisable(false);
satNew2Controller.setCorpus(corpus);
satNew2Controller.init();
oneWordAnalysisTab.setDisable(false);
oneWordTabController.setCorpus(corpus);
oneWordTabController.init();
characterLevelTab.setDisable(false);
catController.setCorpus(corpus);
catController.init();
wordFormationTab.setDisable(false);
wordLevelTab.setDisable(false);
//wfController.setCorpus(corpus);
//wfController.init();
wlController.setCorpus(corpus);
wlController.init();
if (corpus.getCorpusType() == CorpusType.SOLAR) {
filterTab.setDisable(false);
tabPane.getTabs().add(1, filterTab);
ffsController.setCorpus(corpus);
ffsController.initFilters();
} else {
filterTab.setDisable(true);
tabPane.getTabs().removeAll(filterTab);
}
} else {
GUIController.showAlert(Alert.AlertType.ERROR, corpus.getValidationErrorsToString());
}
}
private File directoryChooser() {
DirectoryChooser directoryChooser = new DirectoryChooser();
// open in the folder where the jar is located if possible
File workingDir = getWorkingDirectory();
if (workingDir != null) {
directoryChooser.setInitialDirectory(workingDir);
}
return directoryChooser.showDialog(stage);
}
/**
* Hides GOS related checkbox until needed.
*/
private void toggleGosChBVisibility() {
gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
}
private String detectCorpusType(Collection<File> corpusFiles, String corpusLocation) {
// check that we recognize this corpus
// read first file only, maybe later do all, if toll on resources is acceptable
File f = corpusFiles.iterator().next();
String title = XML_processing.readXMLHeaderTag(f.getAbsolutePath(), "title").toLowerCase();
String test = CCKRES.getNameLowerCase();
String debug = "";
// check if XML file's title contains any of recognized corpus titles
corpusType = null;
if (title.contains(SOLAR.getNameLowerCase())) {
corpusType = SOLAR;
} else if (title.contains(GIGAFIDA.getNameLowerCase())) {
corpusType = GIGAFIDA;
} else if (title.contains(CCKRES.getNameLowerCase())) {
corpusType = CCKRES;
} else if (title.contains(GOS.getNameLowerCase())) {
corpusType = GOS;
}
if (corpusType == null) {
return null;
} else {
corpus.setCorpusType(corpusType);
StringBuilder sb = new StringBuilder();
sb.append(corpusLocation)
.append("\n")
.append(String.format(NOTIFICATION_FOUND_X_FILES, corpusFiles.size()))
.append("\n")
.append(String.format("Korpus: %s", corpusType.toString()));
String result = sb.toString();
logger.debug(result);
return result;
}
}
public Corpus getCorpus() {
return corpus;
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
}
public void setStringLevelTabNew2(Tab stringLevelTabNew2) { this.stringLevelTabNew2 = stringLevelTabNew2; }
public void setOneWordAnalysisTab(Tab oneWordAnalysisTab) { this.oneWordAnalysisTab = oneWordAnalysisTab; }
public void setCharacterLevelTab(Tab characterLevelTab) { this.characterLevelTab = characterLevelTab; }
public void setWordLevelTab(Tab wordLevelTab) {
this.wordLevelTab = wordLevelTab;
}
public void setFilterTab(Tab filterTab) {
this.filterTab = filterTab;
}
public void setFfsController(FiltersForSolar ffsController) {
this.ffsController = ffsController;
}
public void setTabPane(TabPane tabPane) {
this.tabPane = tabPane;
}
public void setSatNew2Controller(StringAnalysisTabNew2 satNew2Controller) { this.satNew2Controller = satNew2Controller; }
public void setOneWordTabController(OneWordAnalysisTab oneWordTabController) { this.oneWordTabController = oneWordTabController; }
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
/*public void setWfController(WordFormationTab wfController) {
this.wfController = wfController;
}*/
public void setWlController(WordLevelTab wlController) {
this.wlController = wlController;
}
public void setWordFormationTab(Tab wordFormationTab) {
this.wordFormationTab = wordFormationTab;
}
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
}
}

View File

@@ -0,0 +1,187 @@
package gui;
import static data.Enums.solar.SolarFilters.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import javafx.application.HostServices;
import javafx.scene.control.Hyperlink;
import org.controlsfx.control.CheckComboBox;
import data.Corpus;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.fxml.FXML;
import javafx.scene.control.Label;
import javafx.scene.layout.AnchorPane;
import util.Util;
public class FiltersForSolar {
@FXML
public AnchorPane solarFiltersTabPane;
@FXML
public CheckComboBox<String> solarRegijaCCB;
@FXML
public CheckComboBox<String> solarPredmetCCB;
@FXML
public CheckComboBox<String> solarRazredCCB;
@FXML
public CheckComboBox<String> solarLetoCCB;
@FXML
public CheckComboBox<String> solarSolaCCB;
@FXML
public CheckComboBox<String> solarVrstaBesedilaCCB;
@FXML
public Label selectedFiltersLabel;
@FXML
private Hyperlink helpH;
private HashMap<String, ObservableList<String>> selectedFilters;
private Corpus corpus;
private StringAnalysisTabNew2 satNew2Controller;
private OneWordAnalysisTab oneWordTabController;
private CharacterAnalysisTab catController;
//private WordFormationTab wfController;
private WordLevelTab wlController;
private HostServices hostService;
@SuppressWarnings("unchecked")
public void initialize() {
selectedFilters = new HashMap<>();
solarRegijaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
selectedFilters.put(REGIJA, solarRegijaCCB.getCheckModel().getCheckedItems());
updateSolarFilterLabel();
});
solarPredmetCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
selectedFilters.put(PREDMET, solarPredmetCCB.getCheckModel().getCheckedItems());
updateSolarFilterLabel();
});
solarRazredCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
selectedFilters.put(RAZRED, solarRazredCCB.getCheckModel().getCheckedItems());
updateSolarFilterLabel();
});
solarLetoCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
selectedFilters.put(LETO, solarLetoCCB.getCheckModel().getCheckedItems());
updateSolarFilterLabel();
});
solarSolaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
selectedFilters.put(SOLA, solarSolaCCB.getCheckModel().getCheckedItems());
updateSolarFilterLabel();
});
solarVrstaBesedilaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
selectedFilters.put(TIP, solarVrstaBesedilaCCB.getCheckModel().getCheckedItems());
updateSolarFilterLabel();
});
helpH.setOnAction(e -> openHelpWebsite());
}
public void initFilters() {
solarRegijaCCB.getItems().removeAll();
solarRegijaCCB.getItems().setAll(corpus.getSolarFilters().get(REGIJA));
solarRegijaCCB.getItems().sorted();
solarPredmetCCB.getItems().removeAll();
solarPredmetCCB.getItems().setAll(corpus.getSolarFilters().get(PREDMET));
solarPredmetCCB.getItems().sorted();
solarRazredCCB.getItems().removeAll();
solarRazredCCB.getItems().setAll(corpus.getSolarFilters().get(RAZRED));
solarRazredCCB.getItems().sorted();
solarLetoCCB.getItems().removeAll();
solarLetoCCB.getItems().setAll(corpus.getSolarFilters().get(LETO));
solarLetoCCB.getItems().sorted();
solarSolaCCB.getItems().removeAll();
solarSolaCCB.getItems().setAll(corpus.getSolarFilters().get(SOLA));
solarSolaCCB.getItems().sorted();
solarVrstaBesedilaCCB.getItems().removeAll();
solarVrstaBesedilaCCB.getItems().setAll(corpus.getSolarFilters().get(TIP));
solarVrstaBesedilaCCB.getItems().sorted();
}
private void updateSolarFilterLabel() {
if (Util.isMapEmpty(selectedFilters)) {
setSOlarFIlterLabelText("/");
} else {
StringBuilder allFilters = new StringBuilder();
for (Map.Entry<String, ObservableList<String>> entry : selectedFilters.entrySet()) {
ArrayList<String> values = new ArrayList<>(entry.getValue());
if (!values.isEmpty()) {
allFilters.append(entry.getKey())
.append(": ");
for (int i = 0; i < values.size(); i++) {
allFilters.append(values.get(i));
if (i < values.size() - 1) {
// so we won't append a comma after the last element
allFilters.append(", ");
}
}
allFilters.append("\n\n");
}
}
setSOlarFIlterLabelText(allFilters.toString());
}
HashMap<String, HashSet<String>> solarFiltersMap = new HashMap<>();
for (Map.Entry<String, ObservableList<String>> e : selectedFilters.entrySet()) {
HashSet<String> values = new HashSet<>();
values.addAll(e.getValue());
solarFiltersMap.put(e.getKey(), values);
}
satNew2Controller.setSolarFiltersMap(solarFiltersMap);
oneWordTabController.setSolarFiltersMap(solarFiltersMap);
catController.setSolarFiltersMap(solarFiltersMap);
//wfController.setSolarFiltersMap(solarFiltersMap);
wlController.setSolarFiltersMap(solarFiltersMap);
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
}
private void setSOlarFIlterLabelText(String content) {
selectedFiltersLabel.setText(content);
satNew2Controller.setSelectedFiltersLabel(content);
oneWordTabController.setSelectedFiltersLabel(content);
catController.setSelectedFiltersLabel(content);
//wfController.setSelectedFiltersLabel(content);
wlController.setSelectedFiltersLabel(content);
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
}
public void setSatNew2Controller(StringAnalysisTabNew2 satNew2Controller) { this.satNew2Controller = satNew2Controller; }
public void setOneWordTabController(OneWordAnalysisTab oneWordTabController) { this.oneWordTabController = oneWordTabController; }
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
/*public void setWfController(WordFormationTab wfController) {
this.wfController = wfController;
}*/
public void setWlController(WordLevelTab wlController) {
this.wlController = wlController;
}
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
}
}

View File

@@ -0,0 +1,150 @@
package gui;
import java.io.IOException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.kordamp.ikonli.fontawesome.FontAwesome;
import org.kordamp.ikonli.javafx.FontIcon;
import data.Corpus;
import javafx.application.Application;
import javafx.fxml.FXML;
import javafx.fxml.FXMLLoader;
import javafx.scene.Parent;
import javafx.scene.Scene;
import javafx.scene.control.Alert;
import javafx.scene.control.Tab;
import javafx.scene.control.TabPane;
import javafx.stage.Stage;
public class GUIController extends Application {
public final static Logger logger = LogManager.getLogger(GUIController.class);
@FXML
public Tab StringLevelTabNew2;
@FXML
public Tab OneWordAnalysisTab;
@FXML
public Tab CharacterLevelTabNew;
@FXML
public Tab corpusTab;
public TabPane tabPane;
@FXML
private CharacterAnalysisTab catController;
@FXML
private static Parent sat;
@FXML
private StringAnalysisTabNew2 satNew2Controller;
@FXML
private static Parent satNew2;
@FXML
private OneWordAnalysisTab oneWordTabController;
@FXML
private static Parent oneWordTab;
@FXML
private CorpusTab ctController;
@FXML
private Parent ct;
//@FXML
//private WordFormationTab wfController;
@FXML
private Parent wf;
@FXML
private WordLevelTab wlController;
@FXML
private Parent wl;
@FXML
private FiltersForSolar ffsController;
@FXML
private Parent ffs;
@FXML
private SelectedFiltersPane sfpController;
@FXML
private Parent sfp;
@FXML
public Tab stringLevelTab;
@FXML
public Tab wordLevelTab;
/*@FXML
public Tab wordFormationTab;*/
@FXML
public Tab filterTab;
public Stage stage;
private Corpus corpus;
@Override
public void start(Stage primaryStage) throws IOException {
Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml"));
primaryStage.setTitle("GUI");
Scene scene = new Scene(root, 800, 600);
// https://github.com/dicolar/jbootx
// scene.getStylesheets().add(GUIController.class.getResource("bootstrap3.css").toExternalForm())
primaryStage.setScene(scene);
stage = primaryStage;
primaryStage.show();
}
public static void main(String[] args) {
launch(args);
}
public void initialize() {
corpus = new Corpus();
ctController.setCorpus(corpus);
ctController.setFilterTab(filterTab);
ctController.setStringLevelTabNew2(StringLevelTabNew2);
ctController.setOneWordAnalysisTab(OneWordAnalysisTab);
ctController.setCharacterLevelTab(CharacterLevelTabNew);
ctController.setSatNew2Controller(satNew2Controller);
ctController.setOneWordTabController(oneWordTabController);
ctController.setCatController(catController);
//ctController.setWfController(wfController);
ctController.setWlController(wlController);
ctController.setTabPane(tabPane);
ctController.setFfsController(ffsController);
//ctController.setWordFormationTab(wordFormationTab);
ctController.setWordLevelTab(wordLevelTab);
ctController.setHostServices(getHostServices());
satNew2Controller.setCorpus(corpus);
satNew2Controller.setHostServices(getHostServices());
oneWordTabController.setCorpus(corpus);
oneWordTabController.setHostServices(getHostServices());
catController.setCorpus(corpus);
catController.setHostServices(getHostServices());
//wfController.setCorpus(corpus);
//wfController.setHostServices(getHostServices());
wlController.setCorpus(corpus);
wlController.setHostServices(getHostServices());
ffsController.setSatNew2Controller(satNew2Controller);
ffsController.setOneWordTabController(oneWordTabController);
ffsController.setCatController(catController);
//ffsController.setWfController(wfController);
ffsController.setWlController(wlController);
ffsController.setHostServices(getHostServices());
// set tab icons
corpusTab.setGraphic(new FontIcon(FontAwesome.COG));
filterTab.setGraphic(new FontIcon(FontAwesome.FILTER));
// hide filter tab
tabPane.getTabs().removeAll(filterTab);
}
static void showAlert(Alert.AlertType alertType, String headerText, String contentText) {
Alert alert = new Alert(alertType);
alert.setTitle(Messages.windowTitles.get(alertType));
alert.setHeaderText(headerText != null ? headerText : "");
alert.setContentText(contentText != null ? contentText : "");
alert.showAndWait();
}
static void showAlert(Alert.AlertType alertType, String headerText) {
showAlert(alertType, headerText, null);
}
}

View File

@@ -0,0 +1,74 @@
package gui;
import static javafx.scene.control.Alert.AlertType.*;
import java.util.HashMap;
import javafx.scene.control.Alert;
public class Messages {
// warnings & errors
public static final String WARNING_CORPUS_NOT_FOUND = "V izbranem direktoriju ni ustreznih korpusnih datotek.";
public static final String WARNING_RESULTS_DIR_NOT_VALID = "Za dostop do izbranega direktorija nimate potrebnih pravic.";
public static final String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS = "Izbran nivo ngramov in vpisano št. besed v filtru se ne ujemata.";
public static final String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO = "Izberite drugo število ali popravite filter.";
public static final String WARNING_WORD_OR_LEMMA = "Izberite, če želite statistiko izračunati za besede ali leme.";
public static final String WARNING_ONLY_NUMBERS_ALLOWED = "Prosim vnesite veljavno število.";
public static final String WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES = "Število za ngram (%d) in število msd oznak (%d) se morata ujemati.";
public static final String WARNING_MISSING_STRING_LENGTH = "Dolžina niza mora biti večja od 0. Vstavljena je privzeta vrednost (1).";
public static final String WARNING_NO_TAXONOMY_FOUND = "Iz korpusnih datotek ni bilo moč razbrati taksonomije. Prosim izberite drugo lokacijo ali korpus.";
public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus.";
public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem.";
public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov.";
// missing
public static final String MISSING_NGRAM_LEVEL = "N-gram nivo";
public static final String MISSING_CALCULATE_FOR = "Izračunaj za";
public static final String MISSING_SKIP = "";
public static final String MISSING_STRING_LENGTH = "Dolžina niza";
public static final String MISMATCHED_STRING_LENGTH_AND_MSD_REGEX = "Neujemajoča dolžina niza in regex filter";
// general notifications - static content/set only once
public static final String NOTIFICATION_FOUND_X_FILES = "Št. najdenih datotek: %d";
public static final String NOTIFICATION_ANALYSIS_COMPLETED = "Analiza je zaključena, rezultati so shranjeni.";
public static final String NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS = "Analiza je zaključena, vendar ni bilo moč izračunati statistike, ki bi ustrezala vsem navedenim pogojem.";
public static final String RESULTS_PATH_SET_TO_DEFAULT = "Lokacija za shranjevanje rezultatov je nastavljena na lokacijo korpusa.";
// ongoing notifications - displayed while processing, dynamically changing
public static final String ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y = "Analiziram datoteko %d od %d (%s)";
// Labels
public static final String LABEL_CORPUS_LOCATION_NOT_SET = "Lokacija korpusa ni nastavljena";
public static final String LABEL_RESULTS_LOCATION_NOT_SET = "Lokacija za shranjevanje rezultatov ni nastavljena";
public static final String LABEL_RESULTS_CORPUS_TYPE_NOT_SET = "Vrsta korpusa ni nastavljena";
public static final String LABEL_SCANNING_CORPUS = "Iskanje in analiza korpusnih datotek...";
public static final String LABEL_SCANNING_SINGLE_FILE_CORPUS = "Analiza vnosa ";
public static final String COMPLETED = "končano";
public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
// Not properly to be here. TODO move somewhere else in future
public static final String HELP_URL = "http://slovnica.ijs.si/";
// helper maps
/**
* Typical window titles
* ERROR = "Napaka"
* WARNING = "Opozorilo"
* CONFIRMATION = "Potrdilo"
*/
static HashMap<Alert.AlertType, String> windowTitles = new HashMap<>();
static {
// automatically set window's title
windowTitles.put(ERROR, "Napaka");
windowTitles.put(WARNING, "Opozorilo");
windowTitles.put(CONFIRMATION, "Potrdilo");
}
}

View File

@@ -0,0 +1,389 @@
package gui;
import data.*;
import javafx.application.HostServices;
import javafx.collections.FXCollections;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
import javafx.scene.layout.Pane;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.regex.Pattern;
import static alg.XML_processing.readXML;
import static gui.GUIController.showAlert;
import static gui.Messages.*;
@SuppressWarnings("Duplicates")
public class OneWordAnalysisTab {
public final static Logger logger = LogManager.getLogger(OneWordAnalysisTab.class);
@FXML
public Label selectedFiltersLabel;
@FXML
public Label solarFilters;
@FXML
private TextField msdTF;
private ArrayList<Pattern> msd;
private ArrayList<String> msdStrings;
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
@FXML
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
@FXML
private Button computeNgramsB;
@FXML
public ProgressBar ngramProgressBar;
@FXML
public Label progressLabel;
@FXML
private Hyperlink helpH;
private enum MODE {
LETTER,
WORD
}
private MODE currentMode;
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private Filter filter;
private boolean useDb;
private HostServices hostService;
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
// TODO: pass observables for taxonomy based on header scan
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
public void init() {
currentMode = MODE.WORD;
toggleMode(currentMode);
// calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue);
logger.info("calculateForCB:", calculateFor.toString());
});
calculateForCB.getSelectionModel().select(0);
// msd
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = msdTF.getText();
logger.info("msdTf: ", value);
if (!ValidationUtil.isEmpty(value)) {
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
int nOfRequiredMsdTokens = 1;
if (msdTmp.size() != nOfRequiredMsdTokens) {
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
logAlert(msg);
showAlert(Alert.AlertType.ERROR, msg);
}
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
for (String msdToken : msdTmp) {
msd.add(Pattern.compile(msdToken));
msdStrings.add(msdToken);
}
logger.info(String.format("msd accepted (%d)", msd.size()));
} else if (!ValidationUtil.isEmpty(newValue)) {
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
}
}
});
msdTF.setText("");
msd = new ArrayList<>();
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
taxonomyCCB.setDisable(true);
}
computeNgramsB.setOnAction(e -> {
compute();
logger.info("compute button");
});
helpH.setOnAction(e -> openHelpWebsite());
}
/**
* case a: values for combo boxes can change after a corpus change
* <ul>
* <li>different corpus type - reset all fields so no old values remain</li>
* <li>same corpus type, different subset - keep</li>
* </ul>
* <p>
* case b: values for combo boxes can change after a header scan
* <ul>
* <li>at first, fields are populated by corpus type defaults</li>
* <li>after, with gathered data</li>
* </ul>
* <p></p>
* ngrams: 1
* calculateFor: word
* msd:
* taxonomy:
* skip: 0
* iscvv: false
* string length: 1
*/
public void populateFields() {
// corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType();
// TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
if (calculateFor == null) {
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
}
if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(true);
logger.info("no msd data");
} else {
if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously
// or msd has been set but the corpus changed -> reset
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(false);
logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false);
logger.info("msd kept");
}
}
// TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false);
} else {
}
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
}
/**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
* sets combobox values to what is applicable ...
*
* @param mode
*/
public void toggleMode(MODE mode) {
if (mode == null) {
mode = currentMode;
}
logger.info("mode: ", mode.toString());
if (mode == MODE.WORD) {
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
} else if (mode == MODE.LETTER) {
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
// if calculateFor was selected for something other than a word or a lemma -> reset
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
// if the user selected something else before selecting ngram for letters, reset that choice
calculateFor = CalculateFor.WORD;
calculateForCB.getSelectionModel().select("različnica");
}
}
// override if orth mode, allow only word
if (corpus.isGosOrthMode()) {
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
msdTF.setDisable(true);
} else {
msdTF.setDisable(false);
}
}
private void compute() {
Filter filter = new Filter();
filter.setNgramValue(1);
filter.setCalculateFor(calculateFor);
filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0);
filter.setIsCvv(false);
filter.setSolarFilters(solarFiltersMap);
filter.setStringLength(1);
String message = Validation.validateForStringLevel(filter);
if (message == null) {
// no errors
logger.info("Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
execute(statistic);
} else {
logAlert(message);
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
}
}
private void logAlert(String alert) {
logger.info("alert: " + alert);
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
}
public Corpus getCorpus() {
return corpus;
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
if (corpus.getCorpusType() != CorpusType.SOLAR) {
setSelectedFiltersLabel(null);
} else {
setSelectedFiltersLabel("/");
}
}
public void setSelectedFiltersLabel(String content) {
if (content != null) {
solarFilters.setVisible(true);
selectedFiltersLabel.setVisible(true);
selectedFiltersLabel.setText(content);
} else {
solarFilters.setVisible(false);
selectedFiltersLabel.setVisible(false);
}
}
private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
}
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
this.solarFiltersMap = solarFiltersMap;
}
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
}
}

View File

@@ -0,0 +1,18 @@
package gui;
import javafx.scene.control.Label;
public class SelectedFiltersPane {
public Label selectedFiltersLabel;
public Label getSelectedFiltersLabel() {
return selectedFiltersLabel;
}
public void setSelectedFiltersLabel(String filters) {
this.selectedFiltersLabel = new Label(filters);
this.selectedFiltersLabel.setText("test?");
}
}

View File

@@ -0,0 +1,511 @@
package gui;
import static alg.XML_processing.*;
import static gui.GUIController.*;
import static gui.Messages.*;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.regex.Pattern;
import javafx.application.HostServices;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import data.*;
import javafx.collections.FXCollections;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
import javafx.scene.layout.Pane;
@SuppressWarnings("Duplicates")
public class StringAnalysisTabNew2 {
public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class);
@FXML
public Label selectedFiltersLabel;
@FXML
public Label solarFilters;
@FXML
private TextField msdTF;
private ArrayList<Pattern> msd;
private ArrayList<String> msdStrings;
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
@FXML
private CheckBox calculatecvvCB;
private boolean calculateCvv;
@FXML
private TextField stringLengthTF;
private Integer stringLength;
@FXML
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
@FXML
private ComboBox<String> ngramValueCB;
private Integer ngramValue;
@FXML
private ComboBox<String> skipValueCB;
private Integer skipValue;
@FXML
private Pane paneWords;
@FXML
private Pane paneLetters;
@FXML
private Button computeNgramsB;
@FXML
public ProgressBar ngramProgressBar;
@FXML
public Label progressLabel;
@FXML
private Hyperlink helpH;
private enum MODE {
LETTER,
WORD
}
private MODE currentMode;
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private Filter filter;
private boolean useDb;
private HostServices hostService;
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
// TODO: pass observables for taxonomy based on header scan
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
public void init() {
currentMode = MODE.WORD;
toggleMode(currentMode);
// ngram value CB
ngramValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
if (newValue.equals("nivo črk")) {
ngramValue = 0;
toggleMode(MODE.LETTER);
} else {
ngramValue = Integer.valueOf(newValue);
toggleMode(MODE.WORD);
}
// skip only on ngrams of more than one word
if (ngramValue > 1) {
skipValueCB.setDisable(false);
} else {
skipValueCB.getSelectionModel().select(0);
skipValue = 0;
skipValueCB.setDisable(true);
}
logger.info("ngramValueCB:", ngramValue);
});
// set first n-gram value to 2 at index 0
ngramValueCB.getSelectionModel().select(0); // selected index
ngramValue = 2; // actual value at that index
// calculateForCB
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
calculateFor = CalculateFor.factory(newValue);
logger.info("calculateForCB:", calculateFor.toString());
});
calculateForCB.getSelectionModel().select(0);
// msd
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = msdTF.getText();
logger.info("msdTf: ", value);
if (!ValidationUtil.isEmpty(value)) {
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
int nOfRequiredMsdTokens = ngramValue == 0 ? 1 : ngramValue;
if (msdTmp.size() != nOfRequiredMsdTokens) {
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
logAlert(msg);
showAlert(Alert.AlertType.ERROR, msg);
}
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
for (String msdToken : msdTmp) {
msd.add(Pattern.compile(msdToken));
msdStrings.add(msdToken);
}
logger.info(String.format("msd accepted (%d)", msd.size()));
} else if (!ValidationUtil.isEmpty(newValue)) {
msd = new ArrayList<>();
msdStrings = new ArrayList<>();
}
}
});
msdTF.setText("");
msd = new ArrayList<>();
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
taxonomyCCB.setDisable(true);
}
// skip
skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
skipValue = Integer.valueOf(newValue);
logger.info("Skip " + skipValue);
});
skipValueCB.getSelectionModel().select(0);
skipValue = 0;
// cvv
calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
calculateCvv = newValue;
logger.info("calculate cvv: " + calculateCvv);
});
calculatecvvCB.setSelected(false);
// string length
stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = stringLengthTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
}
stringLength = Integer.parseInt(value);
} else {
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
stringLengthTF.setText("1");
logAlert(WARNING_MISSING_STRING_LENGTH);
}
}
});
computeNgramsB.setOnAction(e -> {
compute();
logger.info("compute button");
});
helpH.setOnAction(e -> openHelpWebsite());
}
/**
* case a: values for combo boxes can change after a corpus change
* <ul>
* <li>different corpus type - reset all fields so no old values remain</li>
* <li>same corpus type, different subset - keep</li>
* </ul>
* <p>
* case b: values for combo boxes can change after a header scan
* <ul>
* <li>at first, fields are populated by corpus type defaults</li>
* <li>after, with gathered data</li>
* </ul>
* <p></p>
* ngrams: 1
* calculateFor: word
* msd:
* taxonomy:
* skip: 0
* iscvv: false
* string length: 1
*/
public void populateFields() {
// corpus changed if: current one is null (this is first run of the app)
// or if currentCorpus != gui's corpus
boolean corpusChanged = currentCorpusType == null
|| currentCorpusType != corpus.getCorpusType();
// keep ngram value if set
if (ngramValue == null) {
ngramValueCB.getSelectionModel().select("1");
ngramValue = 1;
}
// TODO: check for GOS, GIGAFIDA, SOLAR...
// refresh and:
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
if (calculateFor == null) {
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
}
if (!filter.hasMsd()) {
// if current corpus doesn't have msd data, disable this field
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(true);
logger.info("no msd data");
} else {
if (ValidationUtil.isEmpty(msd)
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
// msd has not been set previously
// or msd has been set but the corpus changed -> reset
msd = new ArrayList<>();
msdTF.setText("");
msdTF.setDisable(false);
logger.info("msd reset");
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
// if msd has been set, but corpus type remained the same, we can keep any set msd value
msdTF.setText(StringUtils.join(msdStrings, " "));
msdTF.setDisable(false);
logger.info("msd kept");
}
}
// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
// keep skip value
if (skipValue == null) {
skipValueCB.getSelectionModel().select("0");
skipValue = 0;
}
// keep calculateCvv
calculatecvvCB.setSelected(calculateCvv);
// keep string length if set
if (stringLength != null) {
stringLengthTF.setText(String.valueOf(stringLength));
} else {
stringLengthTF.setText("1");
stringLength = 1;
}
// TODO: trigger on rescan
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
// user changed corpus (by type) or by selection & triggered a rescan of headers
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
currentCorpusType = corpus.getCorpusType();
// setTaxonomyIsDirty(false);
} else {
}
// see if we read taxonomy from headers, otherwise use default values for given corpus
ObservableList<String> tax = corpus.getTaxonomy();
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
}
/**
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
* sets combobox values to what is applicable ...
*
* @param mode
*/
public void toggleMode(MODE mode) {
if (mode == null) {
mode = currentMode;
}
logger.info("mode: ", mode.toString());
if (mode == MODE.WORD) {
paneWords.setVisible(true);
paneLetters.setVisible(false);
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
} else if (mode == MODE.LETTER) {
paneWords.setVisible(false);
paneLetters.setVisible(true);
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
// populate with default cvv length value
if (stringLength == null) {
stringLengthTF.setText("1");
stringLength = 1;
} else {
stringLengthTF.setText(String.valueOf(stringLength));
}
// if calculateFor was selected for something other than a word or a lemma -> reset
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
// if the user selected something else before selecting ngram for letters, reset that choice
calculateFor = CalculateFor.WORD;
calculateForCB.getSelectionModel().select("različnica");
}
}
// override if orth mode, allow only word
if (corpus.isGosOrthMode()) {
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
msdTF.setDisable(true);
} else {
msdTF.setDisable(false);
}
}
private void compute() {
Filter filter = new Filter();
filter.setNgramValue(ngramValue);
filter.setCalculateFor(calculateFor);
filter.setMsd(msd);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(skipValue);
filter.setIsCvv(calculateCvv);
filter.setSolarFilters(solarFiltersMap);
if (ngramValue != null && ngramValue == 0) {
filter.setStringLength(stringLength);
}
String message = Validation.validateForStringLevel(filter);
if (message == null) {
// no errors
logger.info("Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
execute(statistic);
} else {
logAlert(message);
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
}
}
private void logAlert(String alert) {
logger.info("alert: " + alert);
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
}
public Corpus getCorpus() {
return corpus;
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
if (corpus.getCorpusType() != CorpusType.SOLAR) {
setSelectedFiltersLabel(null);
} else {
setSelectedFiltersLabel("/");
}
}
public void setSelectedFiltersLabel(String content) {
if (content != null) {
solarFilters.setVisible(true);
selectedFiltersLabel.setVisible(true);
selectedFiltersLabel.setText(content);
} else {
solarFilters.setVisible(false);
selectedFiltersLabel.setVisible(false);
}
}
private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
boolean corpusIsSplit = corpusFiles.size() > 1;
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
boolean successullySaved = statistic.saveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
}
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
this.solarFiltersMap = solarFiltersMap;
}
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
}
}

View File

@@ -0,0 +1,77 @@
package gui;
import java.io.File;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.math.NumberUtils;
public class ValidationUtil {
public static boolean isNumber(String value) {
return NumberUtils.isCreatable(value);
}
/**
* Checks if an object is empty or null. Null part is especially important,
* since Java's built-in isEmpty() methods don't check for this condition
* and throw a nullPointerException as a result.
* <p>
* Supported structures:
* <ul>
* <li>String: empty if null or length is zero</li>
* <li>List: empty if null or size() == 0</li>
* <li>Map: empty if null or if it contains no keys, or if all keys map to an empty value </li>
* </ul>
*/
public static boolean isEmpty(Object o) {
if (o == null) {
return true;
}
if (o instanceof String) {
if (((String) o).length() == 0) {
return true;
}
}
if (o instanceof List) {
if (((List) o).isEmpty()) {
return true;
}
}
if (o instanceof Map) {
if (((Map) o).keySet().isEmpty()) {
return true;
} else {
for (Object val : ((Map) o).values()) {
if (!isEmpty(val)) {
// if map contains any value that isn't empty, the map isn't considered empty
return false;
}
}
}
}
return false;
}
public static boolean isNotEmpty(Object o) {
return !isEmpty(o);
}
/**
* Checks whether a given File is a folder for which we have appropriate permission
*/
public static boolean isValidDirectory(File f) {
return f.isDirectory() && f.canRead() && f.canWrite();
}
/**
* Checks whether a given File is a folder for which we have appropriate permission
*/
public static boolean isReadableDirectory(File f) {
return f.isDirectory() && f.canRead();
}
}

View File

@@ -0,0 +1,208 @@
package gui;
import static alg.XML_processing.*;
import static gui.GUIController.*;
import static gui.Messages.*;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import javafx.application.HostServices;
import javafx.scene.control.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import data.*;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.layout.AnchorPane;
@SuppressWarnings("Duplicates")
public class WordFormationTab {
public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
public AnchorPane wordAnalysisTabPane;
@FXML
public Label selectedFiltersLabel;
@FXML
public Label solarFilters;
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
@FXML
private Button computeB;
@FXML
public ProgressBar ngramProgressBar;
@FXML
public Label progressLabel;
@FXML
private Hyperlink helpH;
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private HostServices hostService;
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
private boolean useDb;
public void init() {
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
taxonomyCCB.setDisable(true);
}
computeB.setOnAction(e -> {
compute();
logger.info("compute button");
});
helpH.setOnAction(e -> openHelpWebsite());
}
private void compute() {
Filter filter = new Filter();
filter.setNgramValue(1);
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0);
filter.setMsd(new ArrayList<>());
filter.setIsCvv(false);
filter.setSolarFilters(solarFiltersMap);
String message = Validation.validateForStringLevel(filter);
if (message == null) {
// no errors
logger.info("Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
execute(statistic);
} else {
logAlert(message);
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
}
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
}
private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
// first, we have to recalculate all occurrences to detailed statistics
boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
}
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
}
private void logAlert(String alert) {
logger.info("alert: " + alert);
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
if (corpus.getCorpusType() != CorpusType.SOLAR) {
setSelectedFiltersLabel(null);
} else {
setSelectedFiltersLabel("/");
}
}
public void setSelectedFiltersLabel(String content) {
if (content != null) {
solarFilters.setVisible(true);
selectedFiltersLabel.setVisible(true);
selectedFiltersLabel.setText(content);
} else {
solarFilters.setVisible(false);
selectedFiltersLabel.setVisible(false);
}
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
this.solarFiltersMap = solarFiltersMap;
}
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
}
}

View File

@@ -0,0 +1,207 @@
package gui;
import static alg.XML_processing.*;
import static gui.GUIController.*;
import static gui.Messages.*;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import javafx.application.HostServices;
import javafx.scene.control.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox;
import data.*;
import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.layout.AnchorPane;
@SuppressWarnings("Duplicates")
public class WordLevelTab {
public final static Logger logger = LogManager.getLogger(WordLevelTab.class);
public AnchorPane wordLevelAnalysisTabPane;
@FXML
public Label selectedFiltersLabel;
@FXML
public Label solarFilters;
@FXML
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
@FXML
private Button computeB;
@FXML
public ProgressBar ngramProgressBar;
@FXML
public Label progressLabel;
@FXML
private Hyperlink helpH;
private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap;
private HostServices hostService;
// after header scan
private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType;
private boolean useDb;
public void init() {
// taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
taxonomy.addAll(checkedItems);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
});
taxonomyCCB.getCheckModel().clearChecks();
} else {
taxonomyCCB.setDisable(true);
}
computeB.setOnAction(e -> {
compute();
logger.info("compute button");
});
helpH.setOnAction(e -> openHelpWebsite());
}
private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL);
}
private void compute() {
Filter filter = new Filter();
filter.setNgramValue(1);
filter.setCalculateFor(CalculateFor.WORD);
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
filter.setAl(AnalysisLevel.WORD_LEVEL);
filter.setSkipValue(0);
filter.setMsd(new ArrayList<>());
filter.setIsCvv(false);
filter.setSolarFilters(solarFiltersMap);
String message = Validation.validateForStringLevel(filter);
if (message == null) {
// no errors
logger.info("Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
execute(statistic);
} else {
logAlert(message);
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
}
}
private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter());
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates")
@Override
protected Void call() throws Exception {
long i = 0;
for (File f : corpusFiles) {
readXML(f.toString(), statistic);
i++;
this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
}
return null;
}
};
ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty());
task.setOnSucceeded(e -> {
try {
// first, we have to recalculate all occurrences to detailed statistics
boolean successullySaved = statistic.saveResultNestedToDisk();
if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
} else {
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
}
} catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
logger.error("Error while saving", e1);
}
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind();
progressLabel.setText("");
});
final Thread thread = new Thread(task, "task");
thread.setDaemon(true);
thread.start();
}
private void logAlert(String alert) {
logger.info("alert: " + alert);
}
public void setCorpus(Corpus corpus) {
this.corpus = corpus;
if (corpus.getCorpusType() != CorpusType.SOLAR) {
setSelectedFiltersLabel(null);
} else {
setSelectedFiltersLabel("/");
}
}
public void setSelectedFiltersLabel(String content) {
if (content != null) {
solarFilters.setVisible(true);
selectedFiltersLabel.setVisible(true);
selectedFiltersLabel.setText(content);
} else {
solarFilters.setVisible(false);
selectedFiltersLabel.setVisible(false);
}
}
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
this.solarFiltersMap = solarFiltersMap;
}
public void setHostServices(HostServices hostServices){
this.hostService = hostServices;
}
}

View File

@@ -0,0 +1,3 @@
Manifest-Version: 1.0
Main-Class: gui.GUIController

View File

@@ -0,0 +1,25 @@
package util;
import java.nio.ByteBuffer;
public class ByteUtils {
/*
* Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
*/
public static byte[] longToBytes(long x) {
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
buffer.putLong(x);
return buffer.array();
}
/*
* Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
*/
public static long bytesToLong(byte[] bytes) {
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
buffer.put(bytes);
buffer.flip();//need flip
return buffer.getLong();
}
}

View File

@@ -0,0 +1,46 @@
package util;
import java.util.Arrays;
import java.util.HashSet;
import java.util.stream.IntStream;
public class Combinations {
private static HashSet<HashSet<Integer>> result = new HashSet<>();
/* arr[] ---> Input Array
data[] ---> Temporary array to store current combination
start & end ---> Staring and Ending indexes in arr[]
index ---> Current index in data[]
r ---> Size of a combination to be printed */
static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) {
// Current combination is ready to be printed, print it
if (index == combinationLength) {
result.add(new HashSet<>(Arrays.asList(data)));
return;
}
// replace index with all possible elements. The condition
// "end-i+1 >= r-index" makes sure that including one element
// at index will make a combination with remaining elements
// at remaining positions
for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) {
data[index] = arr[i];
combinationUtil(arr, data, i + 1, end, index + 1, combinationLength);
}
}
public static HashSet<HashSet<Integer>> generateIndices(int maxNOfIndices) {
result = new HashSet<>();
int[] arr = IntStream.range(1, maxNOfIndices).toArray();
for (int i = 1; i < maxNOfIndices - 1; i++) {
// A temporary array to store all combination one by one
combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i);
}
// also add an empty one for X.... (all of this type)
result.add(new HashSet<>());
return result;
}
}

View File

@@ -0,0 +1,267 @@
package util;
import static util.Util.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.lang3.tuple.Pair;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import data.Enums.WordLevelType;
@SuppressWarnings("unchecked")
public class Export {
public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
JSONArray wrapper = new JSONArray();
for (Pair<String, Map<String, Long>> p : set) {
JSONArray data_wrapper = new JSONArray();
JSONObject metric = new JSONObject();
String title = p.getLeft();
Map<String, Long> map = p.getRight();
if (map.isEmpty())
continue;
long total = Util.mapSumFrequencies(map);
for (Map.Entry<String, Long> e : map.entrySet()) {
JSONObject data_entry = new JSONObject();
data_entry.put("word", e.getKey());
data_entry.put("frequency", e.getValue());
data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
data_wrapper.add(data_entry);
}
metric.put("Title", title);
metric.put("data", data_wrapper);
wrapper.add(metric);
}
try (FileWriter file = new FileWriter("statistics.json")) {
file.write(wrapper.toJSONString());
} catch (IOException e) {
e.printStackTrace();
}
}
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
//Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n";
//CSV file header
Object[] FILE_HEADER = {"word", "frequency", "percent"};
String fileName = "";
for (Pair<String, Map<String, Long>> p : set) {
String title = p.getLeft();
fileName = title.replace(": ", "-");
fileName = fileName.replace(" ", "_").concat(".csv");
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
Map<String, Long> map = p.getRight();
if (map.isEmpty())
continue;
long total = Util.mapSumFrequencies(map);
OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null;
//Create the CSVFormat object with "\n" as a record delimiter
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
try {
//initialize FileWriter object
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
//initialize CSVPrinter object
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
// write info block
printHeaderInfo(csvFilePrinter, headerInfoBlock);
//Create CSV file header
csvFilePrinter.printRecord(FILE_HEADER);
for (Map.Entry<String, Long> e : map.entrySet()) {
List dataEntry = new ArrayList<>();
dataEntry.add(e.getKey());
dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / total));
csvFilePrinter.printRecord(dataEntry);
}
} catch (Exception e) {
System.out.println("Error in CsvFileWriter!");
e.printStackTrace();
} finally {
try {
if (fileWriter != null) {
fileWriter.flush();
fileWriter.close();
}
if (csvFilePrinter != null) {
csvFilePrinter.close();
}
} catch (IOException e) {
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
e.printStackTrace();
}
}
}
return fileName;
}
public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
//Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n";
//CSV file header
Object[] FILE_HEADER = {"word", "frequency", "percent"};
String fileName = "";
fileName = title.replace(": ", "-");
fileName = fileName.replace(" ", "_").concat(".csv");
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null;
//Create the CSVFormat object with "\n" as a record delimiter
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
try {
//initialize FileWriter object
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
//initialize CSVPrinter object
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
// write info block
printHeaderInfo(csvFilePrinter, headerInfoBlock);
//Create CSV file header
csvFilePrinter.printRecord(FILE_HEADER);
for (Object[] resultEntry : result) {
List dataEntry = new ArrayList<>();
dataEntry.add(resultEntry[0]);
dataEntry.add(resultEntry[1]);
dataEntry.add(formatNumberAsPercent(resultEntry[2]));
csvFilePrinter.printRecord(dataEntry);
}
} catch (Exception e) {
System.out.println("Error in CsvFileWriter!");
e.printStackTrace();
} finally {
try {
if (fileWriter != null) {
fileWriter.flush();
fileWriter.close();
}
if (csvFilePrinter != null) {
csvFilePrinter.close();
}
} catch (IOException e) {
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
e.printStackTrace();
}
}
return fileName;
}
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
//Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n";
//CSV file header
Object[] FILE_HEADER = {"type", "key", "word", "frequency"};
String fileName = "";
fileName = title.replace(": ", "-");
fileName = fileName.replace(" ", "_").concat(".csv");
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null;
//Create the CSVFormat object with "\n" as a record delimiter
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
try {
//initialize FileWriter object
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
//initialize CSVPrinter object
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
// write info block
printHeaderInfo(csvFilePrinter, headerInfoBlock);
//Create CSV file header
csvFilePrinter.printRecord(FILE_HEADER);
for (Map.Entry<WordLevelType, Map<String, Map<String, Long>>> typeEntry : result.entrySet()) {
for (Map.Entry<String, Map<String, Long>> keyWordEntry : typeEntry.getValue().entrySet()) {
for (Map.Entry<String, Long> calculationResults : keyWordEntry.getValue().entrySet()) {
List values = new ArrayList();
values.add(typeEntry.getKey().getName());
values.add(keyWordEntry.getKey());
values.add(calculationResults.getKey());
values.add(calculationResults.getValue());
csvFilePrinter.printRecord(values);
}
}
}
} catch (Exception e) {
System.out.println("Error in CsvFileWriter!");
e.printStackTrace();
} finally {
try {
if (fileWriter != null) {
fileWriter.flush();
fileWriter.close();
}
if (csvFilePrinter != null) {
csvFilePrinter.close();
}
} catch (IOException e) {
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
e.printStackTrace();
}
}
return fileName;
}
private static void printHeaderInfo(CSVPrinter csvFilePrinter, LinkedHashMap<String, String> headerInfoBlock) throws IOException {
for (Map.Entry<String, String> entry : headerInfoBlock.entrySet()) {
List values = new ArrayList();
values.add(entry.getKey());
values.add(entry.getValue());
csvFilePrinter.printRecord(values);
}
// 2 empty lines
List values = new ArrayList();
csvFilePrinter.printRecord(values);
csvFilePrinter.printRecord(values);
}
}

View File

@@ -0,0 +1,31 @@
package util;
public class Key /*implements Comparable<Key> */ {
// private final String value;
//
// Key(String value) {
// this.value = value;
// }
//
// @Override
// public int compareTo(Key o) {
// return Objects.compare(this.value, o.value);
// }
//
// @Override
// public boolean equals(Object o) {
// if (this.equals(o)) {
// return true;
// }
// if (o == null || getClass() != o.getClass()) {
// return false;
// }
// Key key = (Key) o;
// return Objects.equals(value, key.value);
// }
//
// @Override
// public int hashCode() {
// return 0;
// }
}

View File

@@ -0,0 +1,63 @@
package util;
import java.util.concurrent.TimeUnit;
/**
* Adapted from http://memorynotfound.com/calculating-elapsed-time-java/
*/
public class TimeWatch {
private long starts;
private TimeWatch() {
reset();
}
public static TimeWatch start() {
return new TimeWatch();
}
private TimeWatch reset() {
starts = System.nanoTime();
return this;
}
private long time() {
long ends = System.nanoTime();
return ends - starts;
}
private long time(TimeUnit unit) {
return unit.convert(time(), TimeUnit.NANOSECONDS);
}
private String toMinuteSeconds() {
return String.format("%d min, %d sec", time(TimeUnit.MINUTES),
time(TimeUnit.SECONDS) - time(TimeUnit.MINUTES));
}
public String toFullTime() {
long hours = time(TimeUnit.HOURS);
long minutes = time(TimeUnit.MINUTES) - TimeUnit.HOURS.toMinutes(hours);
long seconds = time(TimeUnit.SECONDS) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
long milliseconds = time(TimeUnit.MILLISECONDS) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
return String.format("%d h, %d min, %d s, %d ms", hours, minutes, seconds, milliseconds);
}
public String toString() {
return "Elapsed Time in nano seconds: ";
}
private void exampleUsage() {
TimeWatch watch = TimeWatch.start();
// do something...
System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds());
System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS));
System.out.println("Elapsed Time in nano seconds: " + watch.time());
}
}

View File

@@ -0,0 +1,225 @@
package util;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.text.MessageFormat;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import data.Settings;
import gui.GUIController;
import gui.ValidationUtil;
public class Util {
public final static Logger logger = LogManager.getLogger(Util.class);
public static String toReadableTime(long time) {
long hours = time(TimeUnit.HOURS, time);
long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours);
long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds);
long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds);
return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds);
}
private static long time(TimeUnit unit, long t) {
return unit.convert(t, TimeUnit.NANOSECONDS);
}
/**
* Converts a number to a more readable format.
* 12345 -> 12.345
* 12345,678 -> 12.345,67
*
* @param o byte, double, float, int,long, short
*
* @return number formatted with thousands separator and 2 decimal places (floats)
*/
private static String formatNumberReadable(Object o) {
if (isInstanceOfInteger(o))
return String.format("%,d", o);
else if (isInstanceOfFloat(o))
return String.format("%,.2f", o);
else
return "- invalid input format -";
}
public static String formatNumberAsPercent(Object o) {
return MessageFormat.format("{0,number,#.###%}", o);
}
private static boolean isInstanceOfInteger(Object o) {
Set<Class<?>> types = new HashSet<>();
types.add(Byte.class);
types.add(Short.class);
types.add(Integer.class);
types.add(Long.class);
return types.contains(o.getClass());
}
private static boolean isInstanceOfFloat(Object o) {
Set<Class<?>> types = new HashSet<>();
types.add(Float.class);
types.add(Double.class);
return types.contains(o.getClass());
}
public static <K, V> void printMap(Map<K, V> map) {
System.out.println("\nkey: value");
map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v)));
System.out.println();
}
/**
* Generic map converter -> since AtomicLongs aren't as comparable.
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
*/
public static <K, V> Map<String, Long> atomicInt2StringAndInt(Map<K, V> map) {
Map m = new HashMap<String, Long>();
for (Map.Entry<K, V> e : map.entrySet()) {
m.put(e.getKey().toString(), ((AtomicLong) e.getValue()).longValue());
}
return m;
}
/**
* Sorts a map in a descending order by value.
*/
public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue(Map<K, V> map, int limit) {
/*
sorted() in itself is O(1), since it's an intermediate operation that
doesn't consume the stream, but simply adds an operation to the pipeline.
Once the stream is consumed by a terminal operation, the sort happens and
either
- it doesn't do anything (O(1)) because the stream knows that the
elements are already sorted (because they come from a SortedSet, for example)
- or the stream is not parallel, and it delegates to Arrays.sort() (O(n log n))
- or the stream is parallel, and it delegates to Arrays.parallelSort() (O(n log n))
As of JDK 8, the main sorting algorithm which is also used in standard
stream API implementation for sequential sorting is TimSort. Its worst
case is O(n log n), but it works incredibly fast (with O(n) and quite
small constant) if data is presorted (in forward or reverse direction)
or partially presorted (for example, if you concatenate two sorted lists
and sort them again).
*/
// if limit is set to 0 or less, we take that to mean no limit at all
if (limit <= 0) {
limit = map.size();
}
Map<K, V> result = new LinkedHashMap<>();
TimeWatch watch = TimeWatch.start();
Stream<Map.Entry<K, V>> st = map.entrySet().stream();
st.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).limit(limit)
.forEachOrdered(e -> result.put(e.getKey(), e.getValue()));
if (Settings.PRINT_LOG) {
System.out.println(String.format("Elapsed time for sorting %s items: %s",
formatNumberReadable(result.size()),
watch.toFullTime()));
}
return result;
}
public static <K, V> void printMap(Map<K, Integer> map, String title, int number_of_words) {
System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title));
map.forEach((k, v) ->
System.out.println(String.format("%s:\t %s\t %s%%",
k,
Util.formatNumberReadable(v),
Util.formatNumberReadable((double) v / number_of_words * 100))));
System.out.println();
}
static long mapSumFrequencies(Map<String, Long> map) {
long sum = 0;
for (long value : map.values()) {
sum += value;
}
return sum;
}
/**
* Used for passing optional integer values for sorting.
*/
public static int getValidInt(int... i) {
if (i == null || i.length < 1 || i[0] <= 0) {
return 0;
} else {
return i[0];
}
}
/**
* Check whether a map is empty. It also considers an edge case where map's keys are lists to check if those lists are empty.
*/
public static <K, V> boolean isMapEmpty(Map<K, V> map) {
if (map.isEmpty()) {
// default
return true;
}
// otherwise check if keys map to values that are empty
for (V v : map.values()) {
// todo: generalize to all collections if/when needed
ArrayList<String> vl = new ArrayList((List<String>) v);
if (!vl.isEmpty()) {
return false;
}
}
return true;
}
/**
* Returns the location of the main class if possible, otherwise null
*/
public static File getWorkingDirectory() {
// get location of the currently executing class
String path = GUIController.class.getProtectionDomain().getCodeSource().getLocation().getPath();
logger.info("working dir path: ", path);
String decodedPath = null;
try {
decodedPath = URLDecoder.decode(path, "UTF-8");
} catch (UnsupportedEncodingException e) {
logger.error("decoding: ", e);
// e.printStackTrace();
}
if (decodedPath != null) {
File workingDirectory = new File(decodedPath);
// in case it's a file (class is packaged inside a jar), select its parent folder
workingDirectory = workingDirectory.isFile() ? workingDirectory.getParentFile() : workingDirectory;
if (ValidationUtil.isReadableDirectory(workingDirectory)) {
logger.info("working dir is ok: ", workingDirectory.getAbsolutePath());
return workingDirectory;
}
}
logger.info("working dir returing null");
return null;
}
}

View File

@@ -0,0 +1,132 @@
package util.db;
import static util.ByteUtils.*;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.io.FileUtils;
import org.rocksdb.*;
import util.TimeWatch;
public class RDB {
private RocksDB db;
private String path;
private static final String UTF_8 = "UTF-8";
public RDB() {
// different dbs i ncase of concurrent calculations
this.path = System.getProperty("java.io.tmpdir")
.concat(File.separator)
.concat(String.format("corpusAnalyzer_db%d", LocalDateTime.now().toString().hashCode()));
this.db = createDB();
}
private RocksDB createDB() {
RocksDB.loadLibrary();
// the Options class contains a set of configurable DB options
// that determines the behaviour of the database.
try (final Options options = new Options()) {
options.setCreateIfMissing(true);
// a factory method that returns a RocksDB instance
try (final RocksDB rdb = RocksDB.open(options, path)) {
if (db != null) {
return rdb;
} else {
this.db = rdb;
}
}
} catch (RocksDBException e) {
// do some error handling
}
return null;
}
public void writeBatch(Map<String, AtomicLong> results) throws UnsupportedEncodingException {
RocksDB.loadLibrary();
// a factory method that returns a RocksDB instance
try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
final WriteBatch wb = new WriteBatch();
for (Map.Entry<String, AtomicLong> entry : results.entrySet()) {
byte[] key = entry.getKey().getBytes(UTF_8);
long resultValue = entry.getValue().longValue();
try {
final byte[] dbValue = rdb.get(key);
if (dbValue != null) {
// value == null if key does not exist in db.
wb.put(key, longToBytes(bytesToLong(dbValue) + resultValue));
} else {
wb.put(key, longToBytes(entry.getValue().longValue()));
}
} catch (RocksDBException e) {
// TODO: error handling
}
}
TimeWatch watch = TimeWatch.start();
rdb.write(new WriteOptions(), wb);
System.out.println(String.format("Writing %d entries took: %s", wb.count(), watch.toFullTime()));
} catch (RocksDBException e) {
// do some error handling
}
}
// public byte[] atomicIntToByteArray(final AtomicLong i) {
// BigInteger bigInt = BigInteger.valueOf(i.intValue());
//
// return bigInt.toByteArray();
// }
public RocksDB getDb() {
return db;
}
public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
Map<String, AtomicLong> dump = new HashMap<>();
RocksDB.loadLibrary();
// the Options class contains a set of configurable DB options
// that determines the behaviour of the database.
// a factory method that returns a RocksDB instance
try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
try (RocksIterator it = rdb.newIterator()) {
it.seekToFirst();
// it.next();
while (it.isValid()) {
byte[] key = it.key();
byte[] value = it.value();
dump.put(new String(key, UTF_8), new AtomicLong(bytesToLong(value)));
it.next();
}
}
} catch (RocksDBException e) {
e.printStackTrace();
}
return dump;
}
public void delete() {
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,524 @@
<?oxygen RNGSchema="http://nl.ijs.si/ssj/gos/schema/tei_gos.rnc" type="compact"?>
<!--DOCTYPE TEI SYSTEM "http://nl.ijs.si/ssj/gos/schema/tei_gos.dtd"-->
<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xml:id="gos" xml:lang="slv">
<teiHeader>
<fileDesc>
<titleStmt>
<title xml:lang="slv">Korpus GOS</title>
<title xml:lang="eng">GOS Corpus</title>
<funder xml:lang="slv">Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za
šolstvo in šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje
2007/2013, razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve:
izboljšanje kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007/2013.
</funder>
<funder xml:lang="eng">The operation is partly financed by the European Union, the European Social Fund, and the
Ministry of Education and Sport of the Republic of Slovenia. The operation is being carried out within the
operational programme Human Resources Development for the period 2007/2013, developmental priorities:
improvement of the quality and efficiency of educational and training systems 2007/2013.
</funder>
<respStmt>
<name xml:id="MIRO">Miro Romih, Amebis</name>
<resp xml:lang="slv">Vodja projekta "Sporazumevanje v slovenskem jeziku.</resp>
<resp xml:lang="eng">"Communication in Slovene" project leader.</resp>
</respStmt>
<respStmt>
<name xml:id="SIMON">Simon Krek, Amebis, JSI</name>
<resp xml:lang="slv">Koordinator projekta "Sporazumevanje v slovenskem jeziku.</resp>
<resp xml:lang="eng">"Communication in Slovene" project coordinator.</resp>
</respStmt>
<respStmt>
<name xml:id="ANA">Ana Zwitter Vitez, Trojina</name>
<resp xml:lang="slv">Koordinatorica gradnje korpusa GOS.</resp>
<resp xml:lang="eng">Coordinator of the GOS corpus compilation project.</resp>
</respStmt>
<respStmt>
<name xml:id="DARINKA">Darinka Verdonik, FERI</name>
<resp xml:lang="slv">Koordinatorica izdelave spletnega konkordančnika GOS.</resp>
<resp xml:lang="eng">Coordinator of the GOS corpus web concordancer project.</resp>
</respStmt>
<respStmt>
<name xml:id="ET">Tomaž Erjavec, JSI</name>
<resp xml:lang="slv">Redakcija zapisa TEI / XML.</resp>
<resp xml:lang="eng">TEI / XML corpus encoding.</resp>
</respStmt>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<publicationStmt>
<distributor>
<address xml:lang="en">
<addrLine>Amebis, d.o.o., Kamnik</addrLine>
<addrLine>Bakovnik 3</addrLine>
<addrLine>SI-1241 Kamnik</addrLine>
<addrLine>Slovenia</addrLine>
</address>
<address xml:lang="sl">
<addrLine>Amebis, d.o.o., Kamnik</addrLine>
<addrLine>Bakovnik 3</addrLine>
<addrLine>1241 Kamnik</addrLine>
</address>
</distributor>
<pubPlace>
<ref target="http://www.slovenscina.eu/">http://www.slovenscina.eu/</ref>
<ref target="http://www.korpus-gos.net/">http://www.korpus-gos.net/</ref>
</pubPlace>
<availability>
<p xml:lang="sl">Avtorske pravice za to izdajo ureja licenca <ref
target="http://creativecommons.org/licenses/by-nc-sa/3.0/deed.sl">Priznanje
avtorstva-Nekomercialno-Deljenje pod enakimi pogoji 3.0</ref>.
</p>
<p xml:lang="sl">Dovoljeno vam je:
<list>
<item>reproduciranje, distribuiranje, dajanje v najem in priobčevanje dela javnosti</item>
<item>predelati delo</item>
</list>
Pod naslednjimi pogoji:
<list>
<item>Priznanje avtorstva — Pri uporabi dela morate navesti izvirnega avtorja na način, ki ga določi
izvirni avtor oziroma dajalec licence. V znanstvenih publikacijah to pomeni citiranje ustreznega
dela ali del, dostopnih na domači strani projekta, <ref target="http://www.slovenscina.eu/">
http://www.slovenscina.eu/</ref>.
</item>
<item>Nekomercialno. Tega dela ne smete uporabiti v komercialne namene.</item>
<item>Deljenje pod enakimi pogoji — Če spremenite, preoblikujete ali uporabite to delo v svojem delu,
lahko distribuirate predelavo dela le pod licenco, ki je enaka tej.
</item>
</list>
</p>
<p xml:lang="en">This work is licenced under the <ref
target="http://creativecommons.org/licenses/by-nc-sa/3.0/deed.en">Attribution-NonCommercial-ShareAlike
3.0</ref>.
</p>
<p xml:lang="en">You are free:
<list>
<item>to Share — to copy, distribute and transmit the work</item>
<item>to Remix — to adapt the work</item>
</list>
Under the following conditions:
<list>
<item>Attribution — You must attribute the work in the manner specified by the author or licensor. In
scientific publications this means citing the relevant publication or publications, referred to on
the home page of the project: <ref target="http://www.slovenscina.eu/">
http://www.slovenscina.eu/</ref>.
</item>
<item>Noncommercial. You may not use this work for commercial purposes.</item>
<item>Share Alike. If you alter, transform, or build upon this work, you may distribute the resulting
work only under the same or similar license to this one.
</item>
</list>
</p>
</availability>
<date>2012-03-14</date>
</publicationStmt>
<sourceDesc>
<p xml:lang="slv">Besedila so pretvorjena v TEI XML iz datotek programa Transcriber.</p>
<p xml:lang="eng">Texts are transformed to TEI XML from Transcriber files.</p>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="slv">Projekt
<ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>
</p>
<p xml:lang="eng">Project
<ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>
</p>
</projectDesc>
<classDecl>
<taxonomy xml:id="gosTaxons">
<!-- TIP DISKURZA -->
<category xml:id="gos.T">
<catDesc>tip diskurza</catDesc>
<category xml:id="gos.T.J">
<catDesc>javni</catDesc>
<category xml:id="gos.T.J.I">
<catDesc>informativno-izobraževalni</catDesc>
</category>
<category xml:id="gos.T.J.R">
<catDesc>razvedrilni</catDesc>
</category>
</category>
<category xml:id="gos.T.N">
<catDesc>nejavni</catDesc>
<category xml:id="gos.T.N.N">
<catDesc>nezasebni</catDesc>
</category>
<category xml:id="gos.T.N.Z">
<catDesc>zasebni</catDesc>
</category>
</category>
</category>
<!-- KANAL -->
<category xml:id="gos.K">
<catDesc>kanal</catDesc>
<category xml:id="gos.K.O">
<catDesc>osebni stik</catDesc>
</category>
<category xml:id="gos.K.P">
<catDesc>telefon</catDesc>
</category>
<category xml:id="gos.K.R">
<catDesc>radio</catDesc>
</category>
<category xml:id="gos.K.T">
<catDesc>televizija</catDesc>
</category>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<langUsage>
<language ident="slv">slovenščina</language>
<language ident="eng">angleščina</language>
<language ident="deu">nemščina</language>
<language ident="ita">italijanščina</language>
<language ident="zls">južnoslovanski jeziki</language>
<language ident="sla">drugi slovanski jeziki</language>
<language ident="roa">drugi romanski jeziki</language>
</langUsage>
</profileDesc>
</teiHeader>
<TEI xml:id="gos.001">
<teiHeader>
<fileDesc>
<titleStmt>
<title xml:id="JIfajzakhu-np0911061839_s2">Splošno predavanje za prvi letnik prevajalstva.</title>
<respStmt>
<resp>snemanje</resp>
<name>Neža Pahovnik</name>
</respStmt>
<respStmt>
<resp>transkripcija</resp>
<name>MatejaS</name>
</respStmt>
</titleStmt>
<publicationStmt>
<date>2009-11-05</date>
<pubPlace></pubPlace>
</publicationStmt>
<sourceDesc>
<recordingStmt>
<recording type="audio" dur="PT28M56S">
<broadcast>
<bibl>
<title>terenski posnetek</title>
</bibl>
</broadcast>
<date>2009-11-05</date>
</recording>
</recordingStmt>
</sourceDesc>
</fileDesc>
<profileDesc>
<textClass>
<catRef target="gos.T.J.I"/>
<catRef target="gos.K.O"/>
</textClass>
<textDesc>
<channel/>
<constitution/>
<derivation/>
<domain>akademski, družboslovje</domain>
<factuality/>
<interaction/>
<preparedness/>
<purpose/>
</textDesc>
<particDesc>
<listPerson n="1">
<person n="Af-pred-02166">
<sex value="2">ženski</sex>
<age atLeast="35" atMost="59"/>
<residence>LJ</residence>
<education>fakulteta ali več</education>
<langKnowledge>
<langKnown tag="slv" level="first"/>
</langKnowledge>
</person>
</listPerson>
</particDesc>
<settingDesc>
<place>
<region>LJ</region>
<settlement>Ljubljana</settlement>
</place>
<setting>
<date>2009-10-22</date>
<time>14:40</time>
</setting>
</settingDesc>
</profileDesc>
</teiHeader>
<text>
<body>
<div type="norm">
<u who="Af-pred-02166">
<seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
<w lemma="n3" msd="L">n3</w>
</seg>
</u>
</div>
</body>
</text>
</TEI>
<TEI xml:id="gos.002">
<teiHeader>
<fileDesc>
<titleStmt>
<title xml:id="JIfajzakhu-np1003120917_s2">Ura filozofije, pri kateri predavatelj študentom razlaga nemško
klasično filozofijo.
</title>
<respStmt>
<resp>snemanje</resp>
<name>Neža Pahovnik</name>
</respStmt>
<respStmt>
<resp>transkripcija</resp>
<name>Alenka Mirkac</name>
</respStmt>
</titleStmt>
<publicationStmt>
<date>2010-03-12</date>
<pubPlace>Ljubljana</pubPlace>
</publicationStmt>
<sourceDesc>
<recordingStmt>
<recording type="audio" dur="PT34M12S">
<broadcast>
<bibl>
<title>terenski posnetek</title>
</bibl>
</broadcast>
<date>2010-03-12</date>
</recording>
</recordingStmt>
</sourceDesc>
</fileDesc>
<profileDesc>
<textClass>
<catRef target="gos.T.J.R"/>
<catRef target="gos.K.O"/>
</textClass>
<textDesc>
<channel/>
<constitution/>
<derivation/>
<domain>akademski, humanistika</domain>
<factuality/>
<interaction/>
<preparedness/>
<purpose/>
</textDesc>
<particDesc>
<listPerson n="1">
<person n="Zm-prof-01084">
<sex value="1">moški</sex>
<age atLeast="35" atMost="59"/>
<residence>LJ, NM</residence>
<education>fakulteta ali več</education>
<langKnowledge>
<langKnown tag="slv" level="first"/>
</langKnowledge>
</person>
</listPerson>
</particDesc>
<settingDesc>
<place>
<region>LJ</region>
<settlement>Ljubljana</settlement>
</place>
<setting>
<date>2010-01-06</date>
<time>19:40</time>
</setting>
</settingDesc>
</profileDesc>
</teiHeader>
<text>
<body>
<div type="norm">
<u who="Af-pred-02166">
<seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
<w lemma="n4" msd="L">n4</w>
</seg>
</u>
</div>
</body>
</text>
</TEI>
<TEI xml:id="gos.001">
<teiHeader>
<fileDesc>
<titleStmt>
<title xml:id="JIfajzakhu-np0911061839_s2">Splošno predavanje za prvi letnik prevajalstva.</title>
<respStmt>
<resp>snemanje</resp>
<name>Neža Pahovnik</name>
</respStmt>
<respStmt>
<resp>transkripcija</resp>
<name>MatejaS</name>
</respStmt>
</titleStmt>
<publicationStmt>
<date>2009-11-05</date>
<pubPlace></pubPlace>
</publicationStmt>
<sourceDesc>
<recordingStmt>
<recording type="audio" dur="PT28M56S">
<broadcast>
<bibl>
<title>terenski posnetek</title>
</bibl>
</broadcast>
<date>2009-11-05</date>
</recording>
</recordingStmt>
</sourceDesc>
</fileDesc>
<profileDesc>
<textClass>
<catRef target="gos.T.J.I"/>
<catRef target="gos.K.O"/>
</textClass>
<textDesc>
<channel/>
<constitution/>
<derivation/>
<domain>akademski, družboslovje</domain>
<factuality/>
<interaction/>
<preparedness/>
<purpose/>
</textDesc>
<particDesc>
<listPerson n="1">
<person n="Af-pred-02166">
<sex value="2">ženski</sex>
<age atLeast="35" atMost="59"/>
<residence>LJ</residence>
<education>fakulteta ali več</education>
<langKnowledge>
<langKnown tag="slv" level="first"/>
</langKnowledge>
</person>
</listPerson>
</particDesc>
<settingDesc>
<place>
<region>LJ</region>
<settlement>Ljubljana</settlement>
</place>
<setting>
<date>2009-10-22</date>
<time>14:40</time>
</setting>
</settingDesc>
</profileDesc>
</teiHeader>
<text>
<body>
<div type="norm">
<u who="Af-pred-02166">
<seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
<w lemma="n3" msd="L">n3</w>
</seg>
</u>
</div>
</body>
</text>
</TEI>
<TEI xml:id="gos.002">
<teiHeader>
<fileDesc>
<titleStmt>
<title xml:id="JIfajzakhu-np1003120917_s2">Ura filozofije, pri kateri predavatelj študentom razlaga nemško
klasično filozofijo.
</title>
<respStmt>
<resp>snemanje</resp>
<name>Neža Pahovnik</name>
</respStmt>
<respStmt>
<resp>transkripcija</resp>
<name>Alenka Mirkac</name>
</respStmt>
</titleStmt>
<publicationStmt>
<date>2010-03-12</date>
<pubPlace>Ljubljana</pubPlace>
</publicationStmt>
<sourceDesc>
<recordingStmt>
<recording type="audio" dur="PT34M12S">
<broadcast>
<bibl>
<title>terenski posnetek</title>
</bibl>
</broadcast>
<date>2010-03-12</date>
</recording>
</recordingStmt>
</sourceDesc>
</fileDesc>
<profileDesc>
<textClass>
<catRef target="gos.T.J.R"/>
<catRef target="gos.K.O"/>
</textClass>
<textDesc>
<channel/>
<constitution/>
<derivation/>
<domain>akademski, humanistika</domain>
<factuality/>
<interaction/>
<preparedness/>
<purpose/>
</textDesc>
<particDesc>
<listPerson n="1">
<person n="Zm-prof-01084">
<sex value="1">moški</sex>
<age atLeast="35" atMost="59"/>
<residence>LJ, NM</residence>
<education>fakulteta ali več</education>
<langKnowledge>
<langKnown tag="slv" level="first"/>
</langKnowledge>
</person>
</listPerson>
</particDesc>
<settingDesc>
<place>
<region>LJ</region>
<settlement>Ljubljana</settlement>
</place>
<setting>
<date>2010-01-06</date>
<time>19:40</time>
</setting>
</settingDesc>
</profileDesc>
</teiHeader>
<text>
<body>
<div type="norm">
<u who="Af-pred-02166">
<seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
<w lemma="n4" msd="L">n4</w>
</seg>
</u>
</div>
</body>
</text>
</TEI>
</teiCorpus>

133
src/main/resources/GUI.fxml Normal file
View File

@@ -0,0 +1,133 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--<?import gui.CorpusTab ?>-->
<!--<?import gui.StringAnalysisTab ?>-->
<?import java.lang.*?>
<?import javafx.collections.FXCollections?>
<?import javafx.scene.control.*?>
<?import javafx.scene.layout.*?>
<AnchorPane prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111" xmlns:fx="http://javafx.com/fxml/1"
fx:controller="gui.GUIController">
<children>
<TabPane fx:id="tabPane" prefHeight="600.0" prefWidth="800.0" tabClosingPolicy="UNAVAILABLE" AnchorPane.bottomAnchor="0.0"
AnchorPane.leftAnchor="0.0" AnchorPane.rightAnchor="0.0" AnchorPane.topAnchor="0.0">
<tabs>
<Tab fx:id="corpusTab" closable="false" text="Korpus">
<fx:include fx:id="ct" source="gui/CorpusTab.fxml"/>
</Tab>
<Tab fx:id="filterTab" closable="false" disable="true" text="Filter">
<fx:include fx:id="ffs" source="gui/FiltersForSolar.fxml"/>
</Tab>
<Tab fx:id="CharacterLevelTabNew" closable="false" disable="true" text="Črke">
<fx:include fx:id="cat" source="gui/CharacterAnalysisTab.fxml"/>
</Tab>
<Tab fx:id="wordLevelTab" closable="false" disable="true" text="Besedni deli">
<fx:include fx:id="wl" source="gui/WordLevelTab.fxml"/>
</Tab>
<Tab fx:id="OneWordAnalysisTab" closable="false" disable="true" text="Besede">
<fx:include fx:id="oneWordTab" source="gui/OneWordAnalysisTab.fxml"/>
</Tab>
<Tab fx:id="StringLevelTabNew2" closable="false" disable="true" text="Besedni nizi">
<fx:include fx:id="satNew2" source="gui/StringAnalysisTabNew2.fxml"/>
</Tab>
<!--<Tab fx:id="wordFormationTab" closable="false" disable="true" text="Besedotvorni procesi">
<fx:include fx:id="wf" source="gui/WordFormationTab.fxml"/>
</Tab>-->
<!--<Tab fx:id="wordLevelTab" closable="false" disable="true" text="Nivo besed in delov besed">-->
<!--<content>-->
<!--<AnchorPane minHeight="0.0" minWidth="0.0" prefHeight="180.0" prefWidth="200.0">-->
<!--<children>-->
<!--<Label fx:id="izbraniFiltriLabelB" layoutX="400.0" layoutY="14.0" text="Izbrani filtri:"/>-->
<!--<Label fx:id="selectedFiltersLabelB" layoutX="399.0" layoutY="45.0" text="/"/>-->
<!--<Label layoutX="35.0" layoutY="30.0" text="Različnica/lema"/>-->
<!--<ComboBox fx:id="distributionWordOrLemmaCombo" layoutX="146.0" layoutY="26.0"-->
<!--prefWidth="150.0" promptText="izberi">-->
<!--<items>-->
<!--<FXCollections fx:factory="observableArrayList">-->
<!--<String fx:value="različnica"/>-->
<!--<String fx:value="lema"/>-->
<!--</FXCollections>-->
<!--</items>-->
<!--</ComboBox>-->
<!--<Label layoutX="35.0" layoutY="75.0" text="JOS:"/>-->
<!--<ComboBox fx:id="distributionJosCombo" layoutX="146.0" layoutY="71.0"-->
<!--prefWidth="150.0" promptText="izberi">-->
<!--<items>-->
<!--<FXCollections fx:factory="observableArrayList">-->
<!--<String fx:value="- brez -"/>-->
<!--<String fx:value="samostalnik"/>-->
<!--<String fx:value="glagol"/>-->
<!--<String fx:value="pridevnik"/>-->
<!--<String fx:value="prislov"/>-->
<!--<String fx:value="zaimek"/>-->
<!--<String fx:value="stevnik"/>-->
<!--<String fx:value="predlog"/>-->
<!--<String fx:value="veznik"/>-->
<!--<String fx:value="clenek"/>-->
<!--<String fx:value="medmet"/>-->
<!--<String fx:value="okrajsava"/>-->
<!--</FXCollections>-->
<!--</items>-->
<!--</ComboBox>-->
<!--<Label layoutX="35.0" layoutY="120.0" text="Taksonomija:"/>-->
<!--<ComboBox fx:id="distributionTaxonomyCombo" layoutX="146.0" layoutY="116.0"-->
<!--prefWidth="150.0" promptText="izberi"-->
<!--visibleRowCount="5">-->
<!--</ComboBox>-->
<!--<Button fx:id="distributionCalculateButton" layoutX="32.0" layoutY="180.0" mnemonicParsing="false"-->
<!--prefHeight="25.0" prefWidth="243.0" text="Izračunaj"/>-->
<!--&lt;!&ndash;<TitledPane animated="false" layoutX="-2.0" layoutY="315.0" prefHeight="256.0" prefWidth="806.0" text="Distribucija zaporedij samoglasnikov in soglasnikov">&ndash;&gt;-->
<!--&lt;!&ndash;<content>&ndash;&gt;-->
<!--&lt;!&ndash;<AnchorPane minHeight="0.0" minWidth="0.0" prefHeight="180.0" prefWidth="457.0">&ndash;&gt;-->
<!--&lt;!&ndash;<children>&ndash;&gt;-->
<!--&lt;!&ndash;<Label layoutX="21.0" layoutY="18.0" text="Samostalnik/lema:" />&ndash;&gt;-->
<!--&lt;!&ndash;<ComboBox fx:id="distributionCVVWordOrLemmaCombo" layoutX="135.0" layoutY="14.0" onAction="#distributionCVVWOrdOrLemma" prefWidth="150.0" promptText="izberi">&ndash;&gt;-->
<!--&lt;!&ndash;<items>&ndash;&gt;-->
<!--&lt;!&ndash;<FXCollections fx:factory="observableArrayList">&ndash;&gt;-->
<!--&lt;!&ndash;<String fx:value="različnica" />&ndash;&gt;-->
<!--&lt;!&ndash;<String fx:value="lema" />&ndash;&gt;-->
<!--&lt;!&ndash;</FXCollections>&ndash;&gt;-->
<!--&lt;!&ndash;</items>&ndash;&gt;-->
<!--&lt;!&ndash;</ComboBox>&ndash;&gt;-->
<!--&lt;!&ndash;<Label layoutX="21.0" layoutY="69.0" text="Dolžina zaporedja:" />&ndash;&gt;-->
<!--&lt;!&ndash;&lt;!&ndash;<TextField fx:id="CVVLengthTA" layoutX="136.0" layoutY="65.0" onAction="#CVVLength" prefHeight="25.0" prefWidth="214.0" promptText="vnesi dolžino zaporedja (celo število)" />&ndash;&gt;&ndash;&gt;-->
<!--&lt;!&ndash;<Button fx:id="distributionCalculateCVVButton" layoutX="22.0" layoutY="103.0" mnemonicParsing="false" onAction="#distributionCVVCalculate" prefHeight="25.0" prefWidth="243.0" text="Izračunaj" />&ndash;&gt;-->
<!--&lt;!&ndash;<TextField fx:id="morphosyntacticFilterTextField" layoutX="22.0" layoutY="158.0" onAction="#morphosyntacticFilterTextArea" prefHeight="25.0" prefWidth="766.0" />&ndash;&gt;-->
<!--&lt;!&ndash;<ProgressBar fx:id="distributionProgressBar" layoutX="20.0" layoutY="174.0" prefHeight="18.0" prefWidth="770.0" progress="0.0" />&ndash;&gt;-->
<!--&lt;!&ndash;<Label fx:id="distributionProgressLabel" layoutX="20.0" layoutY="199.0" prefHeight="17.0" prefWidth="769.0" text="Label" />&ndash;&gt;-->
<!--&lt;!&ndash;</children>&ndash;&gt;-->
<!--&lt;!&ndash;</AnchorPane>&ndash;&gt;-->
<!--&lt;!&ndash;</content>&ndash;&gt;-->
<!--&lt;!&ndash;</TitledPane>&ndash;&gt;-->
<!--</children>-->
<!--</AnchorPane>-->
<!--</content>-->
<!--</Tab>-->
<!--<Tab fx:id="wordFormationTab" disable="false" text="Oblikoslovne kategorije">-->
<!--<content>-->
<!--<AnchorPane minHeight="0.0" minWidth="0.0" prefHeight="180.0" prefWidth="200.0">-->
<!--<children>-->
<!--<Label fx:id="izbraniFiltriLabelC" layoutX="400.0" layoutY="14.0" text="Izbrani filtri:"/>-->
<!--<Label fx:id="selectedFiltersLabelC" layoutX="399.0" layoutY="45.0" text="/"/>-->
<!--<Label layoutX="18.0" layoutY="27.0" text="Taksonomija:"/>-->
<!--<ComboBox fx:id="inflectedJosTaxonomyCombo" layoutX="129.0" layoutY="23.0"-->
<!--prefWidth="150.0" promptText="izberi"-->
<!--visibleRowCount="5">-->
<!--</ComboBox>-->
<!--<Button fx:id="inflectedJosCalculateButton" layoutX="15.0" layoutY="87.0" mnemonicParsing="false"-->
<!--prefHeight="25.0" prefWidth="243.0" text="Izračunaj"/>-->
<!--<ProgressBar fx:id="inflectedJOSProgressBar" layoutX="15.0" layoutY="499.0" prefHeight="18.0"-->
<!--prefWidth="770.0" progress="0.0"/>-->
<!--<Label fx:id="inflectedJOSProgressLabel" layoutX="15.0" layoutY="524.0" prefHeight="17.0"-->
<!--prefWidth="769.0" text="Label"/>-->
<!--</children>-->
<!--</AnchorPane>-->
<!--</content>-->
<!--</Tab>-->
</tabs>
</TabPane>
</children>
</AnchorPane>

View File

@@ -0,0 +1,237 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0012405" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: Branko Gradišnik. ANTI2(1999)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>52 besed</extent>
<publicationStmt>
<idno>ANTI2</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title n="???">neznani naslov</title>
<author>Branko Gradišnik</author>
<date>1999</date>
<publisher n="drugo">neznani založnik</publisher>
<note type="sourceLang"/>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="50"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="11"/>
<tagUsage gi="p" occurs="2"/>
<tagUsage gi="s" occurs="5"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="52"/>
</namespace>
</tagsDecl>
<appInfo>
<application ident="Amebis_pretvornik" version="1.0">
<label>[ZDRUZEVANJE] 1:1</label>
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\ANTI2.ZDR</label>
<label>[1] **********</label>
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.doc</label>
<label>[FORMAT] MS Word for Windows 6.0/7.0</label>
<label>[DATUM] 2.12.1999</label>
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.RTF</label>
<label>[PRETVORBA] RTF</label>
<label>[KONEC] **********</label>
</application>
</appInfo>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.P">
<catDesc>prenosnik</catDesc>
<category xml:id="Ft.P.G">
<catDesc>govorni</catDesc>
</category>
<category xml:id="Ft.P.E">
<catDesc>elektronski</catDesc>
</category>
<category xml:id="Ft.P.P">
<catDesc>pisni</catDesc>
<category xml:id="Ft.P.P.O">
<catDesc>objavljeno</catDesc>
<category xml:id="Ft.P.P.O.K">
<catDesc>knjižno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P">
<catDesc>periodično</catDesc>
<category xml:id="Ft.P.P.O.P.C">
<catDesc>časopisno</catDesc>
<category xml:id="Ft.P.P.O.P.C.D">
<catDesc>dnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.V">
<catDesc>večkrat tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.T">
<catDesc>tedensko</catDesc>
</category>
</category>
<category xml:id="Ft.P.P.O.P.R">
<catDesc>revialno</catDesc>
<category xml:id="Ft.P.P.O.P.R.T">
<catDesc>tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.S">
<catDesc>štirinajstdnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.M">
<catDesc>mesečno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.D">
<catDesc>redkeje kot na mesec</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.O">
<catDesc>občasno</catDesc>
</category>
</category>
</category>
</category>
<category xml:id="Ft.P.P.N">
<catDesc>neobjavljeno</catDesc>
<category xml:id="Ft.P.P.N.J">
<catDesc>javno</catDesc>
</category>
<category xml:id="Ft.P.P.N.I">
<catDesc>interno</catDesc>
</category>
<category xml:id="Ft.P.P.N.Z">
<catDesc>zasebno</catDesc>
</category>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.Z">
<catDesc>zvrst</catDesc>
<category xml:id="Ft.Z.U">
<catDesc>umetnostna</catDesc>
<category xml:id="Ft.Z.U.P">
<catDesc>pesniška</catDesc>
</category>
<category xml:id="Ft.Z.U.R">
<catDesc>prozna</catDesc>
</category>
<category xml:id="Ft.Z.U.D">
<catDesc>dramska</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N">
<catDesc>neumetnostna</catDesc>
<category xml:id="Ft.Z.N.S">
<catDesc>strokovna</catDesc>
<category xml:id="Ft.Z.N.S.H">
<catDesc>humanistična in družboslovna</catDesc>
</category>
<category xml:id="Ft.Z.N.S.N">
<catDesc>naravoslovna in tehnična</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N.N">
<catDesc>nestrokovna</catDesc>
</category>
<category xml:id="Ft.Z.N.P">
<catDesc>pravna</catDesc>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.L">
<catDesc>lektorirano</catDesc>
<category xml:id="Ft.L.D">
<catDesc>da</catDesc>
</category>
<category xml:id="Ft.L.N">
<catDesc>ne</catDesc>
</category>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.T.K.S"/>
<catRef target="#Ft.P.P.N.Z"/>
<catRef target="#Ft.Z.N.N"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0012405." xml:lang="sl">
<body>
<p>
<s>
<w msd="Vd" lemma="ker">Ker</w>
<S/>
<w msd="Ggnste-n" lemma="imeti">ima</w>
<S/>
<w msd="Somei" lemma="junak">junak</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Sozem" lemma="posest">posesti</w>
<c>.</c>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,70 @@
"Korpus: ";Gigafida
"Datum: ";14.05.2018 06:34
"Analiza: ";Besedni nizi
"n-gram nivo: ";nivo črk
"Skip: ";0
"Izračunaj za: ";lema
"Izračunaj za kombinacije samoglasnikov in soglasnikov: ";ne
"Dolžina niza: ";1
word;frequency;percent
a;438;11.086%
i;390;9.871%
e;341;8.631%
o;328;8.302%
t;262;6.631%
n;261;6.606%
r;229;5.796%
k;174;4.404%
d;144;3.645%
s;141;3.569%
v;133;3.366%
l;123;3.113%
j;120;3.037%
p;120;3.037%
z;81;2.05%
b;75;1.898%
u;71;1.797%
"č";65;1.645%
m;58;1.468%
g;53;1.341%
c;44;1.114%
"š";32;0.81%
"ž";32;0.81%
1;28;0.709%
h;20;0.506%
0;19;0.481%
2;18;0.456%
".";17;0.43%
M;13;0.329%
6;12;0.304%
f;11;0.278%
9;10;0.253%
3;8;0.202%
A;7;0.177%
J;7;0.177%
T;6;0.152%
B;5;0.127%
K;5;0.127%
P;5;0.127%
5;4;0.101%
8;4;0.101%
R;4;0.101%
S;4;0.101%
4;3;0.076%
":";3;0.076%
D;3;0.076%
F;3;0.076%
I;3;0.076%
7;2;0.051%
G;2;0.051%
w;2;0.051%
"'";1;0.025%
C;1;0.025%
E;1;0.025%
L;1;0.025%
N;1;0.025%
V;1;0.025%
Z;1;0.025%
"Š";1;0.025%
1 Korpus: Gigafida
2 Datum: 14.05.2018 06:34
3 Analiza: Besedni nizi
4 n-gram nivo: nivo črk
5 Skip: 0
6 Izračunaj za: lema
7 Izračunaj za kombinacije samoglasnikov in soglasnikov: ne
8 Dolžina niza: 1
9 word frequency percent
10 a 438 11.086%
11 i 390 9.871%
12 e 341 8.631%
13 o 328 8.302%
14 t 262 6.631%
15 n 261 6.606%
16 r 229 5.796%
17 k 174 4.404%
18 d 144 3.645%
19 s 141 3.569%
20 v 133 3.366%
21 l 123 3.113%
22 j 120 3.037%
23 p 120 3.037%
24 z 81 2.05%
25 b 75 1.898%
26 u 71 1.797%
27 č 65 1.645%
28 m 58 1.468%
29 g 53 1.341%
30 c 44 1.114%
31 š 32 0.81%
32 ž 32 0.81%
33 1 28 0.709%
34 h 20 0.506%
35 0 19 0.481%
36 2 18 0.456%
37 . 17 0.43%
38 M 13 0.329%
39 6 12 0.304%
40 f 11 0.278%
41 9 10 0.253%
42 3 8 0.202%
43 A 7 0.177%
44 J 7 0.177%
45 T 6 0.152%
46 B 5 0.127%
47 K 5 0.127%
48 P 5 0.127%
49 5 4 0.101%
50 8 4 0.101%
51 R 4 0.101%
52 S 4 0.101%
53 4 3 0.076%
54 : 3 0.076%
55 D 3 0.076%
56 F 3 0.076%
57 I 3 0.076%
58 7 2 0.051%
59 G 2 0.051%
60 w 2 0.051%
61 ' 1 0.025%
62 C 1 0.025%
63 E 1 0.025%
64 L 1 0.025%
65 N 1 0.025%
66 V 1 0.025%
67 Z 1 0.025%
68 Š 1 0.025%

View File

@@ -0,0 +1,390 @@
"Korpus: ";Gigafida
"Datum: ";14.05.2018 06:37
"Analiza: ";Besedni nizi
"n-gram nivo: ";nivo črk
"Skip: ";0
"Izračunaj za: ";lema
"Izračunaj za kombinacije samoglasnikov in soglasnikov: ";ne
"Dolžina niza: ";2
word;frequency;percent
ti;122;3.835%
en;70;2.201%
at;59;1.855%
it;56;1.76%
in;54;1.698%
ko;54;1.698%
st;48;1.509%
na;48;1.509%
po;46;1.446%
ar;45;1.415%
ka;45;1.415%
ra;44;1.383%
an;42;1.32%
pr;40;1.257%
bi;40;1.257%
je;39;1.226%
re;38;1.195%
te;37;1.163%
ja;37;1.163%
od;36;1.132%
ov;36;1.132%
ta;33;1.037%
ri;31;0.975%
el;31;0.975%
er;30;0.943%
da;28;0.88%
se;27;0.849%
za;27;0.849%
ni;26;0.817%
av;24;0.754%
do;24;0.754%
vi;24;0.754%
ro;23;0.723%
ed;23;0.723%
ek;23;0.723%
le;23;0.723%
li;22;0.692%
nj;22;0.692%
os;22;0.692%
de;21;0.66%
la;21;0.66%
lo;21;0.66%
ve;20;0.629%
lj;20;0.629%
no;20;0.629%
ol;20;0.629%
aj;19;0.597%
or;19;0.597%
rt;18;0.566%
to;18;0.566%
va;18;0.566%
es;18;0.566%
me;18;0.566%
on;18;0.566%
ki;17;0.534%
pe;17;0.534%
ak;16;0.503%
ce;16;0.503%
dr;16;0.503%
et;15;0.472%
ic;15;0.472%
ik;15;0.472%
is;15;0.472%
ič;15;0.472%
ob;15;0.472%
sk;14;0.44%
ca;14;0.44%
ga;14;0.44%
ot;14;0.44%
as;13;0.409%
rk;13;0.409%
ru;13;0.409%
ev;13;0.409%
"ča";13;0.409%
"če";13;0.409%
ij;13;0.409%
ir;13;0.409%
kr;13;0.409%
ma;13;0.409%
ne;13;0.409%
og;13;0.409%
ur;12;0.377%
"ža";12;0.377%
vo;12;0.377%
go;12;0.377%
zd;12;0.377%
iz;12;0.377%
ju;12;0.377%
op;12;0.377%
ad;11;0.346%
iž;11;0.346%
"či";11;0.346%
Ma;11;0.346%
oz;11;0.346%
al;10;0.314%
di;10;0.314%
us;10;0.314%
em;10;0.314%
eč;10;0.314%
om;10;0.314%
pa;10;0.314%
so;9;0.283%
ug;9;0.283%
"ša";9;0.283%
iv;9;0.283%
mi;9;0.283%
ok;9;0.283%
be;8;0.251%
bl;8;0.251%
nč;8;0.251%
oč;8;0.251%
tr;8;0.251%
ec;8;0.251%
ze;8;0.251%
ns;8;0.251%
sp;7;0.22%
dj;7;0.22%
un;7;0.22%
aš;7;0.22%
il;7;0.22%
"še";7;0.22%
ke;7;0.22%
eš;7;0.22%
1.;6;0.189%
10;6;0.189%
ah;6;0.189%
rj;6;0.189%
ba;6;0.189%
uh;6;0.189%
eb;6;0.189%
"že";6;0.189%
ep;6;0.189%
ji;6;0.189%
ml;6;0.189%
nb;6;0.189%
nk;6;0.189%
am;5;0.157%
ap;5;0.157%
az;5;0.157%
20;5;0.157%
sn;5;0.157%
sr;5;0.157%
dn;5;0.157%
ej;5;0.157%
ez;5;0.157%
ač;5;0.157%
ge;5;0.157%
gl;5;0.157%
gr;5;0.157%
ha;5;0.157%
"čk";5;0.157%
"čl";5;0.157%
"št";5;0.157%
uč;5;0.157%
jd;5;0.157%
kl;5;0.157%
ku;5;0.157%
Ju;5;0.157%
Ko;5;0.157%
oj;5;0.157%
01;4;0.126%
11;4;0.126%
rb;4;0.126%
rm;4;0.126%
bo;4;0.126%
sa;4;0.126%
si;4;0.126%
ci;4;0.126%
tj;4;0.126%
tv;4;0.126%
To;4;0.126%
eh;4;0.126%
"ži";4;0.126%
"žn";4;0.126%
vl;4;0.126%
oš;4;0.126%
ož;4;0.126%
ib;4;0.126%
id;4;0.126%
"šk";4;0.126%
zg;4;0.126%
zi;4;0.126%
mo;4;0.126%
".1";4;0.126%
nt;4;0.126%
oc;4;0.126%
of;4;0.126%
ac;3;0.094%
13;3;0.094%
19;3;0.094%
Ag;3;0.094%
br;3;0.094%
Ro;3;0.094%
sv;3;0.094%
ck;3;0.094%
Br;3;0.094%
dl;3;0.094%
ud;3;0.094%
du;3;0.094%
um;3;0.094%
up;3;0.094%
ut;3;0.094%
vn;3;0.094%
62;3;0.094%
vs;3;0.094%
66;3;0.094%
fi;3;0.094%
he;3;0.094%
hk;3;0.094%
ho;3;0.094%
9.;3;0.094%
ig;3;0.094%
im;3;0.094%
"šn";3;0.094%
až;3;0.094%
nd;3;0.094%
".2";3;0.094%
00;2;0.063%
pt;2;0.063%
09;2;0.063%
12;2;0.063%
ag;2;0.063%
rc;2;0.063%
rd;2;0.063%
rg;2;0.063%
rn;2;0.063%
21;2;0.063%
rs;2;0.063%
2:;2;0.063%
Al;2;0.063%
An;2;0.063%
sl;2;0.063%
3.;2;0.063%
su;2;0.063%
1s;2;0.063%
th;2;0.063%
tn;2;0.063%
db;2;0.063%
Sr;2;0.063%
tu;2;0.063%
46;2;0.063%
dg;2;0.063%
dk;2;0.063%
ub;2;0.063%
dt;2;0.063%
Da;2;0.063%
vd;2;0.063%
fa;2;0.063%
vr;2;0.063%
ff;2;0.063%
vz;2;0.063%
fo;2;0.063%
Fi;2;0.063%
bč;2;0.063%
gu;2;0.063%
8.;2;0.063%
"čn";2;0.063%
Go;2;0.063%
98;2;0.063%
99;2;0.063%
"šp";2;0.063%
zm;2;0.063%
zn;2;0.063%
jc;2;0.063%
Ja;2;0.063%
ll;2;0.063%
ln;2;0.063%
uš;2;0.063%
už;2;0.063%
vš;2;0.063%
ež;2;0.063%
nu;2;0.063%
vž;2;0.063%
03;1;0.031%
08;1;0.031%
Pa;1;0.031%
Pe;1;0.031%
iš;1;0.031%
Pl;1;0.031%
Po;1;0.031%
ab;1;0.031%
Pr;1;0.031%
rf;1;0.031%
rh;1;0.031%
t.;1;0.031%
2.;1;0.031%
22;1;0.031%
24;1;0.031%
25;1;0.031%
29;1;0.031%
bn;1;0.031%
SC;1;0.031%
sm;1;0.031%
30;1;0.031%
31;1;0.031%
Ba;1;0.031%
cc;1;0.031%
35;1;0.031%
Ru;1;0.031%
Be;1;0.031%
co;1;0.031%
ct;1;0.031%
4.;1;0.031%
St;1;0.031%
dp;1;0.031%
Ta;1;0.031%
uc;1;0.031%
ds;1;0.031%
uf;1;0.031%
dv;1;0.031%
uk;1;0.031%
ea;1;0.031%
56;1;0.031%
Tu;1;0.031%
ef;1;0.031%
De;1;0.031%
eg;1;0.031%
ei;1;0.031%
"žm";1;0.031%
nš;1;0.031%
vk;1;0.031%
60;1;0.031%
fe;1;0.031%
El;1;0.031%
Va;1;0.031%
fu;1;0.031%
nž;1;0.031%
wi;1;0.031%
i';1;0.031%
gi;1;0.031%
Fr;1;0.031%
"čb";1;0.031%
hi;1;0.031%
I.;1;0.031%
"ču";1;0.031%
hr;1;0.031%
"Šm";1;0.031%
ie;1;0.031%
97;1;0.031%
9:;1;0.031%
io;1;0.031%
zb;1;0.031%
"'s";1;0.031%
zo;1;0.031%
":2";1;0.031%
zr;1;0.031%
zs;1;0.031%
":3";1;0.031%
zu;1;0.031%
":5";1;0.031%
zv;1;0.031%
jn;1;0.031%
In;1;0.031%
jo;1;0.031%
js;1;0.031%
Iv;1;0.031%
kd;1;0.031%
Zu;1;0.031%
ld;1;0.031%
lm;1;0.031%
lu;1;0.031%
Lj;1;0.031%
mp;1;0.031%
ms;1;0.031%
MS;1;0.031%
nc;1;0.031%
ng;1;0.031%
".0";1;0.031%
Mo;1;0.031%
nr;1;0.031%
".7";1;0.031%
".9";1;0.031%
"šč";1;0.031%
Ne;1;0.031%
oh;1;0.031%
oi;1;0.031%
ow;1;0.031%
pi;1;0.031%
pl;1;0.031%
1 Korpus: Gigafida
2 Datum: 14.05.2018 06:37
3 Analiza: Besedni nizi
4 n-gram nivo: nivo črk
5 Skip: 0
6 Izračunaj za: lema
7 Izračunaj za kombinacije samoglasnikov in soglasnikov: ne
8 Dolžina niza: 2
9 word frequency percent
10 ti 122 3.835%
11 en 70 2.201%
12 at 59 1.855%
13 it 56 1.76%
14 in 54 1.698%
15 ko 54 1.698%
16 st 48 1.509%
17 na 48 1.509%
18 po 46 1.446%
19 ar 45 1.415%
20 ka 45 1.415%
21 ra 44 1.383%
22 an 42 1.32%
23 pr 40 1.257%
24 bi 40 1.257%
25 je 39 1.226%
26 re 38 1.195%
27 te 37 1.163%
28 ja 37 1.163%
29 od 36 1.132%
30 ov 36 1.132%
31 ta 33 1.037%
32 ri 31 0.975%
33 el 31 0.975%
34 er 30 0.943%
35 da 28 0.88%
36 se 27 0.849%
37 za 27 0.849%
38 ni 26 0.817%
39 av 24 0.754%
40 do 24 0.754%
41 vi 24 0.754%
42 ro 23 0.723%
43 ed 23 0.723%
44 ek 23 0.723%
45 le 23 0.723%
46 li 22 0.692%
47 nj 22 0.692%
48 os 22 0.692%
49 de 21 0.66%
50 la 21 0.66%
51 lo 21 0.66%
52 ve 20 0.629%
53 lj 20 0.629%
54 no 20 0.629%
55 ol 20 0.629%
56 aj 19 0.597%
57 or 19 0.597%
58 rt 18 0.566%
59 to 18 0.566%
60 va 18 0.566%
61 es 18 0.566%
62 me 18 0.566%
63 on 18 0.566%
64 ki 17 0.534%
65 pe 17 0.534%
66 ak 16 0.503%
67 ce 16 0.503%
68 dr 16 0.503%
69 et 15 0.472%
70 ic 15 0.472%
71 ik 15 0.472%
72 is 15 0.472%
73 15 0.472%
74 ob 15 0.472%
75 sk 14 0.44%
76 ca 14 0.44%
77 ga 14 0.44%
78 ot 14 0.44%
79 as 13 0.409%
80 rk 13 0.409%
81 ru 13 0.409%
82 ev 13 0.409%
83 ča 13 0.409%
84 če 13 0.409%
85 ij 13 0.409%
86 ir 13 0.409%
87 kr 13 0.409%
88 ma 13 0.409%
89 ne 13 0.409%
90 og 13 0.409%
91 ur 12 0.377%
92 ža 12 0.377%
93 vo 12 0.377%
94 go 12 0.377%
95 zd 12 0.377%
96 iz 12 0.377%
97 ju 12 0.377%
98 op 12 0.377%
99 ad 11 0.346%
100 11 0.346%
101 či 11 0.346%
102 Ma 11 0.346%
103 oz 11 0.346%
104 al 10 0.314%
105 di 10 0.314%
106 us 10 0.314%
107 em 10 0.314%
108 10 0.314%
109 om 10 0.314%
110 pa 10 0.314%
111 so 9 0.283%
112 ug 9 0.283%
113 ša 9 0.283%
114 iv 9 0.283%
115 mi 9 0.283%
116 ok 9 0.283%
117 be 8 0.251%
118 bl 8 0.251%
119 8 0.251%
120 8 0.251%
121 tr 8 0.251%
122 ec 8 0.251%
123 ze 8 0.251%
124 ns 8 0.251%
125 sp 7 0.22%
126 dj 7 0.22%
127 un 7 0.22%
128 7 0.22%
129 il 7 0.22%
130 še 7 0.22%
131 ke 7 0.22%
132 7 0.22%
133 1. 6 0.189%
134 10 6 0.189%
135 ah 6 0.189%
136 rj 6 0.189%
137 ba 6 0.189%
138 uh 6 0.189%
139 eb 6 0.189%
140 že 6 0.189%
141 ep 6 0.189%
142 ji 6 0.189%
143 ml 6 0.189%
144 nb 6 0.189%
145 nk 6 0.189%
146 am 5 0.157%
147 ap 5 0.157%
148 az 5 0.157%
149 20 5 0.157%
150 sn 5 0.157%
151 sr 5 0.157%
152 dn 5 0.157%
153 ej 5 0.157%
154 ez 5 0.157%
155 5 0.157%
156 ge 5 0.157%
157 gl 5 0.157%
158 gr 5 0.157%
159 ha 5 0.157%
160 čk 5 0.157%
161 čl 5 0.157%
162 št 5 0.157%
163 5 0.157%
164 jd 5 0.157%
165 kl 5 0.157%
166 ku 5 0.157%
167 Ju 5 0.157%
168 Ko 5 0.157%
169 oj 5 0.157%
170 01 4 0.126%
171 11 4 0.126%
172 rb 4 0.126%
173 rm 4 0.126%
174 bo 4 0.126%
175 sa 4 0.126%
176 si 4 0.126%
177 ci 4 0.126%
178 tj 4 0.126%
179 tv 4 0.126%
180 To 4 0.126%
181 eh 4 0.126%
182 ži 4 0.126%
183 žn 4 0.126%
184 vl 4 0.126%
185 4 0.126%
186 4 0.126%
187 ib 4 0.126%
188 id 4 0.126%
189 šk 4 0.126%
190 zg 4 0.126%
191 zi 4 0.126%
192 mo 4 0.126%
193 .1 4 0.126%
194 nt 4 0.126%
195 oc 4 0.126%
196 of 4 0.126%
197 ac 3 0.094%
198 13 3 0.094%
199 19 3 0.094%
200 Ag 3 0.094%
201 br 3 0.094%
202 Ro 3 0.094%
203 sv 3 0.094%
204 ck 3 0.094%
205 Br 3 0.094%
206 dl 3 0.094%
207 ud 3 0.094%
208 du 3 0.094%
209 um 3 0.094%
210 up 3 0.094%
211 ut 3 0.094%
212 vn 3 0.094%
213 62 3 0.094%
214 vs 3 0.094%
215 66 3 0.094%
216 fi 3 0.094%
217 he 3 0.094%
218 hk 3 0.094%
219 ho 3 0.094%
220 9. 3 0.094%
221 ig 3 0.094%
222 im 3 0.094%
223 šn 3 0.094%
224 3 0.094%
225 nd 3 0.094%
226 .2 3 0.094%
227 00 2 0.063%
228 pt 2 0.063%
229 09 2 0.063%
230 12 2 0.063%
231 ag 2 0.063%
232 rc 2 0.063%
233 rd 2 0.063%
234 rg 2 0.063%
235 rn 2 0.063%
236 21 2 0.063%
237 rs 2 0.063%
238 2: 2 0.063%
239 Al 2 0.063%
240 An 2 0.063%
241 sl 2 0.063%
242 3. 2 0.063%
243 su 2 0.063%
244 1s 2 0.063%
245 th 2 0.063%
246 tn 2 0.063%
247 db 2 0.063%
248 Sr 2 0.063%
249 tu 2 0.063%
250 46 2 0.063%
251 dg 2 0.063%
252 dk 2 0.063%
253 ub 2 0.063%
254 dt 2 0.063%
255 Da 2 0.063%
256 vd 2 0.063%
257 fa 2 0.063%
258 vr 2 0.063%
259 ff 2 0.063%
260 vz 2 0.063%
261 fo 2 0.063%
262 Fi 2 0.063%
263 2 0.063%
264 gu 2 0.063%
265 8. 2 0.063%
266 čn 2 0.063%
267 Go 2 0.063%
268 98 2 0.063%
269 99 2 0.063%
270 šp 2 0.063%
271 zm 2 0.063%
272 zn 2 0.063%
273 jc 2 0.063%
274 Ja 2 0.063%
275 ll 2 0.063%
276 ln 2 0.063%
277 2 0.063%
278 2 0.063%
279 2 0.063%
280 2 0.063%
281 nu 2 0.063%
282 2 0.063%
283 03 1 0.031%
284 08 1 0.031%
285 Pa 1 0.031%
286 Pe 1 0.031%
287 1 0.031%
288 Pl 1 0.031%
289 Po 1 0.031%
290 ab 1 0.031%
291 Pr 1 0.031%
292 rf 1 0.031%
293 rh 1 0.031%
294 t. 1 0.031%
295 2. 1 0.031%
296 22 1 0.031%
297 24 1 0.031%
298 25 1 0.031%
299 29 1 0.031%
300 bn 1 0.031%
301 SC 1 0.031%
302 sm 1 0.031%
303 30 1 0.031%
304 31 1 0.031%
305 Ba 1 0.031%
306 cc 1 0.031%
307 35 1 0.031%
308 Ru 1 0.031%
309 Be 1 0.031%
310 co 1 0.031%
311 ct 1 0.031%
312 4. 1 0.031%
313 St 1 0.031%
314 dp 1 0.031%
315 Ta 1 0.031%
316 uc 1 0.031%
317 ds 1 0.031%
318 uf 1 0.031%
319 dv 1 0.031%
320 uk 1 0.031%
321 ea 1 0.031%
322 56 1 0.031%
323 Tu 1 0.031%
324 ef 1 0.031%
325 De 1 0.031%
326 eg 1 0.031%
327 ei 1 0.031%
328 žm 1 0.031%
329 1 0.031%
330 vk 1 0.031%
331 60 1 0.031%
332 fe 1 0.031%
333 El 1 0.031%
334 Va 1 0.031%
335 fu 1 0.031%
336 1 0.031%
337 wi 1 0.031%
338 i' 1 0.031%
339 gi 1 0.031%
340 Fr 1 0.031%
341 čb 1 0.031%
342 hi 1 0.031%
343 I. 1 0.031%
344 ču 1 0.031%
345 hr 1 0.031%
346 Šm 1 0.031%
347 ie 1 0.031%
348 97 1 0.031%
349 9: 1 0.031%
350 io 1 0.031%
351 zb 1 0.031%
352 's 1 0.031%
353 zo 1 0.031%
354 :2 1 0.031%
355 zr 1 0.031%
356 zs 1 0.031%
357 :3 1 0.031%
358 zu 1 0.031%
359 :5 1 0.031%
360 zv 1 0.031%
361 jn 1 0.031%
362 In 1 0.031%
363 jo 1 0.031%
364 js 1 0.031%
365 Iv 1 0.031%
366 kd 1 0.031%
367 Zu 1 0.031%
368 ld 1 0.031%
369 lm 1 0.031%
370 lu 1 0.031%
371 Lj 1 0.031%
372 mp 1 0.031%
373 ms 1 0.031%
374 MS 1 0.031%
375 nc 1 0.031%
376 ng 1 0.031%
377 .0 1 0.031%
378 Mo 1 0.031%
379 nr 1 0.031%
380 .7 1 0.031%
381 .9 1 0.031%
382 šč 1 0.031%
383 Ne 1 0.031%
384 oh 1 0.031%
385 oi 1 0.031%
386 ow 1 0.031%
387 pi 1 0.031%
388 pl 1 0.031%

View File

@@ -0,0 +1,455 @@
"Korpus: ";Gigafida
"Datum: ";31.01.2018 05:11
"Analiza: ";Besedni nizi
"n-gram nivo: ";1
"Skip: ";0
"Izračunaj za: ";lema
word;frequency;percent
biti;29;3.766%
in;29;3.766%
v;16;2.078%
z;12;1.558%
se;10;1.299%
on;9;1.169%
za;9;1.169%
ki;8;1.039%
na;8;1.039%
da;7;0.909%
kako;7;0.909%
o;6;0.779%
ta;5;0.649%
elina;4;0.519%
ajdov;4;0.519%
zadruga;4;0.519%
postati;4;0.519%
grozdje;4;0.519%
ne;4;0.519%
pol;4;0.519%
dodati;4;0.519%
ti;4;0.519%
cerkev;4;0.519%
kaša;4;0.519%
totenbirt;4;0.519%
približno;4;0.519%
drug;4;0.519%
sestra;4;0.519%
korenje;3;0.39%
Jurkovič;3;0.39%
do;3;0.39%
srbeč;3;0.39%
"če";3;0.39%
narod;3;0.39%
Matjaž;3;0.39%
"član";3;0.39%
Koper;3;0.39%
ura;3;0.39%
gost;3;0.39%
ob;3;0.39%
od;3;0.39%
oreh;3;0.39%
po;3;0.39%
križarjenje;3;0.39%
jaz;3;0.39%
mlad;3;0.39%
izdelovati;3;0.39%
62;3;0.39%
ogledalo;3;0.39%
kocka;3;0.39%
"še";3;0.39%
kovinski;3;0.39%
koža;3;0.39%
Agata;3;0.39%
vino;3;0.39%
dati;3;0.39%
zelenjaven;3;0.39%
juha;3;0.39%
pomaranča;3;0.39%
dobro;2;0.26%
imeti;2;0.26%
ter;2;0.26%
jesenski;2;0.26%
lahko;2;0.26%
1;2;0.26%
3;2;0.26%
korenčkov;2;0.26%
več;2;0.26%
Marta;2;0.26%
gepard;2;0.26%
ustanovitev;2;0.26%
a;2;0.26%
the;2;0.26%
tiskarna;2;0.26%
Roblek;2;0.26%
učiteljica;2;0.26%
eko;2;0.26%
torta;2;0.26%
Totenbirt;2;0.26%
ideja;2;0.26%
kuhati;2;0.26%
Javšnik;2;0.26%
"špasen";2;0.26%
voda;2;0.26%
društvo;2;0.26%
"življenje";2;0.26%
pečica;2;0.26%
ladja;2;0.26%
praven;2;0.26%
oseba;2;0.26%
medtem;2;0.26%
namen;2;0.26%
Jurkovička;2;0.26%
Martika;2;0.26%
oprati;2;0.26%
resničen;2;0.26%
kar;2;0.26%
junak;2;0.26%
Godec;2;0.26%
pa;2;0.26%
"čas";2;0.26%
"žena";2;0.26%
pekač;2;0.26%
težava;2;0.26%
1st;2;0.26%
pot;2;0.26%
ker;2;0.26%
star;2;0.26%
sodnica;2;0.26%
nekaj;2;0.26%
46;2;0.26%
officer;2;0.26%
lata;2;0.26%
pri;2;0.26%
nov;2;0.26%
Tomijev;2;0.26%
znebiti;2;0.26%
april;2;0.26%
pozdrav;2;0.26%
posoda;2;0.26%
vdova;2;0.26%
Sredozemlje;2;0.26%
svoj;2;0.26%
občina;2;0.26%
1998;2;0.26%
Alenka;2;0.26%
zgodba;2;0.26%
mesto;2;0.26%
pravi;2;0.26%
Fijavž;2;0.26%
velik;2;0.26%
potem;2;0.26%
veličasten;2;0.26%
zahoden;2;0.26%
organizacija;1;0.13%
odvisno;1;0.13%
dekan;1;0.13%
viroza;1;0.13%
drunk;1;0.13%
pričati;1;0.13%
Brolo;1;0.13%
Končar;1;0.13%
tek;1;0.13%
sister;1;0.13%
okusen;1;0.13%
dokler;1;0.13%
izgubiti;1;0.13%
pospeševati;1;0.13%
zvezdniški;1;0.13%
vključno;1;0.13%
spoštovan;1;0.13%
5;1;0.13%
cek;1;0.13%
1113;1;0.13%
roka;1;0.13%
g;1;0.13%
nedoločen;1;0.13%
izumirati;1;0.13%
uporabiti;1;0.13%
pomarančen;1;0.13%
Darko;1;0.13%
polica;1;0.13%
Frenk;1;0.13%
križarjanje;1;0.13%
de;1;0.13%
gospodarski;1;0.13%
Marseille;1;0.13%
dl;1;0.13%
torinski;1;0.13%
12:35;1;0.13%
strah;1;0.13%
Danijel;1;0.13%
vliti;1;0.13%
"ženska";1;0.13%
kompas;1;0.13%
iti;1;0.13%
test;1;0.13%
ustaviti;1;0.13%
Barcelona;1;0.13%
tako;1;0.13%
en;1;0.13%
premešati;1;0.13%
upravljanje;1;0.13%
sutano;1;0.13%
Tanja;1;0.13%
naročiti;1;0.13%
09.11.2010;1;0.13%
intermarketing;1;0.13%
nakazovati;1;0.13%
križariti;1;0.13%
2010;1;0.13%
2130;1;0.13%
zaprt;1;0.13%
prezgodaj;1;0.13%
zdeti;1;0.13%
arhivo;1;0.13%
sin;1;0.13%
akreditacija;1;0.13%
Performs;1;0.13%
paličen;1;0.13%
Marijana;1;0.13%
sladkor;1;0.13%
potekati;1;0.13%
istospolno;1;0.13%
12:25;1;0.13%
I.;1;0.13%
tisti;1;0.13%
jesti;1;0.13%
vnaprej;1;0.13%
naj;1;0.13%
mehko;1;0.13%
judge;1;0.13%
tukaj;1;0.13%
iz;1;0.13%
foto;1;0.13%
palma;1;0.13%
Mojca;1;0.13%
nizek;1;0.13%
blagajna;1;0.13%
mešalnik;1;0.13%
"želeti";1;0.13%
vse;1;0.13%
31.10;1;0.13%
okus;1;0.13%
dragocen;1;0.13%
pojasnjevati;1;0.13%
optimist;1;0.13%
jogurt;1;0.13%
vsebovati;1;0.13%
skorajda;1;0.13%
operacija;1;0.13%
ko;1;0.13%
podjetje;1;0.13%
teden;1;0.13%
ustanoviti;1;0.13%
Kofu;1;0.13%
666;1;0.13%
druga;1;0.13%
motnja;1;0.13%
košček;1;0.13%
izbrati;1;0.13%
prav;1;0.13%
ogret;1;0.13%
rezina;1;0.13%
odgovoren;1;0.13%
vsota;1;0.13%
Planinšek;1;0.13%
pridružiti;1;0.13%
sok;1;0.13%
Indija;1;0.13%
fantastica;1;0.13%
Palermo;1;0.13%
dober;1;0.13%
"člen";1;0.13%
29.03.2010;1;0.13%
splošen;1;0.13%
pojav;1;0.13%
ali;1;0.13%
poslednji;1;0.13%
priokus;1;0.13%
račun;1;0.13%
trg;1;0.13%
proklamirati;1;0.13%
nazaj;1;0.13%
Anand;1;0.13%
pecilen;1;0.13%
vame;1;0.13%
peč;1;0.13%
edinstven;1;0.13%
1.7;1;0.13%
cena;1;0.13%
usta;1;0.13%
med;1;0.13%
veliko;1;0.13%
zmešati;1;0.13%
ogledati;1;0.13%
srbečica;1;0.13%
Maja;1;0.13%
21.;1;0.13%
kaj;1;0.13%
Branko;1;0.13%
zelo;1;0.13%
Mallorca;1;0.13%
polovica;1;0.13%
zakon;1;0.13%
aranžma;1;0.13%
antikrist;1;0.13%
bert;1;0.13%
minuta;1;0.13%
urednik;1;0.13%
poleg;1;0.13%
volilen;1;0.13%
priloga;1;0.13%
mareziga;1;0.13%
unikaten;1;0.13%
križati;1;0.13%
dunajski;1;0.13%
Detela;1;0.13%
jurkovička;1;0.13%
naročnik;1;0.13%
naš;1;0.13%
pred;1;0.13%
lep;1;0.13%
bogastvo;1;0.13%
1.;1;0.13%
ključen;1;0.13%
6000;1;0.13%
penast;1;0.13%
"čast";1;0.13%
2.;1;0.13%
20;1;0.13%
peška;1;0.13%
22;1;0.13%
moka;1;0.13%
narezati;1;0.13%
mik;1;0.13%
danes;1;0.13%
"članica";1;0.13%
ravno;1;0.13%
odpraviti;1;0.13%
sprejemljiv;1;0.13%
uresničevati;1;0.13%
pristop;1;0.13%
oni;1;0.13%
ponuditi;1;0.13%
obiskati;1;0.13%
mogoč;1;0.13%
določba;1;0.13%
jed;1;0.13%
umešati;1;0.13%
tekoč;1;0.13%
Ivek;1;0.13%
Neapelj;1;0.13%
povzročati;1;0.13%
kateri;1;0.13%
pogost;1;0.13%
izdelan;1;0.13%
izstop;1;0.13%
prt;1;0.13%
referendum;1;0.13%
66;1;0.13%
preprost;1;0.13%
komedija;1;0.13%
lupinica;1;0.13%
Eli's;1;0.13%
masa;1;0.13%
korist;1;0.13%
recept;1;0.13%
požig;1;0.13%
vzeti;1;0.13%
komisija;1;0.13%
Ankaran;1;0.13%
prositi;1;0.13%
tudi;1;0.13%
posebej;1;0.13%
8.;1;0.13%
svoboden;1;0.13%
sanjati;1;0.13%
Tunis;1;0.13%
ohraniti;1;0.13%
kolobar;1;0.13%
dieten;1;0.13%
19.;1;0.13%
Valentinrozman;1;0.13%
09:56;1;0.13%
kloniranje;1;0.13%
začeti;1;0.13%
anatemizirati;1;0.13%
streti;1;0.13%
97;1;0.13%
zaradi;1;0.13%
nekdo;1;0.13%
sodelovanje;1;0.13%
križarka;1;0.13%
prostovoljen;1;0.13%
počitnice;1;0.13%
"število";1;0.13%
jesen;1;0.13%
koncert;1;0.13%
Prison;1;0.13%
prošnja;1;0.13%
"želodec";1;0.13%
older;1;0.13%
MSC;1;0.13%
prašek;1;0.13%
"št.";1;0.13%
Stepančič;1;0.13%
zavreti;1;0.13%
"škofija";1;0.13%
lahek;1;0.13%
prispevek;1;0.13%
južek;1;0.13%
temeljiti;1;0.13%
novinar;1;0.13%
popeljati;1;0.13%
"Šmarje";1;0.13%
zavračati;1;0.13%
oziroma;1;0.13%
ustanoiveti;1;0.13%
Beljan;1;0.13%
dermatologinja;1;0.13%
goljufati;1;0.13%
okrogel;1;0.13%
Brecelj;1;0.13%
Podobnik;1;0.13%
13.9;1;0.13%
prinašati;1;0.13%
soliti;1;0.13%
neškodljiv;1;0.13%
widow;1;0.13%
skrivati;1;0.13%
08.11.2010;1;0.13%
usmerjen;1;0.13%
genova;1;0.13%
dan;1;0.13%
24.10;1;0.13%
akreditirati;1;0.13%
narediti;1;0.13%
peder;1;0.13%
ves;1;0.13%
liter;1;0.13%
posuti;1;0.13%
zakaj;1;0.13%
odkrivati;1;0.13%
Roberto;1;0.13%
detective;1;0.13%
Ručigaj;1;0.13%
bolan;1;0.13%
odstraniti;1;0.13%
jajce;1;0.13%
odličen;1;0.13%
konec;1;0.13%
posest;1;0.13%
nared;1;0.13%
duhovnik;1;0.13%
pogledati;1;0.13%
sreča;1;0.13%
zato;1;0.13%
cesta;1;0.13%
saj;1;0.13%
sam;1;0.13%
opreka;1;0.13%
enakopraven;1;0.13%
olje;1;0.13%
Ljubljana;1;0.13%
Zucco;1;0.13%
1 Korpus: Gigafida
2 Datum: 31.01.2018 05:11
3 Analiza: Besedni nizi
4 n-gram nivo: 1
5 Skip: 0
6 Izračunaj za: lema
7 word frequency percent
8 biti 29 3.766%
9 in 29 3.766%
10 v 16 2.078%
11 z 12 1.558%
12 se 10 1.299%
13 on 9 1.169%
14 za 9 1.169%
15 ki 8 1.039%
16 na 8 1.039%
17 da 7 0.909%
18 kako 7 0.909%
19 o 6 0.779%
20 ta 5 0.649%
21 elina 4 0.519%
22 ajdov 4 0.519%
23 zadruga 4 0.519%
24 postati 4 0.519%
25 grozdje 4 0.519%
26 ne 4 0.519%
27 pol 4 0.519%
28 dodati 4 0.519%
29 ti 4 0.519%
30 cerkev 4 0.519%
31 kaša 4 0.519%
32 totenbirt 4 0.519%
33 približno 4 0.519%
34 drug 4 0.519%
35 sestra 4 0.519%
36 korenje 3 0.39%
37 Jurkovič 3 0.39%
38 do 3 0.39%
39 srbeč 3 0.39%
40 če 3 0.39%
41 narod 3 0.39%
42 Matjaž 3 0.39%
43 član 3 0.39%
44 Koper 3 0.39%
45 ura 3 0.39%
46 gost 3 0.39%
47 ob 3 0.39%
48 od 3 0.39%
49 oreh 3 0.39%
50 po 3 0.39%
51 križarjenje 3 0.39%
52 jaz 3 0.39%
53 mlad 3 0.39%
54 izdelovati 3 0.39%
55 62 3 0.39%
56 ogledalo 3 0.39%
57 kocka 3 0.39%
58 še 3 0.39%
59 kovinski 3 0.39%
60 koža 3 0.39%
61 Agata 3 0.39%
62 vino 3 0.39%
63 dati 3 0.39%
64 zelenjaven 3 0.39%
65 juha 3 0.39%
66 pomaranča 3 0.39%
67 dobro 2 0.26%
68 imeti 2 0.26%
69 ter 2 0.26%
70 jesenski 2 0.26%
71 lahko 2 0.26%
72 1 2 0.26%
73 3 2 0.26%
74 korenčkov 2 0.26%
75 več 2 0.26%
76 Marta 2 0.26%
77 gepard 2 0.26%
78 ustanovitev 2 0.26%
79 a 2 0.26%
80 the 2 0.26%
81 tiskarna 2 0.26%
82 Roblek 2 0.26%
83 učiteljica 2 0.26%
84 eko 2 0.26%
85 torta 2 0.26%
86 Totenbirt 2 0.26%
87 ideja 2 0.26%
88 kuhati 2 0.26%
89 Javšnik 2 0.26%
90 špasen 2 0.26%
91 voda 2 0.26%
92 društvo 2 0.26%
93 življenje 2 0.26%
94 pečica 2 0.26%
95 ladja 2 0.26%
96 praven 2 0.26%
97 oseba 2 0.26%
98 medtem 2 0.26%
99 namen 2 0.26%
100 Jurkovička 2 0.26%
101 Martika 2 0.26%
102 oprati 2 0.26%
103 resničen 2 0.26%
104 kar 2 0.26%
105 junak 2 0.26%
106 Godec 2 0.26%
107 pa 2 0.26%
108 čas 2 0.26%
109 žena 2 0.26%
110 pekač 2 0.26%
111 težava 2 0.26%
112 1st 2 0.26%
113 pot 2 0.26%
114 ker 2 0.26%
115 star 2 0.26%
116 sodnica 2 0.26%
117 nekaj 2 0.26%
118 46 2 0.26%
119 officer 2 0.26%
120 lata 2 0.26%
121 pri 2 0.26%
122 nov 2 0.26%
123 Tomijev 2 0.26%
124 znebiti 2 0.26%
125 april 2 0.26%
126 pozdrav 2 0.26%
127 posoda 2 0.26%
128 vdova 2 0.26%
129 Sredozemlje 2 0.26%
130 svoj 2 0.26%
131 občina 2 0.26%
132 1998 2 0.26%
133 Alenka 2 0.26%
134 zgodba 2 0.26%
135 mesto 2 0.26%
136 pravi 2 0.26%
137 Fijavž 2 0.26%
138 velik 2 0.26%
139 potem 2 0.26%
140 veličasten 2 0.26%
141 zahoden 2 0.26%
142 organizacija 1 0.13%
143 odvisno 1 0.13%
144 dekan 1 0.13%
145 viroza 1 0.13%
146 drunk 1 0.13%
147 pričati 1 0.13%
148 Brolo 1 0.13%
149 Končar 1 0.13%
150 tek 1 0.13%
151 sister 1 0.13%
152 okusen 1 0.13%
153 dokler 1 0.13%
154 izgubiti 1 0.13%
155 pospeševati 1 0.13%
156 zvezdniški 1 0.13%
157 vključno 1 0.13%
158 spoštovan 1 0.13%
159 5 1 0.13%
160 cek 1 0.13%
161 1113 1 0.13%
162 roka 1 0.13%
163 g 1 0.13%
164 nedoločen 1 0.13%
165 izumirati 1 0.13%
166 uporabiti 1 0.13%
167 pomarančen 1 0.13%
168 Darko 1 0.13%
169 polica 1 0.13%
170 Frenk 1 0.13%
171 križarjanje 1 0.13%
172 de 1 0.13%
173 gospodarski 1 0.13%
174 Marseille 1 0.13%
175 dl 1 0.13%
176 torinski 1 0.13%
177 12:35 1 0.13%
178 strah 1 0.13%
179 Danijel 1 0.13%
180 vliti 1 0.13%
181 ženska 1 0.13%
182 kompas 1 0.13%
183 iti 1 0.13%
184 test 1 0.13%
185 ustaviti 1 0.13%
186 Barcelona 1 0.13%
187 tako 1 0.13%
188 en 1 0.13%
189 premešati 1 0.13%
190 upravljanje 1 0.13%
191 sutano 1 0.13%
192 Tanja 1 0.13%
193 naročiti 1 0.13%
194 09.11.2010 1 0.13%
195 intermarketing 1 0.13%
196 nakazovati 1 0.13%
197 križariti 1 0.13%
198 2010 1 0.13%
199 2130 1 0.13%
200 zaprt 1 0.13%
201 prezgodaj 1 0.13%
202 zdeti 1 0.13%
203 arhivo 1 0.13%
204 sin 1 0.13%
205 akreditacija 1 0.13%
206 Performs 1 0.13%
207 paličen 1 0.13%
208 Marijana 1 0.13%
209 sladkor 1 0.13%
210 potekati 1 0.13%
211 istospolno 1 0.13%
212 12:25 1 0.13%
213 I. 1 0.13%
214 tisti 1 0.13%
215 jesti 1 0.13%
216 vnaprej 1 0.13%
217 naj 1 0.13%
218 mehko 1 0.13%
219 judge 1 0.13%
220 tukaj 1 0.13%
221 iz 1 0.13%
222 foto 1 0.13%
223 palma 1 0.13%
224 Mojca 1 0.13%
225 nizek 1 0.13%
226 blagajna 1 0.13%
227 mešalnik 1 0.13%
228 želeti 1 0.13%
229 vse 1 0.13%
230 31.10 1 0.13%
231 okus 1 0.13%
232 dragocen 1 0.13%
233 pojasnjevati 1 0.13%
234 optimist 1 0.13%
235 jogurt 1 0.13%
236 vsebovati 1 0.13%
237 skorajda 1 0.13%
238 operacija 1 0.13%
239 ko 1 0.13%
240 podjetje 1 0.13%
241 teden 1 0.13%
242 ustanoviti 1 0.13%
243 Kofu 1 0.13%
244 666 1 0.13%
245 druga 1 0.13%
246 motnja 1 0.13%
247 košček 1 0.13%
248 izbrati 1 0.13%
249 prav 1 0.13%
250 ogret 1 0.13%
251 rezina 1 0.13%
252 odgovoren 1 0.13%
253 vsota 1 0.13%
254 Planinšek 1 0.13%
255 pridružiti 1 0.13%
256 sok 1 0.13%
257 Indija 1 0.13%
258 fantastica 1 0.13%
259 Palermo 1 0.13%
260 dober 1 0.13%
261 člen 1 0.13%
262 29.03.2010 1 0.13%
263 splošen 1 0.13%
264 pojav 1 0.13%
265 ali 1 0.13%
266 poslednji 1 0.13%
267 priokus 1 0.13%
268 račun 1 0.13%
269 trg 1 0.13%
270 proklamirati 1 0.13%
271 nazaj 1 0.13%
272 Anand 1 0.13%
273 pecilen 1 0.13%
274 vame 1 0.13%
275 peč 1 0.13%
276 edinstven 1 0.13%
277 1.7 1 0.13%
278 cena 1 0.13%
279 usta 1 0.13%
280 med 1 0.13%
281 veliko 1 0.13%
282 zmešati 1 0.13%
283 ogledati 1 0.13%
284 srbečica 1 0.13%
285 Maja 1 0.13%
286 21. 1 0.13%
287 kaj 1 0.13%
288 Branko 1 0.13%
289 zelo 1 0.13%
290 Mallorca 1 0.13%
291 polovica 1 0.13%
292 zakon 1 0.13%
293 aranžma 1 0.13%
294 antikrist 1 0.13%
295 bert 1 0.13%
296 minuta 1 0.13%
297 urednik 1 0.13%
298 poleg 1 0.13%
299 volilen 1 0.13%
300 priloga 1 0.13%
301 mareziga 1 0.13%
302 unikaten 1 0.13%
303 križati 1 0.13%
304 dunajski 1 0.13%
305 Detela 1 0.13%
306 jurkovička 1 0.13%
307 naročnik 1 0.13%
308 naš 1 0.13%
309 pred 1 0.13%
310 lep 1 0.13%
311 bogastvo 1 0.13%
312 1. 1 0.13%
313 ključen 1 0.13%
314 6000 1 0.13%
315 penast 1 0.13%
316 čast 1 0.13%
317 2. 1 0.13%
318 20 1 0.13%
319 peška 1 0.13%
320 22 1 0.13%
321 moka 1 0.13%
322 narezati 1 0.13%
323 mik 1 0.13%
324 danes 1 0.13%
325 članica 1 0.13%
326 ravno 1 0.13%
327 odpraviti 1 0.13%
328 sprejemljiv 1 0.13%
329 uresničevati 1 0.13%
330 pristop 1 0.13%
331 oni 1 0.13%
332 ponuditi 1 0.13%
333 obiskati 1 0.13%
334 mogoč 1 0.13%
335 določba 1 0.13%
336 jed 1 0.13%
337 umešati 1 0.13%
338 tekoč 1 0.13%
339 Ivek 1 0.13%
340 Neapelj 1 0.13%
341 povzročati 1 0.13%
342 kateri 1 0.13%
343 pogost 1 0.13%
344 izdelan 1 0.13%
345 izstop 1 0.13%
346 prt 1 0.13%
347 referendum 1 0.13%
348 66 1 0.13%
349 preprost 1 0.13%
350 komedija 1 0.13%
351 lupinica 1 0.13%
352 Eli's 1 0.13%
353 masa 1 0.13%
354 korist 1 0.13%
355 recept 1 0.13%
356 požig 1 0.13%
357 vzeti 1 0.13%
358 komisija 1 0.13%
359 Ankaran 1 0.13%
360 prositi 1 0.13%
361 tudi 1 0.13%
362 posebej 1 0.13%
363 8. 1 0.13%
364 svoboden 1 0.13%
365 sanjati 1 0.13%
366 Tunis 1 0.13%
367 ohraniti 1 0.13%
368 kolobar 1 0.13%
369 dieten 1 0.13%
370 19. 1 0.13%
371 Valentinrozman 1 0.13%
372 09:56 1 0.13%
373 kloniranje 1 0.13%
374 začeti 1 0.13%
375 anatemizirati 1 0.13%
376 streti 1 0.13%
377 97 1 0.13%
378 zaradi 1 0.13%
379 nekdo 1 0.13%
380 sodelovanje 1 0.13%
381 križarka 1 0.13%
382 prostovoljen 1 0.13%
383 počitnice 1 0.13%
384 število 1 0.13%
385 jesen 1 0.13%
386 koncert 1 0.13%
387 Prison 1 0.13%
388 prošnja 1 0.13%
389 želodec 1 0.13%
390 older 1 0.13%
391 MSC 1 0.13%
392 prašek 1 0.13%
393 št. 1 0.13%
394 Stepančič 1 0.13%
395 zavreti 1 0.13%
396 škofija 1 0.13%
397 lahek 1 0.13%
398 prispevek 1 0.13%
399 južek 1 0.13%
400 temeljiti 1 0.13%
401 novinar 1 0.13%
402 popeljati 1 0.13%
403 Šmarje 1 0.13%
404 zavračati 1 0.13%
405 oziroma 1 0.13%
406 ustanoiveti 1 0.13%
407 Beljan 1 0.13%
408 dermatologinja 1 0.13%
409 goljufati 1 0.13%
410 okrogel 1 0.13%
411 Brecelj 1 0.13%
412 Podobnik 1 0.13%
413 13.9 1 0.13%
414 prinašati 1 0.13%
415 soliti 1 0.13%
416 neškodljiv 1 0.13%
417 widow 1 0.13%
418 skrivati 1 0.13%
419 08.11.2010 1 0.13%
420 usmerjen 1 0.13%
421 genova 1 0.13%
422 dan 1 0.13%
423 24.10 1 0.13%
424 akreditirati 1 0.13%
425 narediti 1 0.13%
426 peder 1 0.13%
427 ves 1 0.13%
428 liter 1 0.13%
429 posuti 1 0.13%
430 zakaj 1 0.13%
431 odkrivati 1 0.13%
432 Roberto 1 0.13%
433 detective 1 0.13%
434 Ručigaj 1 0.13%
435 bolan 1 0.13%
436 odstraniti 1 0.13%
437 jajce 1 0.13%
438 odličen 1 0.13%
439 konec 1 0.13%
440 posest 1 0.13%
441 nared 1 0.13%
442 duhovnik 1 0.13%
443 pogledati 1 0.13%
444 sreča 1 0.13%
445 zato 1 0.13%
446 cesta 1 0.13%
447 saj 1 0.13%
448 sam 1 0.13%
449 opreka 1 0.13%
450 enakopraven 1 0.13%
451 olje 1 0.13%
452 Ljubljana 1 0.13%
453 Zucco 1 0.13%

View File

@@ -0,0 +1,512 @@
"Korpus: ";Gigafida
"Datum: ";25.01.2018 06:27
"Analiza: ";Besedni nizi
"n-gram nivo: ";1
"Skip: ";0
"Izračunaj za: ";različnica
word;frequency;percent
in;29;3.766%
v;16;2.078%
je;14;1.818%
za;9;1.169%
ki;8;1.039%
na;8;1.039%
se;8;1.039%
da;7;0.909%
kako;7;0.909%
bi;6;0.779%
o;6;0.779%
s;6;0.779%
z;6;0.779%
elina;4;0.519%
dodamo;4;0.519%
ne;4;0.519%
pol;4;0.519%
ogledala;4;0.519%
totenbirt;4;0.519%
kašo;4;0.519%
približno;4;0.519%
sestra;4;0.519%
ajdovo;4;0.519%
korenje;3;0.39%
Jurkovič;3;0.39%
do;3;0.39%
izdelujejo;3;0.39%
"če";3;0.39%
Koper;3;0.39%
ure;3;0.39%
"članov";3;0.39%
drugo;3;0.39%
postane;3;0.39%
mu;3;0.39%
grozdje;3;0.39%
ob;3;0.39%
od;3;0.39%
po;3;0.39%
cerkev;3;0.39%
62;3;0.39%
"še";3;0.39%
kovinska;3;0.39%
Agata;3;0.39%
juho;3;0.39%
zahodnega;2;0.26%
tem;2;0.26%
ter;2;0.26%
lahko;2;0.26%
1;2;0.26%
3;2;0.26%
več;2;0.26%
Marta;2;0.26%
gepard;2;0.26%
bo;2;0.26%
ustanovitev;2;0.26%
a;2;0.26%
the;2;0.26%
tiskarna;2;0.26%
Roblek;2;0.26%
učiteljica;2;0.26%
eko;2;0.26%
Totenbirt;2;0.26%
idejo;2;0.26%
težav;2;0.26%
vode;2;0.26%
resnična;2;0.26%
novih;2;0.26%
orehe;2;0.26%
zadruga;2;0.26%
Matjaž;2;0.26%
nam;2;0.26%
jo;2;0.26%
vse;2;0.26%
medtem;2;0.26%
namen;2;0.26%
Jurkovička;2;0.26%
aprila;2;0.26%
Martika;2;0.26%
starejša;2;0.26%
srbečo;2;0.26%
junak;2;0.26%
Godec;2;0.26%
gosta;2;0.26%
pa;2;0.26%
kuhamo;2;0.26%
križarjenju;2;0.26%
"času";2;0.26%
"žena";2;0.26%
pekač;2;0.26%
1st;2;0.26%
pot;2;0.26%
si;2;0.26%
ker;2;0.26%
sodnica;2;0.26%
občin;2;0.26%
nekaj;2;0.26%
46;2;0.26%
officer;2;0.26%
late;2;0.26%
pri;2;0.26%
zelenjavne;2;0.26%
damo;2;0.26%
znebiti;2;0.26%
jih;2;0.26%
kocke;2;0.26%
operemo;2;0.26%
posodi;2;0.26%
kožo;2;0.26%
tomijeva;2;0.26%
vas;2;0.26%
bosta;2;0.26%
mlajša;2;0.26%
tega;2;0.26%
vdova;2;0.26%
Sredozemlja;2;0.26%
1998;2;0.26%
korenčkovo;2;0.26%
vino;2;0.26%
zgodba;2;0.26%
ima;2;0.26%
Fijavž;2;0.26%
potem;2;0.26%
organizacija;1;0.13%
dobro;1;0.13%
zadrugah;1;0.13%
odvisno;1;0.13%
požigom;1;0.13%
svobodnem;1;0.13%
drunk;1;0.13%
Brolo;1;0.13%
priokusom;1;0.13%
Končar;1;0.13%
začne;1;0.13%
tek;1;0.13%
sister;1;0.13%
naša;1;0.13%
zvezdniška;1;0.13%
dokler;1;0.13%
pravno;1;0.13%
pospeševati;1;0.13%
vključno;1;0.13%
5;1;0.13%
zavremo;1;0.13%
jesenska;1;0.13%
dietna;1;0.13%
pravne;1;0.13%
ValentinRozman;1;0.13%
postali;1;0.13%
roko;1;0.13%
1113;1;0.13%
izberemo;1;0.13%
kolobarje;1;0.13%
zavračamo;1;0.13%
sami;1;0.13%
g;1;0.13%
narežemo;1;0.13%
popeljala;1;0.13%
uporabite;1;0.13%
police;1;0.13%
okusu;1;0.13%
Darko;1;0.13%
"špasnem";1;0.13%
gospodarske;1;0.13%
de;1;0.13%
upravljanju;1;0.13%
torto;1;0.13%
Marseille;1;0.13%
dl;1;0.13%
križarjanju;1;0.13%
12:35;1;0.13%
torta;1;0.13%
mladi;1;0.13%
Neaplja;1;0.13%
temelji;1;0.13%
proklamirali;1;0.13%
odličnimi;1;0.13%
tako;1;0.13%
pristopu;1;0.13%
testu;1;0.13%
sutano;1;0.13%
Tanja;1;0.13%
poslednjih;1;0.13%
Barcelono;1;0.13%
cerkvi;1;0.13%
Javšnik;1;0.13%
09.11.2010;1;0.13%
intermarketing;1;0.13%
onemu;1;0.13%
volilna;1;0.13%
nakazovati;1;0.13%
"števila";1;0.13%
2010;1;0.13%
2130;1;0.13%
akreditacijo;1;0.13%
mogoče;1;0.13%
društvu;1;0.13%
prezgodaj;1;0.13%
arhivo;1;0.13%
svojega;1;0.13%
društvo;1;0.13%
gre;1;0.13%
Performs;1;0.13%
odstranimo;1;0.13%
"življenje";1;0.13%
i.;1;0.13%
veličastni;1;0.13%
ustanovi;1;0.13%
koščkov;1;0.13%
Marijana;1;0.13%
sladkor;1;0.13%
veličastna;1;0.13%
istospolno;1;0.13%
"življenju";1;0.13%
pečico;1;0.13%
12:25;1;0.13%
viroz;1;0.13%
tekoča;1;0.13%
pečice;1;0.13%
vanjo;1;0.13%
nedoločenega;1;0.13%
posujemo;1;0.13%
"ženskami";1;0.13%
jeste;1;0.13%
narod;1;0.13%
vnaprej;1;0.13%
uresničuje;1;0.13%
ladji;1;0.13%
naj;1;0.13%
ceka;1;0.13%
ladja;1;0.13%
mehko;1;0.13%
judge;1;0.13%
dni;1;0.13%
tista;1;0.13%
palmo;1;0.13%
Mojco;1;0.13%
tukaj;1;0.13%
iz;1;0.13%
foto;1;0.13%
duhovnike;1;0.13%
ji;1;0.13%
blagajna;1;0.13%
ponudimo;1;0.13%
31.10;1;0.13%
ju;1;0.13%
edinstvena;1;0.13%
pojasnjuje;1;0.13%
smo;1;0.13%
osebe;1;0.13%
ustanoivi;1;0.13%
prostovoljnem;1;0.13%
optimist;1;0.13%
jogurt;1;0.13%
osebo;1;0.13%
skorajda;1;0.13%
ko;1;0.13%
obiskali;1;0.13%
operaciji;1;0.13%
večjih;1;0.13%
podjetje;1;0.13%
izumira;1;0.13%
novinarja;1;0.13%
druge;1;0.13%
666;1;0.13%
drugi;1;0.13%
premešamo;1;0.13%
motnja;1;0.13%
prav;1;0.13%
določbe;1;0.13%
peči;1;0.13%
Indiji;1;0.13%
penasto;1;0.13%
Palermu;1;0.13%
Planinšek;1;0.13%
sok;1;0.13%
fantastica;1;0.13%
dober;1;0.13%
"člen";1;0.13%
počitnic;1;0.13%
ust;1;0.13%
gosto;1;0.13%
mi;1;0.13%
29.03.2010;1;0.13%
pojav;1;0.13%
ali;1;0.13%
račun;1;0.13%
torinskim;1;0.13%
grozdju;1;0.13%
trg;1;0.13%
Mallorco;1;0.13%
nazaj;1;0.13%
vami;1;0.13%
koristi;1;0.13%
rezino;1;0.13%
"špasnega";1;0.13%
naročili;1;0.13%
srbečico;1;0.13%
1.7;1;0.13%
cena;1;0.13%
Javšnika;1;0.13%
med;1;0.13%
veliko;1;0.13%
Maja;1;0.13%
21.;1;0.13%
kaj;1;0.13%
Branko;1;0.13%
zelo;1;0.13%
polovico;1;0.13%
nižja;1;0.13%
velike;1;0.13%
kar;1;0.13%
pedri;1;0.13%
strli;1;0.13%
zakon;1;0.13%
aranžma;1;0.13%
bert;1;0.13%
srbeče;1;0.13%
povzroča;1;0.13%
urednik;1;0.13%
jeseni;1;0.13%
prilogi;1;0.13%
poleg;1;0.13%
dekani;1;0.13%
vsote;1;0.13%
marezige;1;0.13%
Matjaža;1;0.13%
križati;1;0.13%
Detela;1;0.13%
jurkovička;1;0.13%
vsebuje;1;0.13%
naročnik;1;0.13%
dunajska;1;0.13%
odkrivajte;1;0.13%
pred;1;0.13%
lep;1;0.13%
anatemizirala;1;0.13%
bogastvo;1;0.13%
1.;1;0.13%
spoštovani;1;0.13%
antikrista;1;0.13%
bolni;1;0.13%
6000;1;0.13%
ste;1;0.13%
goljufal;1;0.13%
zaprta;1;0.13%
"čast";1;0.13%
mešalnikom;1;0.13%
pozdravi;1;0.13%
sinov;1;0.13%
križarjenja;1;0.13%
peške;1;0.13%
2.;1;0.13%
20;1;0.13%
minut;1;0.13%
22;1;0.13%
prtom;1;0.13%
danes;1;0.13%
ohranimo;1;0.13%
sprejemljivi;1;0.13%
"članica";1;0.13%
paličnim;1;0.13%
ravno;1;0.13%
odpraviti;1;0.13%
Anandm;1;0.13%
umešamo;1;0.13%
ta;1;0.13%
pridružite;1;0.13%
prinašala;1;0.13%
zdi;1;0.13%
Tunisu;1;0.13%
jed;1;0.13%
splošne;1;0.13%
ogreto;1;0.13%
Ivek;1;0.13%
odgovorni;1;0.13%
"želimo";1;0.13%
pecilni;1;0.13%
dala;1;0.13%
skrivali;1;0.13%
bolje;1;0.13%
moko;1;0.13%
solimo;1;0.13%
izgubil;1;0.13%
orehih;1;0.13%
zmešamo;1;0.13%
referendum;1;0.13%
66;1;0.13%
poglejte;1;0.13%
maso;1;0.13%
zelenjavna;1;0.13%
preprost;1;0.13%
komedija;1;0.13%
Eli's;1;0.13%
recept;1;0.13%
komisija;1;0.13%
Ankaran;1;0.13%
naroda;1;0.13%
Kofujem;1;0.13%
tudi;1;0.13%
posebej;1;0.13%
usmerjeni;1;0.13%
8.;1;0.13%
lupinico;1;0.13%
zadrugo;1;0.13%
narodom;1;0.13%
kocka;1;0.13%
katerega;1;0.13%
19.;1;0.13%
izstopu;1;0.13%
09:56;1;0.13%
vzamemo;1;0.13%
pozdrav;1;0.13%
"škofije";1;0.13%
"čemer";1;0.13%
97;1;0.13%
zaradi;1;0.13%
izdelana;1;0.13%
nekdo;1;0.13%
kloniranju;1;0.13%
vam;1;0.13%
okusna;1;0.13%
boste;1;0.13%
križarki;1;0.13%
prosimo;1;0.13%
unikatna;1;0.13%
ključno;1;0.13%
kože;1;0.13%
enem;1;0.13%
naredimo;1;0.13%
koncert;1;0.13%
Prison;1;0.13%
prošnja;1;0.13%
"želodec";1;0.13%
Frenki;1;0.13%
older;1;0.13%
MSC;1;0.13%
prašek;1;0.13%
kompasom;1;0.13%
"št.";1;0.13%
posesti;1;0.13%
Stepančič;1;0.13%
pomarančni;1;0.13%
lahek;1;0.13%
prispevek;1;0.13%
južek;1;0.13%
koncu;1;0.13%
"Šmarje";1;0.13%
oziroma;1;0.13%
Beljan;1;0.13%
dermatologinja;1;0.13%
okrogel;1;0.13%
vlijemo;1;0.13%
Brecelj;1;0.13%
Podobnik;1;0.13%
13.9;1;0.13%
Alenke;1;0.13%
priča;1;0.13%
neškodljiv;1;0.13%
widow;1;0.13%
nismo;1;0.13%
Alenka;1;0.13%
08.11.2010;1;0.13%
strahu;1;0.13%
genove;1;0.13%
tednu;1;0.13%
vinu;1;0.13%
potekal;1;0.13%
24.10;1;0.13%
sanja;1;0.13%
dragoceno;1;0.13%
akreditirate;1;0.13%
liter;1;0.13%
mesta;1;0.13%
zakaj;1;0.13%
ustavili;1;0.13%
Roberto;1;0.13%
detective;1;0.13%
Danijela;1;0.13%
Ručigaj;1;0.13%
jajci;1;0.13%
mesti;1;0.13%
mika;1;0.13%
nared;1;0.13%
pravo;1;0.13%
križarili;1;0.13%
bila;1;0.13%
sodelovanju;1;0.13%
prava;1;0.13%
zato;1;0.13%
cesta;1;0.13%
saj;1;0.13%
srečo;1;0.13%
olje;1;0.13%
svojih;1;0.13%
Ljubljana;1;0.13%
pomaranče;1;0.13%
jesenskem;1;0.13%
pomarančo;1;0.13%
opreki;1;0.13%
najpogostejša;1;0.13%
pomaranči;1;0.13%
Zucco;1;0.13%
enakopravnem;1;0.13%
1 Korpus: Gigafida
2 Datum: 25.01.2018 06:27
3 Analiza: Besedni nizi
4 n-gram nivo: 1
5 Skip: 0
6 Izračunaj za: različnica
7 word frequency percent
8 in 29 3.766%
9 v 16 2.078%
10 je 14 1.818%
11 za 9 1.169%
12 ki 8 1.039%
13 na 8 1.039%
14 se 8 1.039%
15 da 7 0.909%
16 kako 7 0.909%
17 bi 6 0.779%
18 o 6 0.779%
19 s 6 0.779%
20 z 6 0.779%
21 elina 4 0.519%
22 dodamo 4 0.519%
23 ne 4 0.519%
24 pol 4 0.519%
25 ogledala 4 0.519%
26 totenbirt 4 0.519%
27 kašo 4 0.519%
28 približno 4 0.519%
29 sestra 4 0.519%
30 ajdovo 4 0.519%
31 korenje 3 0.39%
32 Jurkovič 3 0.39%
33 do 3 0.39%
34 izdelujejo 3 0.39%
35 če 3 0.39%
36 Koper 3 0.39%
37 ure 3 0.39%
38 članov 3 0.39%
39 drugo 3 0.39%
40 postane 3 0.39%
41 mu 3 0.39%
42 grozdje 3 0.39%
43 ob 3 0.39%
44 od 3 0.39%
45 po 3 0.39%
46 cerkev 3 0.39%
47 62 3 0.39%
48 še 3 0.39%
49 kovinska 3 0.39%
50 Agata 3 0.39%
51 juho 3 0.39%
52 zahodnega 2 0.26%
53 tem 2 0.26%
54 ter 2 0.26%
55 lahko 2 0.26%
56 1 2 0.26%
57 3 2 0.26%
58 več 2 0.26%
59 Marta 2 0.26%
60 gepard 2 0.26%
61 bo 2 0.26%
62 ustanovitev 2 0.26%
63 a 2 0.26%
64 the 2 0.26%
65 tiskarna 2 0.26%
66 Roblek 2 0.26%
67 učiteljica 2 0.26%
68 eko 2 0.26%
69 Totenbirt 2 0.26%
70 idejo 2 0.26%
71 težav 2 0.26%
72 vode 2 0.26%
73 resnična 2 0.26%
74 novih 2 0.26%
75 orehe 2 0.26%
76 zadruga 2 0.26%
77 Matjaž 2 0.26%
78 nam 2 0.26%
79 jo 2 0.26%
80 vse 2 0.26%
81 medtem 2 0.26%
82 namen 2 0.26%
83 Jurkovička 2 0.26%
84 aprila 2 0.26%
85 Martika 2 0.26%
86 starejša 2 0.26%
87 srbečo 2 0.26%
88 junak 2 0.26%
89 Godec 2 0.26%
90 gosta 2 0.26%
91 pa 2 0.26%
92 kuhamo 2 0.26%
93 križarjenju 2 0.26%
94 času 2 0.26%
95 žena 2 0.26%
96 pekač 2 0.26%
97 1st 2 0.26%
98 pot 2 0.26%
99 si 2 0.26%
100 ker 2 0.26%
101 sodnica 2 0.26%
102 občin 2 0.26%
103 nekaj 2 0.26%
104 46 2 0.26%
105 officer 2 0.26%
106 late 2 0.26%
107 pri 2 0.26%
108 zelenjavne 2 0.26%
109 damo 2 0.26%
110 znebiti 2 0.26%
111 jih 2 0.26%
112 kocke 2 0.26%
113 operemo 2 0.26%
114 posodi 2 0.26%
115 kožo 2 0.26%
116 tomijeva 2 0.26%
117 vas 2 0.26%
118 bosta 2 0.26%
119 mlajša 2 0.26%
120 tega 2 0.26%
121 vdova 2 0.26%
122 Sredozemlja 2 0.26%
123 1998 2 0.26%
124 korenčkovo 2 0.26%
125 vino 2 0.26%
126 zgodba 2 0.26%
127 ima 2 0.26%
128 Fijavž 2 0.26%
129 potem 2 0.26%
130 organizacija 1 0.13%
131 dobro 1 0.13%
132 zadrugah 1 0.13%
133 odvisno 1 0.13%
134 požigom 1 0.13%
135 svobodnem 1 0.13%
136 drunk 1 0.13%
137 Brolo 1 0.13%
138 priokusom 1 0.13%
139 Končar 1 0.13%
140 začne 1 0.13%
141 tek 1 0.13%
142 sister 1 0.13%
143 naša 1 0.13%
144 zvezdniška 1 0.13%
145 dokler 1 0.13%
146 pravno 1 0.13%
147 pospeševati 1 0.13%
148 vključno 1 0.13%
149 5 1 0.13%
150 zavremo 1 0.13%
151 jesenska 1 0.13%
152 dietna 1 0.13%
153 pravne 1 0.13%
154 ValentinRozman 1 0.13%
155 postali 1 0.13%
156 roko 1 0.13%
157 1113 1 0.13%
158 izberemo 1 0.13%
159 kolobarje 1 0.13%
160 zavračamo 1 0.13%
161 sami 1 0.13%
162 g 1 0.13%
163 narežemo 1 0.13%
164 popeljala 1 0.13%
165 uporabite 1 0.13%
166 police 1 0.13%
167 okusu 1 0.13%
168 Darko 1 0.13%
169 špasnem 1 0.13%
170 gospodarske 1 0.13%
171 de 1 0.13%
172 upravljanju 1 0.13%
173 torto 1 0.13%
174 Marseille 1 0.13%
175 dl 1 0.13%
176 križarjanju 1 0.13%
177 12:35 1 0.13%
178 torta 1 0.13%
179 mladi 1 0.13%
180 Neaplja 1 0.13%
181 temelji 1 0.13%
182 proklamirali 1 0.13%
183 odličnimi 1 0.13%
184 tako 1 0.13%
185 pristopu 1 0.13%
186 testu 1 0.13%
187 sutano 1 0.13%
188 Tanja 1 0.13%
189 poslednjih 1 0.13%
190 Barcelono 1 0.13%
191 cerkvi 1 0.13%
192 Javšnik 1 0.13%
193 09.11.2010 1 0.13%
194 intermarketing 1 0.13%
195 onemu 1 0.13%
196 volilna 1 0.13%
197 nakazovati 1 0.13%
198 števila 1 0.13%
199 2010 1 0.13%
200 2130 1 0.13%
201 akreditacijo 1 0.13%
202 mogoče 1 0.13%
203 društvu 1 0.13%
204 prezgodaj 1 0.13%
205 arhivo 1 0.13%
206 svojega 1 0.13%
207 društvo 1 0.13%
208 gre 1 0.13%
209 Performs 1 0.13%
210 odstranimo 1 0.13%
211 življenje 1 0.13%
212 i. 1 0.13%
213 veličastni 1 0.13%
214 ustanovi 1 0.13%
215 koščkov 1 0.13%
216 Marijana 1 0.13%
217 sladkor 1 0.13%
218 veličastna 1 0.13%
219 istospolno 1 0.13%
220 življenju 1 0.13%
221 pečico 1 0.13%
222 12:25 1 0.13%
223 viroz 1 0.13%
224 tekoča 1 0.13%
225 pečice 1 0.13%
226 vanjo 1 0.13%
227 nedoločenega 1 0.13%
228 posujemo 1 0.13%
229 ženskami 1 0.13%
230 jeste 1 0.13%
231 narod 1 0.13%
232 vnaprej 1 0.13%
233 uresničuje 1 0.13%
234 ladji 1 0.13%
235 naj 1 0.13%
236 ceka 1 0.13%
237 ladja 1 0.13%
238 mehko 1 0.13%
239 judge 1 0.13%
240 dni 1 0.13%
241 tista 1 0.13%
242 palmo 1 0.13%
243 Mojco 1 0.13%
244 tukaj 1 0.13%
245 iz 1 0.13%
246 foto 1 0.13%
247 duhovnike 1 0.13%
248 ji 1 0.13%
249 blagajna 1 0.13%
250 ponudimo 1 0.13%
251 31.10 1 0.13%
252 ju 1 0.13%
253 edinstvena 1 0.13%
254 pojasnjuje 1 0.13%
255 smo 1 0.13%
256 osebe 1 0.13%
257 ustanoivi 1 0.13%
258 prostovoljnem 1 0.13%
259 optimist 1 0.13%
260 jogurt 1 0.13%
261 osebo 1 0.13%
262 skorajda 1 0.13%
263 ko 1 0.13%
264 obiskali 1 0.13%
265 operaciji 1 0.13%
266 večjih 1 0.13%
267 podjetje 1 0.13%
268 izumira 1 0.13%
269 novinarja 1 0.13%
270 druge 1 0.13%
271 666 1 0.13%
272 drugi 1 0.13%
273 premešamo 1 0.13%
274 motnja 1 0.13%
275 prav 1 0.13%
276 določbe 1 0.13%
277 peči 1 0.13%
278 Indiji 1 0.13%
279 penasto 1 0.13%
280 Palermu 1 0.13%
281 Planinšek 1 0.13%
282 sok 1 0.13%
283 fantastica 1 0.13%
284 dober 1 0.13%
285 člen 1 0.13%
286 počitnic 1 0.13%
287 ust 1 0.13%
288 gosto 1 0.13%
289 mi 1 0.13%
290 29.03.2010 1 0.13%
291 pojav 1 0.13%
292 ali 1 0.13%
293 račun 1 0.13%
294 torinskim 1 0.13%
295 grozdju 1 0.13%
296 trg 1 0.13%
297 Mallorco 1 0.13%
298 nazaj 1 0.13%
299 vami 1 0.13%
300 koristi 1 0.13%
301 rezino 1 0.13%
302 špasnega 1 0.13%
303 naročili 1 0.13%
304 srbečico 1 0.13%
305 1.7 1 0.13%
306 cena 1 0.13%
307 Javšnika 1 0.13%
308 med 1 0.13%
309 veliko 1 0.13%
310 Maja 1 0.13%
311 21. 1 0.13%
312 kaj 1 0.13%
313 Branko 1 0.13%
314 zelo 1 0.13%
315 polovico 1 0.13%
316 nižja 1 0.13%
317 velike 1 0.13%
318 kar 1 0.13%
319 pedri 1 0.13%
320 strli 1 0.13%
321 zakon 1 0.13%
322 aranžma 1 0.13%
323 bert 1 0.13%
324 srbeče 1 0.13%
325 povzroča 1 0.13%
326 urednik 1 0.13%
327 jeseni 1 0.13%
328 prilogi 1 0.13%
329 poleg 1 0.13%
330 dekani 1 0.13%
331 vsote 1 0.13%
332 marezige 1 0.13%
333 Matjaža 1 0.13%
334 križati 1 0.13%
335 Detela 1 0.13%
336 jurkovička 1 0.13%
337 vsebuje 1 0.13%
338 naročnik 1 0.13%
339 dunajska 1 0.13%
340 odkrivajte 1 0.13%
341 pred 1 0.13%
342 lep 1 0.13%
343 anatemizirala 1 0.13%
344 bogastvo 1 0.13%
345 1. 1 0.13%
346 spoštovani 1 0.13%
347 antikrista 1 0.13%
348 bolni 1 0.13%
349 6000 1 0.13%
350 ste 1 0.13%
351 goljufal 1 0.13%
352 zaprta 1 0.13%
353 čast 1 0.13%
354 mešalnikom 1 0.13%
355 pozdravi 1 0.13%
356 sinov 1 0.13%
357 križarjenja 1 0.13%
358 peške 1 0.13%
359 2. 1 0.13%
360 20 1 0.13%
361 minut 1 0.13%
362 22 1 0.13%
363 prtom 1 0.13%
364 danes 1 0.13%
365 ohranimo 1 0.13%
366 sprejemljivi 1 0.13%
367 članica 1 0.13%
368 paličnim 1 0.13%
369 ravno 1 0.13%
370 odpraviti 1 0.13%
371 Anandm 1 0.13%
372 umešamo 1 0.13%
373 ta 1 0.13%
374 pridružite 1 0.13%
375 prinašala 1 0.13%
376 zdi 1 0.13%
377 Tunisu 1 0.13%
378 jed 1 0.13%
379 splošne 1 0.13%
380 ogreto 1 0.13%
381 Ivek 1 0.13%
382 odgovorni 1 0.13%
383 želimo 1 0.13%
384 pecilni 1 0.13%
385 dala 1 0.13%
386 skrivali 1 0.13%
387 bolje 1 0.13%
388 moko 1 0.13%
389 solimo 1 0.13%
390 izgubil 1 0.13%
391 orehih 1 0.13%
392 zmešamo 1 0.13%
393 referendum 1 0.13%
394 66 1 0.13%
395 poglejte 1 0.13%
396 maso 1 0.13%
397 zelenjavna 1 0.13%
398 preprost 1 0.13%
399 komedija 1 0.13%
400 Eli's 1 0.13%
401 recept 1 0.13%
402 komisija 1 0.13%
403 Ankaran 1 0.13%
404 naroda 1 0.13%
405 Kofujem 1 0.13%
406 tudi 1 0.13%
407 posebej 1 0.13%
408 usmerjeni 1 0.13%
409 8. 1 0.13%
410 lupinico 1 0.13%
411 zadrugo 1 0.13%
412 narodom 1 0.13%
413 kocka 1 0.13%
414 katerega 1 0.13%
415 19. 1 0.13%
416 izstopu 1 0.13%
417 09:56 1 0.13%
418 vzamemo 1 0.13%
419 pozdrav 1 0.13%
420 škofije 1 0.13%
421 čemer 1 0.13%
422 97 1 0.13%
423 zaradi 1 0.13%
424 izdelana 1 0.13%
425 nekdo 1 0.13%
426 kloniranju 1 0.13%
427 vam 1 0.13%
428 okusna 1 0.13%
429 boste 1 0.13%
430 križarki 1 0.13%
431 prosimo 1 0.13%
432 unikatna 1 0.13%
433 ključno 1 0.13%
434 kože 1 0.13%
435 enem 1 0.13%
436 naredimo 1 0.13%
437 koncert 1 0.13%
438 Prison 1 0.13%
439 prošnja 1 0.13%
440 želodec 1 0.13%
441 Frenki 1 0.13%
442 older 1 0.13%
443 MSC 1 0.13%
444 prašek 1 0.13%
445 kompasom 1 0.13%
446 št. 1 0.13%
447 posesti 1 0.13%
448 Stepančič 1 0.13%
449 pomarančni 1 0.13%
450 lahek 1 0.13%
451 prispevek 1 0.13%
452 južek 1 0.13%
453 koncu 1 0.13%
454 Šmarje 1 0.13%
455 oziroma 1 0.13%
456 Beljan 1 0.13%
457 dermatologinja 1 0.13%
458 okrogel 1 0.13%
459 vlijemo 1 0.13%
460 Brecelj 1 0.13%
461 Podobnik 1 0.13%
462 13.9 1 0.13%
463 Alenke 1 0.13%
464 priča 1 0.13%
465 neškodljiv 1 0.13%
466 widow 1 0.13%
467 nismo 1 0.13%
468 Alenka 1 0.13%
469 08.11.2010 1 0.13%
470 strahu 1 0.13%
471 genove 1 0.13%
472 tednu 1 0.13%
473 vinu 1 0.13%
474 potekal 1 0.13%
475 24.10 1 0.13%
476 sanja 1 0.13%
477 dragoceno 1 0.13%
478 akreditirate 1 0.13%
479 liter 1 0.13%
480 mesta 1 0.13%
481 zakaj 1 0.13%
482 ustavili 1 0.13%
483 Roberto 1 0.13%
484 detective 1 0.13%
485 Danijela 1 0.13%
486 Ručigaj 1 0.13%
487 jajci 1 0.13%
488 mesti 1 0.13%
489 mika 1 0.13%
490 nared 1 0.13%
491 pravo 1 0.13%
492 križarili 1 0.13%
493 bila 1 0.13%
494 sodelovanju 1 0.13%
495 prava 1 0.13%
496 zato 1 0.13%
497 cesta 1 0.13%
498 saj 1 0.13%
499 srečo 1 0.13%
500 olje 1 0.13%
501 svojih 1 0.13%
502 Ljubljana 1 0.13%
503 pomaranče 1 0.13%
504 jesenskem 1 0.13%
505 pomarančo 1 0.13%
506 opreki 1 0.13%
507 najpogostejša 1 0.13%
508 pomaranči 1 0.13%
509 Zucco 1 0.13%
510 enakopravnem 1 0.13%

View File

@@ -0,0 +1,623 @@
word;frequency;percent
ajdovo kašo;4;0.586%
in ajdovo;3;0.439%
kovinska ogledala;3;0.439%
kako izdelujejo;3;0.439%
pol ure;3;0.439%
Agata Jurkovič;3;0.439%
težav s;2;0.293%
za pol;2;0.293%
62 vdova;2;0.293%
resnična zgodba;2;0.293%
v času;2;0.293%
srbečo kožo;2;0.293%
novih občin;2;0.293%
Roblek Martika;2;0.293%
znebiti težav;2;0.293%
korenje in;2;0.293%
in damo;2;0.293%
tiskarna gepard;2;0.293%
elina starejša;2;0.293%
vse v;2;0.293%
the late;2;0.293%
kako se;2;0.293%
korenčkovo juho;2;0.293%
totenbirt the;2;0.293%
Marta Fijavž;2;0.293%
za ustanovitev;2;0.293%
ne bi;2;0.293%
elina mlajša;2;0.293%
vdova sodnica;2;0.293%
46 učiteljica;2;0.293%
učiteljica tomijeva;2;0.293%
s srbečo;2;0.293%
da je;2;0.293%
dodamo še;2;0.293%
"žena elina";2;0.293%
zelenjavne kocke;2;0.293%
zahodnega Sredozemlja;2;0.293%
sodnica elina;2;0.293%
gepard 1;2;0.293%
Godec in;2;0.293%
grozdje in;2;0.293%
mlajša sestra;2;0.293%
Martika 46;2;0.293%
starejša sestra;2;0.293%
tomijeva žena;2;0.293%
se znebiti;2;0.293%
idejo o;2;0.293%
Fijavž Roblek;2;0.293%
Jurkovič Jurkovička;2;0.293%
da bi;2;0.293%
in orehe;2;0.293%
Jurkovička 62;2;0.293%
izdelujejo kovinska;2;0.293%
ustanovitev novih;2;0.293%
z idejo;1;0.146%
Jurkovič jurkovička;1;0.146%
da za;1;0.146%
gospodarske koristi;1;0.146%
na križarjenju;1;0.146%
in Marseille;1;0.146%
Neaplja se;1;0.146%
vode odvisno;1;0.146%
polovico zelenjavne;1;0.146%
"ženskami v";1;0.146%
Planinšek Ručigaj;1;0.146%
jajci in;1;0.146%
pol eko;1;0.146%
ravno prav;1;0.146%
pojasnjuje dermatologinja;1;0.146%
pot tista;1;0.146%
mogoče da;1;0.146%
približno pol;1;0.146%
ohranimo nekaj;1;0.146%
je zelo;1;0.146%
recept za;1;0.146%
temelji na;1;0.146%
okusu in;1;0.146%
v čast;1;0.146%
"špasnega križarjenja";1;0.146%
o zadrugah;1;0.146%
posujemo grozdje;1;0.146%
je nared;1;0.146%
več sinov;1;0.146%
officer detective;1;0.146%
junak v;1;0.146%
gosta in;1;0.146%
in dietna;1;0.146%
referendum za;1;0.146%
palmo de;1;0.146%
prošnja za;1;0.146%
dekani ki;1;0.146%
pri operaciji;1;0.146%
enakopravnem sodelovanju;1;0.146%
posesti nekaj;1;0.146%
si kako;1;0.146%
Brecelj Agata;1;0.146%
paličnim mešalnikom;1;0.146%
v sutano;1;0.146%
lahko jeste;1;0.146%
Koper 21.;1;0.146%
rezino pomaranče;1;0.146%
blagajna zaprta;1;0.146%
09.11.2010 ob;1;0.146%
cena ki;1;0.146%
tega mi;1;0.146%
oziroma postane;1;0.146%
ustanoivi društvo;1;0.146%
nedoločenega števila;1;0.146%
ki bo;1;0.146%
lahek in;1;0.146%
"članov ki";1;0.146%
ali drugo;1;0.146%
1 veliko;1;0.146%
namen pospeševati;1;0.146%
pekač in;1;0.146%
bi goljufal;1;0.146%
zgodba sami;1;0.146%
jeseni naredimo;1;0.146%
osebe če;1;0.146%
in požigom;1;0.146%
Mallorco Barcelono;1;0.146%
za korenčkovo;1;0.146%
jesenska torta;1;0.146%
tudi posebej;1;0.146%
testu posujemo;1;0.146%
poslednjih dni;1;0.146%
Beljan in;1;0.146%
prezgodaj je;1;0.146%
jih cerkev;1;0.146%
sodelovanju in;1;0.146%
junak ustanoivi;1;0.146%
tem uresničuje;1;0.146%
drugi posodi;1;0.146%
kocke in;1;0.146%
danes skorajda;1;0.146%
strli na;1;0.146%
srbečico in;1;0.146%
pospeševati gospodarske;1;0.146%
do Neaplja;1;0.146%
najpogostejša motnja;1;0.146%
Totenbirt Marta;1;0.146%
"če se";1;0.146%
prava pot;1;0.146%
s tem;1;0.146%
mu odstranimo;1;0.146%
narod pa;1;0.146%
prilogi vam;1;0.146%
se ustavili;1;0.146%
komisija za;1;0.146%
nekaj o;1;0.146%
svobodnem izstopu;1;0.146%
uresničuje namen;1;0.146%
cerkev in;1;0.146%
Prison officer;1;0.146%
viroz saj;1;0.146%
Totenbirt Agata;1;0.146%
juho in;1;0.146%
19. aprila;1;0.146%
zmešamo s;1;0.146%
nazaj peči;1;0.146%
Roberto Zucco;1;0.146%
foto arhivo;1;0.146%
zato da;1;0.146%
sanja a;1;0.146%
kože je;1;0.146%
"št. 2130";1;0.146%
Tunisu obiskali;1;0.146%
jed je;1;0.146%
ne postane;1;0.146%
jo odpraviti;1;0.146%
olje vino;1;0.146%
postane ravno;1;0.146%
tem da;1;0.146%
in neškodljiv;1;0.146%
aprila 1998;1;0.146%
se mu;1;0.146%
late Frenki;1;0.146%
Indiji naj;1;0.146%
potem gre;1;0.146%
ko dobro;1;0.146%
s priokusom;1;0.146%
vinu in;1;0.146%
na roko;1;0.146%
je cena;1;0.146%
ogreto pečico;1;0.146%
na veličastni;1;0.146%
"števila članov";1;0.146%
grozdje vino;1;0.146%
jogurt olje;1;0.146%
po grozdju;1;0.146%
pečico za;1;0.146%
do 13.9;1;0.146%
pravno osebo;1;0.146%
"še jogurt";1;0.146%
akreditirate naša;1;0.146%
bi prinašala;1;0.146%
na ladji;1;0.146%
je od;1;0.146%
preprost recept;1;0.146%
kocke ter;1;0.146%
prostovoljnem pristopu;1;0.146%
dobro premešamo;1;0.146%
Alenka Godec;1;0.146%
in dekani;1;0.146%
iz pečice;1;0.146%
Koper 8.;1;0.146%
nismo naročili;1;0.146%
odgovorni urednik;1;0.146%
bo potekal;1;0.146%
vlijemo v;1;0.146%
roko zato;1;0.146%
proklamirali in;1;0.146%
velike vsote;1;0.146%
približno liter;1;0.146%
62 widow;1;0.146%
in Danijela;1;0.146%
prav gosta;1;0.146%
je tiskarna;1;0.146%
boste od;1;0.146%
late južek;1;0.146%
bi skrivali;1;0.146%
račun št.;1;0.146%
mladi istospolno;1;0.146%
Danijela ceka;1;0.146%
mu ne;1;0.146%
Zucco 1st;1;0.146%
povzroča srbečico;1;0.146%
8. aprila;1;0.146%
kar v;1;0.146%
okrogel pekač;1;0.146%
damo nazaj;1;0.146%
ter kuhamo;1;0.146%
dodamo drugo;1;0.146%
5 popeljala;1;0.146%
in kar;1;0.146%
bogastvo in;1;0.146%
o križarjenju;1;0.146%
torinskim prtom;1;0.146%
za referendum;1;0.146%
zadruga lahko;1;0.146%
križati idejo;1;0.146%
pravo pot;1;0.146%
Tanja Planinšek;1;0.146%
ob 09:56;1;0.146%
ustanovi podjetje;1;0.146%
vino pomarančo;1;0.146%
katerega je;1;0.146%
skorajda najpogostejša;1;0.146%
in sladkor;1;0.146%
od 24.10;1;0.146%
v opreki;1;0.146%
torta vsebuje;1;0.146%
v posodi;1;0.146%
zavračamo račun;1;0.146%
zelo lahek;1;0.146%
izstopu enakopravnem;1;0.146%
Barcelono in;1;0.146%
tista ki;1;0.146%
bosta zvezdniška;1;0.146%
ki ji;1;0.146%
od genove;1;0.146%
Ankaran škofije;1;0.146%
ki ju;1;0.146%
mesta zahodnega;1;0.146%
križarjenja je;1;0.146%
bi se;1;0.146%
na kolobarje;1;0.146%
ponudimo z;1;0.146%
je prava;1;0.146%
pecilni prašek;1;0.146%
splošne določbe;1;0.146%
v ogreto;1;0.146%
operaciji 666;1;0.146%
ste bolni;1;0.146%
v življenju;1;0.146%
zdi ključno;1;0.146%
"želimo na";1;0.146%
mika 08.11.2010;1;0.146%
za duhovnike;1;0.146%
v okrogel;1;0.146%
dala tiskarna;1;0.146%
Performs in;1;0.146%
lep pozdrav;1;0.146%
Matjaž Javšnik;1;0.146%
poleg tega;1;0.146%
tako da;1;0.146%
ob 12:25;1;0.146%
sinov naroda;1;0.146%
24.10 do;1;0.146%
skrivali svojega;1;0.146%
poglejte si;1;0.146%
in komedija;1;0.146%
sprejemljivi potem;1;0.146%
odstranimo peške;1;0.146%
potekal 19.;1;0.146%
med mesti;1;0.146%
podjetje drugo;1;0.146%
"škofije Šmarje";1;0.146%
ki ima;1;0.146%
in pol;1;0.146%
moko pecilni;1;0.146%
maso vlijemo;1;0.146%
prispevek ki;1;0.146%
tega pa;1;0.146%
srbeče kože;1;0.146%
pedri če;1;0.146%
dl vode;1;0.146%
naročnik je;1;0.146%
začne nakazovati;1;0.146%
večjih koščkov;1;0.146%
ker tega;1;0.146%
nekdo mu;1;0.146%
operemo grozdje;1;0.146%
veličastna mesta;1;0.146%
1st Prison;1;0.146%
bo ladja;1;0.146%
vsebuje grozdje;1;0.146%
genove do;1;0.146%
priokusom po;1;0.146%
in srečo;1;0.146%
detective 1st;1;0.146%
dunajska cesta;1;0.146%
pred ženskami;1;0.146%
totenbirt Agata;1;0.146%
pojav srbeče;1;0.146%
de Mallorco;1;0.146%
jih strli;1;0.146%
widow judge;1;0.146%
zadruga je;1;0.146%
medtem v;1;0.146%
older sister;1;0.146%
izgubil dragoceno;1;0.146%
in Matjaž;1;0.146%
naredimo torto;1;0.146%
29.03.2010 ob;1;0.146%
volilna komisija;1;0.146%
prosimo da;1;0.146%
a okusna;1;0.146%
zavremo približno;1;0.146%
torto s;1;0.146%
z narodom;1;0.146%
antikrista vključno;1;0.146%
požigom z;1;0.146%
postane mehko;1;0.146%
počitnic od;1;0.146%
ob 12:35;1;0.146%
in tekoča;1;0.146%
Darko Stepančič;1;0.146%
kako gosto;1;0.146%
penasto umešamo;1;0.146%
veličastni križarki;1;0.146%
s kompasom;1;0.146%
bert 09.11.2010;1;0.146%
o društvu;1;0.146%
ustavili v;1;0.146%
zgodba prezgodaj;1;0.146%
mi pri;1;0.146%
20 minut;1;0.146%
08.11.2010 ob;1;0.146%
narežemo korenje;1;0.146%
križarili boste;1;0.146%
pomarančni sok;1;0.146%
97 ker;1;0.146%
se s;1;0.146%
društvu poslednjih;1;0.146%
ju lahko;1;0.146%
zvezdniška gosta;1;0.146%
odvisno kako;1;0.146%
v posesti;1;0.146%
gre za;1;0.146%
od 1.7;1;0.146%
organizacija vnaprej;1;0.146%
bolje potem;1;0.146%
anatemizirala tako;1;0.146%
ki priča;1;0.146%
aprila akreditirate;1;0.146%
Matjaž Koper;1;0.146%
ima namen;1;0.146%
66 drunk;1;0.146%
smo jih;1;0.146%
enem tednu;1;0.146%
onemu zdi;1;0.146%
ladja MSC;1;0.146%
eko zelenjavne;1;0.146%
z rezino;1;0.146%
dober tek;1;0.146%
"še ajdovo";1;0.146%
mesti zahodnega;1;0.146%
"življenju mogoče";1;0.146%
21. 2.;1;0.146%
lahko ustanovi;1;0.146%
saj je;1;0.146%
popeljala med;1;0.146%
kako jo;1;0.146%
gosto juho;1;0.146%
je blagajna;1;0.146%
"še za";1;0.146%
za želodec;1;0.146%
juho želimo;1;0.146%
vzamemo iz;1;0.146%
Palermu in;1;0.146%
dermatologinja Tanja;1;0.146%
Alenke Godec;1;0.146%
eko zelenjavna;1;0.146%
nekaj večjih;1;0.146%
Anandm Kofujem;1;0.146%
bosta z;1;0.146%
postali sprejemljivi;1;0.146%
a se;1;0.146%
je bila;1;0.146%
križarjenju tukaj;1;0.146%
križarjenju bosta;1;0.146%
je vse;1;0.146%
naroda narod;1;0.146%
ogledala izdelana;1;0.146%
pridružite se;1;0.146%
nakazovati velike;1;0.146%
okusna jed;1;0.146%
Maja Končar;1;0.146%
pa izumira;1;0.146%
liter vode;1;0.146%
dietna a;1;0.146%
uporabite v;1;0.146%
prtom in;1;0.146%
na koncu;1;0.146%
ValentinRozman 29.03.2010;1;0.146%
je danes;1;0.146%
obiskali palmo;1;0.146%
ji dodamo;1;0.146%
"članica druge";1;0.146%
juho solimo;1;0.146%
vino pomarančni;1;0.146%
koristi svojih;1;0.146%
se mladi;1;0.146%
do 31.10;1;0.146%
"Šmarje marezige";1;0.146%
z Anandm;1;0.146%
drugo zadrugo;1;0.146%
cesta 22;1;0.146%
kašo ki;1;0.146%
namen zaradi;1;0.146%
v približno;1;0.146%
vas bo;1;0.146%
kašo in;1;0.146%
pekač vzamemo;1;0.146%
istospolno usmerjeni;1;0.146%
se onemu;1;0.146%
bi anatemizirala;1;0.146%
posodi zavremo;1;0.146%
svojega strahu;1;0.146%
zaradi katerega;1;0.146%
veliko nižja;1;0.146%
Marijana Brecelj;1;0.146%
vključno z;1;0.146%
i. splošne;1;0.146%
unikatna ogledala;1;0.146%
Branko Podobnik;1;0.146%
gosta Alenka;1;0.146%
mehko približno;1;0.146%
odličnimi pozdravi;1;0.146%
in upravljanju;1;0.146%
1. člen;1;0.146%
izdelujejo ta;1;0.146%
grozdju pomaranči;1;0.146%
o tem;1;0.146%
naj bi;1;0.146%
orehe ki;1;0.146%
Končar Marta;1;0.146%
trg Brolo;1;0.146%
koncert Alenke;1;0.146%
prinašala bogastvo;1;0.146%
Marseille vse;1;0.146%
judge Eli's;1;0.146%
vas nismo;1;0.146%
cerkev v;1;0.146%
pomaranči vinu;1;0.146%
je izgubil;1;0.146%
sami si;1;0.146%
v drugi;1;0.146%
priča o;1;0.146%
prašek in;1;0.146%
strahu pred;1;0.146%
vami koncert;1;0.146%
MSC fantastica;1;0.146%
občin Ankaran;1;0.146%
je organizacija;1;0.146%
s paličnim;1;0.146%
Kofujem torinskim;1;0.146%
približno 20;1;0.146%
jesenskem špasnem;1;0.146%
"špasnem križarjanju";1;0.146%
cerkvi bolje;1;0.146%
duhovnike več;1;0.146%
izberemo pravo;1;0.146%
1113 Ljubljana;1;0.146%
Detela Matjaž;1;0.146%
dokler korenje;1;0.146%
posodi zmešamo;1;0.146%
postane članica;1;0.146%
6000 Koper;1;0.146%
Mojco Beljan;1;0.146%
vam zavračamo;1;0.146%
mu začne;1;0.146%
ima junak;1;0.146%
ker ima;1;0.146%
bila ust;1;0.146%
bi postali;1;0.146%
"čast jeseni";1;0.146%
v Palermu;1;0.146%
z odličnimi;1;0.146%
pot prispevek;1;0.146%
približno 3;1;0.146%
v enem;1;0.146%
pri vas;1;0.146%
nam jo;1;0.146%
ki nam;1;0.146%
je cerkvi;1;0.146%
kaj povzroča;1;0.146%
premešamo dodamo;1;0.146%
odpraviti pojasnjuje;1;0.146%
društvo da;1;0.146%
z vami;1;0.146%
"če jih";1;0.146%
po testu;1;0.146%
nam na;1;0.146%
jeste tudi;1;0.146%
in Tunisu;1;0.146%
pristopu svobodnem;1;0.146%
in kako;1;0.146%
narodom če;1;0.146%
zelenjavna kocka;1;0.146%
v Indiji;1;0.146%
ter temelji;1;0.146%
korenje ne;1;0.146%
pomarančo in;1;0.146%
naša novinarja;1;0.146%
upravljanju članov;1;0.146%
kolobarje narežemo;1;0.146%
damo v;1;0.146%
da ohranimo;1;0.146%
novinarja Mojco;1;0.146%
dodamo moko;1;0.146%
pravne osebe;1;0.146%
na prostovoljnem;1;0.146%
za akreditacijo;1;0.146%
cerkev ne;1;0.146%
"čemer se";1;0.146%
Brolo 3;1;0.146%
in ne;1;0.146%
izdelana v;1;0.146%
ta unikatna;1;0.146%
"času počitnic";1;0.146%
o kloniranju;1;0.146%
koncu vanjo;1;0.146%
marezige in;1;0.146%
medtem operemo;1;0.146%
in mu;1;0.146%
drugo pravno;1;0.146%
zadrugo ali;1;0.146%
svojih članov;1;0.146%
vode ki;1;0.146%
in lupinico;1;0.146%
potem bi;1;0.146%
3 dl;1;0.146%
dragoceno življenje;1;0.146%
Ivek 66;1;0.146%
jurkovička 62;1;0.146%
se nam;1;0.146%
Matjaža Javšnika;1;0.146%
"članov ter";1;0.146%
2130 97;1;0.146%
osebo oziroma;1;0.146%
po okusu;1;0.146%
o čemer;1;0.146%
pa je;1;0.146%
"če je";1;0.146%
mešalnikom da;1;0.146%
križarjanju vas;1;0.146%
kuhamo dokler;1;0.146%
1st police;1;0.146%
opreki z;1;0.146%
vanjo dodamo;1;0.146%
totenbirt Ivek;1;0.146%
si izberemo;1;0.146%
operemo korenje;1;0.146%
aranžma špasnega;1;0.146%
ključno pri;1;0.146%
in orehih;1;0.146%
več o;1;0.146%
Javšnika optimist;1;0.146%
zakon o;1;0.146%
"času viroz";1;0.146%
ladji bosta;1;0.146%
kuhamo približno;1;0.146%
v prilogi;1;0.146%
neškodljiv za;1;0.146%
kompasom odkrivajte;1;0.146%
ne sanja;1;0.146%
sok in;1;0.146%
drugo polovico;1;0.146%
usmerjeni proklamirali;1;0.146%
peči še;1;0.146%
kloniranju antikrista;1;0.146%
edinstvena kovinska;1;0.146%
solimo po;1;0.146%
da postane;1;0.146%
police officer;1;0.146%
je dala;1;0.146%
pečice po;1;0.146%
druge pravne;1;0.146%
sladkor penasto;1;0.146%
odkrivajte veličastna;1;0.146%
komedija Matjaža;1;0.146%
vnaprej nedoločenega;1;0.146%
na jesenskem;1;0.146%
in pedri;1;0.146%
Eli's older;1;0.146%
2. 1998;1;0.146%
fantastica 5;1;0.146%
posebej uporabite;1;0.146%
ki smo;1;0.146%
jo je;1;0.146%
1 word frequency percent
2 ajdovo kašo 4 0.586%
3 in ajdovo 3 0.439%
4 kovinska ogledala 3 0.439%
5 kako izdelujejo 3 0.439%
6 pol ure 3 0.439%
7 Agata Jurkovič 3 0.439%
8 težav s 2 0.293%
9 za pol 2 0.293%
10 62 vdova 2 0.293%
11 resnična zgodba 2 0.293%
12 v času 2 0.293%
13 srbečo kožo 2 0.293%
14 novih občin 2 0.293%
15 Roblek Martika 2 0.293%
16 znebiti težav 2 0.293%
17 korenje in 2 0.293%
18 in damo 2 0.293%
19 tiskarna gepard 2 0.293%
20 elina starejša 2 0.293%
21 vse v 2 0.293%
22 the late 2 0.293%
23 kako se 2 0.293%
24 korenčkovo juho 2 0.293%
25 totenbirt the 2 0.293%
26 Marta Fijavž 2 0.293%
27 za ustanovitev 2 0.293%
28 ne bi 2 0.293%
29 elina mlajša 2 0.293%
30 vdova sodnica 2 0.293%
31 46 učiteljica 2 0.293%
32 učiteljica tomijeva 2 0.293%
33 s srbečo 2 0.293%
34 da je 2 0.293%
35 dodamo še 2 0.293%
36 žena elina 2 0.293%
37 zelenjavne kocke 2 0.293%
38 zahodnega Sredozemlja 2 0.293%
39 sodnica elina 2 0.293%
40 gepard 1 2 0.293%
41 Godec in 2 0.293%
42 grozdje in 2 0.293%
43 mlajša sestra 2 0.293%
44 Martika 46 2 0.293%
45 starejša sestra 2 0.293%
46 tomijeva žena 2 0.293%
47 se znebiti 2 0.293%
48 idejo o 2 0.293%
49 Fijavž Roblek 2 0.293%
50 Jurkovič Jurkovička 2 0.293%
51 da bi 2 0.293%
52 in orehe 2 0.293%
53 Jurkovička 62 2 0.293%
54 izdelujejo kovinska 2 0.293%
55 ustanovitev novih 2 0.293%
56 z idejo 1 0.146%
57 Jurkovič jurkovička 1 0.146%
58 da za 1 0.146%
59 gospodarske koristi 1 0.146%
60 na križarjenju 1 0.146%
61 in Marseille 1 0.146%
62 Neaplja se 1 0.146%
63 vode odvisno 1 0.146%
64 polovico zelenjavne 1 0.146%
65 ženskami v 1 0.146%
66 Planinšek Ručigaj 1 0.146%
67 jajci in 1 0.146%
68 pol eko 1 0.146%
69 ravno prav 1 0.146%
70 pojasnjuje dermatologinja 1 0.146%
71 pot tista 1 0.146%
72 mogoče da 1 0.146%
73 približno pol 1 0.146%
74 ohranimo nekaj 1 0.146%
75 je zelo 1 0.146%
76 recept za 1 0.146%
77 temelji na 1 0.146%
78 okusu in 1 0.146%
79 v čast 1 0.146%
80 špasnega križarjenja 1 0.146%
81 o zadrugah 1 0.146%
82 posujemo grozdje 1 0.146%
83 je nared 1 0.146%
84 več sinov 1 0.146%
85 officer detective 1 0.146%
86 junak v 1 0.146%
87 gosta in 1 0.146%
88 in dietna 1 0.146%
89 referendum za 1 0.146%
90 palmo de 1 0.146%
91 prošnja za 1 0.146%
92 dekani ki 1 0.146%
93 pri operaciji 1 0.146%
94 enakopravnem sodelovanju 1 0.146%
95 posesti nekaj 1 0.146%
96 si kako 1 0.146%
97 Brecelj Agata 1 0.146%
98 paličnim mešalnikom 1 0.146%
99 v sutano 1 0.146%
100 lahko jeste 1 0.146%
101 Koper 21. 1 0.146%
102 rezino pomaranče 1 0.146%
103 blagajna zaprta 1 0.146%
104 09.11.2010 ob 1 0.146%
105 cena ki 1 0.146%
106 tega mi 1 0.146%
107 oziroma postane 1 0.146%
108 ustanoivi društvo 1 0.146%
109 nedoločenega števila 1 0.146%
110 ki bo 1 0.146%
111 lahek in 1 0.146%
112 članov ki 1 0.146%
113 ali drugo 1 0.146%
114 1 veliko 1 0.146%
115 namen pospeševati 1 0.146%
116 pekač in 1 0.146%
117 bi goljufal 1 0.146%
118 zgodba sami 1 0.146%
119 jeseni naredimo 1 0.146%
120 osebe če 1 0.146%
121 in požigom 1 0.146%
122 Mallorco Barcelono 1 0.146%
123 za korenčkovo 1 0.146%
124 jesenska torta 1 0.146%
125 tudi posebej 1 0.146%
126 testu posujemo 1 0.146%
127 poslednjih dni 1 0.146%
128 Beljan in 1 0.146%
129 prezgodaj je 1 0.146%
130 jih cerkev 1 0.146%
131 sodelovanju in 1 0.146%
132 junak ustanoivi 1 0.146%
133 tem uresničuje 1 0.146%
134 drugi posodi 1 0.146%
135 kocke in 1 0.146%
136 danes skorajda 1 0.146%
137 strli na 1 0.146%
138 srbečico in 1 0.146%
139 pospeševati gospodarske 1 0.146%
140 do Neaplja 1 0.146%
141 najpogostejša motnja 1 0.146%
142 Totenbirt Marta 1 0.146%
143 če se 1 0.146%
144 prava pot 1 0.146%
145 s tem 1 0.146%
146 mu odstranimo 1 0.146%
147 narod pa 1 0.146%
148 prilogi vam 1 0.146%
149 se ustavili 1 0.146%
150 komisija za 1 0.146%
151 nekaj o 1 0.146%
152 svobodnem izstopu 1 0.146%
153 uresničuje namen 1 0.146%
154 cerkev in 1 0.146%
155 Prison officer 1 0.146%
156 viroz saj 1 0.146%
157 Totenbirt Agata 1 0.146%
158 juho in 1 0.146%
159 19. aprila 1 0.146%
160 zmešamo s 1 0.146%
161 nazaj peči 1 0.146%
162 Roberto Zucco 1 0.146%
163 foto arhivo 1 0.146%
164 zato da 1 0.146%
165 sanja a 1 0.146%
166 kože je 1 0.146%
167 št. 2130 1 0.146%
168 Tunisu obiskali 1 0.146%
169 jed je 1 0.146%
170 ne postane 1 0.146%
171 jo odpraviti 1 0.146%
172 olje vino 1 0.146%
173 postane ravno 1 0.146%
174 tem da 1 0.146%
175 in neškodljiv 1 0.146%
176 aprila 1998 1 0.146%
177 se mu 1 0.146%
178 late Frenki 1 0.146%
179 Indiji naj 1 0.146%
180 potem gre 1 0.146%
181 ko dobro 1 0.146%
182 s priokusom 1 0.146%
183 vinu in 1 0.146%
184 na roko 1 0.146%
185 je cena 1 0.146%
186 ogreto pečico 1 0.146%
187 na veličastni 1 0.146%
188 števila članov 1 0.146%
189 grozdje vino 1 0.146%
190 jogurt olje 1 0.146%
191 po grozdju 1 0.146%
192 pečico za 1 0.146%
193 do 13.9 1 0.146%
194 pravno osebo 1 0.146%
195 še jogurt 1 0.146%
196 akreditirate naša 1 0.146%
197 bi prinašala 1 0.146%
198 na ladji 1 0.146%
199 je od 1 0.146%
200 preprost recept 1 0.146%
201 kocke ter 1 0.146%
202 prostovoljnem pristopu 1 0.146%
203 dobro premešamo 1 0.146%
204 Alenka Godec 1 0.146%
205 in dekani 1 0.146%
206 iz pečice 1 0.146%
207 Koper 8. 1 0.146%
208 nismo naročili 1 0.146%
209 odgovorni urednik 1 0.146%
210 bo potekal 1 0.146%
211 vlijemo v 1 0.146%
212 roko zato 1 0.146%
213 proklamirali in 1 0.146%
214 velike vsote 1 0.146%
215 približno liter 1 0.146%
216 62 widow 1 0.146%
217 in Danijela 1 0.146%
218 prav gosta 1 0.146%
219 je tiskarna 1 0.146%
220 boste od 1 0.146%
221 late južek 1 0.146%
222 bi skrivali 1 0.146%
223 račun št. 1 0.146%
224 mladi istospolno 1 0.146%
225 Danijela ceka 1 0.146%
226 mu ne 1 0.146%
227 Zucco 1st 1 0.146%
228 povzroča srbečico 1 0.146%
229 8. aprila 1 0.146%
230 kar v 1 0.146%
231 okrogel pekač 1 0.146%
232 damo nazaj 1 0.146%
233 ter kuhamo 1 0.146%
234 dodamo drugo 1 0.146%
235 5 popeljala 1 0.146%
236 in kar 1 0.146%
237 bogastvo in 1 0.146%
238 o križarjenju 1 0.146%
239 torinskim prtom 1 0.146%
240 za referendum 1 0.146%
241 zadruga lahko 1 0.146%
242 križati idejo 1 0.146%
243 pravo pot 1 0.146%
244 Tanja Planinšek 1 0.146%
245 ob 09:56 1 0.146%
246 ustanovi podjetje 1 0.146%
247 vino pomarančo 1 0.146%
248 katerega je 1 0.146%
249 skorajda najpogostejša 1 0.146%
250 in sladkor 1 0.146%
251 od 24.10 1 0.146%
252 v opreki 1 0.146%
253 torta vsebuje 1 0.146%
254 v posodi 1 0.146%
255 zavračamo račun 1 0.146%
256 zelo lahek 1 0.146%
257 izstopu enakopravnem 1 0.146%
258 Barcelono in 1 0.146%
259 tista ki 1 0.146%
260 bosta zvezdniška 1 0.146%
261 ki ji 1 0.146%
262 od genove 1 0.146%
263 Ankaran škofije 1 0.146%
264 ki ju 1 0.146%
265 mesta zahodnega 1 0.146%
266 križarjenja je 1 0.146%
267 bi se 1 0.146%
268 na kolobarje 1 0.146%
269 ponudimo z 1 0.146%
270 je prava 1 0.146%
271 pecilni prašek 1 0.146%
272 splošne določbe 1 0.146%
273 v ogreto 1 0.146%
274 operaciji 666 1 0.146%
275 ste bolni 1 0.146%
276 v življenju 1 0.146%
277 zdi ključno 1 0.146%
278 želimo na 1 0.146%
279 mika 08.11.2010 1 0.146%
280 za duhovnike 1 0.146%
281 v okrogel 1 0.146%
282 dala tiskarna 1 0.146%
283 Performs in 1 0.146%
284 lep pozdrav 1 0.146%
285 Matjaž Javšnik 1 0.146%
286 poleg tega 1 0.146%
287 tako da 1 0.146%
288 ob 12:25 1 0.146%
289 sinov naroda 1 0.146%
290 24.10 do 1 0.146%
291 skrivali svojega 1 0.146%
292 poglejte si 1 0.146%
293 in komedija 1 0.146%
294 sprejemljivi potem 1 0.146%
295 odstranimo peške 1 0.146%
296 potekal 19. 1 0.146%
297 med mesti 1 0.146%
298 podjetje drugo 1 0.146%
299 škofije Šmarje 1 0.146%
300 ki ima 1 0.146%
301 in pol 1 0.146%
302 moko pecilni 1 0.146%
303 maso vlijemo 1 0.146%
304 prispevek ki 1 0.146%
305 tega pa 1 0.146%
306 srbeče kože 1 0.146%
307 pedri če 1 0.146%
308 dl vode 1 0.146%
309 naročnik je 1 0.146%
310 začne nakazovati 1 0.146%
311 večjih koščkov 1 0.146%
312 ker tega 1 0.146%
313 nekdo mu 1 0.146%
314 operemo grozdje 1 0.146%
315 veličastna mesta 1 0.146%
316 1st Prison 1 0.146%
317 bo ladja 1 0.146%
318 vsebuje grozdje 1 0.146%
319 genove do 1 0.146%
320 priokusom po 1 0.146%
321 in srečo 1 0.146%
322 detective 1st 1 0.146%
323 dunajska cesta 1 0.146%
324 pred ženskami 1 0.146%
325 totenbirt Agata 1 0.146%
326 pojav srbeče 1 0.146%
327 de Mallorco 1 0.146%
328 jih strli 1 0.146%
329 widow judge 1 0.146%
330 zadruga je 1 0.146%
331 medtem v 1 0.146%
332 older sister 1 0.146%
333 izgubil dragoceno 1 0.146%
334 in Matjaž 1 0.146%
335 naredimo torto 1 0.146%
336 29.03.2010 ob 1 0.146%
337 volilna komisija 1 0.146%
338 prosimo da 1 0.146%
339 a okusna 1 0.146%
340 zavremo približno 1 0.146%
341 torto s 1 0.146%
342 z narodom 1 0.146%
343 antikrista vključno 1 0.146%
344 požigom z 1 0.146%
345 postane mehko 1 0.146%
346 počitnic od 1 0.146%
347 ob 12:35 1 0.146%
348 in tekoča 1 0.146%
349 Darko Stepančič 1 0.146%
350 kako gosto 1 0.146%
351 penasto umešamo 1 0.146%
352 veličastni križarki 1 0.146%
353 s kompasom 1 0.146%
354 bert 09.11.2010 1 0.146%
355 o društvu 1 0.146%
356 ustavili v 1 0.146%
357 zgodba prezgodaj 1 0.146%
358 mi pri 1 0.146%
359 20 minut 1 0.146%
360 08.11.2010 ob 1 0.146%
361 narežemo korenje 1 0.146%
362 križarili boste 1 0.146%
363 pomarančni sok 1 0.146%
364 97 ker 1 0.146%
365 se s 1 0.146%
366 društvu poslednjih 1 0.146%
367 ju lahko 1 0.146%
368 zvezdniška gosta 1 0.146%
369 odvisno kako 1 0.146%
370 v posesti 1 0.146%
371 gre za 1 0.146%
372 od 1.7 1 0.146%
373 organizacija vnaprej 1 0.146%
374 bolje potem 1 0.146%
375 anatemizirala tako 1 0.146%
376 ki priča 1 0.146%
377 aprila akreditirate 1 0.146%
378 Matjaž Koper 1 0.146%
379 ima namen 1 0.146%
380 66 drunk 1 0.146%
381 smo jih 1 0.146%
382 enem tednu 1 0.146%
383 onemu zdi 1 0.146%
384 ladja MSC 1 0.146%
385 eko zelenjavne 1 0.146%
386 z rezino 1 0.146%
387 dober tek 1 0.146%
388 še ajdovo 1 0.146%
389 mesti zahodnega 1 0.146%
390 življenju mogoče 1 0.146%
391 21. 2. 1 0.146%
392 lahko ustanovi 1 0.146%
393 saj je 1 0.146%
394 popeljala med 1 0.146%
395 kako jo 1 0.146%
396 gosto juho 1 0.146%
397 je blagajna 1 0.146%
398 še za 1 0.146%
399 za želodec 1 0.146%
400 juho želimo 1 0.146%
401 vzamemo iz 1 0.146%
402 Palermu in 1 0.146%
403 dermatologinja Tanja 1 0.146%
404 Alenke Godec 1 0.146%
405 eko zelenjavna 1 0.146%
406 nekaj večjih 1 0.146%
407 Anandm Kofujem 1 0.146%
408 bosta z 1 0.146%
409 postali sprejemljivi 1 0.146%
410 a se 1 0.146%
411 je bila 1 0.146%
412 križarjenju tukaj 1 0.146%
413 križarjenju bosta 1 0.146%
414 je vse 1 0.146%
415 naroda narod 1 0.146%
416 ogledala izdelana 1 0.146%
417 pridružite se 1 0.146%
418 nakazovati velike 1 0.146%
419 okusna jed 1 0.146%
420 Maja Končar 1 0.146%
421 pa izumira 1 0.146%
422 liter vode 1 0.146%
423 dietna a 1 0.146%
424 uporabite v 1 0.146%
425 prtom in 1 0.146%
426 na koncu 1 0.146%
427 ValentinRozman 29.03.2010 1 0.146%
428 je danes 1 0.146%
429 obiskali palmo 1 0.146%
430 ji dodamo 1 0.146%
431 članica druge 1 0.146%
432 juho solimo 1 0.146%
433 vino pomarančni 1 0.146%
434 koristi svojih 1 0.146%
435 se mladi 1 0.146%
436 do 31.10 1 0.146%
437 Šmarje marezige 1 0.146%
438 z Anandm 1 0.146%
439 drugo zadrugo 1 0.146%
440 cesta 22 1 0.146%
441 kašo ki 1 0.146%
442 namen zaradi 1 0.146%
443 v približno 1 0.146%
444 vas bo 1 0.146%
445 kašo in 1 0.146%
446 pekač vzamemo 1 0.146%
447 istospolno usmerjeni 1 0.146%
448 se onemu 1 0.146%
449 bi anatemizirala 1 0.146%
450 posodi zavremo 1 0.146%
451 svojega strahu 1 0.146%
452 zaradi katerega 1 0.146%
453 veliko nižja 1 0.146%
454 Marijana Brecelj 1 0.146%
455 vključno z 1 0.146%
456 i. splošne 1 0.146%
457 unikatna ogledala 1 0.146%
458 Branko Podobnik 1 0.146%
459 gosta Alenka 1 0.146%
460 mehko približno 1 0.146%
461 odličnimi pozdravi 1 0.146%
462 in upravljanju 1 0.146%
463 1. člen 1 0.146%
464 izdelujejo ta 1 0.146%
465 grozdju pomaranči 1 0.146%
466 o tem 1 0.146%
467 naj bi 1 0.146%
468 orehe ki 1 0.146%
469 Končar Marta 1 0.146%
470 trg Brolo 1 0.146%
471 koncert Alenke 1 0.146%
472 prinašala bogastvo 1 0.146%
473 Marseille vse 1 0.146%
474 judge Eli's 1 0.146%
475 vas nismo 1 0.146%
476 cerkev v 1 0.146%
477 pomaranči vinu 1 0.146%
478 je izgubil 1 0.146%
479 sami si 1 0.146%
480 v drugi 1 0.146%
481 priča o 1 0.146%
482 prašek in 1 0.146%
483 strahu pred 1 0.146%
484 vami koncert 1 0.146%
485 MSC fantastica 1 0.146%
486 občin Ankaran 1 0.146%
487 je organizacija 1 0.146%
488 s paličnim 1 0.146%
489 Kofujem torinskim 1 0.146%
490 približno 20 1 0.146%
491 jesenskem špasnem 1 0.146%
492 špasnem križarjanju 1 0.146%
493 cerkvi bolje 1 0.146%
494 duhovnike več 1 0.146%
495 izberemo pravo 1 0.146%
496 1113 Ljubljana 1 0.146%
497 Detela Matjaž 1 0.146%
498 dokler korenje 1 0.146%
499 posodi zmešamo 1 0.146%
500 postane članica 1 0.146%
501 6000 Koper 1 0.146%
502 Mojco Beljan 1 0.146%
503 vam zavračamo 1 0.146%
504 mu začne 1 0.146%
505 ima junak 1 0.146%
506 ker ima 1 0.146%
507 bila ust 1 0.146%
508 bi postali 1 0.146%
509 čast jeseni 1 0.146%
510 v Palermu 1 0.146%
511 z odličnimi 1 0.146%
512 pot prispevek 1 0.146%
513 približno 3 1 0.146%
514 v enem 1 0.146%
515 pri vas 1 0.146%
516 nam jo 1 0.146%
517 ki nam 1 0.146%
518 je cerkvi 1 0.146%
519 kaj povzroča 1 0.146%
520 premešamo dodamo 1 0.146%
521 odpraviti pojasnjuje 1 0.146%
522 društvo da 1 0.146%
523 z vami 1 0.146%
524 če jih 1 0.146%
525 po testu 1 0.146%
526 nam na 1 0.146%
527 jeste tudi 1 0.146%
528 in Tunisu 1 0.146%
529 pristopu svobodnem 1 0.146%
530 in kako 1 0.146%
531 narodom če 1 0.146%
532 zelenjavna kocka 1 0.146%
533 v Indiji 1 0.146%
534 ter temelji 1 0.146%
535 korenje ne 1 0.146%
536 pomarančo in 1 0.146%
537 naša novinarja 1 0.146%
538 upravljanju članov 1 0.146%
539 kolobarje narežemo 1 0.146%
540 damo v 1 0.146%
541 da ohranimo 1 0.146%
542 novinarja Mojco 1 0.146%
543 dodamo moko 1 0.146%
544 pravne osebe 1 0.146%
545 na prostovoljnem 1 0.146%
546 za akreditacijo 1 0.146%
547 cerkev ne 1 0.146%
548 čemer se 1 0.146%
549 Brolo 3 1 0.146%
550 in ne 1 0.146%
551 izdelana v 1 0.146%
552 ta unikatna 1 0.146%
553 času počitnic 1 0.146%
554 o kloniranju 1 0.146%
555 koncu vanjo 1 0.146%
556 marezige in 1 0.146%
557 medtem operemo 1 0.146%
558 in mu 1 0.146%
559 drugo pravno 1 0.146%
560 zadrugo ali 1 0.146%
561 svojih članov 1 0.146%
562 vode ki 1 0.146%
563 in lupinico 1 0.146%
564 potem bi 1 0.146%
565 3 dl 1 0.146%
566 dragoceno življenje 1 0.146%
567 Ivek 66 1 0.146%
568 jurkovička 62 1 0.146%
569 se nam 1 0.146%
570 Matjaža Javšnika 1 0.146%
571 članov ter 1 0.146%
572 2130 97 1 0.146%
573 osebo oziroma 1 0.146%
574 po okusu 1 0.146%
575 o čemer 1 0.146%
576 pa je 1 0.146%
577 če je 1 0.146%
578 mešalnikom da 1 0.146%
579 križarjanju vas 1 0.146%
580 kuhamo dokler 1 0.146%
581 1st police 1 0.146%
582 opreki z 1 0.146%
583 vanjo dodamo 1 0.146%
584 totenbirt Ivek 1 0.146%
585 si izberemo 1 0.146%
586 operemo korenje 1 0.146%
587 aranžma špasnega 1 0.146%
588 ključno pri 1 0.146%
589 in orehih 1 0.146%
590 več o 1 0.146%
591 Javšnika optimist 1 0.146%
592 zakon o 1 0.146%
593 času viroz 1 0.146%
594 ladji bosta 1 0.146%
595 kuhamo približno 1 0.146%
596 v prilogi 1 0.146%
597 neškodljiv za 1 0.146%
598 kompasom odkrivajte 1 0.146%
599 ne sanja 1 0.146%
600 sok in 1 0.146%
601 drugo polovico 1 0.146%
602 usmerjeni proklamirali 1 0.146%
603 peči še 1 0.146%
604 kloniranju antikrista 1 0.146%
605 edinstvena kovinska 1 0.146%
606 solimo po 1 0.146%
607 da postane 1 0.146%
608 police officer 1 0.146%
609 je dala 1 0.146%
610 pečice po 1 0.146%
611 druge pravne 1 0.146%
612 sladkor penasto 1 0.146%
613 odkrivajte veličastna 1 0.146%
614 komedija Matjaža 1 0.146%
615 vnaprej nedoločenega 1 0.146%
616 na jesenskem 1 0.146%
617 in pedri 1 0.146%
618 Eli's older 1 0.146%
619 2. 1998 1 0.146%
620 fantastica 5 1 0.146%
621 posebej uporabite 1 0.146%
622 ki smo 1 0.146%
623 jo je 1 0.146%

View File

@@ -0,0 +1,572 @@
word;frequency;percent
in ajdovo kašo;3;0.499%
težav s srbečo;2;0.333%
za pol ure;2;0.333%
46 učiteljica tomijeva;2;0.333%
elina starejša sestra;2;0.333%
izdelujejo kovinska ogledala;2;0.333%
62 vdova sodnica;2;0.333%
učiteljica tomijeva žena;2;0.333%
Jurkovič Jurkovička 62;2;0.333%
tomijeva žena elina;2;0.333%
kako se znebiti;2;0.333%
vdova sodnica elina;2;0.333%
Roblek Martika 46;2;0.333%
kako izdelujejo kovinska;2;0.333%
elina mlajša sestra;2;0.333%
Marta Fijavž Roblek;2;0.333%
znebiti težav s;2;0.333%
ustanovitev novih občin;2;0.333%
za ustanovitev novih;2;0.333%
sodnica elina starejša;2;0.333%
Martika 46 učiteljica;2;0.333%
Fijavž Roblek Martika;2;0.333%
se znebiti težav;2;0.333%
"žena elina mlajša";2;0.333%
totenbirt the late;2;0.333%
tiskarna gepard 1;2;0.333%
Agata Jurkovič Jurkovička;2;0.333%
Jurkovička 62 vdova;2;0.333%
s srbečo kožo;2;0.333%
ne postane mehko;1;0.166%
ko dobro premešamo;1;0.166%
v življenju mogoče;1;0.166%
da bi goljufal;1;0.166%
Ivek 66 drunk;1;0.166%
vas nismo naročili;1;0.166%
posesti nekaj o;1;0.166%
posebej uporabite v;1;0.166%
ki ji dodamo;1;0.166%
okusu in kar;1;0.166%
lahko ustanovi podjetje;1;0.166%
Alenka Godec in;1;0.166%
junak ustanoivi društvo;1;0.166%
tako da bi;1;0.166%
novinarja Mojco Beljan;1;0.166%
torinskim prtom in;1;0.166%
da postane ravno;1;0.166%
cena ki nam;1;0.166%
kar v posodi;1;0.166%
začne nakazovati velike;1;0.166%
s paličnim mešalnikom;1;0.166%
na ladji bosta;1;0.166%
neškodljiv za želodec;1;0.166%
damo nazaj peči;1;0.166%
prošnja za akreditacijo;1;0.166%
bolje potem gre;1;0.166%
edinstvena kovinska ogledala;1;0.166%
da ohranimo nekaj;1;0.166%
MSC fantastica 5;1;0.166%
med mesti zahodnega;1;0.166%
izberemo pravo pot;1;0.166%
po testu posujemo;1;0.166%
Prison officer detective;1;0.166%
duhovnike več sinov;1;0.166%
in pedri če;1;0.166%
več sinov naroda;1;0.166%
Agata Jurkovič jurkovička;1;0.166%
judge Eli's older;1;0.166%
jurkovička 62 widow;1;0.166%
nekaj o čemer;1;0.166%
v čast jeseni;1;0.166%
jih strli na;1;0.166%
in damo nazaj;1;0.166%
izdelana v Indiji;1;0.166%
kloniranju antikrista vključno;1;0.166%
je bila ust;1;0.166%
cerkev v opreki;1;0.166%
dermatologinja Tanja Planinšek;1;0.166%
dl vode ki;1;0.166%
približno pol ure;1;0.166%
ravno prav gosta;1;0.166%
račun št. 2130;1;0.166%
Mallorco Barcelono in;1;0.166%
ima namen pospeševati;1;0.166%
gre za duhovnike;1;0.166%
zmešamo s paličnim;1;0.166%
zakon o zadrugah;1;0.166%
o kloniranju antikrista;1;0.166%
potekal 19. aprila;1;0.166%
naročnik je tiskarna;1;0.166%
postali sprejemljivi potem;1;0.166%
potem bi se;1;0.166%
Maja Končar Marta;1;0.166%
dekani ki bo;1;0.166%
jeseni naredimo torto;1;0.166%
mu začne nakazovati;1;0.166%
mladi istospolno usmerjeni;1;0.166%
ustavili v Palermu;1;0.166%
bert 09.11.2010 ob;1;0.166%
jeste tudi posebej;1;0.166%
dodamo moko pecilni;1;0.166%
totenbirt Ivek 66;1;0.166%
prtom in požigom;1;0.166%
druge pravne osebe;1;0.166%
posujemo grozdje in;1;0.166%
"če se s";1;0.166%
pomarančo in orehe;1;0.166%
zato da ohranimo;1;0.166%
tega pa je;1;0.166%
ustanoivi društvo da;1;0.166%
Anandm Kofujem torinskim;1;0.166%
preprost recept za;1;0.166%
a okusna jed;1;0.166%
mesti zahodnega Sredozemlja;1;0.166%
Končar Marta Fijavž;1;0.166%
pri operaciji 666;1;0.166%
bi prinašala bogastvo;1;0.166%
komisija za ustanovitev;1;0.166%
medtem operemo grozdje;1;0.166%
in Danijela ceka;1;0.166%
jih cerkev ne;1;0.166%
in dekani ki;1;0.166%
si kako izdelujejo;1;0.166%
"špasnega križarjenja je";1;0.166%
Marijana Brecelj Agata;1;0.166%
nazaj peči še;1;0.166%
zadruga je organizacija;1;0.166%
Jurkovič jurkovička 62;1;0.166%
se nam na;1;0.166%
sprejemljivi potem bi;1;0.166%
in Tunisu obiskali;1;0.166%
pridružite se nam;1;0.166%
vinu in orehih;1;0.166%
za duhovnike več;1;0.166%
namen zaradi katerega;1;0.166%
danes skorajda najpogostejša;1;0.166%
in upravljanju članov;1;0.166%
koncert Alenke Godec;1;0.166%
poleg tega pa;1;0.166%
komedija Matjaža Javšnika;1;0.166%
gosto juho želimo;1;0.166%
Brecelj Agata Jurkovič;1;0.166%
narod pa izumira;1;0.166%
smo jih strli;1;0.166%
ker tega mi;1;0.166%
aprila akreditirate naša;1;0.166%
s tem uresničuje;1;0.166%
palmo de Mallorco;1;0.166%
vključno z Anandm;1;0.166%
prispevek ki priča;1;0.166%
sinov naroda narod;1;0.166%
se mladi istospolno;1;0.166%
zdi ključno pri;1;0.166%
o tem da;1;0.166%
je cena ki;1;0.166%
cerkvi bolje potem;1;0.166%
ki ju lahko;1;0.166%
v enem tednu;1;0.166%
v posesti nekaj;1;0.166%
i. splošne določbe;1;0.166%
da za referendum;1;0.166%
nakazovati velike vsote;1;0.166%
prava pot tista;1;0.166%
grozdje in damo;1;0.166%
za korenčkovo juho;1;0.166%
drugo polovico zelenjavne;1;0.166%
8. aprila 1998;1;0.166%
kuhamo dokler korenje;1;0.166%
z narodom če;1;0.166%
Godec in komedija;1;0.166%
je dala tiskarna;1;0.166%
juho solimo po;1;0.166%
se ustavili v;1;0.166%
ter kuhamo dokler;1;0.166%
bo ladja MSC;1;0.166%
novih občin Ankaran;1;0.166%
na roko zato;1;0.166%
v Palermu in;1;0.166%
v približno liter;1;0.166%
posodi zavremo približno;1;0.166%
pristopu svobodnem izstopu;1;0.166%
grozdje vino pomarančo;1;0.166%
potem gre za;1;0.166%
Totenbirt Marta Fijavž;1;0.166%
jajci in sladkor;1;0.166%
kolobarje narežemo korenje;1;0.166%
in dietna a;1;0.166%
vino pomarančni sok;1;0.166%
pred ženskami v;1;0.166%
z vami koncert;1;0.166%
vami koncert Alenke;1;0.166%
pri vas nismo;1;0.166%
prezgodaj je izgubil;1;0.166%
tem uresničuje namen;1;0.166%
ju lahko jeste;1;0.166%
ne bi skrivali;1;0.166%
po grozdju pomaranči;1;0.166%
o križarjenju tukaj;1;0.166%
mehko približno 20;1;0.166%
s kompasom odkrivajte;1;0.166%
peči še za;1;0.166%
1 veliko nižja;1;0.166%
kocke in ajdovo;1;0.166%
in damo v;1;0.166%
je izgubil dragoceno;1;0.166%
mogoče da je;1;0.166%
21. 2. 1998;1;0.166%
z Anandm Kofujem;1;0.166%
koncu vanjo dodamo;1;0.166%
jesenska torta vsebuje;1;0.166%
Zucco 1st Prison;1;0.166%
ohranimo nekaj večjih;1;0.166%
widow judge Eli's;1;0.166%
5 popeljala med;1;0.166%
uresničuje namen zaradi;1;0.166%
vse v življenju;1;0.166%
zgodba sami si;1;0.166%
v posodi zmešamo;1;0.166%
vlijemo v okrogel;1;0.166%
pečico za pol;1;0.166%
oziroma postane članica;1;0.166%
zadruga lahko ustanovi;1;0.166%
Koper 8. aprila;1;0.166%
društvo da bi;1;0.166%
zaradi katerega je;1;0.166%
okrogel pekač in;1;0.166%
Alenke Godec in;1;0.166%
mika 08.11.2010 ob;1;0.166%
boste od genove;1;0.166%
Matjaža Javšnika optimist;1;0.166%
juho želimo na;1;0.166%
je danes skorajda;1;0.166%
kaj povzroča srbečico;1;0.166%
opreki z narodom;1;0.166%
gepard 1 veliko;1;0.166%
polovico zelenjavne kocke;1;0.166%
officer detective 1st;1;0.166%
je blagajna zaprta;1;0.166%
drugo zadrugo ali;1;0.166%
pospeševati gospodarske koristi;1;0.166%
Koper 21. 2.;1;0.166%
pomarančni sok in;1;0.166%
pojav srbeče kože;1;0.166%
operemo korenje in;1;0.166%
dokler korenje ne;1;0.166%
namen pospeševati gospodarske;1;0.166%
viroz saj je;1;0.166%
ter temelji na;1;0.166%
postane članica druge;1;0.166%
ki bo potekal;1;0.166%
"št. 2130 97";1;0.166%
pravo pot prispevek;1;0.166%
kašo ki ju;1;0.166%
in Marseille vse;1;0.166%
veličastna mesta zahodnega;1;0.166%
medtem v drugi;1;0.166%
za referendum za;1;0.166%
osebe če se;1;0.166%
ki ima namen;1;0.166%
je tiskarna gepard;1;0.166%
zgodba prezgodaj je;1;0.166%
08.11.2010 ob 09:56;1;0.166%
zadrugo ali drugo;1;0.166%
naj bi prinašala;1;0.166%
"Šmarje marezige in";1;0.166%
odpraviti pojasnjuje dermatologinja;1;0.166%
"članov ter temelji";1;0.166%
osebo oziroma postane;1;0.166%
2130 97 ker;1;0.166%
križarjenju bosta z;1;0.166%
genove do Neaplja;1;0.166%
je zelo lahek;1;0.166%
okusna jed je;1;0.166%
ker ima junak;1;0.166%
iz pečice po;1;0.166%
korenčkovo juho in;1;0.166%
organizacija vnaprej nedoločenega;1;0.166%
zelenjavne kocke in;1;0.166%
postane ravno prav;1;0.166%
od genove do;1;0.166%
aranžma špasnega križarjenja;1;0.166%
korenje ne postane;1;0.166%
kože je danes;1;0.166%
Roberto Zucco 1st;1;0.166%
uporabite v času;1;0.166%
vanjo dodamo še;1;0.166%
Tunisu obiskali palmo;1;0.166%
vode odvisno kako;1;0.166%
prosimo da za;1;0.166%
tega mi pri;1;0.166%
bosta zvezdniška gosta;1;0.166%
pravne osebe če;1;0.166%
v času viroz;1;0.166%
Matjaž Koper 21.;1;0.166%
bo potekal 19.;1;0.166%
saj je zelo;1;0.166%
sok in lupinico;1;0.166%
testu posujemo grozdje;1;0.166%
zelenjavne kocke ter;1;0.166%
križarjanju vas bo;1;0.166%
kako gosto juho;1;0.166%
maso vlijemo v;1;0.166%
dobro premešamo dodamo;1;0.166%
pečice po testu;1;0.166%
in sladkor penasto;1;0.166%
do Neaplja se;1;0.166%
damo v ogreto;1;0.166%
nekdo mu začne;1;0.166%
jogurt olje vino;1;0.166%
skorajda najpogostejša motnja;1;0.166%
v okrogel pekač;1;0.166%
pa je cena;1;0.166%
je cerkvi bolje;1;0.166%
eko zelenjavna kocka;1;0.166%
priča o tem;1;0.166%
občin Ankaran škofije;1;0.166%
naredimo torto s;1;0.166%
"članica druge pravne";1;0.166%
resnična zgodba prezgodaj;1;0.166%
prinašala bogastvo in;1;0.166%
detective 1st police;1;0.166%
roko zato da;1;0.166%
ajdovo kašo in;1;0.166%
se mu ne;1;0.166%
dunajska cesta 22;1;0.166%
naša novinarja Mojco;1;0.166%
istospolno usmerjeni proklamirali;1;0.166%
24.10 do 31.10;1;0.166%
približno liter vode;1;0.166%
korenčkovo juho solimo;1;0.166%
podjetje drugo zadrugo;1;0.166%
cerkev ne bi;1;0.166%
"življenju mogoče da";1;0.166%
pojasnjuje dermatologinja Tanja;1;0.166%
koristi svojih članov;1;0.166%
odvisno kako gosto;1;0.166%
kašo in dietna;1;0.166%
v času počitnic;1;0.166%
v Indiji naj;1;0.166%
09.11.2010 ob 12:35;1;0.166%
gosta in tekoča;1;0.166%
nedoločenega števila članov;1;0.166%
pecilni prašek in;1;0.166%
katerega je bila;1;0.166%
a se onemu;1;0.166%
1st police officer;1;0.166%
1st Prison officer;1;0.166%
lahko jeste tudi;1;0.166%
se s tem;1;0.166%
pedri če jih;1;0.166%
križati idejo o;1;0.166%
in komedija Matjaža;1;0.166%
se onemu zdi;1;0.166%
na prostovoljnem pristopu;1;0.166%
da je prava;1;0.166%
postane mehko približno;1;0.166%
ustanovi podjetje drugo;1;0.166%
"času viroz saj";1;0.166%
z odličnimi pozdravi;1;0.166%
"želimo na kolobarje";1;0.166%
gosta Alenka Godec;1;0.166%
in kako jo;1;0.166%
proklamirali in ne;1;0.166%
v opreki z;1;0.166%
mu odstranimo peške;1;0.166%
Palermu in Tunisu;1;0.166%
Kofujem torinskim prtom;1;0.166%
Indiji naj bi;1;0.166%
kompasom odkrivajte veličastna;1;0.166%
ki smo jih;1;0.166%
in kar v;1;0.166%
"še jogurt olje";1;0.166%
pot tista ki;1;0.166%
Detela Matjaž Koper;1;0.166%
obiskali palmo de;1;0.166%
recept za korenčkovo;1;0.166%
ta unikatna ogledala;1;0.166%
na jesenskem špasnem;1;0.166%
pot prispevek ki;1;0.166%
narodom če je;1;0.166%
si izberemo pravo;1;0.166%
bogastvo in srečo;1;0.166%
ladji bosta zvezdniška;1;0.166%
Beljan in Danijela;1;0.166%
vnaprej nedoločenega števila;1;0.166%
sodelovanju in upravljanju;1;0.166%
približno 3 dl;1;0.166%
pol eko zelenjavne;1;0.166%
totenbirt Agata Jurkovič;1;0.166%
počitnic od 1.7;1;0.166%
jo odpraviti pojasnjuje;1;0.166%
zavračamo račun št.;1;0.166%
dodamo še jogurt;1;0.166%
Barcelono in Marseille;1;0.166%
križarili boste od;1;0.166%
ključno pri operaciji;1;0.166%
paličnim mešalnikom da;1;0.166%
Totenbirt Agata Jurkovič;1;0.166%
anatemizirala tako da;1;0.166%
je od 24.10;1;0.166%
torta vsebuje grozdje;1;0.166%
zvezdniška gosta Alenka;1;0.166%
"še ajdovo kašo";1;0.166%
da bi postali;1;0.166%
"še za pol";1;0.166%
zelo lahek in;1;0.166%
orehe ki smo;1;0.166%
nekaj večjih koščkov;1;0.166%
ki priča o;1;0.166%
korenje in ajdovo;1;0.166%
"čast jeseni naredimo";1;0.166%
temelji na prostovoljnem;1;0.166%
izdelujejo ta unikatna;1;0.166%
pekač in damo;1;0.166%
strahu pred ženskami;1;0.166%
antikrista vključno z;1;0.166%
več o križarjenju;1;0.166%
je organizacija vnaprej;1;0.166%
mu ne sanja;1;0.166%
svobodnem izstopu enakopravnem;1;0.166%
kocke ter kuhamo;1;0.166%
vse v enem;1;0.166%
Mojco Beljan in;1;0.166%
premešamo dodamo še;1;0.166%
Eli's older sister;1;0.166%
posodi zmešamo s;1;0.166%
bi anatemizirala tako;1;0.166%
prostovoljnem pristopu svobodnem;1;0.166%
in neškodljiv za;1;0.166%
svojih članov ter;1;0.166%
29.03.2010 ob 12:25;1;0.166%
skrivali svojega strahu;1;0.166%
vsebuje grozdje vino;1;0.166%
sladkor penasto umešamo;1;0.166%
ki nam jo;1;0.166%
Tanja Planinšek Ručigaj;1;0.166%
vzamemo iz pečice;1;0.166%
olje vino pomarančni;1;0.166%
ogledala izdelana v;1;0.166%
enakopravnem sodelovanju in;1;0.166%
referendum za ustanovitev;1;0.166%
v prilogi vam;1;0.166%
dodamo drugo polovico;1;0.166%
od 24.10 do;1;0.166%
izstopu enakopravnem sodelovanju;1;0.166%
bi skrivali svojega;1;0.166%
odkrivajte veličastna mesta;1;0.166%
mešalnikom da postane;1;0.166%
Ankaran škofije Šmarje;1;0.166%
in orehe ki;1;0.166%
zavremo približno 3;1;0.166%
bi se mladi;1;0.166%
dodamo še ajdovo;1;0.166%
"škofije Šmarje marezige";1;0.166%
nam jo je;1;0.166%
povzroča srbečico in;1;0.166%
bosta z vami;1;0.166%
je vse v;1;0.166%
jesenskem špasnem križarjanju;1;0.166%
akreditirate naša novinarja;1;0.166%
97 ker tega;1;0.166%
juho in ajdovo;1;0.166%
korenje in pol;1;0.166%
ogreto pečico za;1;0.166%
o društvu poslednjih;1;0.166%
junak v posesti;1;0.166%
liter vode odvisno;1;0.166%
strli na roko;1;0.166%
in Matjaž Javšnik;1;0.166%
grozdje in mu;1;0.166%
marezige in dekani;1;0.166%
nam na veličastni;1;0.166%
je prava pot;1;0.166%
usmerjeni proklamirali in;1;0.166%
mi pri vas;1;0.166%
drugi posodi zavremo;1;0.166%
ji dodamo drugo;1;0.166%
mesta zahodnega Sredozemlja;1;0.166%
torto s priokusom;1;0.166%
62 widow judge;1;0.166%
vino pomarančo in;1;0.166%
jo je dala;1;0.166%
prilogi vam zavračamo;1;0.166%
naroda narod pa;1;0.166%
s priokusom po;1;0.166%
ne bi anatemizirala;1;0.166%
svojega strahu pred;1;0.166%
the late južek;1;0.166%
3 dl vode;1;0.166%
jed je nared;1;0.166%
lahek in neškodljiv;1;0.166%
pravno osebo oziroma;1;0.166%
operemo grozdje in;1;0.166%
resnična zgodba sami;1;0.166%
eko zelenjavne kocke;1;0.166%
volilna komisija za;1;0.166%
dala tiskarna gepard;1;0.166%
na križarjenju bosta;1;0.166%
ponudimo z rezino;1;0.166%
"števila članov ki";1;0.166%
pomaranči vinu in;1;0.166%
"ženskami v sutano";1;0.166%
o čemer se;1;0.166%
the late Frenki;1;0.166%
požigom z idejo;1;0.166%
19. aprila akreditirate;1;0.166%
na kolobarje narežemo;1;0.166%
de Mallorco Barcelono;1;0.166%
v drugi posodi;1;0.166%
na koncu vanjo;1;0.166%
Neaplja se ustavili;1;0.166%
dietna a okusna;1;0.166%
fantastica 5 popeljala;1;0.166%
na veličastni križarki;1;0.166%
onemu zdi ključno;1;0.166%
cerkev in pedri;1;0.166%
tem da je;1;0.166%
kovinska ogledala izdelana;1;0.166%
gospodarske koristi svojih;1;0.166%
društvu poslednjih dni;1;0.166%
sami si izberemo;1;0.166%
vode ki ji;1;0.166%
"čemer se mu";1;0.166%
izgubil dragoceno življenje;1;0.166%
idejo o društvu;1;0.166%
poglejte si kako;1;0.166%
po okusu in;1;0.166%
sanja a se;1;0.166%
bi postali sprejemljivi;1;0.166%
da je vse;1;0.166%
in ne bi;1;0.166%
z rezino pomaranče;1;0.166%
"času počitnic od";1;0.166%
ladja MSC fantastica;1;0.166%
vas bo ladja;1;0.166%
priokusom po grozdju;1;0.166%
kako jo odpraviti;1;0.166%
pekač vzamemo iz;1;0.166%
in požigom z;1;0.166%
v ogreto pečico;1;0.166%
idejo o kloniranju;1;0.166%
križarjenja je od;1;0.166%
ne sanja a;1;0.166%
srbeče kože je;1;0.166%
z idejo o;1;0.166%
"če je cerkvi";1;0.166%
kako izdelujejo ta;1;0.166%
narežemo korenje in;1;0.166%
grozdju pomaranči vinu;1;0.166%
drugo pravno osebo;1;0.166%
moko pecilni prašek;1;0.166%
"če jih cerkev";1;0.166%
ValentinRozman 29.03.2010 ob;1;0.166%
ajdovo kašo ki;1;0.166%
prav gosta in;1;0.166%
Marseille vse v;1;0.166%
srbečico in kako;1;0.166%
približno 20 minut;1;0.166%
kuhamo približno pol;1;0.166%
trg Brolo 3;1;0.166%
in pol eko;1;0.166%
Godec in Matjaž;1;0.166%
"članov ki ima";1;0.166%
ali drugo pravno;1;0.166%
vam zavračamo račun;1;0.166%
prašek in orehe;1;0.166%
popeljala med mesti;1;0.166%
"špasnem križarjanju vas";1;0.166%
tudi posebej uporabite;1;0.166%
solimo po okusu;1;0.166%
in mu odstranimo;1;0.166%
ima junak v;1;0.166%
1 word frequency percent
2 in ajdovo kašo 3 0.499%
3 težav s srbečo 2 0.333%
4 za pol ure 2 0.333%
5 46 učiteljica tomijeva 2 0.333%
6 elina starejša sestra 2 0.333%
7 izdelujejo kovinska ogledala 2 0.333%
8 62 vdova sodnica 2 0.333%
9 učiteljica tomijeva žena 2 0.333%
10 Jurkovič Jurkovička 62 2 0.333%
11 tomijeva žena elina 2 0.333%
12 kako se znebiti 2 0.333%
13 vdova sodnica elina 2 0.333%
14 Roblek Martika 46 2 0.333%
15 kako izdelujejo kovinska 2 0.333%
16 elina mlajša sestra 2 0.333%
17 Marta Fijavž Roblek 2 0.333%
18 znebiti težav s 2 0.333%
19 ustanovitev novih občin 2 0.333%
20 za ustanovitev novih 2 0.333%
21 sodnica elina starejša 2 0.333%
22 Martika 46 učiteljica 2 0.333%
23 Fijavž Roblek Martika 2 0.333%
24 se znebiti težav 2 0.333%
25 žena elina mlajša 2 0.333%
26 totenbirt the late 2 0.333%
27 tiskarna gepard 1 2 0.333%
28 Agata Jurkovič Jurkovička 2 0.333%
29 Jurkovička 62 vdova 2 0.333%
30 s srbečo kožo 2 0.333%
31 ne postane mehko 1 0.166%
32 ko dobro premešamo 1 0.166%
33 v življenju mogoče 1 0.166%
34 da bi goljufal 1 0.166%
35 Ivek 66 drunk 1 0.166%
36 vas nismo naročili 1 0.166%
37 posesti nekaj o 1 0.166%
38 posebej uporabite v 1 0.166%
39 ki ji dodamo 1 0.166%
40 okusu in kar 1 0.166%
41 lahko ustanovi podjetje 1 0.166%
42 Alenka Godec in 1 0.166%
43 junak ustanoivi društvo 1 0.166%
44 tako da bi 1 0.166%
45 novinarja Mojco Beljan 1 0.166%
46 torinskim prtom in 1 0.166%
47 da postane ravno 1 0.166%
48 cena ki nam 1 0.166%
49 kar v posodi 1 0.166%
50 začne nakazovati velike 1 0.166%
51 s paličnim mešalnikom 1 0.166%
52 na ladji bosta 1 0.166%
53 neškodljiv za želodec 1 0.166%
54 damo nazaj peči 1 0.166%
55 prošnja za akreditacijo 1 0.166%
56 bolje potem gre 1 0.166%
57 edinstvena kovinska ogledala 1 0.166%
58 da ohranimo nekaj 1 0.166%
59 MSC fantastica 5 1 0.166%
60 med mesti zahodnega 1 0.166%
61 izberemo pravo pot 1 0.166%
62 po testu posujemo 1 0.166%
63 Prison officer detective 1 0.166%
64 duhovnike več sinov 1 0.166%
65 in pedri če 1 0.166%
66 več sinov naroda 1 0.166%
67 Agata Jurkovič jurkovička 1 0.166%
68 judge Eli's older 1 0.166%
69 jurkovička 62 widow 1 0.166%
70 nekaj o čemer 1 0.166%
71 v čast jeseni 1 0.166%
72 jih strli na 1 0.166%
73 in damo nazaj 1 0.166%
74 izdelana v Indiji 1 0.166%
75 kloniranju antikrista vključno 1 0.166%
76 je bila ust 1 0.166%
77 cerkev v opreki 1 0.166%
78 dermatologinja Tanja Planinšek 1 0.166%
79 dl vode ki 1 0.166%
80 približno pol ure 1 0.166%
81 ravno prav gosta 1 0.166%
82 račun št. 2130 1 0.166%
83 Mallorco Barcelono in 1 0.166%
84 ima namen pospeševati 1 0.166%
85 gre za duhovnike 1 0.166%
86 zmešamo s paličnim 1 0.166%
87 zakon o zadrugah 1 0.166%
88 o kloniranju antikrista 1 0.166%
89 potekal 19. aprila 1 0.166%
90 naročnik je tiskarna 1 0.166%
91 postali sprejemljivi potem 1 0.166%
92 potem bi se 1 0.166%
93 Maja Končar Marta 1 0.166%
94 dekani ki bo 1 0.166%
95 jeseni naredimo torto 1 0.166%
96 mu začne nakazovati 1 0.166%
97 mladi istospolno usmerjeni 1 0.166%
98 ustavili v Palermu 1 0.166%
99 bert 09.11.2010 ob 1 0.166%
100 jeste tudi posebej 1 0.166%
101 dodamo moko pecilni 1 0.166%
102 totenbirt Ivek 66 1 0.166%
103 prtom in požigom 1 0.166%
104 druge pravne osebe 1 0.166%
105 posujemo grozdje in 1 0.166%
106 če se s 1 0.166%
107 pomarančo in orehe 1 0.166%
108 zato da ohranimo 1 0.166%
109 tega pa je 1 0.166%
110 ustanoivi društvo da 1 0.166%
111 Anandm Kofujem torinskim 1 0.166%
112 preprost recept za 1 0.166%
113 a okusna jed 1 0.166%
114 mesti zahodnega Sredozemlja 1 0.166%
115 Končar Marta Fijavž 1 0.166%
116 pri operaciji 666 1 0.166%
117 bi prinašala bogastvo 1 0.166%
118 komisija za ustanovitev 1 0.166%
119 medtem operemo grozdje 1 0.166%
120 in Danijela ceka 1 0.166%
121 jih cerkev ne 1 0.166%
122 in dekani ki 1 0.166%
123 si kako izdelujejo 1 0.166%
124 špasnega križarjenja je 1 0.166%
125 Marijana Brecelj Agata 1 0.166%
126 nazaj peči še 1 0.166%
127 zadruga je organizacija 1 0.166%
128 Jurkovič jurkovička 62 1 0.166%
129 se nam na 1 0.166%
130 sprejemljivi potem bi 1 0.166%
131 in Tunisu obiskali 1 0.166%
132 pridružite se nam 1 0.166%
133 vinu in orehih 1 0.166%
134 za duhovnike več 1 0.166%
135 namen zaradi katerega 1 0.166%
136 danes skorajda najpogostejša 1 0.166%
137 in upravljanju članov 1 0.166%
138 koncert Alenke Godec 1 0.166%
139 poleg tega pa 1 0.166%
140 komedija Matjaža Javšnika 1 0.166%
141 gosto juho želimo 1 0.166%
142 Brecelj Agata Jurkovič 1 0.166%
143 narod pa izumira 1 0.166%
144 smo jih strli 1 0.166%
145 ker tega mi 1 0.166%
146 aprila akreditirate naša 1 0.166%
147 s tem uresničuje 1 0.166%
148 palmo de Mallorco 1 0.166%
149 vključno z Anandm 1 0.166%
150 prispevek ki priča 1 0.166%
151 sinov naroda narod 1 0.166%
152 se mladi istospolno 1 0.166%
153 zdi ključno pri 1 0.166%
154 o tem da 1 0.166%
155 je cena ki 1 0.166%
156 cerkvi bolje potem 1 0.166%
157 ki ju lahko 1 0.166%
158 v enem tednu 1 0.166%
159 v posesti nekaj 1 0.166%
160 i. splošne določbe 1 0.166%
161 da za referendum 1 0.166%
162 nakazovati velike vsote 1 0.166%
163 prava pot tista 1 0.166%
164 grozdje in damo 1 0.166%
165 za korenčkovo juho 1 0.166%
166 drugo polovico zelenjavne 1 0.166%
167 8. aprila 1998 1 0.166%
168 kuhamo dokler korenje 1 0.166%
169 z narodom če 1 0.166%
170 Godec in komedija 1 0.166%
171 je dala tiskarna 1 0.166%
172 juho solimo po 1 0.166%
173 se ustavili v 1 0.166%
174 ter kuhamo dokler 1 0.166%
175 bo ladja MSC 1 0.166%
176 novih občin Ankaran 1 0.166%
177 na roko zato 1 0.166%
178 v Palermu in 1 0.166%
179 v približno liter 1 0.166%
180 posodi zavremo približno 1 0.166%
181 pristopu svobodnem izstopu 1 0.166%
182 grozdje vino pomarančo 1 0.166%
183 potem gre za 1 0.166%
184 Totenbirt Marta Fijavž 1 0.166%
185 jajci in sladkor 1 0.166%
186 kolobarje narežemo korenje 1 0.166%
187 in dietna a 1 0.166%
188 vino pomarančni sok 1 0.166%
189 pred ženskami v 1 0.166%
190 z vami koncert 1 0.166%
191 vami koncert Alenke 1 0.166%
192 pri vas nismo 1 0.166%
193 prezgodaj je izgubil 1 0.166%
194 tem uresničuje namen 1 0.166%
195 ju lahko jeste 1 0.166%
196 ne bi skrivali 1 0.166%
197 po grozdju pomaranči 1 0.166%
198 o križarjenju tukaj 1 0.166%
199 mehko približno 20 1 0.166%
200 s kompasom odkrivajte 1 0.166%
201 peči še za 1 0.166%
202 1 veliko nižja 1 0.166%
203 kocke in ajdovo 1 0.166%
204 in damo v 1 0.166%
205 je izgubil dragoceno 1 0.166%
206 mogoče da je 1 0.166%
207 21. 2. 1998 1 0.166%
208 z Anandm Kofujem 1 0.166%
209 koncu vanjo dodamo 1 0.166%
210 jesenska torta vsebuje 1 0.166%
211 Zucco 1st Prison 1 0.166%
212 ohranimo nekaj večjih 1 0.166%
213 widow judge Eli's 1 0.166%
214 5 popeljala med 1 0.166%
215 uresničuje namen zaradi 1 0.166%
216 vse v življenju 1 0.166%
217 zgodba sami si 1 0.166%
218 v posodi zmešamo 1 0.166%
219 vlijemo v okrogel 1 0.166%
220 pečico za pol 1 0.166%
221 oziroma postane članica 1 0.166%
222 zadruga lahko ustanovi 1 0.166%
223 Koper 8. aprila 1 0.166%
224 društvo da bi 1 0.166%
225 zaradi katerega je 1 0.166%
226 okrogel pekač in 1 0.166%
227 Alenke Godec in 1 0.166%
228 mika 08.11.2010 ob 1 0.166%
229 boste od genove 1 0.166%
230 Matjaža Javšnika optimist 1 0.166%
231 juho želimo na 1 0.166%
232 je danes skorajda 1 0.166%
233 kaj povzroča srbečico 1 0.166%
234 opreki z narodom 1 0.166%
235 gepard 1 veliko 1 0.166%
236 polovico zelenjavne kocke 1 0.166%
237 officer detective 1st 1 0.166%
238 je blagajna zaprta 1 0.166%
239 drugo zadrugo ali 1 0.166%
240 pospeševati gospodarske koristi 1 0.166%
241 Koper 21. 2. 1 0.166%
242 pomarančni sok in 1 0.166%
243 pojav srbeče kože 1 0.166%
244 operemo korenje in 1 0.166%
245 dokler korenje ne 1 0.166%
246 namen pospeševati gospodarske 1 0.166%
247 viroz saj je 1 0.166%
248 ter temelji na 1 0.166%
249 postane članica druge 1 0.166%
250 ki bo potekal 1 0.166%
251 št. 2130 97 1 0.166%
252 pravo pot prispevek 1 0.166%
253 kašo ki ju 1 0.166%
254 in Marseille vse 1 0.166%
255 veličastna mesta zahodnega 1 0.166%
256 medtem v drugi 1 0.166%
257 za referendum za 1 0.166%
258 osebe če se 1 0.166%
259 ki ima namen 1 0.166%
260 je tiskarna gepard 1 0.166%
261 zgodba prezgodaj je 1 0.166%
262 08.11.2010 ob 09:56 1 0.166%
263 zadrugo ali drugo 1 0.166%
264 naj bi prinašala 1 0.166%
265 Šmarje marezige in 1 0.166%
266 odpraviti pojasnjuje dermatologinja 1 0.166%
267 članov ter temelji 1 0.166%
268 osebo oziroma postane 1 0.166%
269 2130 97 ker 1 0.166%
270 križarjenju bosta z 1 0.166%
271 genove do Neaplja 1 0.166%
272 je zelo lahek 1 0.166%
273 okusna jed je 1 0.166%
274 ker ima junak 1 0.166%
275 iz pečice po 1 0.166%
276 korenčkovo juho in 1 0.166%
277 organizacija vnaprej nedoločenega 1 0.166%
278 zelenjavne kocke in 1 0.166%
279 postane ravno prav 1 0.166%
280 od genove do 1 0.166%
281 aranžma špasnega križarjenja 1 0.166%
282 korenje ne postane 1 0.166%
283 kože je danes 1 0.166%
284 Roberto Zucco 1st 1 0.166%
285 uporabite v času 1 0.166%
286 vanjo dodamo še 1 0.166%
287 Tunisu obiskali palmo 1 0.166%
288 vode odvisno kako 1 0.166%
289 prosimo da za 1 0.166%
290 tega mi pri 1 0.166%
291 bosta zvezdniška gosta 1 0.166%
292 pravne osebe če 1 0.166%
293 v času viroz 1 0.166%
294 Matjaž Koper 21. 1 0.166%
295 bo potekal 19. 1 0.166%
296 saj je zelo 1 0.166%
297 sok in lupinico 1 0.166%
298 testu posujemo grozdje 1 0.166%
299 zelenjavne kocke ter 1 0.166%
300 križarjanju vas bo 1 0.166%
301 kako gosto juho 1 0.166%
302 maso vlijemo v 1 0.166%
303 dobro premešamo dodamo 1 0.166%
304 pečice po testu 1 0.166%
305 in sladkor penasto 1 0.166%
306 do Neaplja se 1 0.166%
307 damo v ogreto 1 0.166%
308 nekdo mu začne 1 0.166%
309 jogurt olje vino 1 0.166%
310 skorajda najpogostejša motnja 1 0.166%
311 v okrogel pekač 1 0.166%
312 pa je cena 1 0.166%
313 je cerkvi bolje 1 0.166%
314 eko zelenjavna kocka 1 0.166%
315 priča o tem 1 0.166%
316 občin Ankaran škofije 1 0.166%
317 naredimo torto s 1 0.166%
318 članica druge pravne 1 0.166%
319 resnična zgodba prezgodaj 1 0.166%
320 prinašala bogastvo in 1 0.166%
321 detective 1st police 1 0.166%
322 roko zato da 1 0.166%
323 ajdovo kašo in 1 0.166%
324 se mu ne 1 0.166%
325 dunajska cesta 22 1 0.166%
326 naša novinarja Mojco 1 0.166%
327 istospolno usmerjeni proklamirali 1 0.166%
328 24.10 do 31.10 1 0.166%
329 približno liter vode 1 0.166%
330 korenčkovo juho solimo 1 0.166%
331 podjetje drugo zadrugo 1 0.166%
332 cerkev ne bi 1 0.166%
333 življenju mogoče da 1 0.166%
334 pojasnjuje dermatologinja Tanja 1 0.166%
335 koristi svojih članov 1 0.166%
336 odvisno kako gosto 1 0.166%
337 kašo in dietna 1 0.166%
338 v času počitnic 1 0.166%
339 v Indiji naj 1 0.166%
340 09.11.2010 ob 12:35 1 0.166%
341 gosta in tekoča 1 0.166%
342 nedoločenega števila članov 1 0.166%
343 pecilni prašek in 1 0.166%
344 katerega je bila 1 0.166%
345 a se onemu 1 0.166%
346 1st police officer 1 0.166%
347 1st Prison officer 1 0.166%
348 lahko jeste tudi 1 0.166%
349 se s tem 1 0.166%
350 pedri če jih 1 0.166%
351 križati idejo o 1 0.166%
352 in komedija Matjaža 1 0.166%
353 se onemu zdi 1 0.166%
354 na prostovoljnem pristopu 1 0.166%
355 da je prava 1 0.166%
356 postane mehko približno 1 0.166%
357 ustanovi podjetje drugo 1 0.166%
358 času viroz saj 1 0.166%
359 z odličnimi pozdravi 1 0.166%
360 želimo na kolobarje 1 0.166%
361 gosta Alenka Godec 1 0.166%
362 in kako jo 1 0.166%
363 proklamirali in ne 1 0.166%
364 v opreki z 1 0.166%
365 mu odstranimo peške 1 0.166%
366 Palermu in Tunisu 1 0.166%
367 Kofujem torinskim prtom 1 0.166%
368 Indiji naj bi 1 0.166%
369 kompasom odkrivajte veličastna 1 0.166%
370 ki smo jih 1 0.166%
371 in kar v 1 0.166%
372 še jogurt olje 1 0.166%
373 pot tista ki 1 0.166%
374 Detela Matjaž Koper 1 0.166%
375 obiskali palmo de 1 0.166%
376 recept za korenčkovo 1 0.166%
377 ta unikatna ogledala 1 0.166%
378 na jesenskem špasnem 1 0.166%
379 pot prispevek ki 1 0.166%
380 narodom če je 1 0.166%
381 si izberemo pravo 1 0.166%
382 bogastvo in srečo 1 0.166%
383 ladji bosta zvezdniška 1 0.166%
384 Beljan in Danijela 1 0.166%
385 vnaprej nedoločenega števila 1 0.166%
386 sodelovanju in upravljanju 1 0.166%
387 približno 3 dl 1 0.166%
388 pol eko zelenjavne 1 0.166%
389 totenbirt Agata Jurkovič 1 0.166%
390 počitnic od 1.7 1 0.166%
391 jo odpraviti pojasnjuje 1 0.166%
392 zavračamo račun št. 1 0.166%
393 dodamo še jogurt 1 0.166%
394 Barcelono in Marseille 1 0.166%
395 križarili boste od 1 0.166%
396 ključno pri operaciji 1 0.166%
397 paličnim mešalnikom da 1 0.166%
398 Totenbirt Agata Jurkovič 1 0.166%
399 anatemizirala tako da 1 0.166%
400 je od 24.10 1 0.166%
401 torta vsebuje grozdje 1 0.166%
402 zvezdniška gosta Alenka 1 0.166%
403 še ajdovo kašo 1 0.166%
404 da bi postali 1 0.166%
405 še za pol 1 0.166%
406 zelo lahek in 1 0.166%
407 orehe ki smo 1 0.166%
408 nekaj večjih koščkov 1 0.166%
409 ki priča o 1 0.166%
410 korenje in ajdovo 1 0.166%
411 čast jeseni naredimo 1 0.166%
412 temelji na prostovoljnem 1 0.166%
413 izdelujejo ta unikatna 1 0.166%
414 pekač in damo 1 0.166%
415 strahu pred ženskami 1 0.166%
416 antikrista vključno z 1 0.166%
417 več o križarjenju 1 0.166%
418 je organizacija vnaprej 1 0.166%
419 mu ne sanja 1 0.166%
420 svobodnem izstopu enakopravnem 1 0.166%
421 kocke ter kuhamo 1 0.166%
422 vse v enem 1 0.166%
423 Mojco Beljan in 1 0.166%
424 premešamo dodamo še 1 0.166%
425 Eli's older sister 1 0.166%
426 posodi zmešamo s 1 0.166%
427 bi anatemizirala tako 1 0.166%
428 prostovoljnem pristopu svobodnem 1 0.166%
429 in neškodljiv za 1 0.166%
430 svojih članov ter 1 0.166%
431 29.03.2010 ob 12:25 1 0.166%
432 skrivali svojega strahu 1 0.166%
433 vsebuje grozdje vino 1 0.166%
434 sladkor penasto umešamo 1 0.166%
435 ki nam jo 1 0.166%
436 Tanja Planinšek Ručigaj 1 0.166%
437 vzamemo iz pečice 1 0.166%
438 olje vino pomarančni 1 0.166%
439 ogledala izdelana v 1 0.166%
440 enakopravnem sodelovanju in 1 0.166%
441 referendum za ustanovitev 1 0.166%
442 v prilogi vam 1 0.166%
443 dodamo drugo polovico 1 0.166%
444 od 24.10 do 1 0.166%
445 izstopu enakopravnem sodelovanju 1 0.166%
446 bi skrivali svojega 1 0.166%
447 odkrivajte veličastna mesta 1 0.166%
448 mešalnikom da postane 1 0.166%
449 Ankaran škofije Šmarje 1 0.166%
450 in orehe ki 1 0.166%
451 zavremo približno 3 1 0.166%
452 bi se mladi 1 0.166%
453 dodamo še ajdovo 1 0.166%
454 škofije Šmarje marezige 1 0.166%
455 nam jo je 1 0.166%
456 povzroča srbečico in 1 0.166%
457 bosta z vami 1 0.166%
458 je vse v 1 0.166%
459 jesenskem špasnem križarjanju 1 0.166%
460 akreditirate naša novinarja 1 0.166%
461 97 ker tega 1 0.166%
462 juho in ajdovo 1 0.166%
463 korenje in pol 1 0.166%
464 ogreto pečico za 1 0.166%
465 o društvu poslednjih 1 0.166%
466 junak v posesti 1 0.166%
467 liter vode odvisno 1 0.166%
468 strli na roko 1 0.166%
469 in Matjaž Javšnik 1 0.166%
470 grozdje in mu 1 0.166%
471 marezige in dekani 1 0.166%
472 nam na veličastni 1 0.166%
473 je prava pot 1 0.166%
474 usmerjeni proklamirali in 1 0.166%
475 mi pri vas 1 0.166%
476 drugi posodi zavremo 1 0.166%
477 ji dodamo drugo 1 0.166%
478 mesta zahodnega Sredozemlja 1 0.166%
479 torto s priokusom 1 0.166%
480 62 widow judge 1 0.166%
481 vino pomarančo in 1 0.166%
482 jo je dala 1 0.166%
483 prilogi vam zavračamo 1 0.166%
484 naroda narod pa 1 0.166%
485 s priokusom po 1 0.166%
486 ne bi anatemizirala 1 0.166%
487 svojega strahu pred 1 0.166%
488 the late južek 1 0.166%
489 3 dl vode 1 0.166%
490 jed je nared 1 0.166%
491 lahek in neškodljiv 1 0.166%
492 pravno osebo oziroma 1 0.166%
493 operemo grozdje in 1 0.166%
494 resnična zgodba sami 1 0.166%
495 eko zelenjavne kocke 1 0.166%
496 volilna komisija za 1 0.166%
497 dala tiskarna gepard 1 0.166%
498 na križarjenju bosta 1 0.166%
499 ponudimo z rezino 1 0.166%
500 števila članov ki 1 0.166%
501 pomaranči vinu in 1 0.166%
502 ženskami v sutano 1 0.166%
503 o čemer se 1 0.166%
504 the late Frenki 1 0.166%
505 požigom z idejo 1 0.166%
506 19. aprila akreditirate 1 0.166%
507 na kolobarje narežemo 1 0.166%
508 de Mallorco Barcelono 1 0.166%
509 v drugi posodi 1 0.166%
510 na koncu vanjo 1 0.166%
511 Neaplja se ustavili 1 0.166%
512 dietna a okusna 1 0.166%
513 fantastica 5 popeljala 1 0.166%
514 na veličastni križarki 1 0.166%
515 onemu zdi ključno 1 0.166%
516 cerkev in pedri 1 0.166%
517 tem da je 1 0.166%
518 kovinska ogledala izdelana 1 0.166%
519 gospodarske koristi svojih 1 0.166%
520 društvu poslednjih dni 1 0.166%
521 sami si izberemo 1 0.166%
522 vode ki ji 1 0.166%
523 čemer se mu 1 0.166%
524 izgubil dragoceno življenje 1 0.166%
525 idejo o društvu 1 0.166%
526 poglejte si kako 1 0.166%
527 po okusu in 1 0.166%
528 sanja a se 1 0.166%
529 bi postali sprejemljivi 1 0.166%
530 da je vse 1 0.166%
531 in ne bi 1 0.166%
532 z rezino pomaranče 1 0.166%
533 času počitnic od 1 0.166%
534 ladja MSC fantastica 1 0.166%
535 vas bo ladja 1 0.166%
536 priokusom po grozdju 1 0.166%
537 kako jo odpraviti 1 0.166%
538 pekač vzamemo iz 1 0.166%
539 in požigom z 1 0.166%
540 v ogreto pečico 1 0.166%
541 idejo o kloniranju 1 0.166%
542 križarjenja je od 1 0.166%
543 ne sanja a 1 0.166%
544 srbeče kože je 1 0.166%
545 z idejo o 1 0.166%
546 če je cerkvi 1 0.166%
547 kako izdelujejo ta 1 0.166%
548 narežemo korenje in 1 0.166%
549 grozdju pomaranči vinu 1 0.166%
550 drugo pravno osebo 1 0.166%
551 moko pecilni prašek 1 0.166%
552 če jih cerkev 1 0.166%
553 ValentinRozman 29.03.2010 ob 1 0.166%
554 ajdovo kašo ki 1 0.166%
555 prav gosta in 1 0.166%
556 Marseille vse v 1 0.166%
557 srbečico in kako 1 0.166%
558 približno 20 minut 1 0.166%
559 kuhamo približno pol 1 0.166%
560 trg Brolo 3 1 0.166%
561 in pol eko 1 0.166%
562 Godec in Matjaž 1 0.166%
563 članov ki ima 1 0.166%
564 ali drugo pravno 1 0.166%
565 vam zavračamo račun 1 0.166%
566 prašek in orehe 1 0.166%
567 popeljala med mesti 1 0.166%
568 špasnem križarjanju vas 1 0.166%
569 tudi posebej uporabite 1 0.166%
570 solimo po okusu 1 0.166%
571 in mu odstranimo 1 0.166%
572 ima junak v 1 0.166%

View File

@@ -0,0 +1,350 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0012405" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: Branko Gradišnik. ANTI2(1999)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>52 besed</extent>
<publicationStmt>
<idno>ANTI2</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title n="???">neznani naslov</title>
<author>Branko Gradišnik</author>
<date>1999</date>
<publisher n="drugo">neznani založnik</publisher>
<note type="sourceLang"/>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="50"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="11"/>
<tagUsage gi="p" occurs="2"/>
<tagUsage gi="s" occurs="5"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="52"/>
</namespace>
</tagsDecl>
<appInfo>
<application ident="Amebis_pretvornik" version="1.0">
<label>[ZDRUZEVANJE] 1:1</label>
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\ANTI2.ZDR</label>
<label>[1] **********</label>
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.doc</label>
<label>[FORMAT] MS Word for Windows 6.0/7.0</label>
<label>[DATUM] 2.12.1999</label>
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.RTF</label>
<label>[PRETVORBA] RTF</label>
<label>[KONEC] **********</label>
</application>
</appInfo>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.P">
<catDesc>prenosnik</catDesc>
<category xml:id="Ft.P.G">
<catDesc>govorni</catDesc>
</category>
<category xml:id="Ft.P.E">
<catDesc>elektronski</catDesc>
</category>
<category xml:id="Ft.P.P">
<catDesc>pisni</catDesc>
<category xml:id="Ft.P.P.O">
<catDesc>objavljeno</catDesc>
<category xml:id="Ft.P.P.O.K">
<catDesc>knjižno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P">
<catDesc>periodično</catDesc>
<category xml:id="Ft.P.P.O.P.C">
<catDesc>časopisno</catDesc>
<category xml:id="Ft.P.P.O.P.C.D">
<catDesc>dnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.V">
<catDesc>večkrat tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.T">
<catDesc>tedensko</catDesc>
</category>
</category>
<category xml:id="Ft.P.P.O.P.R">
<catDesc>revialno</catDesc>
<category xml:id="Ft.P.P.O.P.R.T">
<catDesc>tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.S">
<catDesc>štirinajstdnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.M">
<catDesc>mesečno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.D">
<catDesc>redkeje kot na mesec</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.O">
<catDesc>občasno</catDesc>
</category>
</category>
</category>
</category>
<category xml:id="Ft.P.P.N">
<catDesc>neobjavljeno</catDesc>
<category xml:id="Ft.P.P.N.J">
<catDesc>javno</catDesc>
</category>
<category xml:id="Ft.P.P.N.I">
<catDesc>interno</catDesc>
</category>
<category xml:id="Ft.P.P.N.Z">
<catDesc>zasebno</catDesc>
</category>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.Z">
<catDesc>zvrst</catDesc>
<category xml:id="Ft.Z.U">
<catDesc>umetnostna</catDesc>
<category xml:id="Ft.Z.U.P">
<catDesc>pesniška</catDesc>
</category>
<category xml:id="Ft.Z.U.R">
<catDesc>prozna</catDesc>
</category>
<category xml:id="Ft.Z.U.D">
<catDesc>dramska</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N">
<catDesc>neumetnostna</catDesc>
<category xml:id="Ft.Z.N.S">
<catDesc>strokovna</catDesc>
<category xml:id="Ft.Z.N.S.H">
<catDesc>humanistična in družboslovna</catDesc>
</category>
<category xml:id="Ft.Z.N.S.N">
<catDesc>naravoslovna in tehnična</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N.N">
<catDesc>nestrokovna</catDesc>
</category>
<category xml:id="Ft.Z.N.P">
<catDesc>pravna</catDesc>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.L">
<catDesc>lektorirano</catDesc>
<category xml:id="Ft.L.D">
<catDesc>da</catDesc>
</category>
<category xml:id="Ft.L.N">
<catDesc>ne</catDesc>
</category>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.T.K.S"/>
<catRef target="#Ft.P.P.N.Z"/>
<catRef target="#Ft.Z.N.N"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0012405." xml:lang="sl">
<body>
<p>
<s>
<w msd="Somei" lemma="junak">Junak</w>
<S/>
<w msd="Ggnste" lemma="ustanoiveti">ustanoivi</w>
<S/>
<w msd="Sosei" lemma="društvo">društvo</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="da">da</w>
<S/>
<w msd="Gp-g" lemma="biti">bi</w>
<S/>
<w msd="Ggnd-em" lemma="goljufati">goljufal</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Zn-mei" lemma="nekdo">Nekdo</w>
<S/>
<w msd="Zotmed--k" lemma="on">mu</w>
<S/>
<w msd="Ggdste" lemma="začeti">začne</w>
<S/>
<w msd="Ggnn" lemma="nakazovati">nakazovati</w>
<S/>
<w msd="Ppnzmt" lemma="velik">velike</w>
<S/>
<w msd="Sozmt" lemma="vsota">vsote</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Rsn" lemma="zakaj">Zakaj</w>
<c>?</c>
<S/>
</s>
<s>
<w msd="Vd" lemma="ker">Ker</w>
<S/>
<w msd="Ggnste-n" lemma="imeti">ima</w>
<S/>
<w msd="Somei" lemma="junak">junak</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Sozem" lemma="posest">posesti</w>
<S/>
<w msd="Rsn" lemma="nekaj">nekaj</w>
<c>,</c>
<S/>
<w msd="Dm" lemma="o">o</w>
<S/>
<w msd="Zz-sem" lemma="kar">čemer</w>
<S/>
<w msd="Zp------k" lemma="se">se</w>
<S/>
<w msd="Zotmed--k" lemma="on">mu</w>
<S/>
<w msd="L" lemma="ne">ne</w>
<S/>
<w msd="Ggnste" lemma="sanjati">sanja</w>
<c>,</c>
<S/>
<w msd="Vp" lemma="a">a</w>
<S/>
<w msd="Zp------k" lemma="se">se</w>
<S/>
<w msd="Zk-sed" lemma="oni">onemu</w>
<S/>
<w msd="Ggnste" lemma="zdeti">zdi</w>
<S/>
<w msd="Ppnsei" lemma="ključen">ključno</w>
<S/>
<w msd="Dm" lemma="pri">pri</w>
<S/>
<w msd="Sozem" lemma="operacija">operaciji</w>
<S/>
<w msd="Kag" lemma="666">666</w>
<c>.</c>
</s>
</p>
<p>
<s>
<w msd="Ggvn" lemma="križati">Križati</w>
<S/>
<w msd="Sozet" lemma="ideja">idejo</w>
<S/>
<w msd="Dm" lemma="o">o</w>
<S/>
<w msd="Sosem" lemma="kloniranje">kloniranju</w>
<S/>
<w msd="Somer" lemma="antikrist">Antikrista</w>
<S/>
<c>(</c>
<w msd="Rsn" lemma="vključno">vključno</w>
<S/>
<w msd="Do" lemma="z">z</w>
<S/>
<w msd="Slmeo" lemma="Anand">Anandm</w>
<S/>
<w msd="Slmeo" lemma="Kofu">Kofujem</w>
<c>,</c>
<S/>
<w msd="Ppnmeo" lemma="torinski">torinskim</w>
<S/>
<w msd="Someo" lemma="prt">prtom</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Someo" lemma="požig">požigom</w>
<c>)</c>
<S/>
<w msd="Do" lemma="z">z</w>
<S/>
<w msd="Sozeo" lemma="ideja">idejo</w>
<S/>
<w msd="Dm" lemma="o">o</w>
<S/>
<w msd="Sosem" lemma="društvo">Društvu</w>
<S/>
<w msd="Ppnmmr" lemma="poslednji">poslednjih</w>
<S/>
<w msd="Sommr" lemma="dan">dni</w>
<c>.</c>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,367 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0016316" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: Državni zbor Republike Slovenije. ZZad. (1992)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>62 besed</extent>
<publicationStmt>
<idno>A0050230</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<pubPlace>Ljubljana</pubPlace>
<title>ZZad</title>
<author>Državni zbor Republike Slovenije</author>
<date>1992</date>
<publisher n="Državni zbor Republike Slovenije">Državni zbor Republike Slovenije</publisher>
<note type="sourceLang"/>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="58"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="7"/>
<tagUsage gi="p" occurs="5"/>
<tagUsage gi="s" occurs="5"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="62"/>
</namespace>
</tagsDecl>
<appInfo>
<application ident="Amebis_pretvornik" version="1.0">
<label>[AVTOMATSKO] DZZAK</label>
<label>[IME] A0050230</label>
<label>[IZVOR] d:\fida\korpus\vhod\dzzak\0118.txt</label>
<label>[DATUM] 7.4.2000</label>
</application>
</appInfo>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.P">
<catDesc>prenosnik</catDesc>
<category xml:id="Ft.P.G">
<catDesc>govorni</catDesc>
</category>
<category xml:id="Ft.P.E">
<catDesc>elektronski</catDesc>
</category>
<category xml:id="Ft.P.P">
<catDesc>pisni</catDesc>
<category xml:id="Ft.P.P.O">
<catDesc>objavljeno</catDesc>
<category xml:id="Ft.P.P.O.K">
<catDesc>knjižno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P">
<catDesc>periodično</catDesc>
<category xml:id="Ft.P.P.O.P.C">
<catDesc>časopisno</catDesc>
<category xml:id="Ft.P.P.O.P.C.D">
<catDesc>dnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.V">
<catDesc>večkrat tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.T">
<catDesc>tedensko</catDesc>
</category>
</category>
<category xml:id="Ft.P.P.O.P.R">
<catDesc>revialno</catDesc>
<category xml:id="Ft.P.P.O.P.R.T">
<catDesc>tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.S">
<catDesc>štirinajstdnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.M">
<catDesc>mesečno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.D">
<catDesc>redkeje kot na mesec</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.O">
<catDesc>občasno</catDesc>
</category>
</category>
</category>
</category>
<category xml:id="Ft.P.P.N">
<catDesc>neobjavljeno</catDesc>
<category xml:id="Ft.P.P.N.J">
<catDesc>javno</catDesc>
</category>
<category xml:id="Ft.P.P.N.I">
<catDesc>interno</catDesc>
</category>
<category xml:id="Ft.P.P.N.Z">
<catDesc>zasebno</catDesc>
</category>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.Z">
<catDesc>zvrst</catDesc>
<category xml:id="Ft.Z.U">
<catDesc>umetnostna</catDesc>
<category xml:id="Ft.Z.U.P">
<catDesc>pesniška</catDesc>
</category>
<category xml:id="Ft.Z.U.R">
<catDesc>prozna</catDesc>
</category>
<category xml:id="Ft.Z.U.D">
<catDesc>dramska</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N">
<catDesc>neumetnostna</catDesc>
<category xml:id="Ft.Z.N.S">
<catDesc>strokovna</catDesc>
<category xml:id="Ft.Z.N.S.H">
<catDesc>humanistična in družboslovna</catDesc>
</category>
<category xml:id="Ft.Z.N.S.N">
<catDesc>naravoslovna in tehnična</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N.N">
<catDesc>nestrokovna</catDesc>
</category>
<category xml:id="Ft.Z.N.P">
<catDesc>pravna</catDesc>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.L">
<catDesc>lektorirano</catDesc>
<category xml:id="Ft.L.D">
<catDesc>da</catDesc>
</category>
<category xml:id="Ft.L.N">
<catDesc>ne</catDesc>
</category>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.T.D"/>
<catRef target="#Ft.P.P.O"/>
<catRef target="#Ft.Z.N.S.H"/>
<catRef target="#Ft.L.D"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0016316." xml:lang="sl">
<body>
<p>
<s>
<w msd="Kav" lemma="1.">1.</w>
<S/>
<w msd="Somei" lemma="člen">člen</w>
</s>
</p>
<p>
<s>
<w msd="Krv" lemma="I.">I.</w>
<S/>
<w msd="Ppnzer" lemma="splošen">SPLOŠNE</w>
<S/>
<w msd="Sozer" lemma="določba">DOLOČBE</w>
</s>
</p>
<p>
<s>
<w msd="Sozei" lemma="zadruga">Zadruga</w>
<S/>
<w msd="Rsn" lemma="lahko">lahko</w>
<S/>
<w msd="Ggdste" lemma="ustanoviti">ustanovi</w>
<S/>
<w msd="Soset" lemma="podjetje">podjetje</w>
<c>,</c>
<S/>
<w msd="Kbzzet" lemma="drug">drugo</w>
<S/>
<w msd="Sozet" lemma="zadruga">zadrugo</w>
<S/>
<w msd="Vp" lemma="ali">ali</w>
<S/>
<w msd="Kbzzet" lemma="drug">drugo</w>
<S/>
<w msd="Ppnzet" lemma="praven">pravno</w>
<S/>
<w msd="Sozet" lemma="oseba">osebo</w>
<S/>
<w msd="Vp" lemma="oziroma">oziroma</w>
<S/>
<w msd="Ggdste" lemma="postati">postane</w>
<S/>
<w msd="Sozei" lemma="članica">članica</w>
<S/>
<w msd="Sozer" lemma="druga">druge</w>
<S/>
<w msd="Ppnzer" lemma="praven">pravne</w>
<S/>
<w msd="Sozer" lemma="oseba">osebe</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="če">če</w>
<S/>
<w msd="Zp------k" lemma="se">se</w>
<S/>
<w msd="Do" lemma="z">s</w>
<S/>
<w msd="Zk-seo" lemma="ta">tem</w>
<S/>
<w msd="Ggnste" lemma="uresničevati">uresničuje</w>
<S/>
<w msd="Sometn" lemma="namen">namen</w>
<c>,</c>
<S/>
<w msd="Dr" lemma="zaradi">zaradi</w>
<S/>
<w msd="Zv-mer" lemma="kateri">katerega</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Gp-d-ez" lemma="biti">bila</w>
<S/>
<w msd="Sosmr" lemma="usta">ust</w>
<S/>
</s>
</p>
<p>
<s>
<w msd="Somei" lemma="zakon">ZAKON</w>
<S/>
<w msd="Dm" lemma="o">O</w>
<S/>
<w msd="Sozmm" lemma="zadruga">ZADRUGAH</w>
</s>
</p>
<p>
<s>
<w msd="Sozei" lemma="zadruga">Zadruga</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Sozei" lemma="organizacija">organizacija</w>
<S/>
<w msd="Rsn" lemma="vnaprej">vnaprej</w>
<S/>
<w msd="Ppnser" lemma="nedoločen">nedoločenega</w>
<S/>
<w msd="Soser" lemma="število">števila</w>
<S/>
<w msd="Sommr" lemma="član">članov</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="ki">ki</w>
<S/>
<w msd="Ggnste-n" lemma="imeti">ima</w>
<S/>
<w msd="Sometn" lemma="namen">namen</w>
<S/>
<w msd="Ggnn" lemma="pospeševati">pospeševati</w>
<S/>
<w msd="Ppnzmt" lemma="gospodarski">gospodarske</w>
<S/>
<w msd="Sozer" lemma="korist">koristi</w>
<S/>
<w msd="Zp-mmr" lemma="svoj">svojih</w>
<S/>
<w msd="Sommr" lemma="član">članov</w>
<S/>
<w msd="Vp" lemma="ter">ter</w>
<S/>
<w msd="Ggnste" lemma="temeljiti">temelji</w>
<S/>
<w msd="Dm" lemma="na">na</w>
<S/>
<w msd="Ppnmem" lemma="prostovoljen">prostovoljnem</w>
<S/>
<w msd="Somem" lemma="pristop">pristopu</w>
<c>,</c>
<S/>
<w msd="Ppnmem" lemma="svoboden">svobodnem</w>
<S/>
<w msd="Somem" lemma="izstop">izstopu</w>
<c>,</c>
<S/>
<w msd="Ppnsem" lemma="enakopraven">enakopravnem</w>
<S/>
<w msd="Sosem" lemma="sodelovanje">sodelovanju</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sosem" lemma="upravljanje">upravljanju</w>
<S/>
<w msd="Sommr" lemma="član">članov</w>
<c>.</c>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,336 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0018194" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: Branko Gradišnik. CERKEV(2000)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>50 besed</extent>
<publicationStmt>
<idno>CERKEV</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title n="???">neznani naslov</title>
<author>Branko Gradišnik</author>
<date>2000</date>
<publisher n="drugo">neznani založnik</publisher>
<note type="sourceLang"/>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="48"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="7"/>
<tagUsage gi="p" occurs="2"/>
<tagUsage gi="s" occurs="2"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="50"/>
</namespace>
</tagsDecl>
<appInfo>
<application ident="Amebis_pretvornik" version="1.0">
<label>[ZDRUZEVANJE] 1:1</label>
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\CERKEV.ZDR</label>
<label>[1] **********</label>
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\SOBOTNA\KOLUMNE\pnz\IDEJE\CERKEV.DOC</label>
<label>[FORMAT] MS Word for Windows 6.0/7.0</label>
<label>[DATUM] 24.2.2000</label>
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\SOBOTNA\KOLUMNE\pnz\IDEJE\CERKEV.RTF</label>
<label>[PRETVORBA] RTF</label>
<label>[KONEC] **********</label>
</application>
</appInfo>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.P">
<catDesc>prenosnik</catDesc>
<category xml:id="Ft.P.G">
<catDesc>govorni</catDesc>
</category>
<category xml:id="Ft.P.E">
<catDesc>elektronski</catDesc>
</category>
<category xml:id="Ft.P.P">
<catDesc>pisni</catDesc>
<category xml:id="Ft.P.P.O">
<catDesc>objavljeno</catDesc>
<category xml:id="Ft.P.P.O.K">
<catDesc>knjižno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P">
<catDesc>periodično</catDesc>
<category xml:id="Ft.P.P.O.P.C">
<catDesc>časopisno</catDesc>
<category xml:id="Ft.P.P.O.P.C.D">
<catDesc>dnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.V">
<catDesc>večkrat tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.T">
<catDesc>tedensko</catDesc>
</category>
</category>
<category xml:id="Ft.P.P.O.P.R">
<catDesc>revialno</catDesc>
<category xml:id="Ft.P.P.O.P.R.T">
<catDesc>tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.S">
<catDesc>štirinajstdnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.M">
<catDesc>mesečno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.D">
<catDesc>redkeje kot na mesec</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.O">
<catDesc>občasno</catDesc>
</category>
</category>
</category>
</category>
<category xml:id="Ft.P.P.N">
<catDesc>neobjavljeno</catDesc>
<category xml:id="Ft.P.P.N.J">
<catDesc>javno</catDesc>
</category>
<category xml:id="Ft.P.P.N.I">
<catDesc>interno</catDesc>
</category>
<category xml:id="Ft.P.P.N.Z">
<catDesc>zasebno</catDesc>
</category>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.Z">
<catDesc>zvrst</catDesc>
<category xml:id="Ft.Z.U">
<catDesc>umetnostna</catDesc>
<category xml:id="Ft.Z.U.P">
<catDesc>pesniška</catDesc>
</category>
<category xml:id="Ft.Z.U.R">
<catDesc>prozna</catDesc>
</category>
<category xml:id="Ft.Z.U.D">
<catDesc>dramska</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N">
<catDesc>neumetnostna</catDesc>
<category xml:id="Ft.Z.N.S">
<catDesc>strokovna</catDesc>
<category xml:id="Ft.Z.N.S.H">
<catDesc>humanistična in družboslovna</catDesc>
</category>
<category xml:id="Ft.Z.N.S.N">
<catDesc>naravoslovna in tehnična</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N.N">
<catDesc>nestrokovna</catDesc>
</category>
<category xml:id="Ft.Z.N.P">
<catDesc>pravna</catDesc>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.L">
<catDesc>lektorirano</catDesc>
<category xml:id="Ft.L.D">
<catDesc>da</catDesc>
</category>
<category xml:id="Ft.L.N">
<catDesc>ne</catDesc>
</category>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.T.K.S"/>
<catRef target="#Ft.P.P.N.Z"/>
<catRef target="#Ft.Z.N.N"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0018194." xml:lang="sl">
<body>
<p>
<s>
<w msd="Sozei" lemma="cerkev">Cerkev</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Sozem" lemma="opreka">opreki</w>
<S/>
<w msd="Do" lemma="z">z</w>
<S/>
<w msd="Someo" lemma="narod">narodom</w>
<c>:</c>
<S/>
<w msd="Vd" lemma="če">Če</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Sozed" lemma="cerkev">Cerkvi</w>
<S/>
<w msd="Rsr" lemma="dobro">bolje</w>
<c>,</c>
<S/>
<w msd="Rsn" lemma="potem">potem</w>
<S/>
<w msd="Ggvste" lemma="iti">gre</w>
<S/>
<w msd="Dt" lemma="za">za</w>
<S/>
<w msd="Sommt" lemma="duhovnik">duhovnike</w>
<S/>
<w msd="Rsr" lemma="več">več</w>
<S/>
<w msd="Sommr" lemma="sin">sinov</w>
<S/>
<w msd="Somer" lemma="narod">naroda</w>
<c>,</c>
<S/>
<w msd="Somei" lemma="narod">narod</w>
<S/>
<w msd="Vp" lemma="pa">pa</w>
<S/>
<w msd="Ggnste" lemma="izumirati">izumira</w>
</s>
</p>
<p>
<s>
<w msd="Sozei" lemma="cerkev">Cerkev</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sommi" lemma="peder">pedri</w>
<c>:</c>
<S/>
<w msd="Vd" lemma="če">Če</w>
<S/>
<w msd="Zotmmt--k" lemma="on">jih</w>
<S/>
<w msd="Sozei" lemma="cerkev">Cerkev</w>
<S/>
<w msd="L" lemma="ne">ne</w>
<S/>
<w msd="Gp-g" lemma="biti">bi</w>
<S/>
<w msd="Ggvd-ez" lemma="anatemizirati">anatemizirala</w>
<c>,</c>
<S/>
<w msd="Vp" lemma="tako">tako</w>
<S/>
<w msd="Vd" lemma="da">da</w>
<S/>
<w msd="Gp-g" lemma="biti">bi</w>
<S/>
<w msd="Ggdd-mm" lemma="postati">postali</w>
<S/>
<w msd="Ppnmmi" lemma="sprejemljiv">sprejemljivi</w>
<c>,</c>
<S/>
<w msd="Rsn" lemma="potem">potem</w>
<S/>
<w msd="Gp-g" lemma="biti">bi</w>
<S/>
<w msd="Zp------k" lemma="se">se</w>
<S/>
<w msd="Ppnmmi" lemma="mlad">mladi</w>
<S/>
<w msd="Rsn" lemma="istospolno">istospolno</w>
<S/>
<w msd="Pdnmmi" lemma="usmerjen">usmerjeni</w>
<S/>
<w msd="Ggvd-mm" lemma="proklamirati">proklamirali</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="L" lemma="ne">ne</w>
<S/>
<w msd="Gp-g" lemma="biti">bi</w>
<S/>
<w msd="Ggnd-mm" lemma="skrivati">skrivali</w>
<S/>
<w msd="Zp-mer" lemma="svoj">svojega</w>
<S/>
<w msd="Somer" lemma="strah">strahu</w>
<S/>
<w msd="Do" lemma="pred">pred</w>
<S/>
<w msd="Sozmo" lemma="ženska">ženskami</w>
<S/>
<w msd="Dt" lemma="v">v</w>
<S/>
<w msd="Rsn" lemma="sutano">sutano</w>
<c>!</c>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,367 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0026709" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: INTERA(1998)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>53 besed</extent>
<publicationStmt>
<idno>INTERA</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title n="???">neznani naslov</title>
<author n="???">neznani avtor</author>
<date>1998</date>
<publisher n="drugo">neznani založnik</publisher>
<note type="sourceLang"/>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="45"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="10"/>
<tagUsage gi="p" occurs="7"/>
<tagUsage gi="s" occurs="11"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="53"/>
</namespace>
</tagsDecl>
<appInfo>
<application ident="Amebis_pretvornik" version="1.0">
<label>[ZDRUZEVANJE] 1:1</label>
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\INTERA.ZDR</label>
<label>[1] **********</label>
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\PrimNov\KOMERC\INTERa.MAR</label>
<label>[FORMAT] WordStar 4.0</label>
<label>[DATUM] 12.10.1999</label>
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\PrimNov\KOMERC\INTERa.RTF</label>
<label>[PRETVORBA] RTF</label>
<label>[KONEC] **********</label>
</application>
</appInfo>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.P">
<catDesc>prenosnik</catDesc>
<category xml:id="Ft.P.G">
<catDesc>govorni</catDesc>
</category>
<category xml:id="Ft.P.E">
<catDesc>elektronski</catDesc>
</category>
<category xml:id="Ft.P.P">
<catDesc>pisni</catDesc>
<category xml:id="Ft.P.P.O">
<catDesc>objavljeno</catDesc>
<category xml:id="Ft.P.P.O.K">
<catDesc>knjižno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P">
<catDesc>periodično</catDesc>
<category xml:id="Ft.P.P.O.P.C">
<catDesc>časopisno</catDesc>
<category xml:id="Ft.P.P.O.P.C.D">
<catDesc>dnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.V">
<catDesc>večkrat tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.T">
<catDesc>tedensko</catDesc>
</category>
</category>
<category xml:id="Ft.P.P.O.P.R">
<catDesc>revialno</catDesc>
<category xml:id="Ft.P.P.O.P.R.T">
<catDesc>tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.S">
<catDesc>štirinajstdnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.M">
<catDesc>mesečno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.D">
<catDesc>redkeje kot na mesec</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.O">
<catDesc>občasno</catDesc>
</category>
</category>
</category>
</category>
<category xml:id="Ft.P.P.N">
<catDesc>neobjavljeno</catDesc>
<category xml:id="Ft.P.P.N.J">
<catDesc>javno</catDesc>
</category>
<category xml:id="Ft.P.P.N.I">
<catDesc>interno</catDesc>
</category>
<category xml:id="Ft.P.P.N.Z">
<catDesc>zasebno</catDesc>
</category>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.Z">
<catDesc>zvrst</catDesc>
<category xml:id="Ft.Z.U">
<catDesc>umetnostna</catDesc>
<category xml:id="Ft.Z.U.P">
<catDesc>pesniška</catDesc>
</category>
<category xml:id="Ft.Z.U.R">
<catDesc>prozna</catDesc>
</category>
<category xml:id="Ft.Z.U.D">
<catDesc>dramska</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N">
<catDesc>neumetnostna</catDesc>
<category xml:id="Ft.Z.N.S">
<catDesc>strokovna</catDesc>
<category xml:id="Ft.Z.N.S.H">
<catDesc>humanistična in družboslovna</catDesc>
</category>
<category xml:id="Ft.Z.N.S.N">
<catDesc>naravoslovna in tehnična</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N.N">
<catDesc>nestrokovna</catDesc>
</category>
<category xml:id="Ft.Z.N.P">
<catDesc>pravna</catDesc>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.L">
<catDesc>lektorirano</catDesc>
<category xml:id="Ft.L.D">
<catDesc>da</catDesc>
</category>
<category xml:id="Ft.L.N">
<catDesc>ne</catDesc>
</category>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.T.K.S"/>
<catRef target="#Ft.P.P.N.I"/>
<catRef target="#Ft.Z.N.N"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0026709." xml:lang="sl">
<body>
<p>
<s>
<w msd="Somei" lemma="g">g</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Slmei" lemma="Detela">Detela</w>
<S/>
<w msd="Slmei" lemma="Matjaž">Matjaž</w>
<S/>
<w msd="Slmei" lemma="Koper">Koper</w>
<c>,</c>
<S/>
<w msd="Kav" lemma="21.">21.</w>
<S/>
<w msd="Kav" lemma="2.">2.</w>
<S/>
<w msd="Kag" lemma="1998">1998</w>
</s>
</p>
<p>
<s>
<w msd="Dm" lemma="v">V</w>
<S/>
<w msd="Sozem" lemma="priloga">prilogi</w>
<S/>
<w msd="Zod-md" lemma="ti">vam</w>
<S/>
<w msd="Ggnspm" lemma="zavračati">zavračamo</w>
<S/>
<w msd="Sometn" lemma="račun">račun</w>
<S/>
<w msd="O" lemma="št.">št.</w>
<S/>
<w msd="Kag" lemma="2130">2130</w>
<c>/</c>
<w msd="Kag" lemma="97">97</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="ker">ker</w>
<S/>
<w msd="Zk-ser" lemma="ta">tega</w>
<S/>
<w msd="Zop-ed--k" lemma="jaz">mi</w>
<S/>
<w msd="Dm" lemma="pri">pri</w>
<S/>
<w msd="Zod-mm" lemma="ti">vas</w>
<S/>
<w msd="Gp-spm-d" lemma="biti">nismo</w>
<S/>
<w msd="Ggdd-mm" lemma="naročiti">naročili</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Somei" lemma="naročnik">Naročnik</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Sozei" lemma="tiskarna">Tiskarna</w>
<S/>
<w msd="Somei" lemma="gepard">Gepard</w>
<S/>
<w msd="Kag" lemma="1">1</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Dr" lemma="poleg">Poleg</w>
<S/>
<w msd="Zk-ser" lemma="ta">tega</w>
<S/>
<w msd="Vp" lemma="pa">pa</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Sozei" lemma="cena">cena</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="ki">ki</w>
<S/>
<w msd="Zop-md" lemma="jaz">nam</w>
<S/>
<w msd="Zotzet--k" lemma="on">jo</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Ggdd-ez" lemma="dati">dala</w>
<S/>
<w msd="Sozei" lemma="tiskarna">Tiskarna</w>
<S/>
<w msd="Somei" lemma="gepard">Gepard</w>
<S/>
<w msd="Kag" lemma="1">1</w>
<S/>
<w msd="Rsn" lemma="veliko">veliko</w>
<S/>
<w msd="Pppzei" lemma="nizek">nižja</w>
<c>.</c>
</s>
</p>
<p>
<s>
<w msd="Ppnzei" lemma="dunajski">Dunajska</w>
<S/>
<w msd="Sozei" lemma="cesta">cesta</w>
<S/>
<w msd="Kag" lemma="22">22</w>
</s>
</p>
<p>
<s>
<w msd="Somei" lemma="intermarketing">INTERMARKETING</w>
</s>
</p>
<p>
<s>
<w msd="Ppnmein" lemma="lep">Lep</w>
<S/>
<w msd="Somei" lemma="pozdrav">pozdrav</w>
<c>!</c>
<S/>
</s>
<s>
<w msd="Slmei" lemma="Darko">Darko</w>
<S/>
<w msd="Slmei" lemma="Stepančič">Stepančič</w>
</s>
</p>
<p>
<s>
<w msd="Kag" lemma="1113">1113</w>
<S/>
<w msd="Slzei" lemma="Ljubljana">Ljubljana</w>
</s>
</p>
<p>
<s>
<w msd="Pdnmeid" lemma="spoštovan">Spoštovani</w>
<c>!</c>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,365 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0030361" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: REFEREND(1998)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>52 besed</extent>
<publicationStmt>
<idno>REFEREND</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title n="???">neznani naslov</title>
<author n="???">neznani avtor</author>
<date>1998</date>
<publisher n="drugo">neznani založnik</publisher>
<note type="sourceLang"/>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="44"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="10"/>
<tagUsage gi="p" occurs="9"/>
<tagUsage gi="s" occurs="9"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="52"/>
</namespace>
</tagsDecl>
<appInfo>
<application ident="Amebis_pretvornik" version="1.0">
<label>[ZDRUZEVANJE] 1:1</label>
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\REFEREND.ZDR</label>
<label>[1] **********</label>
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\PrimNov\NADJA\AKREDITI\1998\REFEREND</label>
<label>[FORMAT] MS Word for Windows 6.0/7.0</label>
<label>[DATUM] 13.10.1999</label>
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\PrimNov\NADJA\AKREDITI\1998\REFEREND.RTF</label>
<label>[PRETVORBA] RTF</label>
<label>[KONEC] **********</label>
</application>
</appInfo>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.P">
<catDesc>prenosnik</catDesc>
<category xml:id="Ft.P.G">
<catDesc>govorni</catDesc>
</category>
<category xml:id="Ft.P.E">
<catDesc>elektronski</catDesc>
</category>
<category xml:id="Ft.P.P">
<catDesc>pisni</catDesc>
<category xml:id="Ft.P.P.O">
<catDesc>objavljeno</catDesc>
<category xml:id="Ft.P.P.O.K">
<catDesc>knjižno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P">
<catDesc>periodično</catDesc>
<category xml:id="Ft.P.P.O.P.C">
<catDesc>časopisno</catDesc>
<category xml:id="Ft.P.P.O.P.C.D">
<catDesc>dnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.V">
<catDesc>večkrat tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.C.T">
<catDesc>tedensko</catDesc>
</category>
</category>
<category xml:id="Ft.P.P.O.P.R">
<catDesc>revialno</catDesc>
<category xml:id="Ft.P.P.O.P.R.T">
<catDesc>tedensko</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.S">
<catDesc>štirinajstdnevno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.M">
<catDesc>mesečno</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.D">
<catDesc>redkeje kot na mesec</catDesc>
</category>
<category xml:id="Ft.P.P.O.P.R.O">
<catDesc>občasno</catDesc>
</category>
</category>
</category>
</category>
<category xml:id="Ft.P.P.N">
<catDesc>neobjavljeno</catDesc>
<category xml:id="Ft.P.P.N.J">
<catDesc>javno</catDesc>
</category>
<category xml:id="Ft.P.P.N.I">
<catDesc>interno</catDesc>
</category>
<category xml:id="Ft.P.P.N.Z">
<catDesc>zasebno</catDesc>
</category>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.Z">
<catDesc>zvrst</catDesc>
<category xml:id="Ft.Z.U">
<catDesc>umetnostna</catDesc>
<category xml:id="Ft.Z.U.P">
<catDesc>pesniška</catDesc>
</category>
<category xml:id="Ft.Z.U.R">
<catDesc>prozna</catDesc>
</category>
<category xml:id="Ft.Z.U.D">
<catDesc>dramska</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N">
<catDesc>neumetnostna</catDesc>
<category xml:id="Ft.Z.N.S">
<catDesc>strokovna</catDesc>
<category xml:id="Ft.Z.N.S.H">
<catDesc>humanistična in družboslovna</catDesc>
</category>
<category xml:id="Ft.Z.N.S.N">
<catDesc>naravoslovna in tehnična</catDesc>
</category>
</category>
<category xml:id="Ft.Z.N.N">
<catDesc>nestrokovna</catDesc>
</category>
<category xml:id="Ft.Z.N.P">
<catDesc>pravna</catDesc>
</category>
</category>
</category>
</taxonomy>
<taxonomy>
<category xml:id="Ft.L">
<catDesc>lektorirano</catDesc>
<category xml:id="Ft.L.D">
<catDesc>da</catDesc>
</category>
<category xml:id="Ft.L.N">
<catDesc>ne</catDesc>
</category>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.T.K.S"/>
<catRef target="#Ft.P.P.N.I"/>
<catRef target="#Ft.Z.N.N"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0030361." xml:lang="sl">
<body>
<p>
<s>
<w msd="Ppnmeid" lemma="odgovoren">odgovorni</w>
<S/>
<w msd="Somei" lemma="urednik">urednik</w>
<S/>
</s>
</p>
<p>
<s>
<w msd="Kag" lemma="6000">6000</w>
<S/>
<w msd="Slmei" lemma="Koper">Koper</w>
</s>
</p>
<p>
<s>
<w msd="Ppnzei" lemma="volilen">Volilna</w>
<S/>
<w msd="Sozei" lemma="komisija">komisija</w>
<S/>
<w msd="Dt" lemma="za">za</w>
<S/>
<w msd="Sozet" lemma="ustanovitev">ustanovitev</w>
<S/>
<w msd="Ppnzmr" lemma="nov">novih</w>
<S/>
<w msd="Sozmr" lemma="občina">občin</w>
</s>
</p>
<p>
<s>
<w msd="Somei" lemma="trg">Trg</w>
<S/>
<w msd="Slmei" lemma="Brolo">Brolo</w>
<S/>
<w msd="Kag" lemma="3">3</w>
</s>
</p>
<p>
<s>
<w msd="Ggnspm" lemma="prositi">Prosimo</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="da">da</w>
<S/>
<w msd="Dt" lemma="za">za</w>
<S/>
<w msd="Sometn" lemma="referendum">referendum</w>
<S/>
<w msd="Dt" lemma="za">za</w>
<S/>
<w msd="Sozet" lemma="ustanovitev">ustanovitev</w>
<S/>
<w msd="Ppnzmr" lemma="nov">novih</w>
<S/>
<w msd="Sozmr" lemma="občina">občin</w>
<S/>
<w msd="Slmei" lemma="Ankaran">Ankaran</w>
<c>-</c>
<w msd="Sozer" lemma="škofija">Škofije</w>
<c>,</c>
<S/>
<w msd="Slsei" lemma="Šmarje">Šmarje</w>
<c>-</c>
<w msd="Sozmi" lemma="mareziga">Marezige</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sommi" lemma="dekan">Dekani</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="ki">ki</w>
<S/>
<w msd="Gp-pte-n" lemma="biti">bo</w>
<S/>
<w msd="Ggnd-em" lemma="potekati">potekal</w>
<S/>
<w msd="Kav" lemma="19.">19.</w>
<S/>
<w msd="Somer" lemma="april">aprila</w>
<c>,</c>
<S/>
<w msd="Ggvsdm" lemma="akreditirati">akreditirate</w>
<S/>
<w msd="Zspzeim" lemma="naš">naša</w>
<S/>
<w msd="Somer" lemma="novinar">novinarja</w>
<S/>
<w msd="Slzet" lemma="Mojca">Mojco</w>
<S/>
<w msd="Slmei" lemma="Beljan">Beljan</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Slmetd" lemma="Danijel">Danijela</w>
<S/>
<w msd="Somer" lemma="cek">Ceka</w>
<c>.</c>
</s>
</p>
<p>
<s>
<w msd="Slmei" lemma="Koper">Koper</w>
<c>,</c>
<S/>
<w msd="Kav" lemma="8.">8.</w>
<S/>
<w msd="Somer" lemma="april">aprila</w>
<S/>
<w msd="Kag" lemma="1998">1998</w>
</s>
</p>
<p>
<s>
<w msd="Sozei" lemma="prošnja">Prošnja</w>
<S/>
<w msd="Dt" lemma="za">za</w>
<S/>
<w msd="Sozet" lemma="akreditacija">akreditacijo</w>
<S/>
</s>
</p>
<p>
<s>
<w msd="Slmei" lemma="Branko">Branko</w>
<S/>
<w msd="Slmei" lemma="Podobnik">Podobnik</w>
<c>,</c>
</s>
</p>
<p>
<s>
<w msd="Do" lemma="z">Z</w>
<S/>
<w msd="Ppnmmo" lemma="odličen">odličnimi</w>
<S/>
<w msd="Sommo" lemma="pozdrav">pozdravi</w>
<c>,</c>
<S/>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,356 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0036980" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: INTERNET (2010-10-12)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>100 besed</extent>
<publicationStmt>
<idno>arhivo.com</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title>INTERNET</title>
<author n="???">neznani avtor</author>
<date>2010-10-12</date>
<publisher n="internet, novice">arhivo.com</publisher>
<note type="sourceLang"/>
<note n="URL">
<list>
<item>
<ref target="http://www.arhivo.com/jesenska-torta&amp;GID%3D8">
http://www.arhivo.com/jesenska-torta&amp;GID=8
</ref>
</item>
</list>
</note>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="97"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="25"/>
<tagUsage gi="p" occurs="3"/>
<tagUsage gi="s" occurs="11"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="100"/>
</namespace>
</tagsDecl>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.I"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0036980." xml:lang="sl">
<body>
<p>
<s>
<w msd="Ppnzei" lemma="jesenski">Jesenska</w>
<S/>
<w msd="Sozei" lemma="torta">torta</w>
<S/>
<w msd="Ggnste" lemma="vsebovati">vsebuje</w>
<S/>
<w msd="Soset" lemma="grozdje">grozdje</w>
<c>,</c>
<S/>
<w msd="Soset" lemma="vino">vino</w>
<c>,</c>
<S/>
<w msd="Sozet" lemma="pomaranča">pomarančo</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sommt" lemma="oreh">orehe</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Somei" lemma="foto">Foto</w>
<c>:</c>
<S/>
<w msd="Rsn" lemma="arhivo">Arhivo</w>
</s>
</p>
<p>
<s>
<w msd="Dt" lemma="v">V</w>
<S/>
<w msd="Sozet" lemma="čast">čast</w>
<S/>
<w msd="Sozer" lemma="jesen">jeseni</w>
<S/>
<w msd="Ggdspm" lemma="narediti">naredimo</w>
<S/>
<w msd="Sozet" lemma="torta">torto</w>
<S/>
<w msd="Do" lemma="z">s</w>
<S/>
<w msd="Someo" lemma="priokus">priokusom</w>
<S/>
<w msd="Dm" lemma="po">po</w>
<S/>
<w msd="Sosem" lemma="grozdje">grozdju</w>
<c>,</c>
<S/>
<w msd="Sozem" lemma="pomaranča">pomaranči</w>
<c>,</c>
<S/>
<w msd="Sosem" lemma="vino">vinu</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sommm" lemma="oreh">orehih</w>
<c>.</c>
</s>
</p>
<p>
<s>
<w msd="Sosmo" lemma="jajce">Jajci</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sometn" lemma="sladkor">sladkor</w>
<S/>
<w msd="Ppnzet" lemma="penast">penasto</w>
<S/>
<w msd="Ggdspm" lemma="umešati">umešamo</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Ggdspm" lemma="dodati">Dodamo</w>
<S/>
<w msd="Sozet" lemma="moka">moko</w>
<c>,</c>
<S/>
<w msd="Ppnmeid" lemma="pecilen">pecilni</w>
<S/>
<w msd="Somei" lemma="prašek">prašek</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sommt" lemma="oreh">orehe</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="ki">ki</w>
<S/>
<w msd="Gp-spm-n" lemma="biti">smo</w>
<S/>
<w msd="Zotmmt--k" lemma="on">jih</w>
<S/>
<w msd="Ggdd-mm" lemma="streti">strli</w>
<S/>
<w msd="Dt" lemma="na">na</w>
<S/>
<w msd="Sozet" lemma="roka">roko</w>
<S/>
<c>(</c>
<w msd="Rsn" lemma="zato">zato</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="da">da</w>
<S/>
<w msd="Ggdspm" lemma="ohraniti">ohranimo</w>
<S/>
<w msd="Rsn" lemma="nekaj">nekaj</w>
<S/>
<w msd="Pppzmr" lemma="velik">večjih</w>
<S/>
<w msd="Sommr" lemma="košček">koščkov</w>
<c>)</c>
<c>.</c>
<S/>
</s>
<s>
<w msd="Vd" lemma="ko">Ko</w>
<S/>
<w msd="Rsn" lemma="dobro">dobro</w>
<S/>
<w msd="Ggdspm" lemma="premešati">premešamo</w>
<c>,</c>
<S/>
<w msd="Ggdspm" lemma="dodati">dodamo</w>
<S/>
<w msd="L" lemma="še">še</w>
<S/>
<w msd="Sometn" lemma="jogurt">jogurt</w>
<c>,</c>
<S/>
<w msd="Sosei" lemma="olje">olje</w>
<c>,</c>
<S/>
<w msd="Sosei" lemma="vino">vino</w>
<c>,</c>
<S/>
<w msd="Ppnmeid" lemma="pomarančen">pomarančni</w>
<S/>
<w msd="Sometn" lemma="sok">sok</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sozet" lemma="lupinica">lupinico</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Sozet" lemma="masa">Maso</w>
<S/>
<w msd="Ggdspm" lemma="vliti">vlijemo</w>
<S/>
<w msd="Dt" lemma="v">v</w>
<S/>
<w msd="Ppnmetn" lemma="okrogel">okrogel</w>
<S/>
<w msd="Sometn" lemma="pekač">pekač</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Ggdspm" lemma="dati">damo</w>
<S/>
<w msd="Dt" lemma="v">v</w>
<S/>
<w msd="Pdnzet" lemma="ogret">ogreto</w>
<S/>
<w msd="Sozet" lemma="pečica">pečico</w>
<S/>
<w msd="Dt" lemma="za">za</w>
<S/>
<w msd="Rsn" lemma="pol">pol</w>
<S/>
<w msd="Sozer" lemma="ura">ure</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Rsn" lemma="medtem">Medtem</w>
<S/>
<w msd="Ggdspm" lemma="oprati">operemo</w>
<S/>
<w msd="Soset" lemma="grozdje">grozdje</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Zotmed--k" lemma="on">mu</w>
<S/>
<w msd="Ggdspm" lemma="odstraniti">odstranimo</w>
<S/>
<w msd="Sozmt" lemma="peška">peške</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Somei" lemma="pekač">Pekač</w>
<S/>
<w msd="Ggdspm" lemma="vzeti">vzamemo</w>
<S/>
<w msd="Dr" lemma="iz">iz</w>
<S/>
<w msd="Sozer" lemma="pečica">pečice</w>
<c>,</c>
<S/>
<w msd="Dm" lemma="po">po</w>
<S/>
<w msd="Somem" lemma="test">testu</w>
<S/>
<w msd="Ggdspm" lemma="posuti">posujemo</w>
<S/>
<w msd="Soset" lemma="grozdje">grozdje</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Ggdspm" lemma="dati">damo</w>
<S/>
<w msd="Rsn" lemma="nazaj">nazaj</w>
<S/>
<w msd="Sozem" lemma="peč">peči</w>
<S/>
<w msd="L" lemma="še">še</w>
<S/>
<w msd="Dt" lemma="za">za</w>
<S/>
<w msd="Rsn" lemma="pol">pol</w>
<S/>
<w msd="Sozer" lemma="ura">ure</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Ggdspm" lemma="ponuditi">Ponudimo</w>
<S/>
<w msd="Do" lemma="z">z</w>
<S/>
<w msd="Sozeo" lemma="rezina">rezino</w>
<S/>
<w msd="Sozer" lemma="pomaranča">pomaranče</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Ppnmein" lemma="dober">Dober</w>
<S/>
<w msd="Somei" lemma="tek">tek</w>
<c>!</c>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,408 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0037258" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: INTERNET (2010-11-09)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>104 besed</extent>
<publicationStmt>
<idno>n-tv.si</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title>INTERNET</title>
<author n="???">neznani avtor</author>
<date>2010-11-09</date>
<publisher n="internet, novice">n-tv.si</publisher>
<note type="sourceLang"/>
<note n="URL">
<list>
<item>
<ref target="http://www.n-tv.si/zakaj-moci-posteljo">http://www.n-tv.si/zakaj-moci-posteljo</ref>
</item>
<item>
<ref target="http://www.n-tv.si/video-blog-joc-o-predsodkih">
http://www.n-tv.si/video-blog-joc-o-predsodkih
</ref>
</item>
<item>
<ref target="http://www.n-tv.si/resnicne-zgodbe">http://www.n-tv.si/resnicne-zgodbe</ref>
</item>
<item>
<ref target="http://www.n-tv.si/resnicna-zgodba-v-spomin-andreju-markovicu">
http://www.n-tv.si/resnicna-zgodba-v-spomin-andreju-markovicu
</ref>
</item>
<item>
<ref target="http://www.n-tv.si/ogledalo-ki-ni-iz-stekla-naj-bi-prinasalo-bogastvo">
http://www.n-tv.si/ogledalo-ki-ni-iz-stekla-naj-bi-prinasalo-bogastvo
</ref>
</item>
<item>
<ref target="http://www.n-tv.si/dermatologija-znebite-se-tezav-s-srbeco-kozo">
http://www.n-tv.si/dermatologija-znebite-se-tezav-s-srbeco-kozo
</ref>
</item>
<item>
<ref target="http://www.n-tv.si/je-res-slab-metabolizem-vzrok-debelosti">
http://www.n-tv.si/je-res-slab-metabolizem-vzrok-debelosti
</ref>
</item>
<item>
<ref target="http://www.n-tv.si/strokovnjaki?p%3D4">http://www.n-tv.si/strokovnjaki?p=4</ref>
</item>
<item>
<ref target="http://www.n-tv.si/pregled-rubrike?p%3D7">http://www.n-tv.si/pregled-rubrike?p=7
</ref>
</item>
</list>
</note>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="98"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="23"/>
<tagUsage gi="p" occurs="9"/>
<tagUsage gi="s" occurs="13"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="104"/>
</namespace>
</tagsDecl>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.I"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0037258." xml:lang="sl">
<body>
<p>
<s>
<w msd="Rsn" lemma="kako">Kako</w>
<S/>
<w msd="Zp------k" lemma="se">se</w>
<S/>
<w msd="Ggdn" lemma="znebiti">znebiti</w>
<S/>
<w msd="Sozmr" lemma="težava">težav</w>
<S/>
<w msd="Do" lemma="z">s</w>
<S/>
<w msd="Pdnzeo" lemma="srbeč">srbečo</w>
<S/>
<w msd="Sozeo" lemma="koža">kožo</w>
<c>?</c>
</s>
</p>
<p>
<s>
<w msd="Ppnzei" lemma="resničen">RESNIČNA</w>
<S/>
<w msd="Sozei" lemma="zgodba">ZGODBA</w>
<c>:</c>
<S/>
<w msd="Ppnmmi" lemma="sam">Sami</w>
<S/>
<w msd="Zp---d--k" lemma="se">si</w>
<S/>
<w msd="Ggdspm" lemma="izbrati">izberemo</w>
<S/>
<w msd="Ppnzet" lemma="pravi">pravo</w>
<S/>
<w msd="Sozet" lemma="pot">pot</w>
<S/>
<w msd="Somei" lemma="prispevek">Prispevek</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="ki">ki</w>
<S/>
<w msd="Ggnste" lemma="pričati">priča</w>
<S/>
<w msd="Dm" lemma="o">o</w>
<S/>
<w msd="Zk-sem" lemma="ta">tem</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="da">da</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Zc-sei" lemma="ves">vse</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Sosem" lemma="življenje">življenju</w>
<S/>
<w msd="Ppnsei" lemma="mogoč">mogoče</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="da">da</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Ppnzei" lemma="pravi">prava</w>
<S/>
<w msd="Sozei" lemma="pot">pot</w>
<S/>
<w msd="Zk-zei" lemma="tisti">tista</w>
<c>,</c>
<S/>
<w msd="Slmei" lemma="ki">ki</w>
<c>.</c>
<c>.</c>
<c>.</c>
</s>
</p>
<p>
<s>
<w msd="Slmei" lemma="bert">bert</w>
<S/>
<c>,</c>
<S/>
<w msd="Kag" lemma="09.11.2010">09.11.2010</w>
<S/>
<w msd="Dm" lemma="ob">ob</w>
<S/>
<w msd="Kag" lemma="12:35">12:35</w>
</s>
</p>
<p>
<s>
<w msd="Rsn" lemma="kako">Kako</w>
<S/>
<w msd="Ggnstm" lemma="izdelovati">izdelujejo</w>
<S/>
<w msd="Ppnsmt" lemma="kovinski">kovinska</w>
<S/>
<w msd="Soser" lemma="ogledalo">ogledala</w>
<c>?</c>
</s>
</p>
<p>
<s>
<w msd="Slmei" lemma="Valentinrozman">ValentinRozman</w>
<S/>
<c>,</c>
<S/>
<w msd="Kag" lemma="29.03.2010">29.03.2010</w>
<S/>
<w msd="Dm" lemma="ob">ob</w>
<S/>
<w msd="Kag" lemma="12:25">12:25</w>
</s>
</p>
<p>
<s>
<w msd="Rsn" lemma="kako">Kako</w>
<S/>
<w msd="Zp------k" lemma="se">se</w>
<S/>
<w msd="Ggdn" lemma="znebiti">znebiti</w>
<S/>
<w msd="Sozmr" lemma="težava">težav</w>
<S/>
<w msd="Do" lemma="z">s</w>
<S/>
<w msd="Pdnzeo" lemma="srbeč">srbečo</w>
<S/>
<w msd="Sozeo" lemma="koža">kožo</w>
<c>?</c>
<S/>
</s>
<s>
<w msd="Somei" lemma="pojav">Pojav</w>
<S/>
<w msd="Pdnzer" lemma="srbeč">srbeče</w>
<S/>
<w msd="Sozer" lemma="koža">kože</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Rsn" lemma="danes">danes</w>
<S/>
<w msd="L" lemma="skorajda">skorajda</w>
<S/>
<w msd="Ppszei" lemma="pogost">najpogostejša</w>
<S/>
<w msd="Sozei" lemma="motnja">motnja</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Rsn" lemma="kaj">Kaj</w>
<S/>
<w msd="Ggnste" lemma="povzročati">povzroča</w>
<S/>
<w msd="Sozet" lemma="srbečica">srbečico</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Rsn" lemma="kako">kako</w>
<S/>
<w msd="Zotzet--k" lemma="on">jo</w>
<S/>
<w msd="Ggdn" lemma="odpraviti">odpraviti</w>
<S/>
<w msd="Ggnste" lemma="pojasnjevati">pojasnjuje</w>
<S/>
<w msd="Sozei" lemma="dermatologinja">dermatologinja</w>
<S/>
<w msd="Slzei" lemma="Tanja">Tanja</w>
<S/>
<w msd="Slzei" lemma="Planinšek">Planinšek</w>
<S/>
<w msd="Slzei" lemma="Ručigaj">Ručigaj</w>
<c>.</c>
</s>
</p>
<p>
<s>
<w msd="Ppnzei" lemma="resničen">RESNIČNA</w>
<S/>
<w msd="Sozei" lemma="zgodba">ZGODBA</w>
<c>:</c>
<S/>
<w msd="Rsn" lemma="prezgodaj">Prezgodaj</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Ggdd-em" lemma="izgubiti">izgubil</w>
<S/>
<w msd="Ppnset" lemma="dragocen">dragoceno</w>
<S/>
<w msd="Soset" lemma="življenje">življenje</w>
</s>
</p>
<p>
<s>
<w msd="Somer" lemma="mik">Mika</w>
<S/>
<c>,</c>
<S/>
<w msd="Kag" lemma="08.11.2010">08.11.2010</w>
<S/>
<w msd="Dm" lemma="ob">ob</w>
<S/>
<w msd="Kag" lemma="09:56">09:56</w>
</s>
</p>
<p>
<s>
<w msd="Rsn" lemma="kako">Kako</w>
<S/>
<w msd="Ggnstm" lemma="izdelovati">izdelujejo</w>
<S/>
<w msd="Ppnsmt" lemma="kovinski">kovinska</w>
<S/>
<w msd="Ggdd-ez" lemma="ogledati">ogledala</w>
<c>?</c>
<S/>
</s>
<s>
<w msd="Ppnzei" lemma="edinstven">Edinstvena</w>
<S/>
<w msd="Ppnsmi" lemma="kovinski">kovinska</w>
<S/>
<w msd="Sosmi" lemma="ogledalo">ogledala</w>
<c>,</c>
<S/>
<w msd="Pdnzei" lemma="izdelan">izdelana</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Slzem" lemma="Indija">Indiji</w>
<c>,</c>
<S/>
<w msd="L" lemma="naj">naj</w>
<S/>
<w msd="Gp-g" lemma="biti">bi</w>
<S/>
<w msd="Ggnd-ez" lemma="prinašati">prinašala</w>
<S/>
<w msd="Soset" lemma="bogastvo">bogastvo</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sozet" lemma="sreča">srečo</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Ggdvdm" lemma="pogledati">Poglejte</w>
<S/>
<w msd="Zp---d--k" lemma="se">si</w>
<c>,</c>
<S/>
<w msd="Rsn" lemma="kako">kako</w>
<S/>
<w msd="Ggnstm" lemma="izdelovati">izdelujejo</w>
<S/>
<w msd="Zk-zei" lemma="ta">ta</w>
<S/>
<w msd="Ppnsmt" lemma="unikaten">unikatna</w>
<S/>
<w msd="Sosmt" lemma="ogledalo">ogledala</w>
<c>.</c>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,391 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0037544" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: INTERNET (2010-12-09)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>121 besed</extent>
<publicationStmt>
<idno>arhivo.com</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title>INTERNET</title>
<author n="???">neznani avtor</author>
<date>2010-12-09</date>
<publisher n="internet, novice">arhivo.com</publisher>
<note type="sourceLang"/>
<note n="URL">
<list>
<item>
<ref target="http://www.arhivo.com/korenckova-juha-z-ajdovo-kaso&amp;GID%3D8">
http://www.arhivo.com/korenckova-juha-z-ajdovo-kaso&amp;GID=8
</ref>
</item>
</list>
</note>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="119"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="21"/>
<tagUsage gi="p" occurs="3"/>
<tagUsage gi="s" occurs="9"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="121"/>
</namespace>
</tagsDecl>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.I"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0037544." xml:lang="sl">
<body>
<p>
<s>
<c>-</c>
<S/>
<w msd="Rsn" lemma="eko">eko</w>
<S/>
<w msd="Ppnzei" lemma="zelenjaven">zelenjavna</w>
<S/>
<w msd="Sozei" lemma="kocka">kocka</w>
</s>
</p>
<p>
<s>
<w msd="Gp-sdm-n" lemma="biti">Ste</w>
<S/>
<w msd="Ppnmmi" lemma="bolan">bolni</w>
<c>?</c>
<S/>
</s>
<s>
<w msd="Ppnmein" lemma="preprost">Preprost</w>
<S/>
<w msd="Somei" lemma="recept">recept</w>
<S/>
<w msd="Dt" lemma="za">za</w>
<S/>
<w msd="Psnzet" lemma="korenčkov">korenčkovo</w>
<S/>
<w msd="Sozet" lemma="juha">juho</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Psnzet" lemma="ajdov">ajdovo</w>
<S/>
<w msd="Sozet" lemma="kaša">kašo</w>
<S/>
<c>(</c>
<w msd="Vd" lemma="ki">ki</w>
<S/>
<w msd="Zotmdt--k" lemma="on">ju</w>
<S/>
<w msd="Rsn" lemma="lahko">lahko</w>
<S/>
<w msd="Ggnsdm" lemma="jesti">jeste</w>
<S/>
<w msd="L" lemma="tudi">tudi</w>
<S/>
<w msd="Rsn" lemma="posebej">posebej</w>
<c>)</c>
<S/>
<w msd="Ggdvdm" lemma="uporabiti">uporabite</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Somem" lemma="čas">času</w>
<S/>
<w msd="Sozmr" lemma="viroza">viroz</w>
<c>,</c>
<S/>
<w msd="Vp" lemma="saj">saj</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Rsn" lemma="zelo">zelo</w>
<S/>
<w msd="Ppnmein" lemma="lahek">lahek</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Ppnmein" lemma="neškodljiv">neškodljiv</w>
<S/>
<w msd="Dt" lemma="za">za</w>
<S/>
<w msd="Sometn" lemma="želodec">želodec</w>
<c>.</c>
</s>
</p>
<p>
<s>
<w msd="Ggdspm" lemma="oprati">Operemo</w>
<S/>
<w msd="Soset" lemma="korenje">korenje</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Psnzet" lemma="ajdov">ajdovo</w>
<S/>
<w msd="Sozet" lemma="kaša">kašo</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Dt" lemma="v">V</w>
<S/>
<w msd="Rsn" lemma="približno">približno</w>
<S/>
<w msd="Sometn" lemma="liter">liter</w>
<S/>
<w msd="Sozer" lemma="voda">vode</w>
<S/>
<c>(</c>
<w msd="Rsn" lemma="odvisno">odvisno</w>
<c>,</c>
<S/>
<w msd="Rsn" lemma="kako">kako</w>
<S/>
<w msd="Ppnzet" lemma="gost">gosto</w>
<S/>
<w msd="Sozet" lemma="juha">juho</w>
<S/>
<w msd="Ggnspm" lemma="želeti">želimo</w>
<c>)</c>
<S/>
<w msd="Dt" lemma="na">na</w>
<S/>
<w msd="Sommt" lemma="kolobar">kolobarje</w>
<S/>
<w msd="Ggdspm" lemma="narezati">narežemo</w>
<S/>
<w msd="Soset" lemma="korenje">korenje</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Rsn" lemma="pol">pol</w>
<S/>
<w msd="Rsn" lemma="eko">eko</w>
<S/>
<w msd="Ppnzer" lemma="zelenjaven">zelenjavne</w>
<S/>
<w msd="Sozmt" lemma="kocka">kocke</w>
<S/>
<w msd="Vp" lemma="ter">ter</w>
<S/>
<w msd="Ggnspm" lemma="kuhati">kuhamo</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="dokler">dokler</w>
<S/>
<w msd="Sosei" lemma="korenje">korenje</w>
<S/>
<w msd="L" lemma="ne">ne</w>
<S/>
<w msd="Ggdste" lemma="postati">postane</w>
<S/>
<w msd="Rsn" lemma="mehko">mehko</w>
<S/>
<c>(</c>
<w msd="Rsn" lemma="približno">približno</w>
<S/>
<w msd="Kag" lemma="20">20</w>
<S/>
<w msd="Sozmr" lemma="minuta">minut</w>
<c>)</c>
<c>.</c>
<S/>
</s>
<s>
<w msd="Rsn" lemma="medtem">Medtem</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Kbzmmi" lemma="drug">drugi</w>
<S/>
<w msd="Sozem" lemma="posoda">posodi</w>
<S/>
<w msd="Ggdspm" lemma="zavreti">zavremo</w>
<S/>
<w msd="Rsn" lemma="približno">približno</w>
<S/>
<w msd="Kag" lemma="3">3</w>
<S/>
<w msd="Somei" lemma="dl">dl</w>
<S/>
<w msd="Sozer" lemma="voda">vode</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="ki">ki</w>
<S/>
<w msd="Zotzed--k" lemma="on">ji</w>
<S/>
<w msd="Ggdspm" lemma="dodati">dodamo</w>
<S/>
<w msd="Kbzzet" lemma="drug">drugo</w>
<S/>
<w msd="Sozet" lemma="polovica">polovico</w>
<S/>
<w msd="Ppnzer" lemma="zelenjaven">zelenjavne</w>
<S/>
<w msd="Sozmt" lemma="kocka">kocke</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Psnzet" lemma="ajdov">ajdovo</w>
<S/>
<w msd="Sozet" lemma="kaša">kašo</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Ggnspm" lemma="kuhati">Kuhamo</w>
<S/>
<w msd="Rsn" lemma="približno">približno</w>
<S/>
<w msd="Rsn" lemma="pol">pol</w>
<S/>
<w msd="Sozer" lemma="ura">ure</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Psnsei" lemma="korenčkov">Korenčkovo</w>
<S/>
<w msd="Sozet" lemma="juha">juho</w>
<S/>
<w msd="Ggnspm" lemma="soliti">solimo</w>
<S/>
<w msd="Dm" lemma="po">po</w>
<S/>
<w msd="Somem" lemma="okus">okusu</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Rsn" lemma="kar">kar</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Sozem" lemma="posoda">posodi</w>
<S/>
<w msd="Ggdspm" lemma="zmešati">zmešamo</w>
<S/>
<w msd="Do" lemma="z">s</w>
<S/>
<w msd="Ppnmeo" lemma="paličen">paličnim</w>
<S/>
<w msd="Someo" lemma="mešalnik">mešalnikom</w>
<c>,</c>
<S/>
<w msd="Vd" lemma="da">da</w>
<S/>
<w msd="Ggdste" lemma="postati">postane</w>
<S/>
<w msd="L" lemma="ravno">ravno</w>
<S/>
<w msd="L" lemma="prav">prav</w>
<S/>
<w msd="Ppnzei" lemma="gost">gosta</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Pdnzei" lemma="tekoč">tekoča</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Dm" lemma="na">Na</w>
<S/>
<w msd="Somem" lemma="konec">koncu</w>
<S/>
<w msd="Zotzet--z" lemma="vame">vanjo</w>
<S/>
<w msd="Ggdspm" lemma="dodati">dodamo</w>
<S/>
<w msd="L" lemma="še">še</w>
<S/>
<w msd="Psnzet" lemma="ajdov">ajdovo</w>
<S/>
<w msd="Sozet" lemma="kaša">kašo</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Ppnzei" lemma="dieten">dietna</w>
<c>,</c>
<S/>
<w msd="Vp" lemma="a">a</w>
<S/>
<w msd="Ppnzei" lemma="okusen">okusna</w>
<S/>
<w msd="Sozei" lemma="jed">jed</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Rsn" lemma="nared">nared</w>
<c>.</c>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,355 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0038754" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: INTERNET (2010-07-21)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>97 besed</extent>
<publicationStmt>
<idno>spasteater.si</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title>INTERNET</title>
<author n="???">neznani avtor</author>
<date>2010-07-21</date>
<publisher n="internet, ustanove">spasteater.si</publisher>
<note type="sourceLang"/>
<note n="URL">
<list>
<item>
<ref target="http://www.spasteater.si/si/novice_in_napovedi/4843/detail.html">
http://www.spasteater.si/si/novice_in_napovedi/4843/detail.html
</ref>
</item>
<item>
<ref target="http://www.spasteater.si/si/vstopnice/kako_do_vstopnic/default.html">
http://www.spasteater.si/si/vstopnice/kako_do_vstopnic/default.html
</ref>
</item>
</list>
</note>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="91"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="18"/>
<tagUsage gi="p" occurs="7"/>
<tagUsage gi="s" occurs="12"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="97"/>
</namespace>
</tagsDecl>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.I"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0038754." xml:lang="sl">
<body>
<p>
<s>
<w msd="L" lemma="več">Več</w>
<S/>
<w msd="Dm" lemma="o">o</w>
<S/>
<w msd="Sosem" lemma="križarjenje">križarjenju</w>
<S/>
<w msd="Rsn" lemma="tukaj">tukaj</w>
<c>!</c>
</s>
</p>
<p>
<s>
<w msd="Do" lemma="z">S</w>
<S/>
<w msd="Someo" lemma="kompas">Kompasom</w>
<S/>
<w msd="Ggnvdm" lemma="odkrivati">odkrivajte</w>
<S/>
<w msd="Ppnsmt" lemma="veličasten">veličastna</w>
<S/>
<w msd="Sosmt" lemma="mesto">mesta</w>
<S/>
<w msd="Ppnser" lemma="zahoden">zahodnega</w>
<S/>
<w msd="Slser" lemma="Sredozemlje">Sredozemlja</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Dm" lemma="na">Na</w>
<S/>
<w msd="Sosem" lemma="križarjenje">križarjenju</w>
<S/>
<w msd="Gp-ptd-n" lemma="biti">bosta</w>
<S/>
<w msd="Do" lemma="z">z</w>
<S/>
<w msd="Zod-mo" lemma="ti">vami</w>
<S/>
<w msd="Somei" lemma="koncert">koncert</w>
<S/>
<w msd="Slzer" lemma="Alenka">Alenke</w>
<S/>
<w msd="Slmei" lemma="Godec">Godec</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Sozei" lemma="komedija">komedija</w>
<S/>
<w msd="Slmer" lemma="Matjaž">Matjaža</w>
<S/>
<w msd="Slmer" lemma="Javšnik">Javšnika</w>
<S/>
<w msd="Somei" lemma="optimist">Optimist</w>
<S/>
<c>!</c>
</s>
</p>
<p>
<s>
<w msd="Dm" lemma="na">Na</w>
<S/>
<w msd="Sozem" lemma="ladja">ladji</w>
<S/>
<w msd="Gp-ptd-n" lemma="biti">bosta</w>
<S/>
<w msd="Ppnzei" lemma="zvezdniški">zvezdniška</w>
<S/>
<w msd="Somdi" lemma="gost">gosta</w>
<S/>
<w msd="Slzei" lemma="Alenka">Alenka</w>
<S/>
<w msd="Slzei" lemma="Godec">Godec</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Slmei" lemma="Matjaž">Matjaž</w>
<S/>
<w msd="Slmei" lemma="Javšnik">Javšnik</w>
<c>.</c>
</s>
</p>
<p>
<s>
<w msd="Dm" lemma="na">Na</w>
<S/>
<w msd="Ppnsem" lemma="jesenski">jesenskem</w>
<S/>
<w msd="Ppnsem" lemma="špasen">špasnem</w>
<S/>
<w msd="Sosem" lemma="križarjanje">križarjanju</w>
<S/>
<w msd="Zod-mt" lemma="ti">vas</w>
<S/>
<w msd="Gp-pte-n" lemma="biti">bo</w>
<S/>
<w msd="Sozei" lemma="ladja">ladja</w>
<S/>
<w msd="Slmei" lemma="MSC">MSC</w>
<S/>
<w msd="Sozei" lemma="fantastica">Fantastica</w>
<S/>
<w msd="Kag" lemma="5">5</w>
<c>*</c>
<S/>
<w msd="Ggdd-ez" lemma="popeljati">popeljala</w>
<S/>
<w msd="Do" lemma="med">med</w>
<S/>
<w msd="Sosmo" lemma="mesto">mesti</w>
<S/>
<w msd="Ppnser" lemma="zahoden">zahodnega</w>
<S/>
<w msd="Slser" lemma="Sredozemlje">Sredozemlja</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Ggnd-mm" lemma="križariti">Križarili</w>
<S/>
<w msd="Gp-pdm-n" lemma="biti">boste</w>
<S/>
<w msd="Dr" lemma="od">od</w>
<S/>
<w msd="Sozer" lemma="genova">Genove</w>
<S/>
<w msd="Dr" lemma="do">do</w>
<S/>
<w msd="Slmer" lemma="Neapelj">Neaplja</w>
<c>,</c>
<S/>
<w msd="Zp------k" lemma="se">se</w>
<S/>
<w msd="Ggdd-mm" lemma="ustaviti">ustavili</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Slmem" lemma="Palermo">Palermu</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Slmem" lemma="Tunis">Tunisu</w>
<c>,</c>
<S/>
<w msd="Ggdd-mm" lemma="obiskati">obiskali</w>
<S/>
<w msd="Sozet" lemma="palma">Palmo</w>
<S/>
<w msd="N" lemma="de">de</w>
<S/>
<w msd="Slzet" lemma="Mallorca">Mallorco</w>
<c>,</c>
<S/>
<w msd="Slzet" lemma="Barcelona">Barcelono</w>
<S/>
<w msd="Vp" lemma="in">in</w>
<S/>
<w msd="Slmei" lemma="Marseille">Marseille</w>
<c>,</c>
<S/>
<w msd="Rsn" lemma="vse">vse</w>
<S/>
<w msd="Dm" lemma="v">v</w>
<S/>
<w msd="Kbzmem" lemma="en">enem</w>
<S/>
<w msd="Somem" lemma="teden">tednu</w>
<c>!</c>
</s>
</p>
<p>
<s>
<w msd="Somei" lemma="aranžma">Aranžma</w>
<S/>
<w msd="Ppnser" lemma="špasen">špasnega</w>
<S/>
<w msd="Soser" lemma="križarjenje">križarjenja</w>
<S/>
<w msd="Gp-ste-n" lemma="biti">je</w>
<S/>
<w msd="Dr" lemma="od">od</w>
<S/>
<w msd="Kag" lemma="24.10">24.10</w>
<S/>
<w msd="Dr" lemma="do">do</w>
<S/>
<w msd="Kag" lemma="31.10">31.10</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Kag" lemma="2010">2010</w>
</s>
</p>
<p>
<s>
<w msd="Ggdvdm" lemma="pridružiti">Pridružite</w>
<S/>
<w msd="Zp------k" lemma="se">se</w>
<S/>
<w msd="Zop-md" lemma="jaz">nam</w>
<c>,</c>
<S/>
<w msd="Dm" lemma="na">na</w>
<S/>
<w msd="Ppnzem" lemma="veličasten">veličastni</w>
<S/>
<w msd="Sozem" lemma="križarka">križarki</w>
<c>!</c>
</s>
</p>
<p>
<s>
<w msd="Dm" lemma="v">V</w>
<S/>
<w msd="Somem" lemma="čas">ČASU</w>
<S/>
<w msd="Sozmr" lemma="počitnice">POČITNIC</w>
<c>,</c>
<S/>
<w msd="Dr" lemma="od">OD</w>
<S/>
<w msd="Kag" lemma="1.7">1.7</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Dr" lemma="do">DO</w>
<S/>
<w msd="Kag" lemma="13.9">13.9</w>
<c>.</c>
<S/>
</s>
<s>
<w msd="Gp-ste-n" lemma="biti">JE</w>
<S/>
<w msd="Sozei" lemma="blagajna">BLAGAJNA</w>
<S/>
<w msd="Pdnzei" lemma="zaprt">ZAPRTA</w>
<c>!</c>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,402 @@
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0038920" xml:lang="sl">
<teiHeader>
<fileDesc>
<titleStmt>
<title>Gigafida: INTERNET (2010-09-20)</title>
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
</funder>
</titleStmt>
<editionStmt>
<edition>1.0</edition>
</editionStmt>
<extent>79 besed</extent>
<publicationStmt>
<idno>drama.si</idno>
<availability status="restricted">
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
</p>
</availability>
<date>2012-04-15</date>
</publicationStmt>
<sourceDesc>
<bibl>
<title>INTERNET</title>
<author n="???">neznani avtor</author>
<date>2010-09-20</date>
<publisher n="internet, ustanove">drama.si</publisher>
<note type="sourceLang"/>
<note n="URL">
<list>
<item>
<ref target="http://www.drama.si/eng/ansambel/matija-rozman.html">
http://www.drama.si/eng/ansambel/matija-rozman.html
</ref>
</item>
<item>
<ref target="http://www.drama.si/eng/ansambel/andrej-nahtigal.html">
http://www.drama.si/eng/ansambel/andrej-nahtigal.html
</ref>
</item>
<item>
<ref target="http://www.drama.si/eng/ansambel/janez-skof.html">
http://www.drama.si/eng/ansambel/janez-skof.html
</ref>
</item>
<item>
<ref target="http://www.drama.si/eng/ansambel/boris-mihalj.html">
http://www.drama.si/eng/ansambel/boris-mihalj.html
</ref>
</item>
<item>
<ref target="http://www.drama.si/eng/ansambel/marijana-brecelj.html">
http://www.drama.si/eng/ansambel/marijana-brecelj.html
</ref>
</item>
<item>
<ref target="http://www.drama.si/eng/ansambel/gregor-bakovic.html">
http://www.drama.si/eng/ansambel/gregor-bakovic.html
</ref>
</item>
<item>
<ref target="http://www.drama.si/repertoar/totenbirt.html">
http://www.drama.si/repertoar/totenbirt.html
</ref>
</item>
<item>
<ref target="http://www.drama.si/ansambel/marijana-brecelj.html">
http://www.drama.si/ansambel/marijana-brecelj.html
</ref>
</item>
<item>
<ref target="http://www.drama.si/ansambel/maja-koncar.html">
http://www.drama.si/ansambel/maja-koncar.html
</ref>
</item>
</list>
</note>
</bibl>
</sourceDesc>
</fileDesc>
<encodingDesc>
<projectDesc>
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
</p>
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
</p>
</projectDesc>
<tagsDecl>
<namespace name="http://www.tei-c.org/ns/1.0">
<tagUsage gi="S" occurs="91"/>
<tagUsage gi="body" occurs="1"/>
<tagUsage gi="c" occurs="46"/>
<tagUsage gi="p" occurs="10"/>
<tagUsage gi="s" occurs="10"/>
<tagUsage gi="text" occurs="1"/>
<tagUsage gi="w" occurs="79"/>
</namespace>
</tagsDecl>
<classDecl>
<taxonomy xml:id="SSJ">
<category xml:id="SSJ.T">
<catDesc>tisk</catDesc>
<category xml:id="SSJ.T.K">
<catDesc>knjižno</catDesc>
<category xml:id="SSJ.T.K.L">
<catDesc>leposlovno</catDesc>
</category>
<category xml:id="SSJ.T.K.S">
<catDesc>strokovno</catDesc>
</category>
</category>
<category xml:id="SSJ.T.P">
<catDesc>periodično</catDesc>
<category xml:id="SSJ.T.P.C">
<catDesc>časopis</catDesc>
</category>
<category xml:id="SSJ.T.P.R">
<catDesc>revija</catDesc>
</category>
</category>
<category xml:id="SSJ.T.D">
<catDesc>drugo</catDesc>
</category>
</category>
<category xml:id="SSJ.I">
<catDesc>internet</catDesc>
</category>
</taxonomy>
</classDecl>
</encodingDesc>
<profileDesc>
<textClass>
<catRef target="#SSJ.I"/>
</textClass>
</profileDesc>
</teiHeader>
<text xml:id="F0038920." xml:lang="sl">
<body>
<p>
<s>
<c></c>
<w msd="Somei" lemma="totenbirt">Totenbirt</w>
<c></c>
<S/>
<c>-</c>
<S/>
<w msd="Slzei" lemma="Agata">Agata</w>
<S/>
<w msd="Slmei" lemma="Jurkovič">Jurkovič</w>
<S/>
<c></c>
<S/>
<w msd="Sozei" lemma="jurkovička">Jurkovička</w>
<S/>
<c>,</c>
<S/>
<w msd="Kag" lemma="62">62</w>
<c>,</c>
<S/>
<w msd="Slmei" lemma="widow">widow</w>
<c>,</c>
<S/>
<w msd="Slmei" lemma="judge">judge</w>
<c>,</c>
<S/>
<w msd="Slmei" lemma="Eli's">Eli's</w>
<S/>
<w msd="Slmei" lemma="older">older</w>
<S/>
<w msd="Slmei" lemma="sister">sister</w>
</s>
</p>
<p>
<s>
<w msd="Slmei" lemma="Performs">Performs</w>
<S/>
<w msd="Vp" lemma="in">in</w>
</s>
</p>
<p>
<s>
<c></c>
<w msd="Somei" lemma="totenbirt">Totenbirt</w>
<c></c>
<S/>
<c>-</c>
<S/>
<w msd="Slmei" lemma="Ivek">Ivek</w>
<S/>
<c>,</c>
<S/>
<w msd="Kag" lemma="66">66</w>
<c>,</c>
<S/>
<w msd="Slmei" lemma="drunk">drunk</w>
</s>
</p>
<p>
<s>
<w msd="Slzei" lemma="Maja">Maja</w>
<S/>
<w msd="Slzei" lemma="Končar">Končar</w>
<S/>
<c>-</c>
<S/>
<w msd="Slzei" lemma="Marta">Marta</w>
<S/>
<w msd="Slmei" lemma="Fijavž">Fijavž</w>
<S/>
<w msd="Slmei" lemma="Roblek">Roblek</w>
<S/>
<c>-</c>
<S/>
<w msd="Slzei" lemma="Martika">Martika</w>
<S/>
<c>,</c>
<S/>
<w msd="Kag" lemma="46">46</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="učiteljica">učiteljica</w>
<c>,</c>
<S/>
<w msd="Psnzei" lemma="Tomijev">Tomijeva</w>
<S/>
<w msd="Sozei" lemma="žena">žena</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="elina">Elina</w>
<S/>
<w msd="Pppzei" lemma="mlad">mlajša</w>
<S/>
<w msd="Sozei" lemma="sestra">sestra</w>
</s>
</p>
<p>
<s>
<w msd="Slzei" lemma="Totenbirt">Totenbirt</w>
<S/>
<c>-</c>
<S/>
<w msd="Slzei" lemma="Agata">Agata</w>
<S/>
<w msd="Slmei" lemma="Jurkovič">Jurkovič</w>
<S/>
<c>-</c>
<S/>
<w msd="Slmei" lemma="Jurkovička">Jurkovička</w>
<S/>
<c>,</c>
<S/>
<w msd="Kag" lemma="62">62</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="vdova">vdova</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="sodnica">sodnica</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="elina">Elina</w>
<S/>
<w msd="Pppzei" lemma="star">starejša</w>
<S/>
<w msd="Sozei" lemma="sestra">sestra</w>
</s>
</p>
<p>
<s>
<w msd="Slzei" lemma="Totenbirt">Totenbirt</w>
<S/>
<c>-</c>
<S/>
<w msd="Slzei" lemma="Marta">Marta</w>
<S/>
<w msd="Slmei" lemma="Fijavž">Fijavž</w>
<S/>
<w msd="Slmei" lemma="Roblek">Roblek</w>
<S/>
<c>-</c>
<S/>
<w msd="Slzei" lemma="Martika">Martika</w>
<S/>
<c>,</c>
<S/>
<w msd="Kag" lemma="46">46</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="učiteljica">učiteljica</w>
<c>,</c>
<S/>
<w msd="Psnzei" lemma="Tomijev">Tomijeva</w>
<S/>
<w msd="Sozei" lemma="žena">žena</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="elina">Elina</w>
<S/>
<w msd="Pppzei" lemma="mlad">mlajša</w>
<S/>
<w msd="Sozei" lemma="sestra">sestra</w>
</s>
</p>
<p>
<s>
<w msd="Slzei" lemma="Marijana">Marijana</w>
<S/>
<w msd="Slzei" lemma="Brecelj">Brecelj</w>
<S/>
<c>-</c>
<S/>
<w msd="Slzei" lemma="Agata">Agata</w>
<S/>
<w msd="Slmei" lemma="Jurkovič">Jurkovič</w>
<S/>
<c>-</c>
<S/>
<w msd="Slmei" lemma="Jurkovička">Jurkovička</w>
<S/>
<c>,</c>
<S/>
<w msd="Kag" lemma="62">62</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="vdova">vdova</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="sodnica">sodnica</w>
<c>,</c>
<S/>
<w msd="Sozei" lemma="elina">Elina</w>
<S/>
<w msd="Pppzei" lemma="star">starejša</w>
<S/>
<w msd="Sozei" lemma="sestra">sestra</w>
</s>
</p>
<p>
<s>
<c></c>
<w msd="Somei" lemma="totenbirt">Totenbirt</w>
<c></c>
<S/>
<c>-</c>
<S/>
<w msd="N" lemma="the">The</w>
<S/>
<w msd="Sozer" lemma="lata">late</w>
<S/>
<w msd="N" lemma="južek">Južek</w>
</s>
</p>
<p>
<s>
<w msd="Slmei" lemma="Roberto">Roberto</w>
<S/>
<w msd="Slmei" lemma="Zucco">Zucco</w>
<S/>
<c>-</c>
<S/>
<w msd="Slzet" lemma="1st">1st</w>
<S/>
<w msd="Slmei" lemma="Prison">Prison</w>
<S/>
<w msd="N" lemma="officer">Officer</w>
<S/>
<c>/</c>
<S/>
<w msd="Somei" lemma="detective">Detective</w>
<S/>
<c>/</c>
<S/>
<w msd="Somei" lemma="1st">1st</w>
<S/>
<w msd="Sozer" lemma="polica">Police</w>
<S/>
<w msd="N" lemma="officer">Officer</w>
</s>
</p>
<p>
<s>
<c></c>
<w msd="Somei" lemma="totenbirt">Totenbirt</w>
<c></c>
<S/>
<c>-</c>
<S/>
<w msd="N" lemma="the">The</w>
<S/>
<w msd="Sozer" lemma="lata">late</w>
<S/>
<w msd="Slmmi" lemma="Frenk">Frenki</w>
</s>
</p>
</body>
</text>
</TEI>

View File

@@ -0,0 +1,18 @@
brez
dis
do
eks
inter
iz
na
ne
ni
ob
od
po
pre
pri
pro
raz
spre
za

View File

@@ -0,0 +1,7 @@
ga
ma
me
nj
nje
njo
se

View File

@@ -0,0 +1,54 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import javafx.scene.control.Button?>
<?import javafx.scene.control.CheckBox?>
<?import javafx.scene.control.Hyperlink?>
<?import javafx.scene.control.Label?>
<?import javafx.scene.control.ProgressBar?>
<?import javafx.scene.control.RadioButton?>
<?import javafx.scene.control.TextField?>
<?import javafx.scene.control.ToggleGroup?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.layout.HBox?>
<?import javafx.scene.layout.Pane?>
<?import org.controlsfx.control.CheckComboBox?>
<AnchorPane fx:id="characterAnalysisTab" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.112" xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.CharacterAnalysisTab">
<Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Število črk" />
<TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="20.0" prefWidth="180.0" />
<HBox layoutX="10.0" layoutY="60.0">
<children>
<RadioButton fx:id="lemmaRB" mnemonicParsing="false" prefHeight="25.0" prefWidth="86.0" selected="true" text="lema">
<toggleGroup>
<ToggleGroup fx:id="calculateForRB" />
</toggleGroup></RadioButton>
<RadioButton fx:id="varietyRB" mnemonicParsing="false" text="različnica" toggleGroup="$calculateForRB" />
</children>
</HBox>
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD" />
<TextField fx:id="msdTF" layoutX="100.0" layoutY="160.0" prefWidth="180.0" />
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija" />
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0" />
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
<children>
<CheckBox fx:id="calculatecvvCB" layoutX="10.0" mnemonicParsing="false" prefHeight="25.0" text="Izračunaj za kombinacije samoglasnikov in soglasnikov" visible="false" />
</children>
</Pane>
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
</Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" />
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0" text=" " wrapText="true" />
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0" />
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0" />
</AnchorPane>

View File

@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import javafx.scene.control.Button?>
<?import javafx.scene.control.CheckBox?>
<?import javafx.scene.control.Label?>
<?import javafx.scene.control.Hyperlink?>
<?import javafx.scene.control.ProgressIndicator?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.layout.Pane?>
<AnchorPane prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111" xmlns:fx="http://javafx.com/fxml/1"
fx:controller="gui.CorpusTab">
<children>
<Pane/>
<Button fx:id="chooseCorpusLocationB" layoutX="10.0" layoutY="20.0" mnemonicParsing="false"
text="Nastavi lokacijo korpusa"/>
<CheckBox fx:id="readHeaderInfoChB" layoutX="176.0" layoutY="24.0" mnemonicParsing="false"
text="Preberi info iz headerjev"/>
<Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
<children>
<Label fx:id="chooseCorpusL" prefHeight="50.0" prefWidth="704.0" text="Label"/>
<CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/>
</children>
</Pane>
<ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/>
<Button fx:id="chooseResultsLocationB" layoutX="10.0" layoutY="180.0" mnemonicParsing="false"
text="Nastavi lokacijo rezultatov"/>
<Label fx:id="chooseResultsL" layoutX="10.0" layoutY="220.0" text="Label"/>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
</children>
</AnchorPane>

View File

@@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import javafx.scene.control.Label?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.layout.Pane?>
<?import javafx.scene.control.Hyperlink?>
<?import org.controlsfx.control.CheckComboBox?>
<AnchorPane fx:id="solarFiltersTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.FiltersForSolar">
<Pane>
<CheckComboBox fx:id="solarRegijaCCB" layoutX="104.0" layoutY="40.0" prefHeight="25.0" prefWidth="372.0"/>
<Label layoutX="14.0" layoutY="44.0" text="Regija:"/>
<CheckComboBox fx:id="solarPredmetCCB" layoutX="104.0" layoutY="87.0" prefHeight="25.0" prefWidth="372.0"/>
<Label layoutX="14.0" layoutY="91.0" text="Predmet"/>
<CheckComboBox fx:id="solarRazredCCB" layoutX="104.0" layoutY="136.0" prefHeight="25.0" prefWidth="372.0"/>
<Label layoutX="14.0" layoutY="140.0" text="Razred"/>
<CheckComboBox fx:id="solarLetoCCB" layoutX="104.0" layoutY="189.0" prefHeight="25.0" prefWidth="372.0"/>
<Label layoutX="14.0" layoutY="193.0" text="Leto"/>
<CheckComboBox fx:id="solarSolaCCB" layoutX="104.0" layoutY="246.0" prefHeight="25.0" prefWidth="372.0"/>
<Label layoutX="14.0" layoutY="250.0" text="Šola"/>
<CheckComboBox fx:id="solarVrstaBesedilaCCB" layoutX="104.0" layoutY="293.0" prefHeight="25.0" prefWidth="372.0"/>
<Label layoutX="14.0" layoutY="297.0" text="Vrsta besedila"/>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
<Label layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0"
prefWidth="275.0" text=" " wrapText="true"/>
</Pane>
</AnchorPane>

View File

@@ -0,0 +1,56 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import java.lang.String?>
<?import javafx.collections.FXCollections?>
<?import javafx.scene.control.Button?>
<?import javafx.scene.control.CheckBox?>
<?import javafx.scene.control.Hyperlink?>
<?import javafx.scene.control.ComboBox?>
<?import javafx.scene.control.Label?>
<?import javafx.scene.control.ProgressBar?>
<?import javafx.scene.control.TextField?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.layout.Pane?>
<?import org.controlsfx.control.CheckComboBox?>
<AnchorPane fx:id="oneWordAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.OneWordAnalysisTab">
<Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za"/>
<ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="20.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
visibleRowCount="5">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="lema"/>
<String fx:value="različnica"/>
<String fx:value="oblikoskladenjska oznaka"/>
<String fx:value="oblikoskladenjska lastnost"/>
<String fx:value="besedna vrsta"/>
</FXCollections>
</items>
</ComboBox>
<!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="80.0" prefHeight="25.0" text="Omejitev podatkov" />
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="100.0" layoutY="120.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="160.0" prefHeight="25.0" prefWidth="180.0"/>
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
</Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
text=" " wrapText="true"/>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
</AnchorPane>

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import javafx.scene.control.Label?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.control.Label?>
<AnchorPane prefHeight="400.0" prefWidth="600.0" xmlns="http://javafx.com/javafx/8.0.111" xmlns:fx="http://javafx.com/fxml/1">
<children>
<Label layoutX="371.0" layoutY="26.0" text="Izbrani filtri:"/>
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="371.0" layoutY="43.0" prefHeight="188.0"
prefWidth="215.0"
text=" "/>
</children>
</AnchorPane>

View File

@@ -0,0 +1,105 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import java.lang.String?>
<?import javafx.collections.FXCollections?>
<?import javafx.scene.control.Button?>
<?import javafx.scene.control.CheckBox?>
<?import javafx.scene.control.ComboBox?>
<?import javafx.scene.control.Hyperlink?>
<?import javafx.scene.control.Label?>
<?import javafx.scene.control.ProgressBar?>
<?import javafx.scene.control.TextField?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.layout.Pane?>
<?import org.controlsfx.control.CheckComboBox?>
<AnchorPane fx:id="stringAnalysisTabPaneNew2" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.StringAnalysisTabNew2">
<Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="N-gram nivo"/>
<ComboBox fx:id="ngramValueCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi"
visibleRowCount="5">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="2"/>
<String fx:value="3"/>
<String fx:value="4"/>
<String fx:value="5"/>
</FXCollections>
</items>
</ComboBox>
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izračunaj za"/>
<ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="60.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
visibleRowCount="5">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="lema"/>
<String fx:value="različnica"/>
<String fx:value="oblikoskladenjska oznaka"/>
<String fx:value="oblikoskladenjska lastnost"/>
<String fx:value="besedna vrsta"/>
</FXCollections>
</items>
</ComboBox>
<Pane fx:id="paneWords" layoutX="0.0" layoutY="100.0" prefHeight="36.0" prefWidth="380.0">
<children>
<Label layoutX="10.0" prefHeight="25.0" text="Preskok besed"/>
<ComboBox fx:id="skipValueCB" layoutX="100.0" prefWidth="180.0" promptText="izberi"
visibleRowCount="5">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="0"/>
<String fx:value="1"/>
<String fx:value="2"/>
<String fx:value="3"/>
<String fx:value="4"/>
<String fx:value="5"/>
<String fx:value="6"/>
<String fx:value="7"/>
</FXCollections>
</items>
</ComboBox>
</children>
</Pane>
<!-- MSD and Taxonomy separated -->
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov"/>
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
<TextField fx:id="msdTF" layoutX="100.0" layoutY="200.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
<!-- samoglasniki/soglasniki -->
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="280.0" prefHeight="84.0" prefWidth="380.0">
<children>
<CheckBox fx:id="calculatecvvCB" layoutX="10.0" mnemonicParsing="false" prefHeight="25.0"
text="Izračunaj za kombinacije samoglasnikov in soglasnikov"/>
<TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="40.0" prefWidth="180.0"/>
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Dolžina niza"/>
</children>
</Pane>
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
</Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
text=" " wrapText="true"/>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
</AnchorPane>

View File

@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import org.controlsfx.control.CheckComboBox?>
<?import javafx.scene.control.*?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.layout.Pane?>
<AnchorPane fx:id="wordAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordFormationTab">
<Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
<Button fx:id="computeB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
</Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
text=" " wrapText="true"/>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
</AnchorPane>

View File

@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import org.controlsfx.control.CheckComboBox?>
<?import javafx.scene.control.*?>
<?import javafx.scene.layout.AnchorPane?>
<?import javafx.scene.layout.Pane?>
<AnchorPane fx:id="wordLevelAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordLevelTab">
<Pane>
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
<Button fx:id="computeB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
</Pane>
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
text=" " wrapText="true"/>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
</AnchorPane>

View File

@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="ALL" name="log-config">
<Properties>
<Property name="LOG_DIR">log</Property>
<Property name="ARCHIVE">${LOG_DIR}/archive</Property>
<Property name="PATTERN">[%p] %d{dd.MM.yyyy HH:mm:ss} - %c.%M:%L - %m%n</Property>
</Properties>
<Appenders>
<Console name="STDOUT" target="SYSTEM_OUT">
<PatternLayout pattern="${PATTERN}"/>
</Console>
<File name="MyFile" fileName="${LOG_DIR}/CorpusAnalyzer.log" immediateFlush="true" append="true">
<PatternLayout pattern="${PATTERN}"/>
</File>
</Appenders>
<loggers>
<root level="all">
<appender-ref ref="MyFile" level="all"/>
<appender-ref ref="STDOUT" level="all"/>
</root>
</loggers>
</Configuration>

85
src/test/java/Common.java Normal file
View File

@@ -0,0 +1,85 @@
import java.util.ArrayList;
import java.util.List;
import data.Sentence;
import data.Word;
public class Common {
public static List<Sentence> corpus;
public static List<Sentence> minCorpus;
public static List<Sentence> midCorpus;
public static List<Sentence> midCorpusSkip;
public static List<Sentence> josTest;
static {
Sentence testSentence;
// full sentence
List<Word> words = new ArrayList<>();
words.add(new Word("ker", "ker", "Vd"));
words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("junak", "junak", "Somei"));
words.add(new Word("v", "v", "Dm"));
words.add(new Word("posesti", "posest", "Sozem"));
words.add(new Word("nekaj", "nekaj", "Rsn"));
words.add(new Word("o", "o", "Dm"));
words.add(new Word("čemer", "kar", "Zz-sem"));
words.add(new Word("se", "se", "Zp------k"));
words.add(new Word("mu", "on", "Zotmed--k"));
words.add(new Word("ne", "ne", "L"));
words.add(new Word("sanja", "sanjati", "Ggnste"));
words.add(new Word("a", "a", "Vp"));
words.add(new Word("se", "se", "Zp------k"));
words.add(new Word("onemu", "oni", "Zk-sed"));
words.add(new Word("zdi", "zdeti", "Ggnste"));
words.add(new Word("ključno", "ključen", "Ppnsei"));
words.add(new Word("pri", "pri", "Dm"));
words.add(new Word("operaciji", "operacija", "Sozem"));
words.add(new Word("666", "666", "Kag"));
testSentence = new Sentence(words, "#Ft.Z.N.N");
corpus = new ArrayList<>();
corpus.add(testSentence);
// three word sentence
testSentence = new Sentence(corpus.get(0).getSublist(0, 3), "#Ft.Z.N.N");
minCorpus = new ArrayList<>();
minCorpus.add(testSentence);
// five word sentence
words = new ArrayList<>();
words.add(new Word("ker", "ker", "Vd"));
words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("junak", "junak", "Somei"));
words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("posesti", "posest", "Sozem"));
testSentence = new Sentence(words, "#Ft.Z.N.N");
midCorpus = new ArrayList<>();
midCorpus.add(testSentence);
// five word sentence - for skipgrams
words = new ArrayList<>();
words.add(new Word("ker", "ker", "Vd"));
words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("junak", "junak", "Somei"));
words.add(new Word("v", "v", "Dm"));
words.add(new Word("posesti", "posest", "Sozem"));
testSentence = new Sentence(words, "#Ft.Z.N.N");
midCorpusSkip = new ArrayList<>();
midCorpusSkip.add(testSentence);
// JOS test
words = new ArrayList<>();
words.add(new Word("junak", "junak", "Somei"));
words.add(new Word("ima", "imeti", "Ggnste-n"));
words.add(new Word("posesti", "posest", "Sozem"));
testSentence = new Sentence(words, "#Ft.Z.N.N");
josTest = new ArrayList<>();
josTest.add(testSentence);
}
}

View File

@@ -0,0 +1,42 @@
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOCase;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.junit.Test;
import data.*;
import javafx.collections.ObservableList;
public class CorpusTests {
@Test
public void solarTest() {
//File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/Solar");
// File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/GOS");
File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/Gigafida_subset");
Settings.resultsFilePath = new File(selectedDirectory.getAbsolutePath().concat(File.separator));
Settings.corpus = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("xml", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
File f = Settings.corpus.iterator().next();
Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, 0, CalculateFor.WORD);
// stats.setCorpusType(CorpusType.GOS);
stats.setCorpusType(CorpusType.SOLAR);
// XML_processing.readXMLGos(f.toString(), stats);
// XML_processing.readXML(f.toString(), stats);
// XML_processing.readXMLHeaderTag(f.toString(), "stats");
}
@Test
public void test() {
ObservableList<String> var = GosTaxonomy.getForComboBox();
String debug = "";
}
}

66
src/test/java/DBTest.java Normal file
View File

@@ -0,0 +1,66 @@
import static junit.framework.Assert.*;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.rocksdb.RocksDB;
import util.db.RDB;
public class DBTest {
static {
RocksDB.loadLibrary();
}
// @Test
public void dbConnectorTest() throws UnsupportedEncodingException {
String key1 = "alfa";
AtomicLong value1 = new AtomicLong(10);
String key2 = "beta";
AtomicLong value2 = new AtomicLong(20);
String key3 = "alfa";
AtomicLong value3 = new AtomicLong(50);
String key4 = "theta";
AtomicLong value4 = new AtomicLong(40);
HashMap<String, AtomicLong> results = new HashMap<>();
results.put(key1, value1);
results.put(key2, value2);
RDB db = new RDB();
db.writeBatch(results);
// let's check how that fared out
Map<String, AtomicLong> dumpedResults = db.getDump();
// should have 2 items
assertEquals(2, dumpedResults.size());
// entry comparison
assertTrue(dumpedResults.containsKey(key1));
assertTrue(value1.longValue() == dumpedResults.get(key1).longValue());
assertTrue(dumpedResults.containsKey(key2));
assertTrue(value2.longValue() == dumpedResults.get(key2).longValue());
results = new HashMap<>();
results.put(key3, value3);
results.put(key4, value4);
db.writeBatch(results);
dumpedResults = db.getDump();
// should have 3 items with alfa's value reflecting summation
assertEquals(3, dumpedResults.size());
// entry comparison
assertTrue(dumpedResults.containsKey(key1));
assertTrue(value1.longValue() + value3.longValue() == dumpedResults.get(key1).longValue());
assertTrue(dumpedResults.containsKey(key2));
assertTrue(value2.longValue() == dumpedResults.get(key2).longValue());
assertTrue(dumpedResults.containsKey(key4));
assertTrue(value4.longValue() == dumpedResults.get(key4).longValue());
db.delete();
}
}

View File

@@ -0,0 +1,334 @@
import static org.junit.Assert.*;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import org.junit.Test;
import alg.ngram.Ngrams;
import data.*;
@SuppressWarnings({"Duplicates", "unused"})
public class NgramTests {
@Test
public void letterNgramsTest() {
Map<String, AtomicLong> result = null;
Filter filter = new Filter();
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setStringLength(4);
filter.setNgramValue(0); // letters
filter.setCalculateFor(CalculateFor.WORD);
Corpus testCorpus = new Corpus();
testCorpus.setCorpusType(CorpusType.GIGAFIDA);
testCorpus.setDetectedCorpusFiles(new ArrayList<>());
// tests:
// - no regex
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.minCorpus, stats);
result = stats.getResult();
// tests:
// - algorithm skips words that are shorter than set length value
assertEquals(2, result.size());
assertTrue(result.containsKey("juna"));
assertEquals(1, result.get("juna").longValue());
assertTrue(result.containsKey("unak"));
assertEquals(1, result.get("unak").longValue());
// tests:
// - map update (count) works ok
filter.setStringLength(3);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertEquals(2, result.get("ima").longValue());
// tests:
// - pre-check for the following regex test - this one should include word "ima", next one shouldn't
filter.setStringLength(3);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertTrue(result.containsKey("ima"));
// tests:
// - regex: S.* // vsi samostalniki
ArrayList<Pattern> msdRegex = new ArrayList<>();
msdRegex.add(Pattern.compile("S.*"));
filter.setMsd(msdRegex);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertFalse(result.containsKey("ima"));
// tests:
// - more precise regex
msdRegex = new ArrayList<>();
msdRegex.add(Pattern.compile("S.z.*")); // should include "posesti", but not "junak"
filter.setMsd(msdRegex);
filter.setStringLength(5);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertFalse(result.containsKey("junak"));
assertEquals(3, result.size());
// tests:
// - trickier regex
msdRegex = new ArrayList<>();
msdRegex.add(Pattern.compile(".{2}")); // should count only for msd="Vd" - "ker"
filter.setMsd(msdRegex);
filter.setStringLength(3);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertEquals(1, result.size());
assertTrue(result.containsKey("ker"));
assertEquals(1, result.get("ker").longValue());
}
@Test
public void wordsNgramsTest() {
Map<String, AtomicLong> result = null;
Filter filter = new Filter();
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setNgramValue(3);
Corpus testCorpus = new Corpus();
testCorpus.setCorpusType(CorpusType.GIGAFIDA);
testCorpus.setDetectedCorpusFiles(new ArrayList<>());
// tests:
// - normal ngrams - word
// midCorpus contains 5 words which should make for 3 3-grams
filter.setCalculateFor(CalculateFor.WORD);
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertEquals(3, result.size());
assertTrue(result.containsKey("ker ima junak"));
assertTrue(result.containsKey("ima junak ima"));
assertTrue(result.containsKey("junak ima posesti"));
// tests:
// - normal ngrams - lemmas
filter.setCalculateFor(CalculateFor.LEMMA);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertEquals(3, result.size());
assertTrue(result.containsKey("ker imeti junak"));
assertTrue(result.containsKey("imeti junak imeti"));
assertTrue(result.containsKey("junak imeti posest"));
// tests:
// - normal ngrams - msd
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertEquals(3, result.size());
assertTrue(result.containsKey("Vd Ggnste-n Somei"));
assertTrue(result.containsKey("Ggnste-n Somei Ggnste-n"));
assertTrue(result.containsKey("Somei Ggnste-n Sozem"));
// tests:
// - ngrams - word - regex filter
filter.setCalculateFor(CalculateFor.WORD);
ArrayList<Pattern> msdRegex = new ArrayList<>();
msdRegex.add(Pattern.compile("S.*"));
msdRegex.add(Pattern.compile("G.*"));
msdRegex.add(Pattern.compile(".*"));
filter.setMsd(msdRegex);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertEquals(1, result.size());
assertTrue(result.containsKey("junak ima posesti"));
// tests:
// - ngrams - word - regex filter
filter.setCalculateFor(CalculateFor.WORD);
filter.setNgramValue(2);
msdRegex = new ArrayList<>();
msdRegex.add(Pattern.compile("G.*"));
msdRegex.add(Pattern.compile("Some.*"));
filter.setMsd(msdRegex);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpus, stats);
result = stats.getResult();
assertEquals(1, result.size());
assertTrue(result.containsKey("ima junak"));
}
// @Test
// public void ngramsTest() {
// // minimal compliance test
// Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_SPECS);
//
// Map<String, AtomicLong> results = recalculate(minCorpus, stats);
//
// // 1-gram minCorpusa should equal minCorpus' size
// assertEquals(minCorpus.get(0).getWords().size(), results.size());
//
// // each resulting word should have a frequency of 1
// List<Word> words = minCorpus.get(0).getWords();
// for (int i = 0; i < results.size(); i++) {
// Word w = words.get(i);
// AtomicLong frequency = results.get(w.getMsd());
// assertEquals(1, frequency.intValue());
// }
//
// // repeat for 2grams
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, null, CalculateFor.MORPHOSYNTACTIC_SPECS);
// results = recalculate(minCorpus, stats);
//
// // 2-gram of a 3 item corpus should equal 2 (first two words and second two words)
// assertEquals(2, results.size());
//
// // add a filter
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
//
// List<String> morphosyntacticFilter = new ArrayList<>();
// morphosyntacticFilter.add("Sozem");
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
//
// results = recalculate(minCorpus, stats);
//
// // since min corpus doesn't contain Sozem, results should be empty
// assertEquals(0, results.size());
//
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
// morphosyntacticFilter = new ArrayList<>();
// morphosyntacticFilter.add("Somei");
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
// results = recalculate(minCorpus, stats);
//
// // since we have 1 Somei, 1 result
// assertEquals(1, results.size());
// assertEquals(1, results.get("Somei").intValue());
//
// // actual filter with wildcards
// // 1gram
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
// morphosyntacticFilter = new ArrayList<>();
// morphosyntacticFilter.add("So***");
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
// results = recalculate(minCorpus, stats);
//
// assertEquals(1, results.size());
// assertEquals(1, results.get("Somei").intValue());
//
// // 2gram
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
// morphosyntacticFilter = new ArrayList<>();
// morphosyntacticFilter.add("Ggns*e-n");
// morphosyntacticFilter.add("So***");
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
// results = recalculate(minCorpus, stats);
//
// assertEquals(1, results.size());
// assertEquals(1, results.get("Ggnste-n Somei").intValue());
//
// // 2gram midCorpus
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
// morphosyntacticFilter = new ArrayList<>();
// morphosyntacticFilter.add("Ggns*e-n");
// morphosyntacticFilter.add("So***");
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
// results = recalculate(midCorpus, stats);
//
// assertEquals(2, results.size());
// assertEquals(1, results.get("Ggnste-n Somei").intValue());
// assertEquals(1, results.get("Ggnste-n Sozem").intValue());
// }
private Map<String, AtomicLong> recalculate(List<Sentence> corpus, Statistics stats) {
// calculateForAll(corpus, stats);
return stats.getResult();
}
@Test
public void skipgramsTest() {
Map<String, AtomicLong> result = null;
Filter filter = new Filter();
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setCalculateFor(CalculateFor.WORD);
Corpus testCorpus = new Corpus();
testCorpus.setCorpusType(CorpusType.GIGAFIDA);
testCorpus.setDetectedCorpusFiles(new ArrayList<>());
// tests:
// - bigrams
filter.setNgramValue(2);
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpusSkip, stats);
result = stats.getResult();
Set<String> bigrams = new HashSet<>(Arrays.asList("ker ima", "ima junak", "junak v", "v posesti"));
Set<String> bigramsActual = result.keySet();
assertEquals(bigrams, bigramsActual);
// test:
// - two skip bigrams
filter.setNgramValue(2);
filter.setSkipValue(2);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpusSkip, stats);
result = stats.getResult();
Set<String> twoSkipBigrams = new HashSet<>(Arrays.asList("ker ima", "ker junak", "ker v", "ima junak", "ima v", "ima posesti", "junak v", "junak posesti", "v posesti"));
Set<String> twoSkipBigramsActual = result.keySet();
assertEquals(twoSkipBigrams, twoSkipBigramsActual);
// tests:
// - trigrams
filter.setNgramValue(3);
filter.setSkipValue(null);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpusSkip, stats);
result = stats.getResult();
Set<String> trigrams = new HashSet<>(Arrays.asList("ker ima junak", "ima junak v", "junak v posesti"));
Set<String> trigramsActual = result.keySet();
assertEquals(trigrams, trigramsActual);
// tests:
// - two skip trigrams
filter.setNgramValue(3);
filter.setSkipValue(2);
stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.midCorpusSkip, stats);
result = stats.getResult();
HashSet<String> twoSkipTrigrams = new HashSet<>(Arrays.asList("ker ima junak", "ker ima v", "ker ima posesti", "ker junak v", "ker junak posesti", "ker v posesti", "ima junak v", "ima junak posesti", "ima v posesti", "junak v posesti"));
Set<String> twoSkipTrigramsActual = result.keySet();
assertEquals(twoSkipTrigrams, twoSkipTrigramsActual);
}
}

View File

@@ -0,0 +1,51 @@
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.junit.Test;
import alg.inflectedJOS.WordFormation;
import alg.ngram.Ngrams;
import data.*;
public class WordFormationTest {
@Test
public void calculationTest() throws UnsupportedEncodingException {
Map<String, AtomicLong> result = null;
Filter filter = new Filter();
filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setNgramValue(1);
Corpus testCorpus = new Corpus();
testCorpus.setCorpusType(CorpusType.GIGAFIDA);
testCorpus.setDetectedCorpusFiles(new ArrayList<>());
// tests:
// - normal ngrams - word
// midCorpus contains 5 words which should make for 3 3-grams
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
Ngrams.calculateForAll(Common.josTest, stats);
result = stats.getResult();
WordFormation.calculateStatistics(stats);
Object[][] resultArr = stats.getResultCustom();
String debug = "";
}
@Test
public void testAnything() {
String a = "Somei";
String b = "SomeiD";
String c = a.substring(0, 5);
String d = b.substring(0, 5);
String debug = "";
}
}

View File

@@ -0,0 +1,15 @@
import java.util.HashSet;
import org.junit.Test;
import data.Enums.WordLevelDefaultValues;
public class WordLevelTest {
@Test
public void testResourceFiles() {
HashSet<String> suffixes = WordLevelDefaultValues.getSuffixes();
String debug = "";
}
}

View File

@@ -0,0 +1,39 @@
import static org.junit.Assert.*;
import org.junit.Test;
import data.Word;
public class WordTest {
@Test
public void paddingTest() {
Word w1 = new Word("w1", "l1", "Somei");
Word w2 = new Word("w2", "l2", "Sometd");
// w1's msd should get padded
String msd1 = w1.getMsd();
String msd2 = w2.getMsd();
assertEquals(msd1.length(), msd2.length());
assertEquals(Word.PAD_CHARACTER, msd1.charAt(msd1.length() - 1));
w1 = new Word("w1", "l1", "Gp-g");
w2 = new Word("w2", "l2", "Gp-g---d");
// w1's msd should get padded
msd1 = w1.getMsd();
msd2 = w2.getMsd();
assertEquals(msd1.length(), msd2.length());
assertEquals(Word.PAD_CHARACTER, msd1.charAt(msd1.length() - 1));
assertEquals(Word.PAD_CHARACTER, msd2.charAt(2));
}
@Test
public void cvvTest() {
String siAlphabet = "abcčdefghijklmnoprsštuvzž";
String siAlphabetCvv = "VCCCCVCCCVCCCCCVCCCCCVCCC";
Word w1 = new Word(siAlphabet, "l1", null);
assertEquals(siAlphabetCvv, w1.getCVVWord());
}
}