Project copied
This commit is contained in:
160
.gitignore
vendored
Normal file
160
.gitignore
vendored
Normal file
@@ -0,0 +1,160 @@
|
||||
# Created by .ignore support plugin (hsz.mobi)
|
||||
### Maven template
|
||||
target/
|
||||
pom.xml.tag
|
||||
pom.xml.releaseBackup
|
||||
pom.xml.versionsBackup
|
||||
pom.xml.next
|
||||
release.properties
|
||||
dependency-reduced-pom.xml
|
||||
buildNumber.properties
|
||||
.mvn/timing.properties
|
||||
|
||||
# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
|
||||
!/.mvn/wrapper/maven-wrapper.jar
|
||||
### JetBrains template
|
||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
|
||||
# User-specific stuff:
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/dictionaries
|
||||
.idea/
|
||||
|
||||
# Sensitive or high-churn files:
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.xml
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
|
||||
# Gradle:
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# Mongo Explorer plugin:
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
## File-based project format:
|
||||
*.iws
|
||||
|
||||
## Plugin-specific files:
|
||||
|
||||
# IntelliJ
|
||||
/out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
com_crashlytics_export_strings.xml
|
||||
crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
### Java template
|
||||
# Compiled class file
|
||||
# Log file
|
||||
*.log
|
||||
|
||||
# BlueJ files
|
||||
*.ctxt
|
||||
|
||||
# Mobile Tools for Java (J2ME)
|
||||
.mtj.tmp/
|
||||
|
||||
# Package Files #
|
||||
*.war
|
||||
*.ear
|
||||
*.zip
|
||||
*.tar.gz
|
||||
*.rar
|
||||
|
||||
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
||||
hs_err_pid*
|
||||
### Eclipse template
|
||||
|
||||
.metadata
|
||||
bin/
|
||||
tmp/
|
||||
*.tmp
|
||||
*.bak
|
||||
*.swp
|
||||
*~.nib
|
||||
local.properties
|
||||
.settings/
|
||||
.loadpath
|
||||
.recommenders
|
||||
|
||||
# Eclipse Core
|
||||
.project
|
||||
|
||||
# External tool builders
|
||||
.externalToolBuilders/
|
||||
|
||||
# Locally stored "Eclipse launch configurations"
|
||||
*.launch
|
||||
|
||||
# PyDev specific (Python IDE for Eclipse)
|
||||
*.pydevproject
|
||||
|
||||
# CDT-specific (C/C++ Development Tooling)
|
||||
.cproject
|
||||
|
||||
# JDT-specific (Eclipse Java Development Tools)
|
||||
.classpath
|
||||
|
||||
# Java annotation processor (APT)
|
||||
.factorypath
|
||||
|
||||
# PDT-specific (PHP Development Tools)
|
||||
.buildpath
|
||||
|
||||
# sbteclipse plugin
|
||||
.target
|
||||
|
||||
# Tern plugin
|
||||
.tern-project
|
||||
|
||||
# TeXlipse plugin
|
||||
.texlipse
|
||||
|
||||
# STS (Spring Tool Suite)
|
||||
.springBeans
|
||||
|
||||
# Code Recommenders
|
||||
.recommenders/
|
||||
|
||||
# Scala IDE specific (Scala & Java development for Eclipse)
|
||||
.cache-main
|
||||
.scala_dependencies
|
||||
.worksheet
|
||||
|
||||
|
||||
|
||||
|
||||
### Windows ###
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
ehthumbs.db
|
||||
ehthumbs_vista.db
|
||||
|
||||
# Folder config file
|
||||
Desktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# Windows shortcuts
|
||||
*.lnk
|
||||
28
Corpus Analyzer.iml
Normal file
28
Corpus Analyzer.iml
Normal file
@@ -0,0 +1,28 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
|
||||
<output url="file://$MODULE_DIR$/target/classes" />
|
||||
<output-test url="file://$MODULE_DIR$/target/test-classes" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" name="Maven: commons-io:commons-io:2.5" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.6" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.googlecode.json-simple:json-simple:1.1.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: junit:junit:4.10" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.commons:commons-csv:1.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.controlsfx:controlsfx:8.40.13" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.rocksdb:rocksdbjni:5.7.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.9.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-core:2.9.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.kordamp.ikonli:ikonli-fontawesome-pack:1.9.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.kordamp.ikonli:ikonli-core:1.9.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.kordamp.ikonli:ikonli-javafx:1.9.0" level="project" />
|
||||
</component>
|
||||
</module>
|
||||
122
pom.xml
Normal file
122
pom.xml
Normal file
@@ -0,0 +1,122 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>thesis</groupId>
|
||||
<artifactId>corpus-analyzer</artifactId>
|
||||
<version>1.2</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
<version>3.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.googlecode.json-simple</groupId>
|
||||
<artifactId>json-simple</artifactId>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-csv</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.controlsfx</groupId>
|
||||
<artifactId>controlsfx</artifactId>
|
||||
<version>8.40.13</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.rocksdb</groupId>
|
||||
<artifactId>rocksdbjni</artifactId>
|
||||
<version>5.7.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-api</artifactId>
|
||||
<version>2.9.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-core</artifactId>
|
||||
<version>2.9.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.kordamp.ikonli</groupId>
|
||||
<artifactId>ikonli-fontawesome-pack</artifactId>
|
||||
<version>1.9.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.kordamp.ikonli</groupId>
|
||||
<artifactId>ikonli-javafx</artifactId>
|
||||
<version>1.9.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<!-- packages dependencies into the jar -->
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>gui.GUIController</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
<descriptorRefs>
|
||||
<descriptorRef>jar-with-dependencies</descriptorRef>
|
||||
</descriptorRefs>
|
||||
<appendAssemblyId>false</appendAssemblyId>
|
||||
<outputDirectory>artifact</outputDirectory>
|
||||
<finalName>Corpus_Analyzer_${version}</finalName>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<!-- JavaFX -->
|
||||
<groupId>com.zenjava</groupId>
|
||||
<artifactId>javafx-maven-plugin</artifactId>
|
||||
<version>8.6.0</version>
|
||||
<configuration>
|
||||
<mainClass>gui.GUIController</mainClass>
|
||||
<verbose>true</verbose>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>create-jfxjar</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>build-jar</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
3
src/main/java/META-INF/MANIFEST.MF
Normal file
3
src/main/java/META-INF/MANIFEST.MF
Normal file
@@ -0,0 +1,3 @@
|
||||
Manifest-Version: 1.0
|
||||
Main-Class: gui.GUIController
|
||||
|
||||
15
src/main/java/alg/Common.java
Normal file
15
src/main/java/alg/Common.java
Normal file
@@ -0,0 +1,15 @@
|
||||
package alg;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
public class Common {
|
||||
public static <K, V> void updateMap(Map<K, AtomicLong> map, K o) {
|
||||
// if not in map
|
||||
AtomicLong r = map.putIfAbsent(o, new AtomicLong(1));
|
||||
|
||||
// else
|
||||
if (r != null)
|
||||
map.get(o).incrementAndGet();
|
||||
}
|
||||
}
|
||||
794
src/main/java/alg/XML_processing.java
Normal file
794
src/main/java/alg/XML_processing.java
Normal file
@@ -0,0 +1,794 @@
|
||||
package alg;
|
||||
|
||||
import static data.Enums.solar.SolarFilters.*;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
import javax.xml.stream.XMLEventReader;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.events.*;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
|
||||
import data.*;
|
||||
import gui.ValidationUtil;
|
||||
|
||||
public class XML_processing {
|
||||
public final static org.apache.logging.log4j.Logger logger = LogManager.getLogger(XML_processing.class);
|
||||
|
||||
// public static void processCorpus(Statistics stats) {
|
||||
// // we can preset the list's size, so there won't be a need to resize it
|
||||
// List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT);
|
||||
//
|
||||
// int i = 0;
|
||||
// for (File f : Settings.corpus) {
|
||||
// i++;
|
||||
// readXML(f.toString(), stats);
|
||||
// }
|
||||
// }
|
||||
|
||||
// public static void readXML(String path, Statistics stats) {
|
||||
// if (stats.getCorpusType() == CorpusType.GIGAFIDA) {
|
||||
// readXMLGigafida(path, stats);
|
||||
// } else if (stats.getCorpusType() == CorpusType.GOS) {
|
||||
// readXMLGos(path, stats);
|
||||
// } else if (stats.getCorpusType() == CorpusType.SOLAR) {
|
||||
// readXMLSolar(path, stats);
|
||||
// }
|
||||
// }
|
||||
|
||||
public static void readXML(String path, StatisticsNew stats) {
|
||||
if (stats.getCorpus().getCorpusType() == CorpusType.GIGAFIDA
|
||||
|| stats.getCorpus().getCorpusType() == CorpusType.CCKRES) {
|
||||
readXMLGigafida(path, stats);
|
||||
} else if (stats.getCorpus().getCorpusType() == CorpusType.GOS) {
|
||||
readXMLGos(path, stats);
|
||||
} else if (stats.getCorpus().getCorpusType() == CorpusType.SOLAR) {
|
||||
readXMLSolar(path, stats);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and returns the value of a passed header tag or an empty string.
|
||||
* E.g. title tag, for discerning the corpus' type.
|
||||
* Notice: returns only the value of the first occurrence of a given tag name.
|
||||
*/
|
||||
public static String readXMLHeaderTag(String path, String tag) {
|
||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
XMLEventReader eventReader = null;
|
||||
|
||||
try {
|
||||
eventReader = factory.createXMLEventReader(new FileInputStream(path));
|
||||
while (eventReader.hasNext()) {
|
||||
XMLEvent xmlEvent = eventReader.nextEvent();
|
||||
if (xmlEvent.isStartElement()) {
|
||||
StartElement startElement = xmlEvent.asStartElement();
|
||||
String var = startElement.getName().getLocalPart();
|
||||
|
||||
if (var.equalsIgnoreCase(tag)) {
|
||||
return eventReader.nextEvent().asCharacters().getData();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException | XMLStreamException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
if (eventReader != null) {
|
||||
try {
|
||||
eventReader.close();
|
||||
} catch (XMLStreamException e) {
|
||||
logger.error("closing stream", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private static void fj(List<Sentence> corpus, StatisticsNew stats) {
|
||||
ForkJoinPool pool = new ForkJoinPool();
|
||||
|
||||
if (stats.getFilter().getAl() == AnalysisLevel.STRING_LEVEL) {
|
||||
alg.ngram.ForkJoin wc = new alg.ngram.ForkJoin(corpus, stats);
|
||||
pool.invoke(wc);
|
||||
} else if (stats.getFilter().getAl() == AnalysisLevel.WORD_LEVEL) {
|
||||
alg.word.ForkJoin wc = new alg.word.ForkJoin(corpus, stats);
|
||||
pool.invoke(wc);
|
||||
} else {
|
||||
// TODO:
|
||||
// alg.inflectedJOS.ForkJoin wc = new alg.inflectedJOS.ForkJoin(corpus, stats);
|
||||
// pool.invoke(wc);
|
||||
}
|
||||
}
|
||||
|
||||
// public static void readXMLGos(String path, Statistics stats) {
|
||||
// boolean in_word = false;
|
||||
// String taksonomija = "";
|
||||
// String lemma = "";
|
||||
// String msd = "";
|
||||
// String type = stats.isGosOrthMode() ? "orth" : "norm"; // orth & norm
|
||||
//
|
||||
// List<Word> stavek = new ArrayList<>();
|
||||
// List<Sentence> corpus = new ArrayList<>();
|
||||
// String sentenceDelimiter = "seg";
|
||||
// String taxonomyPrefix = "gos.";
|
||||
//
|
||||
// try {
|
||||
// XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
// XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
|
||||
//
|
||||
// while (eventReader.hasNext()) {
|
||||
// XMLEvent event = eventReader.nextEvent();
|
||||
//
|
||||
// switch (event.getEventType()) {
|
||||
// case XMLStreamConstants.START_ELEMENT:
|
||||
//
|
||||
// StartElement startElement = event.asStartElement();
|
||||
// String qName = startElement.getName().getLocalPart();
|
||||
//
|
||||
// // "word" node
|
||||
// if (qName.equals("w")) {
|
||||
// in_word = true;
|
||||
//
|
||||
// if (type.equals("norm")) {
|
||||
// // make sure we're looking at <w lemma...> and not <w type...>
|
||||
// Iterator var = startElement.getAttributes();
|
||||
// ArrayList<Object> attributes = new ArrayList<>();
|
||||
// while (var.hasNext()) {
|
||||
// attributes.add(var.next());
|
||||
// }
|
||||
//
|
||||
// if (attributes.contains("msd")) {
|
||||
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
||||
// } else {
|
||||
// msd = null;
|
||||
// }
|
||||
//
|
||||
// if (attributes.contains("lemma")) {
|
||||
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// // taxonomy node
|
||||
// else if (qName.equalsIgnoreCase("catRef")) {
|
||||
// // there are some term nodes at the beginning that are of no interest to us
|
||||
// // they differ by not having the attribute "ref", so test will equal null
|
||||
// Attribute test = startElement.getAttributeByName(QName.valueOf("target"));
|
||||
//
|
||||
// if (test != null) {
|
||||
// // keep only taxonomy properties
|
||||
// taksonomija = String.valueOf(test.getValue()).replace(taxonomyPrefix, "");
|
||||
// }
|
||||
// } else if (qName.equalsIgnoreCase("div")) {
|
||||
// type = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
|
||||
//
|
||||
// }
|
||||
// break;
|
||||
//
|
||||
// case XMLStreamConstants.CHARACTERS:
|
||||
// Characters characters = event.asCharacters();
|
||||
//
|
||||
// // "word" node value
|
||||
// if (in_word) {
|
||||
// if (type.equals("norm") && msd != null) {
|
||||
// stavek.add(new Word(characters.getData(), lemma, msd));
|
||||
// } else {
|
||||
// stavek.add(new Word(characters.getData()));
|
||||
// }
|
||||
//
|
||||
// in_word = false;
|
||||
// }
|
||||
// break;
|
||||
//
|
||||
// case XMLStreamConstants.END_ELEMENT:
|
||||
// EndElement endElement = event.asEndElement();
|
||||
//
|
||||
// // parser reached end of the current sentence
|
||||
// if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
||||
// // add sentence to corpus
|
||||
// corpus.add(new Sentence(stavek, taksonomija, type));
|
||||
// // and start a new one
|
||||
// stavek = new ArrayList<>();
|
||||
//
|
||||
// /* Invoke Fork-Join when we reach maximum limit of
|
||||
// * sentences (because we can't read everything to
|
||||
// * memory) or we reach the end of the file.
|
||||
// */
|
||||
// if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
|
||||
// fj(corpus, stats);
|
||||
// // empty the current corpus, since we don't need
|
||||
// // the data anymore
|
||||
// corpus.clear();
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // backup
|
||||
// if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
|
||||
// fj(corpus, stats);
|
||||
// corpus.clear();
|
||||
// }
|
||||
//
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// } catch (FileNotFoundException | XMLStreamException e) {
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
// }
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public static void readXMLSolar(String path, StatisticsNew stats) {
|
||||
boolean in_word = false;
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
List<Word> stavek = new ArrayList<>();
|
||||
List<Sentence> corpus = new ArrayList<>();
|
||||
|
||||
// used for filter
|
||||
Set<String> headTags = new HashSet<>(Arrays.asList("sola", "predmet", "razred", "regija", "tip", "leto"));
|
||||
Map<String, String> headBlock = null;
|
||||
boolean includeThisBlock = false;
|
||||
|
||||
try {
|
||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream(path));
|
||||
|
||||
while (eventReader.hasNext()) {
|
||||
XMLEvent event = eventReader.nextEvent();
|
||||
|
||||
switch (event.getEventType()) {
|
||||
case XMLStreamConstants.START_ELEMENT:
|
||||
|
||||
StartElement startElement = event.asStartElement();
|
||||
// System.out.println(String.format("%s", startElement.toString()));
|
||||
String qName = startElement.getName().getLocalPart();
|
||||
|
||||
// "word" node
|
||||
if (qName.equals("w3")) {
|
||||
in_word = true;
|
||||
|
||||
msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
||||
lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
||||
} else if (qName.equals("c3")) {
|
||||
String c3Content = eventReader.nextEvent().asCharacters().getData();
|
||||
|
||||
if (c3Content.equals(".") && includeThisBlock) {
|
||||
// add sentence to corpus
|
||||
corpus.add(new Sentence(stavek));
|
||||
// and start a new one
|
||||
stavek = new ArrayList<>();
|
||||
|
||||
/* Invoke Fork-Join when we reach maximum limit of
|
||||
* sentences (because we can't read everything to
|
||||
* memory) or we reach the end of the file.
|
||||
*/
|
||||
if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
|
||||
fj(corpus, stats);
|
||||
// empty the current corpus, since we don't need
|
||||
// the data anymore
|
||||
corpus.clear();
|
||||
}
|
||||
}
|
||||
} else if (headTags.contains(qName)) {
|
||||
String tagContent = eventReader.nextEvent().asCharacters().getData();
|
||||
headBlock.put(qName, tagContent);
|
||||
} else if (qName.equals("head")) {
|
||||
headBlock = new HashMap<>();
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.CHARACTERS:
|
||||
Characters characters = event.asCharacters();
|
||||
|
||||
// "word" node value
|
||||
if (in_word) {
|
||||
stavek.add(new Word(characters.getData(), lemma, msd));
|
||||
in_word = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.END_ELEMENT:
|
||||
EndElement endElement = event.asEndElement();
|
||||
String qNameEnd = endElement.getName().getLocalPart();
|
||||
|
||||
if (qNameEnd.equals("head")) {
|
||||
// validate and set boolean
|
||||
if (validateHeadBlock(headBlock, stats.getFilter().getSolarFilters())) {
|
||||
includeThisBlock = true;
|
||||
}
|
||||
} else if (qNameEnd.equals("body")) {
|
||||
// new block, reset filter status
|
||||
includeThisBlock = false;
|
||||
}
|
||||
|
||||
// backup
|
||||
if (endElement.getName().getLocalPart().equalsIgnoreCase("korpus")) {
|
||||
fj(corpus, stats);
|
||||
corpus.clear();
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException | XMLStreamException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param readHeadBlock block of tags read from the corpus
|
||||
* @param userSetFilter tags with values set by the user
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private static boolean validateHeadBlock(Map<String, String> readHeadBlock, HashMap<String, HashSet<String>> userSetFilter) {
|
||||
boolean pass = true;
|
||||
|
||||
if (userSetFilter == null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (Map.Entry<String, HashSet<String>> filterEntry : userSetFilter.entrySet()) {
|
||||
String key = filterEntry.getKey();
|
||||
HashSet<String> valueObject = filterEntry.getValue();
|
||||
|
||||
// if (valueObject instanceof String) {
|
||||
// pass = validateHeadBlockEntry(readHeadBlock, key, (String) valueObject);
|
||||
// } else
|
||||
if (valueObject != null) {
|
||||
//noinspection unchecked
|
||||
for (String value : valueObject) {
|
||||
pass = validateHeadBlockEntry(readHeadBlock, key, value);
|
||||
}
|
||||
}
|
||||
|
||||
if (!pass) {
|
||||
// current head block does not include one of the set filters - not likely, but an edge case anyway
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// if it gets to this point, it passed all the filters
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean validateHeadBlockEntry(Map<String, String> readHeadBlock, String userSetKey, String userSetValue) {
|
||||
if (!readHeadBlock.keySet().contains(userSetKey)) {
|
||||
// current head block does not include one of the set filters - not likely, but an edge case anyway
|
||||
return false;
|
||||
} else if (!readHeadBlock.get(userSetKey).equals(userSetValue)) {
|
||||
// different values -> doesn't pass the filter
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses XML headers for information about its taxonomy (if supported) or filters (solar)
|
||||
*
|
||||
* @param filepath
|
||||
* @param corpusIsSplit is corpus split into multiple xml files, or are all entries grouped into one large xml file
|
||||
* @param corpusType
|
||||
*/
|
||||
public static Object readXmlHeaderTaxonomyAndFilters(String filepath, boolean corpusIsSplit, CorpusType corpusType) {
|
||||
boolean parseTaxonomy = Tax.getCorpusTypesWithTaxonomy().contains(corpusType);
|
||||
// solar
|
||||
Set<String> headTags = null;
|
||||
HashMap<String, HashSet<String>> resultFilters = new HashMap<>();
|
||||
// taxonomy corpora
|
||||
HashSet<String> resultTaxonomy = new HashSet<>();
|
||||
|
||||
String headTagName;
|
||||
|
||||
if (corpusType == CorpusType.SOLAR) {
|
||||
headTagName = "head";
|
||||
// used for filter
|
||||
headTags = new HashSet<>(Arrays.asList(SOLA, PREDMET, RAZRED, REGIJA, TIP, LETO));
|
||||
|
||||
// init results now to avoid null pointers
|
||||
headTags.forEach(f -> resultFilters.put(f, new HashSet<>()));
|
||||
} else {
|
||||
headTagName = "teiHeader";
|
||||
}
|
||||
|
||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
XMLEventReader xmlEventReader = null;
|
||||
try {
|
||||
xmlEventReader = factory.createXMLEventReader(new FileInputStream(filepath));
|
||||
boolean insideHeader = false;
|
||||
|
||||
while (xmlEventReader.hasNext()) {
|
||||
XMLEvent xmlEvent = xmlEventReader.nextEvent();
|
||||
|
||||
if (xmlEvent.isStartElement()) {
|
||||
StartElement startElement = xmlEvent.asStartElement();
|
||||
String elementName = startElement.getName().getLocalPart();
|
||||
|
||||
if (elementName.equalsIgnoreCase(headTagName)) {
|
||||
// if the corpus is split into files, we skip bodies
|
||||
// this toggle is true when we're inside a header (next block of code executes)
|
||||
// and false when we're not (skip reading unnecessary attributes)
|
||||
insideHeader = true;
|
||||
}
|
||||
|
||||
if (insideHeader) {
|
||||
if (parseTaxonomy && elementName.equalsIgnoreCase("catRef")) {
|
||||
HashMap<String, String> atts = extractAttributes(startElement);
|
||||
String debug = "";
|
||||
|
||||
String tax = startElement.getAttributeByName(QName.valueOf("target"))
|
||||
.getValue()
|
||||
.replace("#", "");
|
||||
|
||||
resultTaxonomy.add(tax);
|
||||
} else if (!parseTaxonomy && headTags.contains(elementName)) {
|
||||
String tagContent = xmlEventReader.nextEvent().asCharacters().getData();
|
||||
resultFilters.get(elementName).add(tagContent);
|
||||
}
|
||||
}
|
||||
} else if (xmlEvent.isEndElement() && corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
|
||||
// if the corpus is split into multiple files, each with only one header block per file
|
||||
// that means we should stop after we reach the end of the header
|
||||
return parseTaxonomy ? resultTaxonomy : resultFilters;
|
||||
} else if (xmlEvent.isEndElement() && !corpusIsSplit && isEndElementEndOfHeader(xmlEvent, headTagName)) {
|
||||
// whole corpus in one file, so we have to continue reading in order to find all header blocks
|
||||
insideHeader = false;
|
||||
}
|
||||
}
|
||||
} catch (XMLStreamException e) {
|
||||
logger.error("Streaming error", e);
|
||||
return parseTaxonomy ? resultTaxonomy : resultFilters;
|
||||
} catch (FileNotFoundException e) {
|
||||
logger.error("File not found", e);
|
||||
return parseTaxonomy ? resultTaxonomy : resultFilters;
|
||||
// TODO: keep a list of files that threw this error and a dirty boolean marker -> if true, alert user
|
||||
} finally {
|
||||
if (xmlEventReader != null) {
|
||||
try {
|
||||
xmlEventReader.close();
|
||||
} catch (XMLStreamException e) {
|
||||
logger.error("closing stream", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return parseTaxonomy ? resultTaxonomy : resultFilters;
|
||||
}
|
||||
|
||||
private static boolean isEndElementEndOfHeader(XMLEvent event, String headerTag) {
|
||||
return event.asEndElement()
|
||||
.getName()
|
||||
.getLocalPart()
|
||||
.equalsIgnoreCase(headerTag);
|
||||
}
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
public static boolean readXMLGigafida(String path, StatisticsNew stats) {
|
||||
boolean inWord = false;
|
||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
List<Word> sentence = new ArrayList<>();
|
||||
List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
|
||||
String sentenceDelimiter = "s";
|
||||
|
||||
XMLEventReader eventReader = null;
|
||||
try {
|
||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
eventReader = factory.createXMLEventReader(new FileInputStream(path));
|
||||
|
||||
while (eventReader.hasNext()) {
|
||||
XMLEvent event = eventReader.nextEvent();
|
||||
|
||||
switch (event.getEventType()) {
|
||||
case XMLStreamConstants.START_ELEMENT:
|
||||
StartElement startElement = event.asStartElement();
|
||||
String qName = startElement.getName().getLocalPart();
|
||||
|
||||
// "word" node
|
||||
if (qName.equals("w")) {
|
||||
inWord = true;
|
||||
|
||||
msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
||||
lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
||||
}
|
||||
// taxonomy node
|
||||
else if (qName.equalsIgnoreCase("catRef")) {
|
||||
// there are some term nodes at the beginning that are of no interest to us
|
||||
// they differ by not having the attribute "ref", so test will equal null
|
||||
Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
|
||||
|
||||
if (tax != null) {
|
||||
// keep only taxonomy properties
|
||||
currentFiletaxonomy.add(String.valueOf(tax.getValue()).replace("#", ""));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.CHARACTERS:
|
||||
Characters characters = event.asCharacters();
|
||||
|
||||
// "word" node value
|
||||
if (inWord) {
|
||||
String word = characters.getData();
|
||||
sentence.add(new Word(word, lemma, msd));
|
||||
inWord = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.END_ELEMENT:
|
||||
EndElement endElement = event.asEndElement();
|
||||
|
||||
String var = endElement.getName().getLocalPart();
|
||||
String debug = "";
|
||||
|
||||
// parser reached end of the current sentence
|
||||
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
||||
// add sentence to corpus if it passes filters
|
||||
sentence = runFilters(sentence, stats.getFilter());
|
||||
|
||||
if (!ValidationUtil.isEmpty(sentence)) {
|
||||
corpus.add(new Sentence(sentence));
|
||||
}
|
||||
|
||||
// and start a new one
|
||||
sentence = new ArrayList<>();
|
||||
|
||||
/* Invoke Fork-Join when we reach maximum limit of
|
||||
* sentences (because we can't read everything to
|
||||
* memory) or we reach the end of the file.
|
||||
*/
|
||||
if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
|
||||
fj(corpus, stats);
|
||||
// empty the current corpus, since we don't need the data anymore
|
||||
corpus.clear();
|
||||
|
||||
// TODO: if (stats.isUseDB()) {
|
||||
// stats.storeTmpResultsToDB();
|
||||
// }
|
||||
}
|
||||
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
|
||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||
|
||||
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
|
||||
currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
|
||||
|
||||
if (currentFiletaxonomy.isEmpty()) {
|
||||
// taxonomies don't match so stop
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fallback
|
||||
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
|
||||
fj(corpus, stats);
|
||||
corpus.clear();
|
||||
|
||||
// TODO: if (stats.isUseDB()) {
|
||||
// stats.storeTmpResultsToDB();
|
||||
// }
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException | XMLStreamException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
if (eventReader != null) {
|
||||
try {
|
||||
eventReader.close();
|
||||
} catch (XMLStreamException e) {
|
||||
logger.error("closing stream", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
public static boolean readXMLGos(String path, StatisticsNew stats) {
|
||||
boolean inWord = false;
|
||||
boolean inOrthDiv = false;
|
||||
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
|
||||
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
|
||||
String lemma = "";
|
||||
String msd = "";
|
||||
|
||||
List<Word> sentence = new ArrayList<>();
|
||||
List<Sentence> corpus = new ArrayList<>(Settings.CORPUS_SENTENCE_LIMIT); // preset the list's size, so there won't be a need to resize it
|
||||
String sentenceDelimiter = "seg";
|
||||
|
||||
String gosType = stats.getFilter().hasMsd() ? "norm" : "orth"; // orth & norm
|
||||
|
||||
XMLEventReader eventReader = null;
|
||||
|
||||
boolean includeFile = true;
|
||||
|
||||
try {
|
||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
eventReader = factory.createXMLEventReader(new FileInputStream(path));
|
||||
|
||||
while (eventReader.hasNext()) {
|
||||
XMLEvent event = eventReader.nextEvent();
|
||||
// System.out.print(String.format("%s", event.toString().replaceAll("\\['http://www.tei-c.org/ns/1.0'\\]::", "")));
|
||||
|
||||
switch (event.getEventType()) {
|
||||
case XMLStreamConstants.START_ELEMENT:
|
||||
StartElement startElement = event.asStartElement();
|
||||
String qName = startElement.getName().getLocalPart();
|
||||
|
||||
if (qName.equals("div")) {
|
||||
HashMap<String, String> atts = extractAttributes(startElement);
|
||||
|
||||
if (atts.keySet().contains("type")) {
|
||||
inOrthDiv = atts.get("type").equals("orth");
|
||||
}
|
||||
}
|
||||
|
||||
// "word" node
|
||||
if (qName.equals("w")) {
|
||||
// check that it's not a type
|
||||
HashMap<String, String> atts = extractAttributes(startElement);
|
||||
|
||||
if (!atts.containsKey("type")) {
|
||||
inWord = true;
|
||||
|
||||
if (atts.containsKey("msd")) {
|
||||
msd = atts.get("msd");
|
||||
|
||||
}
|
||||
if (atts.containsKey("lemma")) {
|
||||
lemma = atts.get("lemma");
|
||||
}
|
||||
//
|
||||
// if (!inOrthDiv) {
|
||||
// msd = String.valueOf(startElement.getAttributeByName(QName.valueOf("msd")).getValue());
|
||||
// lemma = String.valueOf(startElement.getAttributeByName(QName.valueOf("lemma")).getValue());
|
||||
// }
|
||||
}
|
||||
|
||||
// }
|
||||
}
|
||||
// taxonomy node
|
||||
else if (qName.equalsIgnoreCase("catRef")) {
|
||||
// there are some term nodes at the beginning that are of no interest to us
|
||||
// they differ by not having the attribute "ref", so test will equal null
|
||||
Attribute tax = startElement.getAttributeByName(QName.valueOf("target"));
|
||||
|
||||
if (tax != null) {
|
||||
// keep only taxonomy properties
|
||||
currentFiletaxonomy.add(String.valueOf(tax.getValue()));
|
||||
}
|
||||
} else if (qName.equalsIgnoreCase("div")) {
|
||||
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
|
||||
}
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.CHARACTERS:
|
||||
// "word" node value
|
||||
if (inWord) {
|
||||
Characters characters = event.asCharacters();
|
||||
if (gosType.equals("norm") && msd != null) {
|
||||
sentence.add(new Word(characters.getData(), lemma, msd));
|
||||
} else {
|
||||
sentence.add(new Word(characters.getData()));
|
||||
}
|
||||
|
||||
inWord = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.END_ELEMENT:
|
||||
EndElement endElement = event.asEndElement();
|
||||
|
||||
// parser reached end of the current sentence
|
||||
if (endElement.getName().getLocalPart().equals(sentenceDelimiter)) {
|
||||
// add sentence to corpus if it passes filters
|
||||
boolean saveSentence = computeForOrth == inOrthDiv;
|
||||
|
||||
if (includeFile && saveSentence && !ValidationUtil.isEmpty(sentence)) {
|
||||
sentence = runFilters(sentence, stats.getFilter());
|
||||
corpus.add(new Sentence(sentence));
|
||||
}
|
||||
|
||||
// and start a new one
|
||||
sentence = new ArrayList<>();
|
||||
|
||||
/* Invoke Fork-Join when we reach maximum limit of
|
||||
* sentences (because we can't read everything to
|
||||
* memory) or we reach the end of the file.
|
||||
*/
|
||||
if (corpus.size() == Settings.CORPUS_SENTENCE_LIMIT || !eventReader.hasNext()) {
|
||||
fj(corpus, stats);
|
||||
// empty the current corpus, since we don't need
|
||||
// the data anymore
|
||||
corpus.clear();
|
||||
}
|
||||
} else if (endElement.getName().getLocalPart().equals("teiHeader")) {
|
||||
// before proceeding to read this file, make sure that taxonomy filters are a match
|
||||
if (!ValidationUtil.isEmpty(stats.getFilter().getTaxonomy())) {
|
||||
currentFiletaxonomy.retainAll(stats.getFilter().getTaxonomy()); // intersection
|
||||
|
||||
// disregard this entry if taxonomies don't match
|
||||
includeFile = !currentFiletaxonomy.isEmpty();
|
||||
|
||||
currentFiletaxonomy = new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
// backup
|
||||
else if (endElement.getName().getLocalPart().equalsIgnoreCase("tei")) {
|
||||
fj(corpus, stats);
|
||||
corpus.clear();
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException | XMLStreamException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
if (eventReader != null) {
|
||||
try {
|
||||
eventReader.close();
|
||||
} catch (XMLStreamException e) {
|
||||
logger.error("closing stream", e);
|
||||
} catch (Exception e) {
|
||||
logger.error("general error", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the sentence through some filters, so we don't do calculations when unnecessary.
|
||||
* Filters:
|
||||
* <ol>
|
||||
* <li><b>Ngrams:</b> omit sentences that are shorter than the ngram value (e.g. 3 gram of a single word sentence)</li>
|
||||
* <li><b>Letter ngrams:</b> omit words that are shorter than the specified string length (e.g. combinations of 3 letters when the word consists of only 2 letters)</li>
|
||||
* </ol>
|
||||
*
|
||||
* @return Empty sentence (if fails 1.) or a sentence with some words removed (2.)
|
||||
*/
|
||||
private static List<Word> runFilters(List<Word> sentence, Filter filter) {
|
||||
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
||||
// ngram level: if not 0 must be less than or equal to number of words in this sentence.
|
||||
if (filter.getNgramValue() > 0 && filter.getNgramValue() > sentence.size()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// if we're calculating values for letters, omit words that are shorter than string length
|
||||
if (filter.getNgramValue() == 0) {
|
||||
sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord().length() < filter.getStringLength())
|
||||
|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma().length() < filter.getStringLength()));
|
||||
}
|
||||
}
|
||||
|
||||
return sentence;
|
||||
}
|
||||
|
||||
private static HashMap<String, String> extractAttributes(StartElement se) {
|
||||
Iterator attributesIt = se.getAttributes();
|
||||
HashMap<String, String> atts = new HashMap<>();
|
||||
|
||||
while (attributesIt.hasNext()) {
|
||||
Attribute a = (Attribute) attributesIt.next();
|
||||
atts.put(a.getName().getLocalPart(), a.getValue());
|
||||
}
|
||||
|
||||
return atts;
|
||||
}
|
||||
}
|
||||
67
src/main/java/alg/inflectedJOS/ForkJoin.java
Normal file
67
src/main/java/alg/inflectedJOS/ForkJoin.java
Normal file
@@ -0,0 +1,67 @@
|
||||
package alg.inflectedJOS;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.RecursiveAction;
|
||||
|
||||
import data.Sentence;
|
||||
import data.Statistics;
|
||||
|
||||
public class ForkJoin extends RecursiveAction {
|
||||
private static final long serialVersionUID = -1260951004477299634L;
|
||||
|
||||
private static final int ACCEPTABLE_SIZE = 1000;
|
||||
private List<Sentence> corpus;
|
||||
private Statistics stats;
|
||||
private int start;
|
||||
private int end;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for subproblems.
|
||||
*/
|
||||
private ForkJoin(List<Sentence> corpus, int start, int end, Statistics stats) {
|
||||
this.corpus = corpus;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor for the initial problem
|
||||
*/
|
||||
public ForkJoin(List<Sentence> corpus, Statistics stats) {
|
||||
this.corpus = corpus;
|
||||
this.start = 0;
|
||||
this.end = corpus.size();
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
private void computeDirectly() {
|
||||
List<Sentence> subCorpus = corpus.subList(start, end);
|
||||
|
||||
if (stats.isTaxonomySet()) {
|
||||
InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy());
|
||||
} else {
|
||||
InflectedJOSCount.calculateForAll(subCorpus, stats, null);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void compute() {
|
||||
int subCorpusSize = end - start;
|
||||
|
||||
if (subCorpusSize < ACCEPTABLE_SIZE) {
|
||||
computeDirectly();
|
||||
} else {
|
||||
int mid = start + subCorpusSize / 2;
|
||||
ForkJoin left = new ForkJoin(corpus, start, mid, stats);
|
||||
ForkJoin right = new ForkJoin(corpus, mid, end, stats);
|
||||
|
||||
// fork (push to queue)-> compute -> join
|
||||
left.fork();
|
||||
right.fork();
|
||||
left.join();
|
||||
right.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
170
src/main/java/alg/inflectedJOS/InflectedJOSCount.java
Normal file
170
src/main/java/alg/inflectedJOS/InflectedJOSCount.java
Normal file
@@ -0,0 +1,170 @@
|
||||
package alg.inflectedJOS;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import alg.Common;
|
||||
import data.Sentence;
|
||||
import data.Statistics;
|
||||
import data.StatisticsNew;
|
||||
import data.Word;
|
||||
|
||||
public class InflectedJOSCount {
|
||||
|
||||
public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
|
||||
|
||||
// static {
|
||||
// // calculate all possible combinations of indices we will substitute with a '-' for substring statistics
|
||||
// indices = new HashMap<>();
|
||||
// for (int i = 5; i <= 8; i++) {
|
||||
// indices.put(i, calculateCombinations(i));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private static List<Integer> calculateCombinations(int i) {
|
||||
// int arr[] = {1, 2, 3, 4, 5};
|
||||
// int r = 3;
|
||||
// int n = arr.length;
|
||||
// ArrayList<ArrayList<Integer>> result = new ArrayList<>();
|
||||
//
|
||||
// return printCombination(arr, n, r);
|
||||
// }
|
||||
//
|
||||
// /* arr[] ---> Input Array
|
||||
// data[] ---> Temporary array to store current combination
|
||||
// start & end ---> Staring and Ending indexes in arr[]
|
||||
// index ---> Current index in data[]
|
||||
// r ---> Size of a combination to be printed */
|
||||
// static void combinationUtil(int arr[], int data[], int start,
|
||||
// int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
|
||||
// // Current combination is ready to be printed, print it
|
||||
// ArrayList<Integer> tmpResult = new ArrayList<>();
|
||||
//
|
||||
// if (index == r) {
|
||||
// ArrayList<Integer> tmpResult = new ArrayList<>();
|
||||
// for (int j = 0; j < r; j++)
|
||||
// System.out.print(data[j] + " ");
|
||||
// System.out.println("");
|
||||
// return;
|
||||
// }
|
||||
//
|
||||
// // replace index with all possible elements. The condition
|
||||
// // "end-i+1 >= r-index" makes sure that including one element
|
||||
// // at index will make a combination with remaining elements
|
||||
// // at remaining positions
|
||||
// for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
|
||||
// data[index] = arr[i];
|
||||
// combinationUtil(arr, data, i + 1, end, index + 1, r);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // The main function that prints all combinations of size r
|
||||
// // in arr[] of size n. This function mainly uses combinationUtil()
|
||||
// static void printCombination(int arr[], int n, int r) {
|
||||
// // A temporary array to store all combination one by one
|
||||
// int data[] = new int[r];
|
||||
//
|
||||
// // Print all combination using temprary array 'data[]'
|
||||
// combinationUtil(arr, data, 0, n - 1, 0, r);
|
||||
// }
|
||||
|
||||
// public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||||
// for (Sentence s : corpus) {
|
||||
// // disregard if wrong taxonomy
|
||||
// if (!(s.getTaxonomy().startsWith(taxonomy))) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// calculateCommon(s, stats.result);
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// // skip if current word is not inflected
|
||||
// if (!(word.getMsd().length() > 0)) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// String msd = word.getMsd();
|
||||
//
|
||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
//
|
||||
// for (int i = 1; i < msd.length(); i++) {
|
||||
// entry.setCharAt(i, msd.charAt(i));
|
||||
// Common.updateMap(stats.result, entry.toString());
|
||||
// entry.setCharAt(i, '-');
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// for (Word word : s.getWords()) {
|
||||
// if (!(word.getMsd().length() > 0)) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// String msd = word.getMsd();
|
||||
//
|
||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
//
|
||||
// for (int i = 1; i < msd.length(); i++) {
|
||||
// entry.setCharAt(i, msd.charAt(i));
|
||||
// Common.updateMap(stats.result, entry.toString());
|
||||
// entry.setCharAt(i, '-');
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||||
for (Sentence s : corpus) {
|
||||
// disregard if wrong taxonomy
|
||||
if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (Word word : s.getWords()) {
|
||||
// skip if current word is not inflected
|
||||
if (!(word.getMsd().length() > 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String msd = word.getMsd();
|
||||
|
||||
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
|
||||
for (int i = 1; i < msd.length(); i++) {
|
||||
entry.setCharAt(i, msd.charAt(i));
|
||||
Common.updateMap(stats.result, entry.toString());
|
||||
entry.setCharAt(i, '-');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
|
||||
for (Sentence s : corpus) {
|
||||
|
||||
for (Word word : s.getWords()) {
|
||||
// skip if current word is not inflected
|
||||
// // TODO: if has defined msd and is of correct type (create a set)
|
||||
// if (!(word.getMsd().length() > 0)) {
|
||||
// continue;
|
||||
// }
|
||||
|
||||
String msd = word.getMsd();
|
||||
|
||||
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
|
||||
for (int i = 1; i < msd.length(); i++) {
|
||||
entry.setCharAt(i, msd.charAt(i));
|
||||
stats.updateResults(entry.toString());
|
||||
entry.setCharAt(i, '-');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
131
src/main/java/alg/inflectedJOS/WordFormation.java
Normal file
131
src/main/java/alg/inflectedJOS/WordFormation.java
Normal file
@@ -0,0 +1,131 @@
|
||||
package alg.inflectedJOS;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import data.Enums.InflectedJosTypes;
|
||||
import data.StatisticsNew;
|
||||
import gui.ValidationUtil;
|
||||
import util.Combinations;
|
||||
|
||||
// adapted from http://www.geeksforgeeks.org/print-all-possible-combinations-of-r-elements-in-a-given-array-of-size-n/
|
||||
public class WordFormation {
|
||||
private static HashMap<String, Long> josTypeResult;
|
||||
private static Object[][] tmpResults;
|
||||
|
||||
private static HashMap<Integer, HashSet<HashSet<Integer>>> indices;
|
||||
|
||||
static {
|
||||
indices = new HashMap<>();
|
||||
|
||||
for (int i = 4; i <= 8; i++) {
|
||||
indices.put(i, Combinations.generateIndices(i));
|
||||
}
|
||||
}
|
||||
|
||||
public static void calculateStatistics(StatisticsNew stat) {
|
||||
Map<String, AtomicLong> result = stat.getResult();
|
||||
|
||||
// 1. filter - keep only inflected types
|
||||
result.keySet().removeIf(x -> !InflectedJosTypes.inflectedJosTypes.contains(x.charAt(0)));
|
||||
|
||||
// 2. for each inflected type get all possible subcombinations
|
||||
for (Character josChar : InflectedJosTypes.inflectedJosTypes) {
|
||||
josTypeResult = new HashMap<>();
|
||||
|
||||
// filter out results for a single word type
|
||||
Map<String, AtomicLong> singleTypeResults = result.entrySet().stream()
|
||||
.filter(x -> x.getKey().charAt(0) == josChar)
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||
|
||||
if (ValidationUtil.isEmpty(singleTypeResults)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// get all possible indices combos for a msd of this length
|
||||
// HashSet<HashSet<Integer>> indicesCombos = indices.get()
|
||||
//Combinations.generateIndices(singleTypeResults.keySet().stream().findFirst().get().length());
|
||||
|
||||
for (Map.Entry<String, AtomicLong> e : singleTypeResults.entrySet()) {
|
||||
int l = e.getKey().length();
|
||||
|
||||
for (HashSet<Integer> indicesCombo : indices.get(e.getKey().length())) {
|
||||
updateResults(mask(e.getKey(), indicesCombo), e.getValue().longValue());
|
||||
}
|
||||
}
|
||||
|
||||
resultsMapToArray(singleTypeResults.values().stream().mapToLong(Number::longValue).sum());
|
||||
}
|
||||
|
||||
stat.setResultCustom(tmpResults);
|
||||
}
|
||||
|
||||
private static String mask(String word, HashSet<Integer> indicesCombo) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
sb.append(word.charAt(0));
|
||||
for (int i = 1; i < word.length(); i++) {
|
||||
sb.append(indicesCombo.contains(i) ? word.charAt(i) : ".");
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
private static void updateResults(String s, Long nOfOccurences) {
|
||||
// if not in map add
|
||||
Long r = josTypeResult.putIfAbsent(s, nOfOccurences);
|
||||
|
||||
// else update
|
||||
if (r != null) {
|
||||
josTypeResult.put(s, josTypeResult.get(s) + nOfOccurences);
|
||||
}
|
||||
}
|
||||
|
||||
private static void resultsMapToArray(Long totalValue) {
|
||||
Double total = totalValue * 1.0;
|
||||
Object[][] josTypeResultArray = new Object[josTypeResult.size()][3];
|
||||
|
||||
int i = 0;
|
||||
for (Map.Entry<String, Long> e : josTypeResult.entrySet()) {
|
||||
josTypeResultArray[i][0] = e.getKey();
|
||||
josTypeResultArray[i][1] = e.getValue();
|
||||
josTypeResultArray[i][2] = e.getValue() / total;
|
||||
|
||||
if (e.getValue() > total) {
|
||||
|
||||
String debug = "";
|
||||
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (tmpResults == null) {
|
||||
tmpResults = josTypeResultArray;
|
||||
} else {
|
||||
int firstLength = tmpResults.length;
|
||||
int secondLength = josTypeResultArray.length;
|
||||
Object[][] tmp = new Object[firstLength + secondLength][3];
|
||||
|
||||
System.arraycopy(tmpResults, 0, tmp, 0, firstLength);
|
||||
System.arraycopy(josTypeResultArray, 0, tmp, firstLength, secondLength);
|
||||
|
||||
tmpResults = tmp;
|
||||
|
||||
// tmpResults = ArrayUtils.addAll(tmpResults, josTypeResultArray);
|
||||
}
|
||||
}
|
||||
|
||||
private static void printArray() {
|
||||
for (int i = 0; i < tmpResults.length; i++) {
|
||||
for (int j = 0; j < tmpResults[i].length; j++) {
|
||||
System.out.print(tmpResults[i][j] + "\t");
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
}
|
||||
62
src/main/java/alg/ngram/ForkJoin.java
Normal file
62
src/main/java/alg/ngram/ForkJoin.java
Normal file
@@ -0,0 +1,62 @@
|
||||
package alg.ngram;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.RecursiveAction;
|
||||
|
||||
import data.Sentence;
|
||||
import data.StatisticsNew;
|
||||
|
||||
public class ForkJoin extends RecursiveAction {
|
||||
private static final long serialVersionUID = 5074814035083362355L;
|
||||
|
||||
private static final int ACCEPTABLE_SIZE = 1000;
|
||||
private List<Sentence> corpus;
|
||||
private StatisticsNew stats;
|
||||
private int start;
|
||||
private int end;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for subproblems.
|
||||
*/
|
||||
private ForkJoin(List<Sentence> corpus, int start, int end, StatisticsNew stats) {
|
||||
this.corpus = corpus;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor for the initial problem
|
||||
*/
|
||||
public ForkJoin(List<Sentence> corpus, StatisticsNew stats) {
|
||||
this.corpus = corpus;
|
||||
this.start = 0;
|
||||
this.end = corpus.size();
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
private void computeDirectly() {
|
||||
List<Sentence> subCorpus = corpus.subList(start, end);
|
||||
Ngrams.calculateForAll(subCorpus, stats);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void compute() {
|
||||
int subCorpusSize = end - start;
|
||||
|
||||
if (subCorpusSize < ACCEPTABLE_SIZE) {
|
||||
computeDirectly();
|
||||
} else {
|
||||
int mid = start + subCorpusSize / 2;
|
||||
ForkJoin left = new ForkJoin(corpus, start, mid, stats);
|
||||
ForkJoin right = new ForkJoin(corpus, mid, end, stats);
|
||||
|
||||
// fork (push to queue)-> compute -> join
|
||||
left.fork();
|
||||
right.fork();
|
||||
left.join();
|
||||
right.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
204
src/main/java/alg/ngram/Ngrams.java
Normal file
204
src/main/java/alg/ngram/Ngrams.java
Normal file
@@ -0,0 +1,204 @@
|
||||
package alg.ngram;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import data.CalculateFor;
|
||||
import data.Sentence;
|
||||
import data.StatisticsNew;
|
||||
import data.Word;
|
||||
import gui.ValidationUtil;
|
||||
|
||||
public class Ngrams {
|
||||
public final static Logger logger = LogManager.getLogger(Ngrams.class);
|
||||
|
||||
|
||||
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
|
||||
if (stats.getFilter().getNgramValue() == 0) { // letter ngram
|
||||
generateNgramLetterCandidates(corpus, stats);
|
||||
} else if (!ValidationUtil.isEmpty(stats.getFilter().getSkipValue()) && stats.getFilter().getSkipValue() > 0) {
|
||||
generateSkipgramCandidates(corpus, stats);
|
||||
} else {
|
||||
generateNgramCandidates(corpus, stats);
|
||||
}
|
||||
}
|
||||
|
||||
public static void generateNgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
|
||||
for (Sentence s : corpus) {
|
||||
// skip sentences shorter than specified ngram length
|
||||
if (s.getWords().size() < stats.getFilter().getNgramValue()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < s.getWords().size() - stats.getFilter().getNgramValue() + 1; i++) {
|
||||
List<Word> ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue());
|
||||
|
||||
// if msd regex is set and this candidate doesn't pass it, skip this iteration
|
||||
if (stats.getFilter().hasMsd() && !passesRegex(ngramCandidate, stats.getFilter().getMsd())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
stats.updateResults(wordToString(ngramCandidate, stats.getFilter().getCalculateFor()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether an ngram candidate passes specified regex filter.
|
||||
*/
|
||||
private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex) {
|
||||
if (ngramCandidate.size() != regex.size()) {
|
||||
logger.error("ngramCandidate.size() & msd.size() mismatch"); // should not occur anyway
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < regex.size(); i++) {
|
||||
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static String wordToString(List<Word> ngramCandidate, CalculateFor calculateFor) {
|
||||
ArrayList<String> candidate = new ArrayList<>(ngramCandidate.size());
|
||||
|
||||
switch (calculateFor) {
|
||||
case LEMMA:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
break;
|
||||
case WORD:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
break;
|
||||
case MORPHOSYNTACTIC_SPECS:
|
||||
case MORPHOSYNTACTIC_PROPERTY:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(Word::getMsd)
|
||||
.collect(Collectors.toList()));
|
||||
break;
|
||||
case WORD_TYPE:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(w -> Character.toString(w.getMsd().charAt(0)))
|
||||
.collect(Collectors.toList()));
|
||||
break;
|
||||
}
|
||||
|
||||
return StringUtils.join(candidate, " ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates candidates and updates results
|
||||
*
|
||||
* @param corpus
|
||||
* @param stats
|
||||
*/
|
||||
private static void generateNgramLetterCandidates(List<Sentence> corpus, StatisticsNew stats) {
|
||||
for (Sentence s : corpus) {
|
||||
for (Word w : s.getWords()) {
|
||||
String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
|
||||
|
||||
// skip this iteration if:
|
||||
// - word doesn't contain a proper version (missing lemma for example)
|
||||
// - msd regex is given but this word's msd doesn't match it, skip this iteration
|
||||
// - given substring length is larger than the word length
|
||||
if (ValidationUtil.isEmpty(word)
|
||||
|| stats.getFilter().hasMsd() && !w.getMsd().matches(stats.getFilter().getMsd().get(0).pattern())
|
||||
|| word.length() < stats.getFilter().getStringLength()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
|
||||
// TODO: locila?
|
||||
stats.updateResults(word.substring(i, i + stats.getFilter().getStringLength()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Extracts skipgram candidates.
|
||||
*
|
||||
* @return List of candidates represented as a list<candidates(String)>
|
||||
*/
|
||||
public static void generateSkipgramCandidates(List<Sentence> corpus, StatisticsNew stats) {
|
||||
ArrayList<Word> currentLoop;
|
||||
int ngram = stats.getFilter().getNgramValue();
|
||||
int skip = stats.getFilter().getSkipValue();
|
||||
|
||||
for (Sentence s : corpus) {
|
||||
List<Word> sentence = s.getWords();
|
||||
|
||||
for (int i = 0; i <= sentence.size() - ngram; i++) { // 1gram
|
||||
for (int j = i + 1; j <= i + skip + 1; j++) { // 2gram
|
||||
if (ngram == 2 && j < sentence.size()) {
|
||||
currentLoop = new ArrayList<>();
|
||||
currentLoop.add(sentence.get(i));
|
||||
currentLoop.add(sentence.get(j));
|
||||
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats);
|
||||
} else {
|
||||
for (int k = j + 1; k <= j + 1 + skip; k++) { // 3gram
|
||||
if (ngram == 3 && k < sentence.size()) {
|
||||
currentLoop = new ArrayList<>();
|
||||
currentLoop.add(sentence.get(i));
|
||||
currentLoop.add(sentence.get(j));
|
||||
currentLoop.add(sentence.get(k));
|
||||
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats);
|
||||
} else {
|
||||
for (int l = k + 1; l <= k + 1 + skip; l++) { // 4gram
|
||||
if (ngram == 4 && k < sentence.size()) {
|
||||
currentLoop = new ArrayList<>();
|
||||
currentLoop.add(sentence.get(i));
|
||||
currentLoop.add(sentence.get(j));
|
||||
currentLoop.add(sentence.get(k));
|
||||
currentLoop.add(sentence.get(l));
|
||||
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats);
|
||||
} else {
|
||||
for (int m = k + 1; m <= k + 1 + skip; m++) { // 5gram
|
||||
if (ngram == 5 && k < sentence.size()) {
|
||||
currentLoop = new ArrayList<>();
|
||||
currentLoop.add(sentence.get(i));
|
||||
currentLoop.add(sentence.get(j));
|
||||
currentLoop.add(sentence.get(k));
|
||||
currentLoop.add(sentence.get(l));
|
||||
currentLoop.add(sentence.get(m));
|
||||
|
||||
validateAndCountSkipgramCandidate(currentLoop, stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats) {
|
||||
// count if no regex is set or if it is & candidate passes it
|
||||
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
|
||||
stats.updateResults(wordToString(skipgramCandidate, stats.getFilter().getCalculateFor()));
|
||||
}
|
||||
}
|
||||
}
|
||||
62
src/main/java/alg/word/ForkJoin.java
Normal file
62
src/main/java/alg/word/ForkJoin.java
Normal file
@@ -0,0 +1,62 @@
|
||||
package alg.word;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.RecursiveAction;
|
||||
|
||||
import data.Sentence;
|
||||
import data.StatisticsNew;
|
||||
|
||||
public class ForkJoin extends RecursiveAction {
|
||||
private static final long serialVersionUID = 7711587510996456040L;
|
||||
|
||||
private static final int ACCEPTABLE_SIZE = 1000;
|
||||
private List<Sentence> corpus;
|
||||
private StatisticsNew stats;
|
||||
private int start;
|
||||
private int end;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for subproblems.
|
||||
*/
|
||||
private ForkJoin(List<Sentence> corpus, int start, int end, StatisticsNew stats) {
|
||||
this.corpus = corpus;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor for the initial problem
|
||||
*/
|
||||
public ForkJoin(List<Sentence> corpus, StatisticsNew stats) {
|
||||
this.corpus = corpus;
|
||||
this.start = 0;
|
||||
this.end = corpus.size();
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
private void computeDirectly() {
|
||||
List<Sentence> subCorpus = corpus.subList(start, end);
|
||||
WordLevel.calculateForAll(subCorpus, stats);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void compute() {
|
||||
int subCorpusSize = end - start;
|
||||
|
||||
if (subCorpusSize < ACCEPTABLE_SIZE) {
|
||||
computeDirectly();
|
||||
} else {
|
||||
int mid = start + subCorpusSize / 2;
|
||||
ForkJoin left = new ForkJoin(corpus, start, mid, stats);
|
||||
ForkJoin right = new ForkJoin(corpus, mid, end, stats);
|
||||
|
||||
// fork (push to queue)-> compute -> join
|
||||
left.fork();
|
||||
right.fork();
|
||||
left.join();
|
||||
right.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
167
src/main/java/alg/word/WordCount.java
Normal file
167
src/main/java/alg/word/WordCount.java
Normal file
@@ -0,0 +1,167 @@
|
||||
package alg.word;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import alg.Common;
|
||||
import data.CalculateFor;
|
||||
import data.Sentence;
|
||||
import data.Statistics;
|
||||
import data.Word;
|
||||
|
||||
class WordCount {
|
||||
private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
Common.updateMap(stats.result, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getCVVLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getCVVWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
if (word.length() > stats.getSubstringLength()) {
|
||||
for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
|
||||
String substring = word.substring(i, i + stats.getSubstringLength());
|
||||
Common.updateMap(stats.result, substring);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
List<Word> filteredWords = new ArrayList<>();
|
||||
|
||||
for (Word word : s.getWords()) {
|
||||
if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
||||
filteredWords.add(word);
|
||||
}
|
||||
}
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(filteredWords
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(filteredWords
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
Common.updateMap(stats.result, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
List<Word> filteredWords = new ArrayList<>();
|
||||
|
||||
for (Word word : s.getWords()) {
|
||||
if (word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
||||
filteredWords.add(word);
|
||||
}
|
||||
}
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(filteredWords
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(filteredWords
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
Common.updateMap(stats.result, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void calculateForTaxonomy(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
if (s.getTaxonomy().equalsIgnoreCase(stats.getDistributionTaxonomy())) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
Common.updateMap(stats.result, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
||||
boolean taxonomyIsSet = stats.isTaxonomySet();
|
||||
boolean JosTypeIsSet = stats.isJOSTypeSet();
|
||||
|
||||
// branching because even though the only difference is an if or two &&
|
||||
// O(if) = 1, the amount of ifs adds up and this saves some time
|
||||
if (taxonomyIsSet && JosTypeIsSet) {
|
||||
calculateForTaxonomyAndJosType(corpus, stats);
|
||||
} else if (taxonomyIsSet && !JosTypeIsSet) {
|
||||
calculateForTaxonomy(corpus, stats);
|
||||
} else if (!taxonomyIsSet && JosTypeIsSet) {
|
||||
calculateForJosType(corpus, stats);
|
||||
} else {
|
||||
if (stats.isVcc()) {
|
||||
calculateVCC(corpus, stats);
|
||||
} else {
|
||||
calculateNoFilter(corpus, stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
112
src/main/java/alg/word/WordLevel.java
Normal file
112
src/main/java/alg/word/WordLevel.java
Normal file
@@ -0,0 +1,112 @@
|
||||
package alg.word;
|
||||
|
||||
import static data.Enums.WordLevelDefaultValues.*;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import data.Enums.WordLevelDefaultValues;
|
||||
import data.Enums.WordLevelType;
|
||||
import data.Sentence;
|
||||
import data.StatisticsNew;
|
||||
import data.Word;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
public class WordLevel {
|
||||
private static HashSet<String> suffixes;
|
||||
private static int minSuffixLength;
|
||||
private static int maxSuffixLength;
|
||||
|
||||
private static HashSet<String> prefixes;
|
||||
private static int minPrefixLength;
|
||||
private static int maxPrefixLength;
|
||||
|
||||
static {
|
||||
suffixes = WordLevelDefaultValues.getSuffixes();
|
||||
calculateSuffixesLengths();
|
||||
|
||||
prefixes = WordLevelDefaultValues.getPrefixes();
|
||||
calculatePrefixesLengths();
|
||||
}
|
||||
|
||||
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
|
||||
for (Sentence s : corpus) {
|
||||
for (Word word : s.getWords()) {
|
||||
calculateForSuffixes(word.getWord(), stats);
|
||||
calculateForPrefixes(word.getWord(), stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void calculateForPrefixes(String word, StatisticsNew stats) {
|
||||
for (int tmpPrefixLength = maxPrefixLength; tmpPrefixLength >= minPrefixLength; tmpPrefixLength++) {
|
||||
if (word.length() - tmpPrefixLength < MIN_N_OF_CHARACTERS_LEFT_PREFIX) {
|
||||
return;
|
||||
}
|
||||
|
||||
String extractedPrefix = StringUtils.left(word, tmpPrefixLength);
|
||||
|
||||
if (prefixes.contains(extractedPrefix)) {
|
||||
// save suffix and full word
|
||||
stats.updateResultsNested(WordLevelType.PREFIX, extractedPrefix, word);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void calculateForSuffixes(String word, StatisticsNew stats) {
|
||||
for (int tmpSuffixLength = maxSuffixLength; tmpSuffixLength >= minSuffixLength; tmpSuffixLength++) {
|
||||
// preveri, da je beseda - cuttan suffix daljši od prednastavljene vrednosti
|
||||
// ker gremo od najdaljše opcije k najkrajši, se ob dosegu tega pogoja lahko zaključi računanje za trenutno besedo
|
||||
if (word.length() - tmpSuffixLength < MIN_N_OF_CHARACTERS_LEFT_SUFFIX) {
|
||||
return;
|
||||
}
|
||||
|
||||
String extractedSuffix = StringUtils.right(word, tmpSuffixLength);
|
||||
|
||||
if (suffixes.contains(extractedSuffix)) {
|
||||
// save suffix and full word
|
||||
stats.updateResultsNested(WordLevelType.SUFFIX, extractedSuffix, word);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finds the shortest and longest suffix for quicker calculations
|
||||
public static void calculateSuffixesLengths() {
|
||||
minSuffixLength = -1;
|
||||
maxSuffixLength = -1;
|
||||
|
||||
for (String suffix : suffixes) {
|
||||
if (suffix.length() > maxSuffixLength) {
|
||||
maxSuffixLength = suffix.length();
|
||||
|
||||
if (minSuffixLength < 0) {
|
||||
minSuffixLength = maxSuffixLength;
|
||||
}
|
||||
} else if (suffix.length() < minSuffixLength) {
|
||||
minSuffixLength = suffix.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finds the shortest and longest suffix for quicker calculations
|
||||
public static void calculatePrefixesLengths() {
|
||||
minPrefixLength = -1;
|
||||
maxPrefixLength = -1;
|
||||
|
||||
for (String prefix : prefixes) {
|
||||
if (prefix.length() > maxPrefixLength) {
|
||||
maxPrefixLength = prefix.length();
|
||||
|
||||
if (minPrefixLength < 0) {
|
||||
minPrefixLength = maxPrefixLength;
|
||||
}
|
||||
} else if (prefix.length() < minPrefixLength) {
|
||||
minPrefixLength = prefix.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
17
src/main/java/data/AnalysisLevel.java
Normal file
17
src/main/java/data/AnalysisLevel.java
Normal file
@@ -0,0 +1,17 @@
|
||||
package data;
|
||||
|
||||
public enum AnalysisLevel {
|
||||
STRING_LEVEL("Besedni nizi"),
|
||||
WORD_LEVEL("Nivo besed in delov besed"),
|
||||
WORD_FORMATION("Besedotvorni procesi");
|
||||
|
||||
private final String name;
|
||||
|
||||
AnalysisLevel(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
}
|
||||
43
src/main/java/data/CalculateFor.java
Normal file
43
src/main/java/data/CalculateFor.java
Normal file
@@ -0,0 +1,43 @@
|
||||
package data;
|
||||
|
||||
public enum CalculateFor {
|
||||
WORD("različnica"),
|
||||
LEMMA("lema"),
|
||||
MORPHOSYNTACTIC_SPECS("oblikoskladenjska oznaka"),
|
||||
MORPHOSYNTACTIC_PROPERTY("oblikoskladenjska lastnost"),
|
||||
WORD_TYPE("besedna vrsta"),
|
||||
DIST_WORDS("različnica"),
|
||||
DIST_LEMMAS("lema");
|
||||
|
||||
|
||||
private final String name;
|
||||
|
||||
CalculateFor(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public static CalculateFor factory(String cf) {
|
||||
if (cf != null) {
|
||||
if (WORD.toString().equals(cf)) {
|
||||
return WORD;
|
||||
}
|
||||
if (LEMMA.toString().equals(cf)) {
|
||||
return LEMMA;
|
||||
}
|
||||
if (MORPHOSYNTACTIC_SPECS.toString().equals(cf)) {
|
||||
return MORPHOSYNTACTIC_SPECS;
|
||||
}
|
||||
if (MORPHOSYNTACTIC_PROPERTY.toString().equals(cf)) {
|
||||
return MORPHOSYNTACTIC_PROPERTY;
|
||||
}
|
||||
if (WORD_TYPE.toString().equals(cf)) {
|
||||
return WORD_TYPE;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
163
src/main/java/data/Corpus.java
Normal file
163
src/main/java/data/Corpus.java
Normal file
@@ -0,0 +1,163 @@
|
||||
package data;
|
||||
|
||||
import static gui.Messages.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import data.Enums.solar.SolarFilters;
|
||||
import gui.ValidationUtil;
|
||||
import javafx.collections.ObservableList;
|
||||
|
||||
public class Corpus {
|
||||
public final static Logger logger = LogManager.getLogger(Corpus.class);
|
||||
|
||||
private CorpusType corpusType;
|
||||
private File chosenResultsLocation;
|
||||
private File chosenCorpusLocation;
|
||||
private Collection<File> detectedCorpusFiles;
|
||||
boolean headerRead;
|
||||
private ObservableList<String> taxonomy; // if gigafida or gos
|
||||
private HashMap<String, ObservableList<String>> solarFilters; // if solar
|
||||
private HashMap<String, HashSet<String>> solarFiltersForXML; // if solar - used while parsing xml
|
||||
private boolean gosOrthMode;
|
||||
boolean hasMsdData;
|
||||
private ArrayList<String> validationErrors;
|
||||
|
||||
public Corpus() {
|
||||
validationErrors = new ArrayList<>();
|
||||
}
|
||||
|
||||
public CorpusType getCorpusType() {
|
||||
return corpusType;
|
||||
}
|
||||
|
||||
public void setCorpusType(CorpusType corpusType) {
|
||||
this.corpusType = corpusType;
|
||||
logger.info("Corpus.set: ", corpusType);
|
||||
}
|
||||
|
||||
public File getChosenResultsLocation() {
|
||||
return chosenResultsLocation;
|
||||
}
|
||||
|
||||
public void setChosenResultsLocation(File chosenResultsLocation) {
|
||||
this.chosenResultsLocation = chosenResultsLocation;
|
||||
logger.info("Corpus.set: ", chosenResultsLocation);
|
||||
}
|
||||
|
||||
public File getChosenCorpusLocation() {
|
||||
return chosenCorpusLocation;
|
||||
}
|
||||
|
||||
public void setChosenCorpusLocation(File chosenCorpusLocation) {
|
||||
this.chosenCorpusLocation = chosenCorpusLocation;
|
||||
logger.info("Corpus.set: ", chosenCorpusLocation);
|
||||
}
|
||||
|
||||
public Collection<File> getDetectedCorpusFiles() {
|
||||
return detectedCorpusFiles;
|
||||
}
|
||||
|
||||
public void setDetectedCorpusFiles(Collection<File> detectedCorpusFiles) {
|
||||
this.detectedCorpusFiles = detectedCorpusFiles;
|
||||
logger.info("Corpus.set: ", detectedCorpusFiles);
|
||||
}
|
||||
|
||||
public boolean isHeaderRead() {
|
||||
return headerRead;
|
||||
}
|
||||
|
||||
public void setHeaderRead(boolean headerRead) {
|
||||
this.headerRead = headerRead;
|
||||
}
|
||||
|
||||
public ObservableList<String> getTaxonomy() {
|
||||
return taxonomy;
|
||||
}
|
||||
|
||||
public void setTaxonomy(ObservableList<String> taxonomy) {
|
||||
this.taxonomy = taxonomy;
|
||||
logger.info("Corpus.set: ", taxonomy);
|
||||
}
|
||||
|
||||
public HashMap<String, ObservableList<String>> getSolarFilters() {
|
||||
return solarFilters;
|
||||
}
|
||||
|
||||
public void setSolarFilters(HashMap<String, ObservableList<String>> solarFilters) {
|
||||
this.solarFilters = solarFilters;
|
||||
logger.info("Corpus.set: ", solarFilters);
|
||||
}
|
||||
|
||||
public HashMap<String, HashSet<String>> getSolarFiltersForXML() {
|
||||
return solarFiltersForXML;
|
||||
}
|
||||
|
||||
public void setSolarFiltersForXML(HashMap<String, HashSet<String>> solarFiltersForXML) {
|
||||
this.solarFiltersForXML = solarFiltersForXML;
|
||||
logger.info("Corpus.set: ", solarFiltersForXML);
|
||||
}
|
||||
|
||||
public boolean isGosOrthMode() {
|
||||
return gosOrthMode;
|
||||
}
|
||||
|
||||
public void setGosOrthMode(boolean gosOrthMode) {
|
||||
this.gosOrthMode = gosOrthMode;
|
||||
logger.info("Corpus.set: ", gosOrthMode);
|
||||
}
|
||||
|
||||
public ArrayList<String> getValidationErrors() {
|
||||
return validationErrors;
|
||||
}
|
||||
|
||||
public String getValidationErrorsToString() {
|
||||
return StringUtils.join(validationErrors, "\n - ");
|
||||
}
|
||||
|
||||
public void setValidationErrors(ArrayList<String> validationErrors) {
|
||||
this.validationErrors = validationErrors;
|
||||
}
|
||||
|
||||
public boolean validate() {
|
||||
if (corpusType == null) {
|
||||
validationErrors.add(LABEL_RESULTS_CORPUS_TYPE_NOT_SET);
|
||||
}
|
||||
|
||||
if (chosenCorpusLocation == null) {
|
||||
validationErrors.add(LABEL_CORPUS_LOCATION_NOT_SET);
|
||||
}
|
||||
|
||||
if (chosenResultsLocation == null) {
|
||||
validationErrors.add(LABEL_RESULTS_LOCATION_NOT_SET);
|
||||
}
|
||||
|
||||
if (!headerRead && corpusType != null) {
|
||||
// if user didn't opt into reading the headers, set default taxonomy or solar filters
|
||||
if (Tax.getCorpusTypesWithTaxonomy().contains(corpusType)) {
|
||||
taxonomy = Tax.getTaxonomyForComboBox(corpusType);
|
||||
} else if (corpusType == CorpusType.SOLAR && solarFilters == null) {
|
||||
setSolarFilters(SolarFilters.getFiltersForComboBoxes());
|
||||
}
|
||||
}
|
||||
|
||||
if (headerRead && ValidationUtil.isEmpty(taxonomy)) {
|
||||
// mustn't happen, intercept at gui level
|
||||
}
|
||||
|
||||
if (!ValidationUtil.isEmpty(validationErrors)) {
|
||||
logger.error("Corpus validation error: ", StringUtils.join(validationErrors, "\n - "));
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
25
src/main/java/data/CorpusType.java
Normal file
25
src/main/java/data/CorpusType.java
Normal file
@@ -0,0 +1,25 @@
|
||||
package data;
|
||||
|
||||
public enum CorpusType {
|
||||
GIGAFIDA("Gigafida", "gigafida"),
|
||||
CCKRES("ccKres ", "cckres"),
|
||||
SOLAR("Šolar", "šolar"),
|
||||
GOS("GOS", "gos");
|
||||
|
||||
|
||||
private final String name;
|
||||
private final String nameLowerCase;
|
||||
|
||||
CorpusType(String name, String nameLowerCase) {
|
||||
this.name = name;
|
||||
this.nameLowerCase = nameLowerCase;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public String getNameLowerCase() {
|
||||
return nameLowerCase;
|
||||
}
|
||||
}
|
||||
12
src/main/java/data/Enums/InflectedJosTypes.java
Normal file
12
src/main/java/data/Enums/InflectedJosTypes.java
Normal file
@@ -0,0 +1,12 @@
|
||||
package data.Enums;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
|
||||
public class InflectedJosTypes {
|
||||
public static final HashSet<Character> inflectedJosTypes = new HashSet<>();
|
||||
|
||||
static {
|
||||
inflectedJosTypes.addAll(Arrays.asList('S', 'G', 'P'));
|
||||
}
|
||||
}
|
||||
68
src/main/java/data/Enums/Msd.java
Normal file
68
src/main/java/data/Enums/Msd.java
Normal file
@@ -0,0 +1,68 @@
|
||||
package data.Enums;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
public enum Msd {
|
||||
NOUN("samostalnik", 'S', "Noun", 'N', 5),
|
||||
VERB("glagol", 'G', "Verb", 'V', 7),
|
||||
ADJECTIVE("pridevnik", 'P', "Adjective", 'A', 6),
|
||||
ADVERB("prislov", 'R', "Adverb", 'R', 2),
|
||||
PRONOUN("zaimek", 'Z', "Pronoun", 'P', 8),
|
||||
NUMERAL("števnik", 'K', "Numeral", 'M', 6),
|
||||
PREPOSITION("predlog", 'D', "Preposition", 'S', 1),
|
||||
CONJUNCTION("veznik", 'V', "Conjunction", 'C', 1),
|
||||
PARTICLE("členek", 'L', "Particle", 'Q', 0),
|
||||
INTERJECTION("medmet", 'M', "Interjection", 'I', 0),
|
||||
ABBREVIATION("okrajšava", 'O', "Abbreviation", 'Y', 0),
|
||||
RESIDUAL("neuvrščeno", 'N', "Residual", 'X', 1);
|
||||
|
||||
private final String siName;
|
||||
private final Character siCode;
|
||||
private final String enName;
|
||||
private final Character enCode;
|
||||
private final Integer nOfAttributes;
|
||||
|
||||
private static HashMap<Character, Integer> siCodeNOfAttributes;
|
||||
|
||||
static {
|
||||
siCodeNOfAttributes = new HashMap<>();
|
||||
for (Msd msd : Msd.values()) {
|
||||
siCodeNOfAttributes.put(msd.getSiCode(), msd.nOfAttributes);
|
||||
}
|
||||
}
|
||||
|
||||
Msd(String siName, Character siCode, String enName, Character enCode, int nOfAttributes) {
|
||||
this.siName = siName;
|
||||
this.siCode = siCode;
|
||||
this.enName = enName;
|
||||
this.enCode = enCode;
|
||||
this.nOfAttributes = nOfAttributes;
|
||||
}
|
||||
|
||||
public String getSiName() {
|
||||
return siName;
|
||||
}
|
||||
|
||||
public Character getSiCode() {
|
||||
return siCode;
|
||||
}
|
||||
|
||||
public String getEnName() {
|
||||
return enName;
|
||||
}
|
||||
|
||||
public Character getEnCode() {
|
||||
return enCode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of attributes for the given type.
|
||||
*
|
||||
* @param msd
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static int getMsdLengthForType(String msd) {
|
||||
return siCodeNOfAttributes.get(msd.charAt(0)) + 1;
|
||||
}
|
||||
}
|
||||
55
src/main/java/data/Enums/WordLevelDefaultValues.java
Normal file
55
src/main/java/data/Enums/WordLevelDefaultValues.java
Normal file
@@ -0,0 +1,55 @@
|
||||
package data.Enums;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
public class WordLevelDefaultValues {
|
||||
public final static Logger logger = LogManager.getLogger(WordLevelDefaultValues.class);
|
||||
|
||||
private static HashSet<String> suffixes;
|
||||
private static final String SUFFIXES_FILE = "/Lists/suffixes.txt";
|
||||
public static final int MIN_N_OF_CHARACTERS_LEFT_SUFFIX = 2;
|
||||
|
||||
private static HashSet<String> prefixes;
|
||||
private static final String PREFIXES_FILE = "/Lists/prefixes.txt";
|
||||
public static final int MIN_N_OF_CHARACTERS_LEFT_PREFIX = 2;
|
||||
|
||||
static {
|
||||
suffixes = new HashSet<>();
|
||||
suffixes = readFromFile(SUFFIXES_FILE);
|
||||
prefixes = new HashSet<>();
|
||||
prefixes = readFromFile(PREFIXES_FILE);
|
||||
}
|
||||
|
||||
private static HashSet<String> readFromFile(String fileName) {
|
||||
Set<String> dictionary = new HashSet<>();
|
||||
|
||||
try (InputStream is = WordLevelDefaultValues.class.getClass().getResourceAsStream(fileName)) {
|
||||
if (is != null) {
|
||||
// TODO: warn if !exists
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
|
||||
dictionary = reader.lines().collect(Collectors.toSet());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.error("Problem reading init dictionary", e);
|
||||
}
|
||||
|
||||
return (HashSet<String>) dictionary;
|
||||
}
|
||||
|
||||
public static HashSet<String> getSuffixes() {
|
||||
return suffixes;
|
||||
}
|
||||
|
||||
public static HashSet<String> getPrefixes() {
|
||||
return prefixes;
|
||||
}
|
||||
}
|
||||
16
src/main/java/data/Enums/WordLevelType.java
Normal file
16
src/main/java/data/Enums/WordLevelType.java
Normal file
@@ -0,0 +1,16 @@
|
||||
package data.Enums;
|
||||
|
||||
public enum WordLevelType {
|
||||
SUFFIX("pripona"),
|
||||
PREFIX("predpona");
|
||||
|
||||
private final String name;
|
||||
|
||||
WordLevelType(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
57
src/main/java/data/Enums/solar/SolarFilters.java
Normal file
57
src/main/java/data/Enums/solar/SolarFilters.java
Normal file
@@ -0,0 +1,57 @@
|
||||
package data.Enums.solar;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ObservableList;
|
||||
|
||||
public class SolarFilters {
|
||||
private static HashMap<String, ObservableList<String>> SOLAR_FILTERS;
|
||||
public static final String SOLA = "sola";
|
||||
public static final String PREDMET = "predmet";
|
||||
public static final String RAZRED = "razred";
|
||||
public static final String REGIJA = "regija";
|
||||
public static final String TIP = "tip";
|
||||
public static final String LETO = "leto";
|
||||
|
||||
static {
|
||||
SOLAR_FILTERS = new HashMap<>();
|
||||
|
||||
SOLAR_FILTERS.put(REGIJA, FXCollections.observableArrayList("Celje", "Gorica", "Koper", "Kranj", "Krško", "Ljubljana", "Maribor", "Murska Sobota", "Novo mesto", "Postojna", "Slovenj Gradec"));
|
||||
SOLAR_FILTERS.put(PREDMET, FXCollections.observableArrayList("državljanska vzgoja in etika", "ekonomija", "filozofija", "geografija", "kemija", "podjetništvo", "psihologija", "slovenščina", "sociologija", "umetnostna vzgoja", "zgodovina"));
|
||||
SOLAR_FILTERS.put(RAZRED, FXCollections.observableArrayList("6. razred", "7. razred", "8. razred", "9. razred", "1. letnik", "2. letnik", "3. letnik", "4. letnik", "5. letnik", "maturitetni tečaj"));
|
||||
SOLAR_FILTERS.put(LETO, FXCollections.observableArrayList("2007", "2008", "2009", "2009/2010", "2010"));
|
||||
SOLAR_FILTERS.put(SOLA, FXCollections.observableArrayList("gimnazija", "osnovna šola", "poklicna šola", "strokovna šola"));
|
||||
SOLAR_FILTERS.put(TIP, FXCollections.observableArrayList("esej/spis", "pisni izdelek (učna ura)", "test (daljše besedilo)", "test (odgovori na vprašanja)"));
|
||||
}
|
||||
|
||||
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_FULL = FXCollections.observableArrayList("različnica", "lema", "oblikoskladenjska oznaka", "oblikoskladenjska lastnost", "besedna vrsta");
|
||||
public static final ObservableList<String> N_GRAM_COMPUTE_FOR_LIMITED = FXCollections.observableArrayList("različnica", "lema");
|
||||
|
||||
/**
|
||||
* Returns filters with all possible values
|
||||
*/
|
||||
public static HashMap<String, ObservableList<String>> getFiltersForComboBoxes() {
|
||||
return SOLAR_FILTERS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns filters with all possible values
|
||||
*/
|
||||
public static HashMap<String, ObservableList<String>> getFiltersForComboBoxes(HashMap<String, HashSet<String>> foundFilters) {
|
||||
HashMap<String, ObservableList<String>> filtersForComboBoxes = new HashMap<>();
|
||||
|
||||
for (Map.Entry<String, ObservableList<String>> e : SOLAR_FILTERS.entrySet()) {
|
||||
if (!foundFilters.containsKey(e.getKey())) {
|
||||
// if, by some reason a specific filter wasn't in the corpus, return a blank list for that filter
|
||||
filtersForComboBoxes.put(e.getKey(), FXCollections.observableArrayList());
|
||||
} else {
|
||||
filtersForComboBoxes.put(e.getKey(), FXCollections.observableArrayList(foundFilters.get(e.getKey())).sorted());
|
||||
}
|
||||
}
|
||||
|
||||
return filtersForComboBoxes;
|
||||
}
|
||||
}
|
||||
144
src/main/java/data/Filter.java
Normal file
144
src/main/java/data/Filter.java
Normal file
@@ -0,0 +1,144 @@
|
||||
package data;
|
||||
|
||||
import static data.Filter.filterName.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import gui.ValidationUtil;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class Filter {
|
||||
private HashMap<filterName, Object> filter;
|
||||
|
||||
public enum filterName {
|
||||
ANALYSIS_LEVEL,
|
||||
CALCULATE_FOR,
|
||||
NGRAM_VALUE,
|
||||
SKIP_VALUE,
|
||||
IS_CVV,
|
||||
STRING_LENGTH,
|
||||
TAXONOMY,
|
||||
MSD,
|
||||
HAS_MSD,
|
||||
SOLAR_FILTERS
|
||||
}
|
||||
|
||||
public Filter() {
|
||||
filter = new HashMap<>();
|
||||
}
|
||||
|
||||
public Filter(AnalysisLevel al, CalculateFor cf) {
|
||||
filter = new HashMap<>();
|
||||
|
||||
filter.put(ANALYSIS_LEVEL, al);
|
||||
filter.put(CALCULATE_FOR, cf);
|
||||
}
|
||||
|
||||
public void setAl(AnalysisLevel al) {
|
||||
filter.put(ANALYSIS_LEVEL, al);
|
||||
}
|
||||
|
||||
public AnalysisLevel getAl() {
|
||||
return (AnalysisLevel) filter.get(ANALYSIS_LEVEL);
|
||||
}
|
||||
|
||||
public void setCalculateFor(CalculateFor cf) {
|
||||
filter.put(CALCULATE_FOR, cf);
|
||||
}
|
||||
|
||||
public CalculateFor getCalculateFor() {
|
||||
return (CalculateFor) filter.get(CALCULATE_FOR);
|
||||
}
|
||||
|
||||
public void setNgramValue(Integer ngramValue) {
|
||||
filter.put(NGRAM_VALUE, ngramValue);
|
||||
}
|
||||
|
||||
public Integer getNgramValue() {
|
||||
return (Integer) filter.get(NGRAM_VALUE);
|
||||
}
|
||||
|
||||
public void setSkipValue(Integer skipValue) {
|
||||
filter.put(SKIP_VALUE, skipValue);
|
||||
}
|
||||
|
||||
public Integer getSkipValue() {
|
||||
return (Integer) filter.get(SKIP_VALUE);
|
||||
}
|
||||
|
||||
public void setIsCvv(boolean isCvv) {
|
||||
filter.put(IS_CVV, isCvv);
|
||||
}
|
||||
|
||||
public boolean isCvv() {
|
||||
return filter.containsKey(IS_CVV) && (boolean) filter.get(IS_CVV);
|
||||
}
|
||||
|
||||
public void setStringLength(int stringLength) {
|
||||
filter.put(STRING_LENGTH, stringLength);
|
||||
}
|
||||
|
||||
public Integer getStringLength() {
|
||||
return (Integer) filter.get(STRING_LENGTH);
|
||||
}
|
||||
|
||||
public void setTaxonomy(ArrayList<String> taxonomy) {
|
||||
filter.put(TAXONOMY, taxonomy);
|
||||
}
|
||||
|
||||
public ArrayList<String> getTaxonomy() {
|
||||
if (filter.containsKey(TAXONOMY) && filter.get(TAXONOMY) != null) {
|
||||
return (ArrayList<String>) filter.get(TAXONOMY);
|
||||
} else {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
public void setMsd(ArrayList<Pattern> msd) {
|
||||
filter.put(MSD, msd);
|
||||
if (!ValidationUtil.isEmpty(msd)) {
|
||||
setHasMsd(true);
|
||||
} else {
|
||||
setHasMsd(false);
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<Pattern> getMsd() {
|
||||
return (ArrayList<Pattern>) filter.get(MSD);
|
||||
}
|
||||
|
||||
public void setHasMsd(boolean hasMsd) {
|
||||
filter.put(HAS_MSD, hasMsd);
|
||||
}
|
||||
|
||||
public boolean hasMsd() {
|
||||
return filter.containsKey(HAS_MSD) && (boolean) filter.get(HAS_MSD);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
String newLine = "\n\t- ";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
sb.append(newLine).append("Filter:");
|
||||
for (Map.Entry<filterName, Object> entry : filter.entrySet()) {
|
||||
sb.append(newLine)
|
||||
.append(entry.getKey().toString())
|
||||
.append(": ")
|
||||
.append(entry.getValue() != null ? entry.getValue().toString() : "null");
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public void setSolarFilters(HashMap<String, HashSet<String>> filters) {
|
||||
filter.put(SOLAR_FILTERS, filters);
|
||||
}
|
||||
|
||||
public HashMap<String, HashSet<String>> getSolarFilters() {
|
||||
return (HashMap<String, HashSet<String>>) filter.get(SOLAR_FILTERS);
|
||||
}
|
||||
}
|
||||
71
src/main/java/data/GigafidaJosWordType.java
Normal file
71
src/main/java/data/GigafidaJosWordType.java
Normal file
@@ -0,0 +1,71 @@
|
||||
package data;
|
||||
|
||||
public enum GigafidaJosWordType {
|
||||
SAMOSTALNIK("samostalnik", 'S'),
|
||||
GLAGOL("glagol", 'G'),
|
||||
PRIDEVNIK("pridevnik", 'P'),
|
||||
PRISLOV("prislov", 'R'),
|
||||
ZAIMEK("zaimek", 'Z'),
|
||||
STEVNIK("stevnik", 'K'),
|
||||
PREDLOG("predlog", 'D'),
|
||||
VEZNIK("veznik", 'V'),
|
||||
CLENEK("clenek", 'L'),
|
||||
MEDMET("medmet", 'M'),
|
||||
OKRAJSAVA("okrajsava", 'O');
|
||||
|
||||
|
||||
private final String name;
|
||||
private final char wordType;
|
||||
|
||||
GigafidaJosWordType(String name, char wordType) {
|
||||
this.name = name;
|
||||
this.wordType = wordType;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public char getWordType() {
|
||||
return wordType;
|
||||
}
|
||||
|
||||
public static GigafidaJosWordType factory(String wType) {
|
||||
if (wType != null) {
|
||||
if (SAMOSTALNIK.toString().equals(wType)) {
|
||||
return SAMOSTALNIK;
|
||||
}
|
||||
if (GLAGOL.toString().equals(wType)) {
|
||||
return GLAGOL;
|
||||
}
|
||||
if (PRIDEVNIK.toString().equals(wType)) {
|
||||
return PRIDEVNIK;
|
||||
}
|
||||
if (PRISLOV.toString().equals(wType)) {
|
||||
return PRISLOV;
|
||||
}
|
||||
if (ZAIMEK.toString().equals(wType)) {
|
||||
return ZAIMEK;
|
||||
}
|
||||
if (STEVNIK.toString().equals(wType)) {
|
||||
return STEVNIK;
|
||||
}
|
||||
if (PREDLOG.toString().equals(wType)) {
|
||||
return PREDLOG;
|
||||
}
|
||||
if (VEZNIK.toString().equals(wType)) {
|
||||
return VEZNIK;
|
||||
}
|
||||
if (CLENEK.toString().equals(wType)) {
|
||||
return CLENEK;
|
||||
}
|
||||
if (MEDMET.toString().equals(wType)) {
|
||||
return MEDMET;
|
||||
}
|
||||
if (OKRAJSAVA.toString().equals(wType)) {
|
||||
return OKRAJSAVA;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
76
src/main/java/data/GigafidaTaxonomy.java
Normal file
76
src/main/java/data/GigafidaTaxonomy.java
Normal file
@@ -0,0 +1,76 @@
|
||||
package data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ObservableList;
|
||||
|
||||
public enum GigafidaTaxonomy {
|
||||
TISK("tisk", "T"),
|
||||
KNJIZNO("knjižno", "T.K"),
|
||||
LEPOSLOVNO("leposlovno", "T.K.L"),
|
||||
STROKOVNO("strokovno", "T.K.S"),
|
||||
PERIODICNO("periodično", "T.P"),
|
||||
CASOPIS("časopis", "T.P.C"),
|
||||
REVIJA("revija", "T.P.R"),
|
||||
INTERNET("internet", "I");
|
||||
|
||||
private final String name;
|
||||
private final String taxonomy;
|
||||
|
||||
private static final ObservableList<String> FOR_COMBO_BOX;
|
||||
|
||||
static {
|
||||
ArrayList<String> values = Arrays.stream(GigafidaTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
|
||||
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
|
||||
}
|
||||
|
||||
GigafidaTaxonomy(String name, String taxonomy) {
|
||||
this.name = name;
|
||||
this.taxonomy = taxonomy;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public String getTaxonomnyString() {
|
||||
return this.taxonomy;
|
||||
}
|
||||
|
||||
public static GigafidaTaxonomy factory(String tax) {
|
||||
if (tax != null) {
|
||||
if (TISK.toString().equals(tax)) {
|
||||
return TISK;
|
||||
}
|
||||
if (KNJIZNO.toString().equals(tax)) {
|
||||
return KNJIZNO;
|
||||
}
|
||||
if (LEPOSLOVNO.toString().equals(tax)) {
|
||||
return LEPOSLOVNO;
|
||||
}
|
||||
if (STROKOVNO.toString().equals(tax)) {
|
||||
return STROKOVNO;
|
||||
}
|
||||
if (PERIODICNO.toString().equals(tax)) {
|
||||
return PERIODICNO;
|
||||
}
|
||||
if (CASOPIS.toString().equals(tax)) {
|
||||
return CASOPIS;
|
||||
}
|
||||
if (REVIJA.toString().equals(tax)) {
|
||||
return REVIJA;
|
||||
}
|
||||
if (INTERNET.toString().equals(tax)) {
|
||||
return INTERNET;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static ObservableList<String> getForComboBox() {
|
||||
return FOR_COMBO_BOX;
|
||||
}
|
||||
}
|
||||
85
src/main/java/data/GosTaxonomy.java
Normal file
85
src/main/java/data/GosTaxonomy.java
Normal file
@@ -0,0 +1,85 @@
|
||||
package data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ObservableList;
|
||||
|
||||
public enum GosTaxonomy {
|
||||
JAVNI("javni", "gos.T.J"),
|
||||
INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "gos.T.J.I"),
|
||||
RAZVEDRILNI("razvedrilni", "gos.T.J.R"),
|
||||
NEJAVNI("nejavni", "gos.T.N"),
|
||||
NEZASEBNI("nezasebni", "gos.T.N.N"),
|
||||
ZASEBNI("zasebni", "gos.T.N.Z"),
|
||||
OSEBNI_STIK("osebni stik", "gos.K.O"),
|
||||
TELEFON("telefon", "gos.K.P"),
|
||||
RADIO("radio", "gos.K.R"),
|
||||
TELEVIZIJA("televizija", "gos.K.T");
|
||||
|
||||
|
||||
private final String name;
|
||||
private final String taxonomy;
|
||||
|
||||
private static final ObservableList<String> FOR_COMBO_BOX;
|
||||
|
||||
static {
|
||||
ArrayList<String> values = Arrays.stream(GosTaxonomy.values()).map(x -> x.name).collect(Collectors.toCollection(ArrayList::new));
|
||||
FOR_COMBO_BOX = FXCollections.observableArrayList(values);
|
||||
}
|
||||
|
||||
GosTaxonomy(String name, String taxonomy) {
|
||||
this.name = name;
|
||||
this.taxonomy = taxonomy;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public String getTaxonomnyString() {
|
||||
return this.taxonomy;
|
||||
}
|
||||
|
||||
public static GosTaxonomy factory(String tax) {
|
||||
if (tax != null) {
|
||||
if (JAVNI.toString().equals(tax)) {
|
||||
return JAVNI;
|
||||
}
|
||||
if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
|
||||
return INFORMATIVNO_IZOBRAZEVALNI;
|
||||
}
|
||||
if (RAZVEDRILNI.toString().equals(tax)) {
|
||||
return RAZVEDRILNI;
|
||||
}
|
||||
if (NEJAVNI.toString().equals(tax)) {
|
||||
return NEJAVNI;
|
||||
}
|
||||
if (NEZASEBNI.toString().equals(tax)) {
|
||||
return NEZASEBNI;
|
||||
}
|
||||
if (ZASEBNI.toString().equals(tax)) {
|
||||
return ZASEBNI;
|
||||
}
|
||||
if (OSEBNI_STIK.toString().equals(tax)) {
|
||||
return OSEBNI_STIK;
|
||||
}
|
||||
if (TELEFON.toString().equals(tax)) {
|
||||
return TELEFON;
|
||||
}
|
||||
if (RADIO.toString().equals(tax)) {
|
||||
return RADIO;
|
||||
}
|
||||
if (TELEVIZIJA.toString().equals(tax)) {
|
||||
return TELEVIZIJA;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static ObservableList<String> getForComboBox() {
|
||||
return FOR_COMBO_BOX;
|
||||
}
|
||||
}
|
||||
56
src/main/java/data/Sentence.java
Normal file
56
src/main/java/data/Sentence.java
Normal file
@@ -0,0 +1,56 @@
|
||||
package data;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class Sentence {
|
||||
|
||||
|
||||
private List<Word> words;
|
||||
private String taksonomija;
|
||||
|
||||
// GOS
|
||||
private String type;
|
||||
private Map<String, String> properties;
|
||||
|
||||
public Sentence(List<Word> words, String taksonomija) {
|
||||
this.words = words;
|
||||
this.taksonomija = taksonomija;
|
||||
}
|
||||
|
||||
public Sentence(List<Word> words) {
|
||||
this.words = words;
|
||||
}
|
||||
|
||||
public Sentence(List<Word> words, String taksonomija, Map<String, String> properties) {
|
||||
this.words = words;
|
||||
this.taksonomija = taksonomija;
|
||||
this.properties = properties;
|
||||
}
|
||||
|
||||
public Sentence(List<Word> words, String taksonomija, String type) {
|
||||
this.words = words;
|
||||
this.taksonomija = taksonomija;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public List<Word> getWords() {
|
||||
return words;
|
||||
}
|
||||
|
||||
public String getTaxonomy() {
|
||||
return taksonomija;
|
||||
}
|
||||
|
||||
public List<Word> getSublist(int indexFrom, int indexTo) {
|
||||
return this.words.subList(indexFrom, indexTo);
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
}
|
||||
16
src/main/java/data/Settings.java
Normal file
16
src/main/java/data/Settings.java
Normal file
@@ -0,0 +1,16 @@
|
||||
package data;
|
||||
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
|
||||
public class Settings {
|
||||
public static final int CORPUS_SENTENCE_LIMIT = 50000;
|
||||
public static final boolean PRINT_LOG = false;
|
||||
|
||||
public static final String FX_ACCENT_OK = "-fx-accent: forestgreen;";
|
||||
public static final String FX_ACCENT_NOK = "-fx-accent: red;";
|
||||
|
||||
public static Collection<File> corpus;
|
||||
public static File resultsFilePath;
|
||||
}
|
||||
299
src/main/java/data/Statistics.java
Normal file
299
src/main/java/data/Statistics.java
Normal file
@@ -0,0 +1,299 @@
|
||||
package data;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import util.Util;
|
||||
import util.db.RDB;
|
||||
|
||||
public class Statistics {
|
||||
private CorpusType corpusType;
|
||||
private AnalysisLevel analysisLevel;
|
||||
private boolean useDB;
|
||||
private RDB db;
|
||||
|
||||
private boolean analysisProducedResults;
|
||||
|
||||
private String taxonomy;
|
||||
private boolean taxonomyIsSet;
|
||||
|
||||
private char JOSType;
|
||||
private boolean JOSTypeIsSet;
|
||||
|
||||
private String resultTitle;
|
||||
public Map<String, AtomicLong> result = new ConcurrentHashMap<>();
|
||||
|
||||
// nGrams
|
||||
private int nGramLevel;
|
||||
private Integer skip;
|
||||
private CalculateFor cf;
|
||||
private List<Pattern> morphosyntacticFilter;
|
||||
|
||||
// distributions
|
||||
private String distributionTaxonomy;
|
||||
private char distributionJosWordType;
|
||||
private boolean vcc;
|
||||
private Integer substringLength;
|
||||
|
||||
// inflected JOS
|
||||
private String inflectedJosTaxonomy;
|
||||
|
||||
// GOS
|
||||
boolean gosOrthMode;
|
||||
|
||||
// šolar
|
||||
Map<String, Object> solarHeadBlockFilter;
|
||||
|
||||
|
||||
// for ngrams
|
||||
public Statistics(AnalysisLevel al, int nGramLevel, Integer skip, CalculateFor cf) {
|
||||
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
||||
this.cf = cf;
|
||||
this.analysisLevel = al;
|
||||
this.nGramLevel = nGramLevel;
|
||||
this.skip = skip == null || skip == 0 ? null : skip;
|
||||
|
||||
this.resultTitle = String.format("%s%d-gram_%s_%s",
|
||||
this.skip != null ? String.format("%d-%s-", skip, "skip") : "",
|
||||
nGramLevel,
|
||||
cf.toString(),
|
||||
dateTime);
|
||||
}
|
||||
|
||||
// for words distributions
|
||||
public Statistics(AnalysisLevel al, Taxonomy distributionTaxonomy, GigafidaJosWordType distributionJosWordType, CalculateFor cf) {
|
||||
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
||||
|
||||
this.resultTitle = String.format("%s_%s_%s",
|
||||
distributionTaxonomy != null ? distributionTaxonomy.toString() : "",
|
||||
distributionJosWordType != null ? distributionJosWordType.toString() : "",
|
||||
dateTime);
|
||||
|
||||
this.analysisLevel = al;
|
||||
this.cf = cf;
|
||||
this.distributionTaxonomy = distributionTaxonomy != null ? distributionTaxonomy.getTaxonomnyString() : null;
|
||||
this.taxonomyIsSet = distributionTaxonomy != null;
|
||||
|
||||
this.JOSTypeIsSet = distributionJosWordType != null;
|
||||
this.distributionJosWordType = this.JOSTypeIsSet ? distributionJosWordType.getWordType() : ' ';
|
||||
}
|
||||
|
||||
public Statistics(AnalysisLevel al, CalculateFor cf, Integer substringLength) {
|
||||
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
||||
|
||||
this.resultTitle = String.format("%s_%d_%s",
|
||||
"Distribucija zaporedij samoglasnikov in soglasnikov",
|
||||
substringLength,
|
||||
dateTime);
|
||||
|
||||
this.analysisLevel = al;
|
||||
this.cf = cf;
|
||||
this.substringLength = substringLength;
|
||||
this.vcc = true;
|
||||
}
|
||||
|
||||
public Statistics(AnalysisLevel al, Taxonomy inflectedJosTaxonomy) {
|
||||
String dateTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm"));
|
||||
|
||||
this.resultTitle = String.format("InflectedJOS_%s_%s",
|
||||
distributionTaxonomy != null ? distributionTaxonomy : "",
|
||||
dateTime);
|
||||
|
||||
this.analysisLevel = al;
|
||||
this.inflectedJosTaxonomy = inflectedJosTaxonomy != null ? inflectedJosTaxonomy.getTaxonomnyString() : null;
|
||||
this.taxonomyIsSet = inflectedJosTaxonomy != null;
|
||||
}
|
||||
|
||||
public Integer getSkip() {
|
||||
return skip;
|
||||
}
|
||||
|
||||
public Integer getSubstringLength() {
|
||||
return substringLength;
|
||||
}
|
||||
|
||||
public String getInflectedJosTaxonomy() {
|
||||
return inflectedJosTaxonomy;
|
||||
}
|
||||
|
||||
public void setSubstringLength(Integer substringLength) {
|
||||
this.substringLength = substringLength;
|
||||
}
|
||||
|
||||
public boolean isVcc() {
|
||||
return vcc;
|
||||
}
|
||||
|
||||
public void setVcc(boolean vcc) {
|
||||
this.vcc = vcc;
|
||||
}
|
||||
|
||||
public String getDistributionTaxonomy() {
|
||||
return distributionTaxonomy;
|
||||
}
|
||||
|
||||
public void setDistributionTaxonomy(String distributionTaxonomy) {
|
||||
this.distributionTaxonomy = distributionTaxonomy;
|
||||
}
|
||||
|
||||
public char getDistributionJosWordType() {
|
||||
return distributionJosWordType;
|
||||
}
|
||||
|
||||
public void setDistributionJosWordType(char distributionJosWordType) {
|
||||
this.distributionJosWordType = distributionJosWordType;
|
||||
}
|
||||
|
||||
public void setMorphosyntacticFilter(List<String> morphosyntacticFilter) {
|
||||
// change filter strings to regex patterns
|
||||
this.morphosyntacticFilter = new ArrayList<>();
|
||||
for (String s : morphosyntacticFilter) {
|
||||
this.morphosyntacticFilter.add(Pattern.compile(s.replaceAll("\\*", ".")));
|
||||
}
|
||||
}
|
||||
|
||||
public List<Pattern> getMsd() {
|
||||
return morphosyntacticFilter;
|
||||
}
|
||||
|
||||
public Map<String, AtomicLong> getResult() {
|
||||
return result;
|
||||
}
|
||||
|
||||
public void setTaxonomy(String taxonomy) {
|
||||
this.taxonomy = taxonomy;
|
||||
}
|
||||
|
||||
public void setTaxonomyIsSet(boolean taxonomyIsSet) {
|
||||
this.taxonomyIsSet = taxonomyIsSet;
|
||||
}
|
||||
|
||||
public char getJOSType() {
|
||||
return JOSType;
|
||||
}
|
||||
|
||||
public void setJOSType(char JOSType) {
|
||||
this.JOSType = JOSType;
|
||||
}
|
||||
|
||||
public boolean isJOSTypeSet() {
|
||||
return JOSTypeIsSet;
|
||||
}
|
||||
|
||||
public void setJOSType(boolean JOSTypeIsSet) {
|
||||
this.JOSTypeIsSet = JOSTypeIsSet;
|
||||
}
|
||||
|
||||
public void saveResultToDisk(int... limit) throws UnsupportedEncodingException {
|
||||
// Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
|
||||
//
|
||||
// if (useDB) {
|
||||
// result = db.getDump();
|
||||
// db.delete();
|
||||
// }
|
||||
//
|
||||
// // if no results and nothing to save, return false
|
||||
// if (!(result.size() > 0)) {
|
||||
// analysisProducedResults = false;
|
||||
// return;
|
||||
// } else {
|
||||
// analysisProducedResults = true;
|
||||
// }
|
||||
//
|
||||
// stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
|
||||
// Export.SetToCSV(stats);
|
||||
}
|
||||
|
||||
// private Map<String, Integer> getSortedResultInflected(Map map) {
|
||||
// // first convert to <String, Integer>
|
||||
// Map<String, Integer> m = Util.sortByValue(Util.atomicInt2StringAndInt(map), 0);
|
||||
//
|
||||
// Map<String, Integer> sortedM = new TreeMap<>();
|
||||
//
|
||||
// sortedM.putAll(m);
|
||||
//
|
||||
// return sortedM;
|
||||
// }
|
||||
|
||||
private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
|
||||
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
||||
}
|
||||
|
||||
public String getTaxonomy() {
|
||||
return taxonomy;
|
||||
}
|
||||
|
||||
public boolean isTaxonomySet() {
|
||||
return taxonomyIsSet;
|
||||
}
|
||||
|
||||
public int getnGramLevel() {
|
||||
return nGramLevel;
|
||||
}
|
||||
|
||||
public CalculateFor getCf() {
|
||||
return cf;
|
||||
}
|
||||
|
||||
public AnalysisLevel getAnalysisLevel() {
|
||||
return analysisLevel;
|
||||
}
|
||||
|
||||
public CorpusType getCorpusType() {
|
||||
return corpusType;
|
||||
}
|
||||
|
||||
public void setCorpusType(CorpusType corpusType) {
|
||||
this.corpusType = corpusType;
|
||||
}
|
||||
|
||||
public boolean isGosOrthMode() {
|
||||
return gosOrthMode;
|
||||
}
|
||||
|
||||
public void setGosOrthMode(boolean gosOrthMode) {
|
||||
this.gosOrthMode = gosOrthMode;
|
||||
}
|
||||
|
||||
public Map<String, Object> getSolarHeadBlockFilter() {
|
||||
return solarHeadBlockFilter;
|
||||
}
|
||||
|
||||
public void setSolarHeadBlockFilter(Map<String, Object> solarHeadBlockFilter) {
|
||||
this.solarHeadBlockFilter = solarHeadBlockFilter;
|
||||
}
|
||||
|
||||
public boolean isUseDB() {
|
||||
return useDB;
|
||||
}
|
||||
|
||||
public void setUseDB(boolean useDB) {
|
||||
if (useDB && db == null) {
|
||||
db = new RDB();
|
||||
}
|
||||
this.useDB = useDB;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores results from this batch to a database and clears results map
|
||||
*/
|
||||
public void storeTmpResultsToDB() {
|
||||
try {
|
||||
db.writeBatch(result);
|
||||
result = new ConcurrentHashMap<>();
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isAnalysisProducedResults() {
|
||||
return analysisProducedResults;
|
||||
}
|
||||
}
|
||||
409
src/main/java/data/StatisticsNew.java
Normal file
409
src/main/java/data/StatisticsNew.java
Normal file
@@ -0,0 +1,409 @@
|
||||
package data;
|
||||
|
||||
import static gui.ValidationUtil.*;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.ImmutablePair;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import alg.inflectedJOS.WordFormation;
|
||||
import data.Enums.WordLevelType;
|
||||
import javafx.collections.ObservableList;
|
||||
import util.Export;
|
||||
import util.Util;
|
||||
import util.db.RDB;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
public class StatisticsNew {
|
||||
public final static Logger logger = LogManager.getLogger(StatisticsNew.class);
|
||||
|
||||
private Corpus corpus;
|
||||
private Filter filter;
|
||||
|
||||
private String resultTitle;
|
||||
private Map<String, AtomicLong> result;
|
||||
private Object[][] resultCustom; // for when calculating percentages that don't add up to 100%
|
||||
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedSuffix;
|
||||
private Map<String, ConcurrentHashMap<String, AtomicLong>> resultNestedPrefix;
|
||||
private boolean useDB;
|
||||
private RDB db;
|
||||
private boolean analysisProducedResults;
|
||||
private LocalDateTime time;
|
||||
|
||||
public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
|
||||
this.corpus = corpus;
|
||||
this.filter = filter;
|
||||
|
||||
if (useDB) {
|
||||
this.useDB = true;
|
||||
db = new RDB();
|
||||
}
|
||||
|
||||
if (filter.getAl() == AnalysisLevel.WORD_LEVEL) {
|
||||
resultNestedSuffix = new ConcurrentHashMap<>();
|
||||
resultNestedPrefix = new ConcurrentHashMap<>();
|
||||
} else {
|
||||
result = new ConcurrentHashMap<>();
|
||||
}
|
||||
|
||||
resultTitle = generateResultTitle();
|
||||
|
||||
logger.debug(toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Result's title consists of:
|
||||
* <ul>
|
||||
* <li>Corpus type</li>
|
||||
* <li>Analysis level</li>
|
||||
* <li>Calculate for</li>
|
||||
* <li></li>
|
||||
* <li></li>
|
||||
* <li></li>
|
||||
* <li></li>
|
||||
* </ul>
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private String generateResultTitle() {
|
||||
String separator = "_";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
||||
Integer ngramLevel = filter.getNgramValue();
|
||||
if(ngramLevel == 0) {
|
||||
sb.append("Crke").
|
||||
append(separator)
|
||||
.append(corpus.getCorpusType().toString())
|
||||
.append(separator);
|
||||
} else if(ngramLevel == 1) {
|
||||
sb.append("Besede").append(separator)
|
||||
.append(corpus.getCorpusType().toString())
|
||||
.append(separator);
|
||||
}
|
||||
else {
|
||||
sb.append(filter.getAl().toString())
|
||||
.append(separator)
|
||||
.append(corpus.getCorpusType().toString())
|
||||
.append(separator);
|
||||
sb.append(filter.getCalculateFor().toString())
|
||||
.append(separator);
|
||||
// ngram value
|
||||
sb.append(filter.getNgramValue()).append("-gram")
|
||||
.append(separator);
|
||||
sb.append(filter.getSkipValue()).append("-preskok")
|
||||
.append(separator);
|
||||
}
|
||||
// TODO: assure skip is not null but zero
|
||||
|
||||
} else {
|
||||
sb.append(filter.getAl().toString()) // analysis level
|
||||
.append(separator)
|
||||
.append(corpus.getCorpusType().toString())
|
||||
.append(separator);
|
||||
}
|
||||
// skip value
|
||||
// msd ?
|
||||
// if taxonomy -> taxonomy
|
||||
// if cvv -> cvv + dolžina
|
||||
|
||||
this.time = this.time != null ? this.time : LocalDateTime.now();
|
||||
|
||||
sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
|
||||
return sb.toString();
|
||||
|
||||
}
|
||||
|
||||
public boolean isAnalysisProducedResults() {
|
||||
return analysisProducedResults;
|
||||
}
|
||||
|
||||
public void setAnalysisProducedResults(boolean analysisProducedResults) {
|
||||
this.analysisProducedResults = analysisProducedResults;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
String newLine = "\n\t- ";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(newLine).append("Statistic properties:");
|
||||
sb.append(newLine).append(corpus.getCorpusType().toString()).append(String.format(" (%d files)", corpus.getDetectedCorpusFiles().size()));
|
||||
sb.append(newLine).append(useDB ? "use DB" : "run in memory");
|
||||
sb.append(newLine).append(filter.toString());
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getResultTitle() {
|
||||
return resultTitle;
|
||||
}
|
||||
|
||||
// ****************************************
|
||||
// ***************** util *****************
|
||||
// ****************************************
|
||||
|
||||
/**
|
||||
* Stores results from this batch to a database and clears results map
|
||||
*/
|
||||
public void storeTmpResultsToDB() {
|
||||
try {
|
||||
db.writeBatch(result);
|
||||
result = new ConcurrentHashMap<>();
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
logger.error("Store tmp results to DB", e);
|
||||
// e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public Filter getFilter() {
|
||||
return filter;
|
||||
}
|
||||
|
||||
public Corpus getCorpus() {
|
||||
return corpus;
|
||||
}
|
||||
|
||||
public boolean saveResultToDisk(int... limit) throws UnsupportedEncodingException {
|
||||
Set<Pair<String, Map<String, Long>>> stats = new HashSet<>();
|
||||
|
||||
if (useDB) {
|
||||
result = db.getDump();
|
||||
db.delete();
|
||||
}
|
||||
|
||||
// if no results and nothing to save, return false
|
||||
if (!(result.size() > 0)) {
|
||||
analysisProducedResults = false;
|
||||
return false;
|
||||
} else {
|
||||
analysisProducedResults = true;
|
||||
}
|
||||
|
||||
stats.add(ImmutablePair.of(resultTitle, getSortedResult(result, Util.getValidInt(limit))));
|
||||
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock());
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean saveResultNestedToDisk(int... limit) throws UnsupportedEncodingException {
|
||||
resultTitle = generateResultTitle();
|
||||
|
||||
if (useDB) {
|
||||
result = db.getDump();
|
||||
db.delete();
|
||||
}
|
||||
Map<WordLevelType, Map<String, Map<String, Long>>> results = new HashMap<>();
|
||||
|
||||
if (!isEmpty(resultNestedSuffix)) {
|
||||
results.put(WordLevelType.SUFFIX, sortNestedMap(resultNestedSuffix, Util.getValidInt(limit)));
|
||||
}
|
||||
|
||||
if (!isEmpty(resultNestedPrefix)) {
|
||||
results.put(WordLevelType.PREFIX, sortNestedMap(resultNestedPrefix, Util.getValidInt(limit)));
|
||||
}
|
||||
|
||||
// if no results and nothing to save, return false
|
||||
if (!(results.size() > 0)) {
|
||||
analysisProducedResults = false;
|
||||
return false;
|
||||
} else {
|
||||
analysisProducedResults = true;
|
||||
}
|
||||
|
||||
Export.nestedMapToCSV(resultTitle, results, corpus.getChosenResultsLocation(), headerInfoBlock());
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
|
||||
filter.setAl(AnalysisLevel.WORD_FORMATION);
|
||||
resultTitle = generateResultTitle();
|
||||
|
||||
if (useDB) {
|
||||
result = db.getDump();
|
||||
db.delete();
|
||||
}
|
||||
|
||||
// if no results and nothing to save, return false
|
||||
if (!(result.size() > 0)) {
|
||||
analysisProducedResults = false;
|
||||
return false;
|
||||
} else {
|
||||
analysisProducedResults = true;
|
||||
}
|
||||
|
||||
WordFormation.calculateStatistics(this);
|
||||
|
||||
Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
|
||||
return true;
|
||||
}
|
||||
|
||||
private Map<String, Map<String, Long>> sortNestedMap(Map<String, ConcurrentHashMap<String, AtomicLong>> nestedMap, int limit) {
|
||||
Map<String, Map<String, Long>> sorted = new HashMap<>();
|
||||
|
||||
for (String s : nestedMap.keySet()) {
|
||||
sorted.put(s, getSortedResult(nestedMap.get(s), Util.getValidInt(limit)));
|
||||
}
|
||||
|
||||
return sorted;
|
||||
}
|
||||
|
||||
|
||||
private Map<String, Long> getSortedResult(Map<String, AtomicLong> map, int limit) {
|
||||
return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
|
||||
}
|
||||
|
||||
public void updateResults(String o) {
|
||||
// if not in map
|
||||
AtomicLong r = result.putIfAbsent(o, new AtomicLong(1));
|
||||
|
||||
// else
|
||||
if (r != null)
|
||||
result.get(o).incrementAndGet();
|
||||
}
|
||||
|
||||
public Map<String, AtomicLong> getResult() {
|
||||
return result;
|
||||
}
|
||||
|
||||
public Object[][] getResultCustom() {
|
||||
return resultCustom;
|
||||
}
|
||||
|
||||
public void setResultCustom(Object[][] resultCustom) {
|
||||
this.resultCustom = resultCustom;
|
||||
}
|
||||
|
||||
public void updateResultsNested(WordLevelType type, String key, String stringValue) {
|
||||
ConcurrentHashMap<String, ConcurrentHashMap<String, AtomicLong>> resultsMap;
|
||||
|
||||
if (type == WordLevelType.SUFFIX) {
|
||||
updateResultsNestedSuffix(key, stringValue);
|
||||
} else if (type == WordLevelType.PREFIX) {
|
||||
updateResultsNestedPrefix(key, stringValue);
|
||||
}
|
||||
}
|
||||
|
||||
public void updateResultsNestedSuffix(String key, String stringValue) {
|
||||
if (resultNestedSuffix.containsKey(key)) {
|
||||
// if not in map
|
||||
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
|
||||
|
||||
// else
|
||||
if (r != null) {
|
||||
resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
|
||||
}
|
||||
} else {
|
||||
resultNestedSuffix.putIfAbsent(key, new ConcurrentHashMap<>());
|
||||
AtomicLong r = resultNestedSuffix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
|
||||
|
||||
if (r != null) {
|
||||
resultNestedSuffix.get(key).get(stringValue).incrementAndGet();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void updateResultsNestedPrefix(String key, String stringValue) {
|
||||
if (resultNestedPrefix.containsKey(key)) {
|
||||
// if not in map
|
||||
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
|
||||
|
||||
// else
|
||||
if (r != null) {
|
||||
resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
|
||||
}
|
||||
} else {
|
||||
resultNestedPrefix.putIfAbsent(key, new ConcurrentHashMap<>());
|
||||
AtomicLong r = resultNestedPrefix.get(key).putIfAbsent(stringValue, new AtomicLong(1));
|
||||
|
||||
if (r != null) {
|
||||
resultNestedPrefix.get(key).get(stringValue).incrementAndGet();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private LinkedHashMap<String, String> headerInfoBlock() {
|
||||
LinkedHashMap<String, String> info = new LinkedHashMap<>();
|
||||
|
||||
info.put("Korpus:", corpus.getCorpusType().toString());
|
||||
info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
|
||||
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
||||
Integer ngramLevel = filter.getNgramValue();
|
||||
if (ngramLevel == 0)
|
||||
info.put("Analiza:", "Črke");
|
||||
else if (ngramLevel == 1)
|
||||
info.put("Analiza", "Besede");
|
||||
else
|
||||
info.put("Analiza:", filter.getAl().toString());
|
||||
} else {
|
||||
info.put("Analiza:", filter.getAl().toString());
|
||||
}
|
||||
|
||||
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
|
||||
Integer ngramLevel = filter.getNgramValue();
|
||||
|
||||
// n.gram nivo
|
||||
if (ngramLevel > 1) {
|
||||
info.put("n-gram nivo:", String.valueOf(ngramLevel));
|
||||
} else if (ngramLevel == 1){
|
||||
info.put("n-gram nivo:", "nivo besed");
|
||||
} else {
|
||||
info.put("n-gram nivo:", "nivo črk");
|
||||
}
|
||||
// skip
|
||||
if (ngramLevel > 1)
|
||||
info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");
|
||||
|
||||
// izračunaj za
|
||||
info.put("Izračunaj za:", filter.getCalculateFor().toString());
|
||||
|
||||
// msd
|
||||
if (!isEmpty(filter.getMsd())) {
|
||||
StringBuilder msdPattern = new StringBuilder();
|
||||
for (Pattern pattern : filter.getMsd()) {
|
||||
msdPattern.append(pattern.toString()).append(" ");
|
||||
}
|
||||
|
||||
info.put("MSD:", msdPattern.toString());
|
||||
}
|
||||
|
||||
// taksonomija
|
||||
if (!isEmpty(filter.getTaxonomy())) {
|
||||
info.put("Taksonomija:", StringUtils.join(filter.getTaxonomy(), ", "));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (isNotEmpty(filter.getTaxonomy()) && Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||
ArrayList<String> tax = Tax.getTaxonomyForInfo(corpus.getCorpusType(), filter.getTaxonomy());
|
||||
|
||||
info.put("Taksonomija: ", "");
|
||||
String sep = "";
|
||||
for (String s : tax) {
|
||||
info.put(sep = sep + " ", s);
|
||||
}
|
||||
}
|
||||
|
||||
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
||||
HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();
|
||||
|
||||
if (!isEmpty(filters)) {
|
||||
info.put("Dodatni filtri: ", "");
|
||||
|
||||
for (Map.Entry<String, ObservableList<String>> f : filters.entrySet()) {
|
||||
info.put(f.getKey(), StringUtils.join(f.getValue(), ", "));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
}
|
||||
175
src/main/java/data/Tax.java
Normal file
175
src/main/java/data/Tax.java
Normal file
@@ -0,0 +1,175 @@
|
||||
package data;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import gui.ValidationUtil;
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ObservableList;
|
||||
|
||||
public class Tax {
|
||||
private static LinkedHashMap<String, String> GIGAFIDA_TAXONOMY;
|
||||
private static LinkedHashMap<String, String> GOS_TAXONOMY;
|
||||
private static final HashSet<CorpusType> corpusTypesWithTaxonomy = new HashSet<>(Arrays.asList(CorpusType.GIGAFIDA, CorpusType.GOS, CorpusType.CCKRES));
|
||||
|
||||
static {
|
||||
// GIGAFIDA ----------------------------
|
||||
GIGAFIDA_TAXONOMY = new LinkedHashMap<>();
|
||||
|
||||
GIGAFIDA_TAXONOMY.put("SSJ.T", "tisk");
|
||||
GIGAFIDA_TAXONOMY.put("SSJ.T.K", "tisk-knjižno");
|
||||
GIGAFIDA_TAXONOMY.put("SSJ.T.K.L", "tisk-knjižno-leposlovno");
|
||||
GIGAFIDA_TAXONOMY.put("SSJ.T.K.S", "tisk-knjižno-strokovno");
|
||||
GIGAFIDA_TAXONOMY.put("SSJ.T.P", "tisk-periodično");
|
||||
GIGAFIDA_TAXONOMY.put("SSJ.T.P.C", "tisk-periodično-časopis");
|
||||
GIGAFIDA_TAXONOMY.put("SSJ.T.P.R", "tisk-periodično-revija");
|
||||
GIGAFIDA_TAXONOMY.put("SSJ.T.D", "tisk-drugo");
|
||||
GIGAFIDA_TAXONOMY.put("SSJ.I", "internet");
|
||||
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P", "prenosnik");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.G", "prenosnik-govorni");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.E", "prenosnik-elektronski");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P", "prenosnik-pisni");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O", "prenosnik-pisni-objavljeno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.K", "prenosnik-pisni-objavljeno-knjižno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P", "prenosnik-pisni-objavljeno-periodično");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C", "prenosnik-pisni-objavljeno-periodično-časopisno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.D", "prenosnik-pisni-objavljeno-periodično-časopisno-dnevno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.V", "prenosnik-pisni-objavljeno-periodično-časopisno-večkrat tedensko");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.C.T", "prenosnik-pisni-objavljeno-periodično-časopisno-tedensko");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R", "prenosnik-pisni-objavljeno-periodično-revialno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.T", "prenosnik-pisni-objavljeno-periodično-revialno-tedensko");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.S", "prenosnik-pisni-objavljeno-periodično-revialno-štirinajstdnevno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.M", "prenosnik-pisni-objavljeno-periodično-revialno-mesečno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.D", "prenosnik-pisni-objavljeno-periodično-revialno-redkeje kot na mesec");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.O.P.R.O", "prenosnik-pisni-objavljeno-periodično-revialno-občasno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.N", "prenosnik-pisni-neobjavljeno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.J", "prenosnik-pisni-neobjavljeno-javno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.I", "prenosnik-pisni-neobjavljeno-interno");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.P.P.N.Z", "prenosnik-pisni-neobjavljeno-zasebno");
|
||||
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z", "zvrst");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.U", "zvrst-umetnostna");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.U.P", "zvrst-umetnostna-pesniška");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.U.R", "zvrst-umetnostna-prozna");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.U.D", "zvrst-umetnostna-dramska");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.N", "zvrst-neumetnostna");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S", "zvrst-neumetnostna-strokovna");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.H", "zvrst-neumetnostna-strokovna-humanistična in družboslovna");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.N.S.N", "zvrst-neumetnostna-strokovna-naravoslovna in tehnična");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.N.N", "zvrst-neumetnostna-nestrokovna");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.Z.N.P", "zvrst-neumetnostna-pravna");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.L", "zvrst-lektorirano");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.L.D", "zvrst-lektorirano-da");
|
||||
GIGAFIDA_TAXONOMY.put("Ft.L.N", "zvrst-lektorirano-ne");
|
||||
|
||||
// GOS ----------------------------------
|
||||
GOS_TAXONOMY = new LinkedHashMap<>();
|
||||
|
||||
GOS_TAXONOMY.put("gos.T", "diskurz");
|
||||
GOS_TAXONOMY.put("gos.T.J", "diskurz-javni");
|
||||
GOS_TAXONOMY.put("gos.T.J.I", "diskurz-javni-informativno-izobraževalni");
|
||||
GOS_TAXONOMY.put("gos.T.J.R", "diskurz-javni-razvedrilni");
|
||||
GOS_TAXONOMY.put("gos.T.N", "diskurz-nejavni");
|
||||
GOS_TAXONOMY.put("gos.T.N.N", "diskurz-nejavni-nezasebni");
|
||||
GOS_TAXONOMY.put("gos.T.N.Z", "diskurz-nejavni-zasebni");
|
||||
|
||||
GOS_TAXONOMY.put("gos.S", "situacija");
|
||||
GOS_TAXONOMY.put("gos.S.R", "situacija-radio");
|
||||
GOS_TAXONOMY.put("gos.S.T", "situacija-televizija");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the whole default taxonomy for the specified corpus type
|
||||
*/
|
||||
public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType) {
|
||||
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
||||
return FXCollections.observableArrayList(GIGAFIDA_TAXONOMY.values());
|
||||
} else if (corpusType == CorpusType.GOS) {
|
||||
return FXCollections.observableArrayList(GOS_TAXONOMY.values());
|
||||
}
|
||||
|
||||
return FXCollections.observableArrayList(new ArrayList<>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns taxonomy names only for items found in headers
|
||||
*/
|
||||
public static ObservableList<String> getTaxonomyForComboBox(CorpusType corpusType, HashSet<String> foundTax) {
|
||||
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
||||
|
||||
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
||||
tax = GIGAFIDA_TAXONOMY;
|
||||
} else if (corpusType == CorpusType.GOS) {
|
||||
tax = GOS_TAXONOMY;
|
||||
}
|
||||
|
||||
ArrayList<String> taxForCombo = new ArrayList<>();
|
||||
|
||||
// assures same relative order
|
||||
for (String t : tax.keySet()) {
|
||||
if (foundTax.contains(t)) {
|
||||
taxForCombo.add(tax.get(t));
|
||||
}
|
||||
}
|
||||
|
||||
return FXCollections.observableArrayList(taxForCombo);
|
||||
}
|
||||
|
||||
public static HashSet<CorpusType> getCorpusTypesWithTaxonomy() {
|
||||
return corpusTypesWithTaxonomy;
|
||||
}
|
||||
|
||||
public static ArrayList<String> getTaxonomyCodes(ArrayList<String> taxonomyNames, CorpusType corpusType) {
|
||||
ArrayList<String> result = new ArrayList<>();
|
||||
|
||||
if (ValidationUtil.isEmpty(taxonomyNames)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
||||
|
||||
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
||||
tax = GIGAFIDA_TAXONOMY;
|
||||
} else if (corpusType == CorpusType.GOS) {
|
||||
tax = GOS_TAXONOMY;
|
||||
}
|
||||
|
||||
// for easier lookup
|
||||
Map<String, String> taxInversed = tax.entrySet()
|
||||
.stream()
|
||||
.collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
|
||||
|
||||
for (String taxonomyName : taxonomyNames) {
|
||||
result.add(taxInversed.get(taxonomyName));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of proper names for codes
|
||||
*
|
||||
* @param corpusType
|
||||
* @param taxonomy
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static ArrayList<String> getTaxonomyForInfo(CorpusType corpusType, ArrayList<String> taxonomy) {
|
||||
LinkedHashMap<String, String> tax = new LinkedHashMap<>();
|
||||
|
||||
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.CCKRES) {
|
||||
tax = GIGAFIDA_TAXONOMY;
|
||||
} else if (corpusType == CorpusType.GOS) {
|
||||
tax = GOS_TAXONOMY;
|
||||
}
|
||||
|
||||
ArrayList<String> result = new ArrayList<>();
|
||||
|
||||
for (String t : taxonomy) {
|
||||
result.add(tax.get(t));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
171
src/main/java/data/Taxonomy.java
Normal file
171
src/main/java/data/Taxonomy.java
Normal file
@@ -0,0 +1,171 @@
|
||||
package data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ObservableList;
|
||||
|
||||
public enum Taxonomy {
|
||||
// GOS
|
||||
JAVNI("javni", "T.J", "gos"),
|
||||
INFORMATIVNO_IZOBRAZEVALNI("informativno-izobraževalni", "T.J.I", "gos"),
|
||||
RAZVEDRILNI("razvedrilni", "T.J.R", "gos"),
|
||||
NEJAVNI("nejavni", "T.N", "gos"),
|
||||
NEZASEBNI("nezasebni", "T.N.N", "gos"),
|
||||
ZASEBNI("zasebni", "T.N.Z", "gos"),
|
||||
OSEBNI_STIK("osebni stik", "K.O", "gos"),
|
||||
TELEFON("telefon", "K.P", "gos"),
|
||||
RADIO("radio", "K.R", "gos"),
|
||||
TELEVIZIJA("televizija", "K.T", "gos"),
|
||||
// Gigafida
|
||||
KNJIZNO("knjižno", "T.K", "gigafida"),
|
||||
LEPOSLOVNO("leposlovno", "T.K.L", "gigafida"),
|
||||
STROKOVNO("strokovno", "T.K.S", "gigafida"),
|
||||
PERIODICNO("periodično", "T.P", "gigafida"),
|
||||
CASOPIS("časopis", "T.P.C", "gigafida"),
|
||||
REVIJA("revija", "T.P.R", "gigafida"),
|
||||
INTERNET("internet", "I", "gigafida"),
|
||||
|
||||
SSJ_TISK("tisk", "SSJ.T", "gigafida"),
|
||||
SSJ_KNJIZNO("opis", "identifikator", "gigafida"),
|
||||
SSJ_LEPOSLOVNO("opis", "identifikator", "gigafida"),
|
||||
SSJ_STROKOVNO("opis", "identifikator", "gigafida"),
|
||||
SSJ_PERIODICNO("opis", "identifikator", "gigafida"),
|
||||
SSJ_CASOPIS("opis", "identifikator", "gigafida"),
|
||||
SSJ_REVIJA("opis", "identifikator", "gigafida"),
|
||||
SSJ_DRUGO("opis", "identifikator", "gigafida"),
|
||||
SSJ_INTERNET("opis", "identifikator", "gigafida"),
|
||||
FT_P_PRENOSNIK("opis", "identifikator", "gigafida"),
|
||||
FT_P_GOVORNI("opis", "identifikator", "gigafida"),
|
||||
FT_P_ELEKTRONSKI("opis", "identifikator", "gigafida"),
|
||||
FT_P_PISNI("opis", "identifikator", "gigafida"),
|
||||
FT_P_OBJAVLJENO("opis", "identifikator", "gigafida"),
|
||||
FT_P_KNJIZNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_PERIODICNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_CASOPISNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_DNEVNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_VECKRAT_TEDENSKO("opis", "identifikator", "gigafida"),
|
||||
// FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
|
||||
FT_P_REVIALNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_TEDENSKO("opis", "identifikator", "gigafida"),
|
||||
FT_P_STIRINAJSTDNEVNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_MESECNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_REDKEJE_KOT_MESECNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_OBCASNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_NEOBJAVLJENO("opis", "identifikator", "gigafida"),
|
||||
FT_P_JAVNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_INTERNO("opis", "identifikator", "gigafida"),
|
||||
FT_P_ZASEBNO("opis", "identifikator", "gigafida"),
|
||||
FT_ZVRST("opis", "identifikator", "gigafida"),
|
||||
FT_UMETNOSTNA("opis", "identifikator", "gigafida"),
|
||||
FT_PESNISKA("opis", "identifikator", "gigafida"),
|
||||
FT_PROZNA("opis", "identifikator", "gigafida"),
|
||||
FT_DRAMSKA("opis", "identifikator", "gigafida"),
|
||||
FT_NEUMETNOSTNA("opis", "identifikator", "gigafida"),
|
||||
FT_STROKOVNA("opis", "identifikator", "gigafida"),
|
||||
FT_HID("opis", "identifikator", "gigafida"),
|
||||
FT_NIT("opis", "identifikator", "gigafida"),
|
||||
FT_NESTROKOVNA("opis", "identifikator", "gigafida"),
|
||||
FT_PRAVNA("opis", "identifikator", "gigafida"),
|
||||
FT_LEKTORIRANO("opis", "identifikator", "gigafida"),
|
||||
FT_DA("opis", "identifikator", "gigafida"),
|
||||
FT_NE("opis", "identifikator", "gigafida");
|
||||
|
||||
|
||||
|
||||
private final String name;
|
||||
private final String taxonomy;
|
||||
private final String corpus;
|
||||
|
||||
Taxonomy(String name, String taxonomy, String corpusType) {
|
||||
this.name = name;
|
||||
this.taxonomy = taxonomy;
|
||||
this.corpus = corpusType;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
public String getTaxonomnyString() {
|
||||
return this.taxonomy;
|
||||
}
|
||||
|
||||
public static Taxonomy factory(String tax) {
|
||||
if (tax != null) {
|
||||
// GOS
|
||||
if (JAVNI.toString().equals(tax)) {
|
||||
return JAVNI;
|
||||
}
|
||||
if (INFORMATIVNO_IZOBRAZEVALNI.toString().equals(tax)) {
|
||||
return INFORMATIVNO_IZOBRAZEVALNI;
|
||||
}
|
||||
if (RAZVEDRILNI.toString().equals(tax)) {
|
||||
return RAZVEDRILNI;
|
||||
}
|
||||
if (NEJAVNI.toString().equals(tax)) {
|
||||
return NEJAVNI;
|
||||
}
|
||||
if (NEZASEBNI.toString().equals(tax)) {
|
||||
return NEZASEBNI;
|
||||
}
|
||||
if (ZASEBNI.toString().equals(tax)) {
|
||||
return ZASEBNI;
|
||||
}
|
||||
if (OSEBNI_STIK.toString().equals(tax)) {
|
||||
return OSEBNI_STIK;
|
||||
}
|
||||
if (TELEFON.toString().equals(tax)) {
|
||||
return TELEFON;
|
||||
}
|
||||
if (RADIO.toString().equals(tax)) {
|
||||
return RADIO;
|
||||
}
|
||||
if (TELEVIZIJA.toString().equals(tax)) {
|
||||
return TELEVIZIJA;
|
||||
}
|
||||
|
||||
// Gigafida
|
||||
// if (TISK.toString().equals(tax)) {
|
||||
// return TISK;
|
||||
// }
|
||||
if (KNJIZNO.toString().equals(tax)) {
|
||||
return KNJIZNO;
|
||||
}
|
||||
if (LEPOSLOVNO.toString().equals(tax)) {
|
||||
return LEPOSLOVNO;
|
||||
}
|
||||
if (STROKOVNO.toString().equals(tax)) {
|
||||
return STROKOVNO;
|
||||
}
|
||||
if (PERIODICNO.toString().equals(tax)) {
|
||||
return PERIODICNO;
|
||||
}
|
||||
if (CASOPIS.toString().equals(tax)) {
|
||||
return CASOPIS;
|
||||
}
|
||||
if (REVIJA.toString().equals(tax)) {
|
||||
return REVIJA;
|
||||
}
|
||||
if (INTERNET.toString().equals(tax)) {
|
||||
return INTERNET;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static ObservableList<String> getDefaultForComboBox(String corpusType) {
|
||||
ArrayList<String> values = Arrays.stream(Taxonomy.values())
|
||||
.filter(x -> x.corpus.equals(corpusType))
|
||||
.map(x -> x.name)
|
||||
.collect(Collectors.toCollection(ArrayList::new));
|
||||
|
||||
return FXCollections.observableArrayList(values);
|
||||
}
|
||||
|
||||
public static ObservableList<String> getDefaultForComboBox(CorpusType corpusType) {
|
||||
return getDefaultForComboBox(corpusType.toString());
|
||||
}
|
||||
}
|
||||
53
src/main/java/data/Validation.java
Normal file
53
src/main/java/data/Validation.java
Normal file
@@ -0,0 +1,53 @@
|
||||
package data;
|
||||
|
||||
import static gui.ValidationUtil.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import gui.Messages;
|
||||
import gui.ValidationUtil;
|
||||
|
||||
public class Validation {
|
||||
|
||||
public static String validateForStringLevel(Filter filter) {
|
||||
ArrayList<String> errors = new ArrayList<>();
|
||||
|
||||
// should not be null, error if null, because init failed
|
||||
if (filter.getNgramValue() == null) {
|
||||
errors.add(Messages.MISSING_NGRAM_LEVEL);
|
||||
}
|
||||
|
||||
// should not be null, error if null, because init failed
|
||||
if (filter.getCalculateFor() == null) {
|
||||
errors.add(Messages.MISSING_CALCULATE_FOR);
|
||||
}
|
||||
|
||||
if (filter.getSkipValue() == null) {
|
||||
filter.setSkipValue(0);
|
||||
}
|
||||
|
||||
if (filter.getNgramValue() != null && ValidationUtil.isEmpty(filter.getMsd()) &&
|
||||
(filter.getMsd().size() != filter.getNgramValue())) {
|
||||
if (!(filter.getMsd().size() == 1 && filter.getNgramValue() == 0) && !ValidationUtil.isEmpty(filter.getMsd()))
|
||||
errors.add(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES);
|
||||
}
|
||||
|
||||
Integer ngramValue = filter.getNgramValue();
|
||||
ArrayList<Pattern> msd = filter.getMsd();
|
||||
|
||||
if (ngramValue > 0 && !ValidationUtil.isEmpty(msd) && ngramValue != msd.size()) {
|
||||
errors.add(String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, ngramValue, msd.size()));
|
||||
}
|
||||
|
||||
if (filter.getNgramValue() != null && filter.getNgramValue() == 0 && isEmpty(filter.getStringLength())) {
|
||||
// if count letters, make sure that the length is given
|
||||
// TODO: check that words we're adding in xml reader are longer than this value
|
||||
errors.add(Messages.MISSING_STRING_LENGTH);
|
||||
}
|
||||
|
||||
return isEmpty(errors) ? null : StringUtils.join(errors, ", \n");
|
||||
}
|
||||
}
|
||||
141
src/main/java/data/Word.java
Normal file
141
src/main/java/data/Word.java
Normal file
@@ -0,0 +1,141 @@
|
||||
package data;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import data.Enums.Msd;
|
||||
import gui.ValidationUtil;
|
||||
|
||||
public class Word implements Serializable {
|
||||
public static final char PAD_CHARACTER = '-';
|
||||
|
||||
private String word;
|
||||
private String lemma;
|
||||
private String msd;
|
||||
private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));
|
||||
|
||||
/**
|
||||
* Possible values:
|
||||
* <p>
|
||||
* <ul>
|
||||
* <li>S = samostalnik</li>
|
||||
* <li>G = glagol</li>
|
||||
* <li>P = pridevnik</li>
|
||||
* <li>R = prislov</li>
|
||||
* <li>Z = zaimek</li>
|
||||
* <li>K = števnik</li>
|
||||
* <li>D = predlog</li>
|
||||
* <li>V = veznik</li>
|
||||
* <li>L = členek</li>
|
||||
* <li>M = medmet</li>
|
||||
* <li>O = okrajšava</li>
|
||||
* <li>N = neuvrščeno</li>
|
||||
* </ul>
|
||||
*/
|
||||
//private char besedna_vrsta;
|
||||
public Word(String word, String lemma, String msd) {
|
||||
this.lemma = lemma;
|
||||
this.msd = normalizeMsd(msd);
|
||||
|
||||
// veliko zacetnico ohranimo samo za lastna imena
|
||||
if (!ValidationUtil.isEmpty(this.msd) && !(this.msd.charAt(0) == 'S'
|
||||
&& this.msd.length() >= 2
|
||||
&& this.msd.charAt(1) == 'l')) {
|
||||
this.word = word.toLowerCase();
|
||||
} else {
|
||||
this.word = word;
|
||||
}
|
||||
}
|
||||
|
||||
public Word() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends a number of '-' to msds which are not properly sized.
|
||||
* E.g. nouns should have 5 attributes, yet the last one isn't always defined (Somei vs. Sometd)
|
||||
*
|
||||
* @param msdInput
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private String normalizeMsd(String msdInput) {
|
||||
if (ValidationUtil.isEmpty(msdInput)) {
|
||||
return "";
|
||||
} else {
|
||||
return StringUtils.rightPad(msdInput, Msd.getMsdLengthForType(msdInput), PAD_CHARACTER);
|
||||
}
|
||||
}
|
||||
|
||||
public Word(String word) {
|
||||
this.word = word;
|
||||
}
|
||||
|
||||
public String getWord() {
|
||||
return word;
|
||||
}
|
||||
|
||||
public String getCVVWord() {
|
||||
return covertToCvv(word);
|
||||
}
|
||||
|
||||
public String getCVVLemma() {
|
||||
return covertToCvv(lemma);
|
||||
}
|
||||
|
||||
private String covertToCvv(String s) {
|
||||
char[] StringCA = s.toCharArray();
|
||||
|
||||
for (int i = 0; i < StringCA.length; i++) {
|
||||
StringCA[i] = VOWELS.contains(StringCA[i]) ? 'V' : 'C';
|
||||
}
|
||||
|
||||
return new String(StringCA);
|
||||
}
|
||||
|
||||
public void setWord(String word) {
|
||||
this.word = word;
|
||||
}
|
||||
|
||||
public String getLemma() {
|
||||
return lemma;
|
||||
}
|
||||
|
||||
public void setLemma(String lemma) {
|
||||
this.lemma = lemma;
|
||||
}
|
||||
|
||||
public String getMsd() {
|
||||
return msd;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
sb.append("beseda:\t")
|
||||
.append(getWord())
|
||||
.append("\n")
|
||||
.append("lema:\t")
|
||||
.append(getLemma())
|
||||
.append("\n")
|
||||
.append("msd:\t")
|
||||
.append(getMsd())
|
||||
.append("\n");
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getForCf(CalculateFor calculateFor, boolean cvv) {
|
||||
String returnValue = "";
|
||||
|
||||
if (cvv) {
|
||||
returnValue = calculateFor == CalculateFor.WORD ? getCVVWord() : getCVVLemma();
|
||||
} else {
|
||||
returnValue = calculateFor == CalculateFor.WORD ? getWord() : getLemma();
|
||||
}
|
||||
|
||||
return returnValue;
|
||||
}
|
||||
}
|
||||
454
src/main/java/gui/CharacterAnalysisTab.java
Normal file
454
src/main/java/gui/CharacterAnalysisTab.java
Normal file
@@ -0,0 +1,454 @@
|
||||
package gui;
|
||||
|
||||
import data.*;
|
||||
import javafx.application.HostServices;
|
||||
import javafx.beans.value.ChangeListener;
|
||||
import javafx.beans.value.ObservableValue;
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ListChangeListener;
|
||||
import javafx.collections.ObservableList;
|
||||
import javafx.concurrent.Task;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.scene.control.*;
|
||||
import javafx.scene.layout.Pane;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.controlsfx.control.CheckComboBox;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static alg.XML_processing.readXML;
|
||||
import static gui.GUIController.showAlert;
|
||||
import static gui.Messages.*;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
public class CharacterAnalysisTab {
|
||||
public final static Logger logger = LogManager.getLogger(CharacterAnalysisTab.class);
|
||||
|
||||
@FXML
|
||||
public Label selectedFiltersLabel;
|
||||
@FXML
|
||||
public Label solarFilters;
|
||||
|
||||
@FXML
|
||||
private TextField msdTF;
|
||||
private ArrayList<Pattern> msd;
|
||||
private ArrayList<String> msdStrings;
|
||||
|
||||
@FXML
|
||||
private CheckComboBox<String> taxonomyCCB;
|
||||
private ArrayList<String> taxonomy;
|
||||
|
||||
@FXML
|
||||
private CheckBox calculatecvvCB;
|
||||
private boolean calculateCvv;
|
||||
|
||||
@FXML
|
||||
private TextField stringLengthTF;
|
||||
private Integer stringLength;
|
||||
|
||||
@FXML
|
||||
private ToggleGroup calculateForRB;
|
||||
private CalculateFor calculateFor;
|
||||
|
||||
@FXML
|
||||
private RadioButton lemmaRB;
|
||||
|
||||
@FXML
|
||||
private RadioButton varietyRB;
|
||||
|
||||
@FXML
|
||||
private Pane paneLetters;
|
||||
|
||||
@FXML
|
||||
private Button computeNgramsB;
|
||||
|
||||
@FXML
|
||||
public ProgressBar ngramProgressBar;
|
||||
@FXML
|
||||
public Label progressLabel;
|
||||
|
||||
@FXML
|
||||
private Hyperlink helpH;
|
||||
|
||||
private enum MODE {
|
||||
LETTER
|
||||
}
|
||||
|
||||
private MODE currentMode;
|
||||
|
||||
private Corpus corpus;
|
||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||
private Filter filter;
|
||||
private boolean useDb;
|
||||
private HostServices hostService;
|
||||
|
||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("različnica", "lema");
|
||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||
|
||||
|
||||
// TODO: pass observables for taxonomy based on header scan
|
||||
// after header scan
|
||||
private ObservableList<String> taxonomyCCBValues;
|
||||
private CorpusType currentCorpusType;
|
||||
|
||||
public void init() {
|
||||
currentMode = MODE.LETTER;
|
||||
toggleMode(currentMode);
|
||||
|
||||
calculateForRB.selectedToggleProperty().addListener(new ChangeListener<Toggle>() {
|
||||
@Override
|
||||
public void changed(ObservableValue<? extends Toggle> observable, Toggle oldValue, Toggle newValue) {
|
||||
//logger.info("calculateForRB:", newValue.toString());
|
||||
RadioButton chk = (RadioButton)newValue.getToggleGroup().getSelectedToggle(); // Cast object to radio button
|
||||
calculateFor = CalculateFor.factory(chk.getText());
|
||||
logger.info("calculateForRB:", chk.getText());
|
||||
//System.out.println("Selected Radio Button - "+chk.getText());
|
||||
}
|
||||
});
|
||||
|
||||
// msd
|
||||
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if (!newValue) {
|
||||
// focus lost
|
||||
String value = msdTF.getText();
|
||||
logger.info("msdTf: ", value);
|
||||
|
||||
if (!ValidationUtil.isEmpty(value)) {
|
||||
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
|
||||
|
||||
int nOfRequiredMsdTokens = 1;
|
||||
if (msdTmp.size() != nOfRequiredMsdTokens) {
|
||||
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
|
||||
logAlert(msg);
|
||||
showAlert(Alert.AlertType.ERROR, msg);
|
||||
}
|
||||
msd = new ArrayList<>();
|
||||
msdStrings = new ArrayList<>();
|
||||
for (String msdToken : msdTmp) {
|
||||
msd.add(Pattern.compile(msdToken));
|
||||
msdStrings.add(msdToken);
|
||||
}
|
||||
logger.info(String.format("msd accepted (%d)", msd.size()));
|
||||
|
||||
} else if (!ValidationUtil.isEmpty(newValue)) {
|
||||
msd = new ArrayList<>();
|
||||
msdStrings = new ArrayList<>();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
msdTF.setText("");
|
||||
msd = new ArrayList<>();
|
||||
|
||||
// taxonomy
|
||||
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||
taxonomyCCB.getItems().removeAll();
|
||||
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
|
||||
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
taxonomy = new ArrayList<>();
|
||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
taxonomy.addAll(checkedItems);
|
||||
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
||||
});
|
||||
taxonomyCCB.getCheckModel().clearChecks();
|
||||
} else {
|
||||
taxonomyCCB.setDisable(true);
|
||||
}
|
||||
|
||||
// cvv
|
||||
calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
calculateCvv = newValue;
|
||||
logger.info("calculate cvv: " + calculateCvv);
|
||||
});
|
||||
|
||||
|
||||
// string length
|
||||
stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if (!newValue) {
|
||||
// focus lost
|
||||
String value = stringLengthTF.getText();
|
||||
if (!ValidationUtil.isEmpty(value)) {
|
||||
if (!ValidationUtil.isNumber(value)) {
|
||||
logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
|
||||
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
|
||||
}
|
||||
stringLength = Integer.parseInt(value);
|
||||
} else {
|
||||
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
|
||||
stringLengthTF.setText("1");
|
||||
logAlert(WARNING_MISSING_STRING_LENGTH);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
computeNgramsB.setOnAction(e -> {
|
||||
compute();
|
||||
logger.info("compute button");
|
||||
});
|
||||
|
||||
helpH.setOnAction(e -> openHelpWebsite());
|
||||
}
|
||||
|
||||
/**
|
||||
* case a: values for combo boxes can change after a corpus change
|
||||
* <ul>
|
||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
||||
* <li>same corpus type, different subset - keep</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* case b: values for combo boxes can change after a header scan
|
||||
* <ul>
|
||||
* <li>at first, fields are populated by corpus type defaults</li>
|
||||
* <li>after, with gathered data</li>
|
||||
* </ul>
|
||||
* <p></p>
|
||||
* ngrams: 1
|
||||
* calculateFor: word
|
||||
* msd:
|
||||
* taxonomy:
|
||||
* skip: 0
|
||||
* iscvv: false
|
||||
* string length: 1
|
||||
*/
|
||||
public void populateFields() {
|
||||
// corpus changed if: current one is null (this is first run of the app)
|
||||
// or if currentCorpus != gui's corpus
|
||||
boolean corpusChanged = currentCorpusType == null
|
||||
|| currentCorpusType != corpus.getCorpusType();
|
||||
|
||||
// TODO: check for GOS, GIGAFIDA, SOLAR...
|
||||
// refresh and:
|
||||
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
||||
if (calculateFor == null) {
|
||||
calculateForRB.selectToggle(lemmaRB);
|
||||
calculateFor = CalculateFor.factory(calculateForRB.getSelectedToggle().toString());
|
||||
}
|
||||
|
||||
if (!filter.hasMsd()) {
|
||||
// if current corpus doesn't have msd data, disable this field
|
||||
msd = new ArrayList<>();
|
||||
msdTF.setText("");
|
||||
msdTF.setDisable(true);
|
||||
logger.info("no msd data");
|
||||
} else {
|
||||
if (ValidationUtil.isEmpty(msd)
|
||||
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
||||
// msd has not been set previously
|
||||
// or msd has been set but the corpus changed -> reset
|
||||
msd = new ArrayList<>();
|
||||
msdTF.setText("");
|
||||
msdTF.setDisable(false);
|
||||
logger.info("msd reset");
|
||||
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
||||
// if msd has been set, but corpus type remained the same, we can keep any set msd value
|
||||
msdTF.setText(StringUtils.join(msdStrings, " "));
|
||||
msdTF.setDisable(false);
|
||||
logger.info("msd kept");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
|
||||
|
||||
// keep calculateCvv
|
||||
calculatecvvCB.setSelected(calculateCvv);
|
||||
|
||||
// keep string length if set
|
||||
if (stringLength != null) {
|
||||
stringLengthTF.setText(String.valueOf(stringLength));
|
||||
} else {
|
||||
stringLengthTF.setText("1");
|
||||
stringLength = 1;
|
||||
}
|
||||
|
||||
// TODO: trigger on rescan
|
||||
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
||||
// user changed corpus (by type) or by selection & triggered a rescan of headers
|
||||
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
ObservableList<String> tax = corpus.getTaxonomy();
|
||||
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
|
||||
currentCorpusType = corpus.getCorpusType();
|
||||
// setTaxonomyIsDirty(false);
|
||||
} else {
|
||||
|
||||
}
|
||||
|
||||
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
ObservableList<String> tax = corpus.getTaxonomy();
|
||||
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
|
||||
* sets combobox values to what is applicable ...
|
||||
*
|
||||
* @param mode
|
||||
*/
|
||||
public void toggleMode(MODE mode) {
|
||||
if (mode == null) {
|
||||
mode = currentMode;
|
||||
}
|
||||
|
||||
logger.info("mode: ", mode.toString());
|
||||
|
||||
if (mode == MODE.LETTER) {
|
||||
paneLetters.setVisible(true);
|
||||
|
||||
// populate with default cvv length value
|
||||
if (stringLength == null) {
|
||||
stringLengthTF.setText("1");
|
||||
stringLength = 1;
|
||||
} else {
|
||||
stringLengthTF.setText(String.valueOf(stringLength));
|
||||
}
|
||||
|
||||
// if calculateFor was selected for something other than a word or a lemma -> reset
|
||||
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
|
||||
// if the user selected something else before selecting ngram for letters, reset that choice
|
||||
calculateFor = CalculateFor.LEMMA;
|
||||
calculateForRB.selectToggle(lemmaRB);
|
||||
}
|
||||
}
|
||||
|
||||
// override if orth mode, allow only word
|
||||
if (corpus.isGosOrthMode()) {
|
||||
// TODO change to
|
||||
varietyRB.setDisable(true);
|
||||
msdTF.setDisable(true);
|
||||
} else {
|
||||
msdTF.setDisable(false);
|
||||
varietyRB.setDisable(false);
|
||||
}
|
||||
}
|
||||
|
||||
private void compute() {
|
||||
Filter filter = new Filter();
|
||||
filter.setNgramValue(0);
|
||||
filter.setCalculateFor(calculateFor);
|
||||
filter.setMsd(msd);
|
||||
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
filter.setSkipValue(0);
|
||||
filter.setIsCvv(calculateCvv);
|
||||
filter.setSolarFilters(solarFiltersMap);
|
||||
filter.setStringLength(stringLength);
|
||||
|
||||
String message = Validation.validateForStringLevel(filter);
|
||||
if (message == null) {
|
||||
// no errors
|
||||
logger.info("Executing: ", filter.toString());
|
||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||
execute(statistic);
|
||||
} else {
|
||||
logAlert(message);
|
||||
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
||||
}
|
||||
}
|
||||
|
||||
private void openHelpWebsite(){
|
||||
hostService.showDocument(Messages.HELP_URL);
|
||||
}
|
||||
|
||||
private void logAlert(String alert) {
|
||||
logger.info("alert: " + alert);
|
||||
}
|
||||
|
||||
public Corpus getCorpus() {
|
||||
return corpus;
|
||||
}
|
||||
|
||||
public void setCorpus(Corpus corpus) {
|
||||
this.corpus = corpus;
|
||||
|
||||
if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
||||
setSelectedFiltersLabel(null);
|
||||
} else {
|
||||
setSelectedFiltersLabel("/");
|
||||
}
|
||||
}
|
||||
|
||||
public void setSelectedFiltersLabel(String content) {
|
||||
if (content != null) {
|
||||
solarFilters.setVisible(true);
|
||||
selectedFiltersLabel.setVisible(true);
|
||||
selectedFiltersLabel.setText(content);
|
||||
} else {
|
||||
solarFilters.setVisible(false);
|
||||
selectedFiltersLabel.setVisible(false);
|
||||
}
|
||||
}
|
||||
|
||||
private void execute(StatisticsNew statistic) {
|
||||
logger.info("Started execution: ", statistic.getFilter());
|
||||
|
||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
boolean corpusIsSplit = corpusFiles.size() > 1;
|
||||
|
||||
final Task<Void> task = new Task<Void>() {
|
||||
@SuppressWarnings("Duplicates")
|
||||
@Override
|
||||
protected Void call() throws Exception {
|
||||
long i = 0;
|
||||
for (File f : corpusFiles) {
|
||||
readXML(f.toString(), statistic);
|
||||
i++;
|
||||
this.updateProgress(i, corpusFiles.size());
|
||||
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
progressLabel.textProperty().bind(task.messageProperty());
|
||||
|
||||
task.setOnSucceeded(e -> {
|
||||
try {
|
||||
boolean successullySaved = statistic.saveResultToDisk();
|
||||
if (successullySaved) {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
} else {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||
}
|
||||
} catch (UnsupportedEncodingException e1) {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
logger.error("Error while saving", e1);
|
||||
}
|
||||
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
task.setOnFailed(e -> {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
||||
logger.error("Error while executing", e);
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
final Thread thread = new Thread(task, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
}
|
||||
|
||||
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
||||
this.solarFiltersMap = solarFiltersMap;
|
||||
}
|
||||
|
||||
public void setHostServices(HostServices hostServices){
|
||||
this.hostService = hostServices;
|
||||
}
|
||||
}
|
||||
517
src/main/java/gui/CorpusTab.java
Normal file
517
src/main/java/gui/CorpusTab.java
Normal file
@@ -0,0 +1,517 @@
|
||||
package gui;
|
||||
|
||||
import static data.CorpusType.*;
|
||||
import static gui.GUIController.*;
|
||||
import static gui.Messages.*;
|
||||
import static util.Util.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOCase;
|
||||
import org.apache.commons.io.filefilter.FileFilterUtils;
|
||||
import org.apache.commons.io.filefilter.TrueFileFilter;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import alg.XML_processing;
|
||||
import data.Corpus;
|
||||
import data.CorpusType;
|
||||
import data.Enums.solar.SolarFilters;
|
||||
import data.Tax;
|
||||
import javafx.collections.ObservableList;
|
||||
import javafx.concurrent.Task;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.scene.control.*;
|
||||
import javafx.scene.layout.Pane;
|
||||
import javafx.stage.DirectoryChooser;
|
||||
import javafx.stage.Stage;
|
||||
import javafx.application.HostServices;
|
||||
|
||||
public class CorpusTab {
|
||||
public final static Logger logger = LogManager.getLogger(CorpusTab.class);
|
||||
public Pane setCorpusWrapperP;
|
||||
|
||||
private Stage stage;
|
||||
|
||||
@FXML
|
||||
private Button chooseCorpusLocationB;
|
||||
private File chosenCorpusLocation;
|
||||
|
||||
@FXML
|
||||
private CheckBox readHeaderInfoChB;
|
||||
private boolean readHeaderInfo;
|
||||
|
||||
@FXML
|
||||
private CheckBox gosUseOrthChB;
|
||||
private boolean gosUseOrth;
|
||||
|
||||
@FXML
|
||||
private Button chooseResultsLocationB;
|
||||
|
||||
@FXML
|
||||
private Label chooseCorpusL;
|
||||
private String chooseCorpusLabelContent;
|
||||
|
||||
@FXML
|
||||
private Label chooseResultsL;
|
||||
private String chooseResultsLabelContent;
|
||||
|
||||
@FXML
|
||||
private ProgressIndicator locationScanPI;
|
||||
|
||||
@FXML
|
||||
private Hyperlink helpH;
|
||||
|
||||
// *** shared ***
|
||||
private Corpus corpus;
|
||||
private CorpusType corpusType;
|
||||
|
||||
// tabs - used to enable/disable
|
||||
private Tab stringLevelTabNew2;
|
||||
private Tab oneWordAnalysisTab;
|
||||
private Tab characterLevelTab;
|
||||
private Tab wordFormationTab;
|
||||
private Tab wordLevelTab;
|
||||
private Tab filterTab;
|
||||
private TabPane tabPane;
|
||||
private StringAnalysisTabNew2 satNew2Controller;
|
||||
private OneWordAnalysisTab oneWordTabController;
|
||||
private CharacterAnalysisTab catController;
|
||||
private FiltersForSolar ffsController;
|
||||
//private WordFormationTab wfController;
|
||||
private WordLevelTab wlController;
|
||||
private HostServices hostService;
|
||||
|
||||
|
||||
public void initialize() {
|
||||
stage = new Stage();
|
||||
|
||||
// add listeners
|
||||
chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
|
||||
chooseCorpusLocationB.setTooltip(new Tooltip(TOOLTIP_chooseCorpusLocationB));
|
||||
helpH.setOnAction(e -> openHelpWebsite());
|
||||
|
||||
readHeaderInfoChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
readHeaderInfo = newValue;
|
||||
logger.info("read headers: ", readHeaderInfo);
|
||||
});
|
||||
readHeaderInfoChB.setTooltip(new Tooltip(TOOLTIP_readHeaderInfoChB));
|
||||
|
||||
gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
gosUseOrth = newValue;
|
||||
corpus.setGosOrthMode(gosUseOrth);
|
||||
wordFormationTab.setDisable(gosUseOrth);
|
||||
satNew2Controller.toggleMode(null);
|
||||
oneWordTabController.toggleMode(null);
|
||||
catController.toggleMode(null);
|
||||
|
||||
logger.info("gosUseOrth: ", gosUseOrth);
|
||||
});
|
||||
|
||||
chooseResultsLocationB.setOnAction(e -> chooseResultsLocation(null));
|
||||
|
||||
// set labels and toggle visibility
|
||||
toggleGosChBVisibility();
|
||||
|
||||
chooseCorpusLabelContent = Messages.LABEL_CORPUS_LOCATION_NOT_SET;
|
||||
chooseCorpusL.setText(chooseCorpusLabelContent);
|
||||
|
||||
chooseResultsLabelContent = Messages.LABEL_RESULTS_LOCATION_NOT_SET;
|
||||
chooseResultsL.setText(chooseResultsLabelContent);
|
||||
|
||||
togglePiAndSetCorpusWrapper(false);
|
||||
}
|
||||
|
||||
private void togglePiAndSetCorpusWrapper(boolean piIsActive) {
|
||||
locationScanPI.setVisible(piIsActive);
|
||||
setCorpusWrapperP.setLayoutX(piIsActive ? 100.0 : 10.0);
|
||||
}
|
||||
|
||||
private void openHelpWebsite(){
|
||||
hostService.showDocument(Messages.HELP_URL);
|
||||
}
|
||||
|
||||
/**
|
||||
* In order for a directory to pass as a valid corpus location, following criteria has to be met:
|
||||
* <ul>
|
||||
* <li>it can't be null</li>
|
||||
* <li>it has to be readable</li>
|
||||
* <li>it has to contain xml files</li>
|
||||
* <li>xml files have to contain valid headers from which we can infer the corpus type</li>
|
||||
* <li>corpus type must be one of the expected corpus types - as noted in the @see data.CorpusType.class </li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Additionally, if the user checks to read taxonomy/filters from the corpus files, that read
|
||||
* has to produce a non-empty list results list
|
||||
*/
|
||||
private void chooseCorpusLocation() {
|
||||
File selectedDirectory = directoryChooser();
|
||||
|
||||
if (selectedDirectory != null && ValidationUtil.isReadableDirectory(selectedDirectory)) {
|
||||
logger.info("selected corpus dir: ", selectedDirectory.getAbsolutePath());
|
||||
|
||||
// scan for xml files
|
||||
Collection<File> corpusFiles = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("xml", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
|
||||
|
||||
// make sure there are corpus files in selected directory or notify the user about it
|
||||
if (corpusFiles.size() == 0) {
|
||||
logger.info("alert: ", WARNING_CORPUS_NOT_FOUND);
|
||||
showAlert(Alert.AlertType.ERROR, WARNING_CORPUS_NOT_FOUND, null);
|
||||
} else {
|
||||
String chooseCorpusLabelContentTmp = detectCorpusType(corpusFiles, selectedDirectory.getAbsolutePath());
|
||||
|
||||
if (chooseCorpusLabelContentTmp == null) {
|
||||
logger.info("alert: ", WARNING_CORPUS_NOT_FOUND);
|
||||
showAlert(Alert.AlertType.ERROR, WARNING_CORPUS_NOT_FOUND, null);
|
||||
} else {
|
||||
initNewCorpus(selectedDirectory, corpusFiles);
|
||||
|
||||
corpus.setChosenCorpusLocation(selectedDirectory);
|
||||
corpus.setDetectedCorpusFiles(corpusFiles);
|
||||
chooseCorpusLabelContent = chooseCorpusLabelContentTmp;
|
||||
logger.info("corpus dir: ", corpus.getChosenCorpusLocation().getAbsolutePath());
|
||||
|
||||
if (readHeaderInfo) {
|
||||
logger.info("reading header info...");
|
||||
readHeaderInfo();
|
||||
} else {
|
||||
setResults();
|
||||
|
||||
setCorpusForAnalysis();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If a user selects a valid corpus location, we define a new corpus (so none of the old data gets carried over)
|
||||
*
|
||||
* @param selectedDirectory
|
||||
* @param corpusFiles
|
||||
*/
|
||||
private void initNewCorpus(File selectedDirectory, Collection<File> corpusFiles) {
|
||||
corpus = new Corpus();
|
||||
corpus.setCorpusType(corpusType);
|
||||
corpus.setDetectedCorpusFiles(corpusFiles);
|
||||
corpus.setChosenCorpusLocation(selectedDirectory);
|
||||
chooseResultsLocation(selectedDirectory);
|
||||
}
|
||||
|
||||
private void chooseResultsLocation(File dir) {
|
||||
// results location can be set either to default value (after selecting valid corpus location) - dir attribute
|
||||
// or to a dir picked via directoryChooser (when dir == null
|
||||
File selectedDirectory = dir == null ? directoryChooser() : dir;
|
||||
|
||||
if (selectedDirectory != null) {
|
||||
String resultsLocationPath = selectedDirectory.getAbsolutePath().concat(File.separator);
|
||||
File chosenResultsLocationTmp = new File(resultsLocationPath);
|
||||
|
||||
if (!ValidationUtil.isValidDirectory(chosenResultsLocationTmp)) {
|
||||
showAlert(Alert.AlertType.ERROR, WARNING_RESULTS_DIR_NOT_VALID);
|
||||
logger.info("alert: ", WARNING_RESULTS_DIR_NOT_VALID);
|
||||
} else {
|
||||
corpus.setChosenResultsLocation(chosenResultsLocationTmp);
|
||||
chooseResultsLabelContent = corpus.getChosenResultsLocation().getAbsolutePath();
|
||||
chooseResultsL.setText(chooseResultsLabelContent);
|
||||
logger.info("results dir: " + chooseResultsLabelContent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void setResults() {
|
||||
// if everything is ok
|
||||
// check and enable checkbox if GOS
|
||||
toggleGosChBVisibility();
|
||||
|
||||
// set default results location
|
||||
String defaultResultsLocationPath = corpus.getChosenCorpusLocation().getAbsolutePath();
|
||||
logger.info("setting default results location to: ", defaultResultsLocationPath);
|
||||
|
||||
chooseCorpusL.setText(chooseCorpusLabelContent);
|
||||
}
|
||||
|
||||
private void readHeaderInfo() {
|
||||
CorpusType corpusType = corpus.getCorpusType();
|
||||
Collection<File> corpusFiles = corpus.getDetectedCorpusFiles();
|
||||
togglePiAndSetCorpusWrapper(true);
|
||||
chooseCorpusL.setText(LABEL_SCANNING_CORPUS);
|
||||
|
||||
logger.info("reading header data for ", corpusType.toString());
|
||||
|
||||
if (corpusType == CorpusType.GIGAFIDA || corpusType == CorpusType.GOS || corpusType == CorpusType.CCKRES) {
|
||||
boolean corpusIsSplit = corpusFiles.size() > 1;
|
||||
|
||||
final Task<HashSet<String>> task = new Task<HashSet<String>>() {
|
||||
@Override
|
||||
protected HashSet<String> call() throws Exception {
|
||||
HashSet<String> values = new HashSet<>();
|
||||
long i = 0;
|
||||
|
||||
if (!corpusIsSplit) {
|
||||
updateProgress(-1.0f, -1.0f);
|
||||
}
|
||||
|
||||
for (File file : corpusFiles) {
|
||||
values.addAll((Collection<? extends String>) XML_processing.readXmlHeaderTaxonomyAndFilters(file.getAbsolutePath(), corpusIsSplit, corpusType));
|
||||
i++;
|
||||
|
||||
if (corpusIsSplit) {
|
||||
updateProgress(i, corpusFiles.size());
|
||||
}
|
||||
}
|
||||
|
||||
updateProgress(1.0f, 1.0f);
|
||||
return values;
|
||||
}
|
||||
};
|
||||
|
||||
locationScanPI.progressProperty().bind(task.progressProperty());
|
||||
|
||||
task.setOnSucceeded(e -> {
|
||||
ObservableList<String> readTaxonomy = Tax.getTaxonomyForComboBox(corpusType, task.getValue());
|
||||
|
||||
if (ValidationUtil.isEmpty(readTaxonomy)) {
|
||||
// if no taxonomy found alert the user and keep other tabs disabled
|
||||
logger.info("No taxonomy found in headers.");
|
||||
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_NO_TAXONOMY_FOUND);
|
||||
} else {
|
||||
// set taxonomy, update label
|
||||
corpus.setTaxonomy(readTaxonomy);
|
||||
corpus.setHeaderRead(true);
|
||||
chooseCorpusL.setText(chooseCorpusLabelContent);
|
||||
setResults();
|
||||
setCorpusForAnalysis();
|
||||
}
|
||||
|
||||
togglePiAndSetCorpusWrapper(false);
|
||||
|
||||
});
|
||||
|
||||
task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
|
||||
task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
|
||||
|
||||
final Thread thread = new Thread(task, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
} else if (corpusType == CorpusType.SOLAR) {
|
||||
// many many fields
|
||||
boolean corpusIsSplit = corpusFiles.size() > 1;
|
||||
|
||||
final Task<HashMap<String, HashSet<String>>> task = new Task<HashMap<String, HashSet<String>>>() {
|
||||
@Override
|
||||
protected HashMap<String, HashSet<String>> call() throws Exception {
|
||||
HashMap<String, HashSet<String>> values = new HashMap<>();
|
||||
long i = 0;
|
||||
|
||||
if (!corpusIsSplit) {
|
||||
updateProgress(-1.0f, -1.0f);
|
||||
}
|
||||
|
||||
for (File file : corpusFiles) {
|
||||
HashMap<String, HashSet<String>> tmpvalues = (HashMap<String, HashSet<String>>) XML_processing.readXmlHeaderTaxonomyAndFilters(file.getAbsolutePath(), corpusIsSplit, corpusType);
|
||||
|
||||
// update final results
|
||||
for (Map.Entry<String, HashSet<String>> entry : tmpvalues.entrySet()) {
|
||||
if (values.containsKey(entry.getKey())) {
|
||||
values.get(entry.getKey()).addAll(entry.getValue());
|
||||
} else {
|
||||
values.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
i++;
|
||||
|
||||
if (corpusIsSplit) {
|
||||
updateProgress(i, corpusFiles.size());
|
||||
}
|
||||
}
|
||||
|
||||
updateProgress(1.0f, 1.0f);
|
||||
return values;
|
||||
}
|
||||
};
|
||||
|
||||
locationScanPI.progressProperty().bind(task.progressProperty());
|
||||
|
||||
task.setOnSucceeded(e -> {
|
||||
HashMap<String, HashSet<String>> values = task.getValue();
|
||||
|
||||
if (ValidationUtil.isEmpty(values)) {
|
||||
// if no taxonomy found alert the user and keep other tabs disabled
|
||||
logger.info("No solar filters found in headers.");
|
||||
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_NO_SOLAR_FILTERS_FOUND);
|
||||
} else {
|
||||
HashMap<String, ObservableList<String>> filtersForComboBoxes = SolarFilters.getFiltersForComboBoxes(values);
|
||||
// set taxonomy, update label
|
||||
corpus.setSolarFiltersForXML(values);
|
||||
corpus.setSolarFilters(filtersForComboBoxes);
|
||||
corpus.setHeaderRead(true);
|
||||
chooseCorpusL.setText(chooseCorpusLabelContent);
|
||||
setResults();
|
||||
setCorpusForAnalysis();
|
||||
}
|
||||
|
||||
togglePiAndSetCorpusWrapper(false);
|
||||
|
||||
});
|
||||
|
||||
task.setOnCancelled(e -> togglePiAndSetCorpusWrapper(false));
|
||||
task.setOnFailed(e -> togglePiAndSetCorpusWrapper(false));
|
||||
|
||||
final Thread thread = new Thread(task, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void setCorpusForAnalysis() {
|
||||
if (corpus.validate()) {
|
||||
// new statistic, enable tabs...
|
||||
stringLevelTabNew2.setDisable(false);
|
||||
satNew2Controller.setCorpus(corpus);
|
||||
satNew2Controller.init();
|
||||
oneWordAnalysisTab.setDisable(false);
|
||||
oneWordTabController.setCorpus(corpus);
|
||||
oneWordTabController.init();
|
||||
characterLevelTab.setDisable(false);
|
||||
catController.setCorpus(corpus);
|
||||
catController.init();
|
||||
wordFormationTab.setDisable(false);
|
||||
wordLevelTab.setDisable(false);
|
||||
//wfController.setCorpus(corpus);
|
||||
//wfController.init();
|
||||
wlController.setCorpus(corpus);
|
||||
wlController.init();
|
||||
|
||||
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
||||
filterTab.setDisable(false);
|
||||
tabPane.getTabs().add(1, filterTab);
|
||||
ffsController.setCorpus(corpus);
|
||||
ffsController.initFilters();
|
||||
} else {
|
||||
filterTab.setDisable(true);
|
||||
tabPane.getTabs().removeAll(filterTab);
|
||||
}
|
||||
} else {
|
||||
GUIController.showAlert(Alert.AlertType.ERROR, corpus.getValidationErrorsToString());
|
||||
}
|
||||
}
|
||||
|
||||
private File directoryChooser() {
|
||||
DirectoryChooser directoryChooser = new DirectoryChooser();
|
||||
|
||||
// open in the folder where the jar is located if possible
|
||||
File workingDir = getWorkingDirectory();
|
||||
|
||||
if (workingDir != null) {
|
||||
directoryChooser.setInitialDirectory(workingDir);
|
||||
}
|
||||
|
||||
return directoryChooser.showDialog(stage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hides GOS related checkbox until needed.
|
||||
*/
|
||||
private void toggleGosChBVisibility() {
|
||||
gosUseOrthChB.setVisible(corpus != null && corpus.getCorpusType() != null && corpus.getCorpusType() == CorpusType.GOS);
|
||||
}
|
||||
|
||||
private String detectCorpusType(Collection<File> corpusFiles, String corpusLocation) {
|
||||
// check that we recognize this corpus
|
||||
// read first file only, maybe later do all, if toll on resources is acceptable
|
||||
File f = corpusFiles.iterator().next();
|
||||
String title = XML_processing.readXMLHeaderTag(f.getAbsolutePath(), "title").toLowerCase();
|
||||
String test = CCKRES.getNameLowerCase();
|
||||
String debug = "";
|
||||
|
||||
// check if XML file's title contains any of recognized corpus titles
|
||||
corpusType = null;
|
||||
if (title.contains(SOLAR.getNameLowerCase())) {
|
||||
corpusType = SOLAR;
|
||||
} else if (title.contains(GIGAFIDA.getNameLowerCase())) {
|
||||
corpusType = GIGAFIDA;
|
||||
} else if (title.contains(CCKRES.getNameLowerCase())) {
|
||||
corpusType = CCKRES;
|
||||
} else if (title.contains(GOS.getNameLowerCase())) {
|
||||
corpusType = GOS;
|
||||
}
|
||||
|
||||
if (corpusType == null) {
|
||||
return null;
|
||||
} else {
|
||||
corpus.setCorpusType(corpusType);
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(corpusLocation)
|
||||
.append("\n")
|
||||
.append(String.format(NOTIFICATION_FOUND_X_FILES, corpusFiles.size()))
|
||||
.append("\n")
|
||||
.append(String.format("Korpus: %s", corpusType.toString()));
|
||||
|
||||
String result = sb.toString();
|
||||
|
||||
logger.debug(result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
public Corpus getCorpus() {
|
||||
return corpus;
|
||||
}
|
||||
|
||||
public void setCorpus(Corpus corpus) {
|
||||
this.corpus = corpus;
|
||||
}
|
||||
|
||||
public void setStringLevelTabNew2(Tab stringLevelTabNew2) { this.stringLevelTabNew2 = stringLevelTabNew2; }
|
||||
|
||||
public void setOneWordAnalysisTab(Tab oneWordAnalysisTab) { this.oneWordAnalysisTab = oneWordAnalysisTab; }
|
||||
|
||||
public void setCharacterLevelTab(Tab characterLevelTab) { this.characterLevelTab = characterLevelTab; }
|
||||
|
||||
public void setWordLevelTab(Tab wordLevelTab) {
|
||||
this.wordLevelTab = wordLevelTab;
|
||||
}
|
||||
|
||||
public void setFilterTab(Tab filterTab) {
|
||||
this.filterTab = filterTab;
|
||||
}
|
||||
|
||||
public void setFfsController(FiltersForSolar ffsController) {
|
||||
this.ffsController = ffsController;
|
||||
}
|
||||
|
||||
public void setTabPane(TabPane tabPane) {
|
||||
this.tabPane = tabPane;
|
||||
}
|
||||
|
||||
public void setSatNew2Controller(StringAnalysisTabNew2 satNew2Controller) { this.satNew2Controller = satNew2Controller; }
|
||||
|
||||
public void setOneWordTabController(OneWordAnalysisTab oneWordTabController) { this.oneWordTabController = oneWordTabController; }
|
||||
|
||||
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
|
||||
|
||||
/*public void setWfController(WordFormationTab wfController) {
|
||||
this.wfController = wfController;
|
||||
}*/
|
||||
|
||||
public void setWlController(WordLevelTab wlController) {
|
||||
this.wlController = wlController;
|
||||
}
|
||||
|
||||
public void setWordFormationTab(Tab wordFormationTab) {
|
||||
this.wordFormationTab = wordFormationTab;
|
||||
}
|
||||
|
||||
public void setHostServices(HostServices hostServices){
|
||||
this.hostService = hostServices;
|
||||
}
|
||||
}
|
||||
187
src/main/java/gui/FiltersForSolar.java
Normal file
187
src/main/java/gui/FiltersForSolar.java
Normal file
@@ -0,0 +1,187 @@
|
||||
package gui;
|
||||
|
||||
import static data.Enums.solar.SolarFilters.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
||||
import javafx.application.HostServices;
|
||||
import javafx.scene.control.Hyperlink;
|
||||
import org.controlsfx.control.CheckComboBox;
|
||||
|
||||
import data.Corpus;
|
||||
import javafx.collections.ListChangeListener;
|
||||
import javafx.collections.ObservableList;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.scene.control.Label;
|
||||
import javafx.scene.layout.AnchorPane;
|
||||
import util.Util;
|
||||
|
||||
public class FiltersForSolar {
|
||||
|
||||
@FXML
|
||||
public AnchorPane solarFiltersTabPane;
|
||||
@FXML
|
||||
public CheckComboBox<String> solarRegijaCCB;
|
||||
@FXML
|
||||
public CheckComboBox<String> solarPredmetCCB;
|
||||
@FXML
|
||||
public CheckComboBox<String> solarRazredCCB;
|
||||
@FXML
|
||||
public CheckComboBox<String> solarLetoCCB;
|
||||
@FXML
|
||||
public CheckComboBox<String> solarSolaCCB;
|
||||
@FXML
|
||||
public CheckComboBox<String> solarVrstaBesedilaCCB;
|
||||
@FXML
|
||||
public Label selectedFiltersLabel;
|
||||
@FXML
|
||||
private Hyperlink helpH;
|
||||
|
||||
private HashMap<String, ObservableList<String>> selectedFilters;
|
||||
private Corpus corpus;
|
||||
|
||||
private StringAnalysisTabNew2 satNew2Controller;
|
||||
private OneWordAnalysisTab oneWordTabController;
|
||||
private CharacterAnalysisTab catController;
|
||||
//private WordFormationTab wfController;
|
||||
private WordLevelTab wlController;
|
||||
private HostServices hostService;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void initialize() {
|
||||
selectedFilters = new HashMap<>();
|
||||
|
||||
solarRegijaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
|
||||
selectedFilters.put(REGIJA, solarRegijaCCB.getCheckModel().getCheckedItems());
|
||||
updateSolarFilterLabel();
|
||||
});
|
||||
|
||||
solarPredmetCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
|
||||
selectedFilters.put(PREDMET, solarPredmetCCB.getCheckModel().getCheckedItems());
|
||||
updateSolarFilterLabel();
|
||||
});
|
||||
|
||||
solarRazredCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
|
||||
selectedFilters.put(RAZRED, solarRazredCCB.getCheckModel().getCheckedItems());
|
||||
updateSolarFilterLabel();
|
||||
});
|
||||
|
||||
solarLetoCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
|
||||
selectedFilters.put(LETO, solarLetoCCB.getCheckModel().getCheckedItems());
|
||||
updateSolarFilterLabel();
|
||||
});
|
||||
|
||||
solarSolaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
|
||||
selectedFilters.put(SOLA, solarSolaCCB.getCheckModel().getCheckedItems());
|
||||
updateSolarFilterLabel();
|
||||
});
|
||||
|
||||
solarVrstaBesedilaCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener) c -> {
|
||||
selectedFilters.put(TIP, solarVrstaBesedilaCCB.getCheckModel().getCheckedItems());
|
||||
updateSolarFilterLabel();
|
||||
});
|
||||
|
||||
helpH.setOnAction(e -> openHelpWebsite());
|
||||
}
|
||||
|
||||
public void initFilters() {
|
||||
solarRegijaCCB.getItems().removeAll();
|
||||
solarRegijaCCB.getItems().setAll(corpus.getSolarFilters().get(REGIJA));
|
||||
solarRegijaCCB.getItems().sorted();
|
||||
solarPredmetCCB.getItems().removeAll();
|
||||
solarPredmetCCB.getItems().setAll(corpus.getSolarFilters().get(PREDMET));
|
||||
solarPredmetCCB.getItems().sorted();
|
||||
solarRazredCCB.getItems().removeAll();
|
||||
solarRazredCCB.getItems().setAll(corpus.getSolarFilters().get(RAZRED));
|
||||
solarRazredCCB.getItems().sorted();
|
||||
solarLetoCCB.getItems().removeAll();
|
||||
solarLetoCCB.getItems().setAll(corpus.getSolarFilters().get(LETO));
|
||||
solarLetoCCB.getItems().sorted();
|
||||
solarSolaCCB.getItems().removeAll();
|
||||
solarSolaCCB.getItems().setAll(corpus.getSolarFilters().get(SOLA));
|
||||
solarSolaCCB.getItems().sorted();
|
||||
solarVrstaBesedilaCCB.getItems().removeAll();
|
||||
solarVrstaBesedilaCCB.getItems().setAll(corpus.getSolarFilters().get(TIP));
|
||||
solarVrstaBesedilaCCB.getItems().sorted();
|
||||
}
|
||||
|
||||
private void updateSolarFilterLabel() {
|
||||
if (Util.isMapEmpty(selectedFilters)) {
|
||||
setSOlarFIlterLabelText("/");
|
||||
} else {
|
||||
StringBuilder allFilters = new StringBuilder();
|
||||
for (Map.Entry<String, ObservableList<String>> entry : selectedFilters.entrySet()) {
|
||||
ArrayList<String> values = new ArrayList<>(entry.getValue());
|
||||
|
||||
if (!values.isEmpty()) {
|
||||
allFilters.append(entry.getKey())
|
||||
.append(": ");
|
||||
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
allFilters.append(values.get(i));
|
||||
|
||||
if (i < values.size() - 1) {
|
||||
// so we won't append a comma after the last element
|
||||
allFilters.append(", ");
|
||||
}
|
||||
}
|
||||
allFilters.append("\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
setSOlarFIlterLabelText(allFilters.toString());
|
||||
}
|
||||
|
||||
HashMap<String, HashSet<String>> solarFiltersMap = new HashMap<>();
|
||||
for (Map.Entry<String, ObservableList<String>> e : selectedFilters.entrySet()) {
|
||||
HashSet<String> values = new HashSet<>();
|
||||
values.addAll(e.getValue());
|
||||
|
||||
solarFiltersMap.put(e.getKey(), values);
|
||||
}
|
||||
|
||||
satNew2Controller.setSolarFiltersMap(solarFiltersMap);
|
||||
oneWordTabController.setSolarFiltersMap(solarFiltersMap);
|
||||
catController.setSolarFiltersMap(solarFiltersMap);
|
||||
//wfController.setSolarFiltersMap(solarFiltersMap);
|
||||
wlController.setSolarFiltersMap(solarFiltersMap);
|
||||
}
|
||||
|
||||
private void openHelpWebsite(){
|
||||
hostService.showDocument(Messages.HELP_URL);
|
||||
}
|
||||
|
||||
private void setSOlarFIlterLabelText(String content) {
|
||||
selectedFiltersLabel.setText(content);
|
||||
satNew2Controller.setSelectedFiltersLabel(content);
|
||||
oneWordTabController.setSelectedFiltersLabel(content);
|
||||
catController.setSelectedFiltersLabel(content);
|
||||
//wfController.setSelectedFiltersLabel(content);
|
||||
wlController.setSelectedFiltersLabel(content);
|
||||
}
|
||||
|
||||
public void setCorpus(Corpus corpus) {
|
||||
this.corpus = corpus;
|
||||
}
|
||||
|
||||
public void setSatNew2Controller(StringAnalysisTabNew2 satNew2Controller) { this.satNew2Controller = satNew2Controller; }
|
||||
|
||||
public void setOneWordTabController(OneWordAnalysisTab oneWordTabController) { this.oneWordTabController = oneWordTabController; }
|
||||
|
||||
public void setCatController(CharacterAnalysisTab catController) { this.catController = catController; }
|
||||
|
||||
/*public void setWfController(WordFormationTab wfController) {
|
||||
this.wfController = wfController;
|
||||
}*/
|
||||
|
||||
public void setWlController(WordLevelTab wlController) {
|
||||
this.wlController = wlController;
|
||||
}
|
||||
|
||||
public void setHostServices(HostServices hostServices){
|
||||
this.hostService = hostServices;
|
||||
}
|
||||
}
|
||||
150
src/main/java/gui/GUIController.java
Normal file
150
src/main/java/gui/GUIController.java
Normal file
@@ -0,0 +1,150 @@
|
||||
package gui;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.kordamp.ikonli.fontawesome.FontAwesome;
|
||||
import org.kordamp.ikonli.javafx.FontIcon;
|
||||
|
||||
import data.Corpus;
|
||||
import javafx.application.Application;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.fxml.FXMLLoader;
|
||||
import javafx.scene.Parent;
|
||||
import javafx.scene.Scene;
|
||||
import javafx.scene.control.Alert;
|
||||
import javafx.scene.control.Tab;
|
||||
import javafx.scene.control.TabPane;
|
||||
import javafx.stage.Stage;
|
||||
|
||||
public class GUIController extends Application {
|
||||
public final static Logger logger = LogManager.getLogger(GUIController.class);
|
||||
|
||||
@FXML
|
||||
public Tab StringLevelTabNew2;
|
||||
@FXML
|
||||
public Tab OneWordAnalysisTab;
|
||||
@FXML
|
||||
public Tab CharacterLevelTabNew;
|
||||
@FXML
|
||||
public Tab corpusTab;
|
||||
public TabPane tabPane;
|
||||
@FXML
|
||||
private CharacterAnalysisTab catController;
|
||||
@FXML
|
||||
private static Parent sat;
|
||||
@FXML
|
||||
private StringAnalysisTabNew2 satNew2Controller;
|
||||
@FXML
|
||||
private static Parent satNew2;
|
||||
@FXML
|
||||
private OneWordAnalysisTab oneWordTabController;
|
||||
@FXML
|
||||
private static Parent oneWordTab;
|
||||
@FXML
|
||||
private CorpusTab ctController;
|
||||
@FXML
|
||||
private Parent ct;
|
||||
//@FXML
|
||||
//private WordFormationTab wfController;
|
||||
@FXML
|
||||
private Parent wf;
|
||||
@FXML
|
||||
private WordLevelTab wlController;
|
||||
@FXML
|
||||
private Parent wl;
|
||||
@FXML
|
||||
private FiltersForSolar ffsController;
|
||||
@FXML
|
||||
private Parent ffs;
|
||||
@FXML
|
||||
private SelectedFiltersPane sfpController;
|
||||
@FXML
|
||||
private Parent sfp;
|
||||
@FXML
|
||||
public Tab stringLevelTab;
|
||||
@FXML
|
||||
public Tab wordLevelTab;
|
||||
/*@FXML
|
||||
public Tab wordFormationTab;*/
|
||||
|
||||
|
||||
@FXML
|
||||
public Tab filterTab;
|
||||
public Stage stage;
|
||||
|
||||
private Corpus corpus;
|
||||
|
||||
|
||||
@Override
|
||||
public void start(Stage primaryStage) throws IOException {
|
||||
Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml"));
|
||||
primaryStage.setTitle("GUI");
|
||||
Scene scene = new Scene(root, 800, 600);
|
||||
// https://github.com/dicolar/jbootx
|
||||
// scene.getStylesheets().add(GUIController.class.getResource("bootstrap3.css").toExternalForm())
|
||||
primaryStage.setScene(scene);
|
||||
stage = primaryStage;
|
||||
primaryStage.show();
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
launch(args);
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
corpus = new Corpus();
|
||||
ctController.setCorpus(corpus);
|
||||
ctController.setFilterTab(filterTab);
|
||||
ctController.setStringLevelTabNew2(StringLevelTabNew2);
|
||||
ctController.setOneWordAnalysisTab(OneWordAnalysisTab);
|
||||
ctController.setCharacterLevelTab(CharacterLevelTabNew);
|
||||
ctController.setSatNew2Controller(satNew2Controller);
|
||||
ctController.setOneWordTabController(oneWordTabController);
|
||||
ctController.setCatController(catController);
|
||||
//ctController.setWfController(wfController);
|
||||
ctController.setWlController(wlController);
|
||||
ctController.setTabPane(tabPane);
|
||||
ctController.setFfsController(ffsController);
|
||||
//ctController.setWordFormationTab(wordFormationTab);
|
||||
ctController.setWordLevelTab(wordLevelTab);
|
||||
ctController.setHostServices(getHostServices());
|
||||
|
||||
satNew2Controller.setCorpus(corpus);
|
||||
satNew2Controller.setHostServices(getHostServices());
|
||||
oneWordTabController.setCorpus(corpus);
|
||||
oneWordTabController.setHostServices(getHostServices());
|
||||
catController.setCorpus(corpus);
|
||||
catController.setHostServices(getHostServices());
|
||||
//wfController.setCorpus(corpus);
|
||||
//wfController.setHostServices(getHostServices());
|
||||
wlController.setCorpus(corpus);
|
||||
wlController.setHostServices(getHostServices());
|
||||
ffsController.setSatNew2Controller(satNew2Controller);
|
||||
ffsController.setOneWordTabController(oneWordTabController);
|
||||
ffsController.setCatController(catController);
|
||||
//ffsController.setWfController(wfController);
|
||||
ffsController.setWlController(wlController);
|
||||
ffsController.setHostServices(getHostServices());
|
||||
|
||||
// set tab icons
|
||||
corpusTab.setGraphic(new FontIcon(FontAwesome.COG));
|
||||
filterTab.setGraphic(new FontIcon(FontAwesome.FILTER));
|
||||
|
||||
// hide filter tab
|
||||
tabPane.getTabs().removeAll(filterTab);
|
||||
}
|
||||
|
||||
static void showAlert(Alert.AlertType alertType, String headerText, String contentText) {
|
||||
Alert alert = new Alert(alertType);
|
||||
alert.setTitle(Messages.windowTitles.get(alertType));
|
||||
alert.setHeaderText(headerText != null ? headerText : "");
|
||||
alert.setContentText(contentText != null ? contentText : "");
|
||||
alert.showAndWait();
|
||||
}
|
||||
|
||||
static void showAlert(Alert.AlertType alertType, String headerText) {
|
||||
showAlert(alertType, headerText, null);
|
||||
}
|
||||
}
|
||||
74
src/main/java/gui/Messages.java
Normal file
74
src/main/java/gui/Messages.java
Normal file
@@ -0,0 +1,74 @@
|
||||
package gui;
|
||||
|
||||
import static javafx.scene.control.Alert.AlertType.*;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
import javafx.scene.control.Alert;
|
||||
|
||||
public class Messages {
|
||||
|
||||
// warnings & errors
|
||||
public static final String WARNING_CORPUS_NOT_FOUND = "V izbranem direktoriju ni ustreznih korpusnih datotek.";
|
||||
public static final String WARNING_RESULTS_DIR_NOT_VALID = "Za dostop do izbranega direktorija nimate potrebnih pravic.";
|
||||
public static final String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS = "Izbran nivo ngramov in vpisano št. besed v filtru se ne ujemata.";
|
||||
public static final String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO = "Izberite drugo število ali popravite filter.";
|
||||
public static final String WARNING_WORD_OR_LEMMA = "Izberite, če želite statistiko izračunati za besede ali leme.";
|
||||
public static final String WARNING_ONLY_NUMBERS_ALLOWED = "Prosim vnesite veljavno število.";
|
||||
public static final String WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES = "Število za ngram (%d) in število msd oznak (%d) se morata ujemati.";
|
||||
public static final String WARNING_MISSING_STRING_LENGTH = "Dolžina niza mora biti večja od 0. Vstavljena je privzeta vrednost (1).";
|
||||
public static final String WARNING_NO_TAXONOMY_FOUND = "Iz korpusnih datotek ni bilo moč razbrati taksonomije. Prosim izberite drugo lokacijo ali korpus.";
|
||||
public static final String WARNING_NO_SOLAR_FILTERS_FOUND = "Iz korpusnih datotek ni bilo moč razbrati filtrov. Prosim izberite drugo lokacijo ali korpus.";
|
||||
public static final String ERROR_WHILE_EXECUTING = "Prišlo je do napake med izvajanjem.";
|
||||
public static final String ERROR_WHILE_SAVING_RESULTS_TO_CSV = "Prišlo je do napake med shranjevanje rezultatov.";
|
||||
|
||||
// missing
|
||||
public static final String MISSING_NGRAM_LEVEL = "N-gram nivo";
|
||||
public static final String MISSING_CALCULATE_FOR = "Izračunaj za";
|
||||
public static final String MISSING_SKIP = "";
|
||||
public static final String MISSING_STRING_LENGTH = "Dolžina niza";
|
||||
public static final String MISMATCHED_STRING_LENGTH_AND_MSD_REGEX = "Neujemajoča dolžina niza in regex filter";
|
||||
|
||||
|
||||
// general notifications - static content/set only once
|
||||
public static final String NOTIFICATION_FOUND_X_FILES = "Št. najdenih datotek: %d";
|
||||
public static final String NOTIFICATION_ANALYSIS_COMPLETED = "Analiza je zaključena, rezultati so shranjeni.";
|
||||
public static final String NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS = "Analiza je zaključena, vendar ni bilo moč izračunati statistike, ki bi ustrezala vsem navedenim pogojem.";
|
||||
public static final String RESULTS_PATH_SET_TO_DEFAULT = "Lokacija za shranjevanje rezultatov je nastavljena na lokacijo korpusa.";
|
||||
|
||||
// ongoing notifications - displayed while processing, dynamically changing
|
||||
public static final String ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y = "Analiziram datoteko %d od %d (%s)";
|
||||
|
||||
// Labels
|
||||
public static final String LABEL_CORPUS_LOCATION_NOT_SET = "Lokacija korpusa ni nastavljena";
|
||||
public static final String LABEL_RESULTS_LOCATION_NOT_SET = "Lokacija za shranjevanje rezultatov ni nastavljena";
|
||||
public static final String LABEL_RESULTS_CORPUS_TYPE_NOT_SET = "Vrsta korpusa ni nastavljena";
|
||||
|
||||
public static final String LABEL_SCANNING_CORPUS = "Iskanje in analiza korpusnih datotek...";
|
||||
public static final String LABEL_SCANNING_SINGLE_FILE_CORPUS = "Analiza vnosa ";
|
||||
public static final String COMPLETED = "končano";
|
||||
|
||||
public static final String TOOLTIP_chooseCorpusLocationB = "Izberite mapo v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.";
|
||||
public static final String TOOLTIP_readHeaderInfoChB = "Če izberete to opcijo, se bo iz headerjev korpusa prebrala razpoložljiva taksonomija oz. filtri (korpus Šolar). Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.";
|
||||
|
||||
|
||||
|
||||
// Not properly to be here. TODO move somewhere else in future
|
||||
public static final String HELP_URL = "http://slovnica.ijs.si/";
|
||||
|
||||
// helper maps
|
||||
/**
|
||||
* Typical window titles
|
||||
* ERROR = "Napaka"
|
||||
* WARNING = "Opozorilo"
|
||||
* CONFIRMATION = "Potrdilo"
|
||||
*/
|
||||
static HashMap<Alert.AlertType, String> windowTitles = new HashMap<>();
|
||||
|
||||
static {
|
||||
// automatically set window's title
|
||||
windowTitles.put(ERROR, "Napaka");
|
||||
windowTitles.put(WARNING, "Opozorilo");
|
||||
windowTitles.put(CONFIRMATION, "Potrdilo");
|
||||
}
|
||||
}
|
||||
389
src/main/java/gui/OneWordAnalysisTab.java
Executable file
389
src/main/java/gui/OneWordAnalysisTab.java
Executable file
@@ -0,0 +1,389 @@
|
||||
package gui;
|
||||
|
||||
import data.*;
|
||||
import javafx.application.HostServices;
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ListChangeListener;
|
||||
import javafx.collections.ObservableList;
|
||||
import javafx.concurrent.Task;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.scene.control.*;
|
||||
import javafx.scene.layout.Pane;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.controlsfx.control.CheckComboBox;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static alg.XML_processing.readXML;
|
||||
import static gui.GUIController.showAlert;
|
||||
import static gui.Messages.*;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
public class OneWordAnalysisTab {
|
||||
public final static Logger logger = LogManager.getLogger(OneWordAnalysisTab.class);
|
||||
|
||||
@FXML
|
||||
public Label selectedFiltersLabel;
|
||||
@FXML
|
||||
public Label solarFilters;
|
||||
|
||||
@FXML
|
||||
private TextField msdTF;
|
||||
private ArrayList<Pattern> msd;
|
||||
private ArrayList<String> msdStrings;
|
||||
|
||||
@FXML
|
||||
private CheckComboBox<String> taxonomyCCB;
|
||||
private ArrayList<String> taxonomy;
|
||||
|
||||
@FXML
|
||||
private ComboBox<String> calculateForCB;
|
||||
private CalculateFor calculateFor;
|
||||
|
||||
|
||||
@FXML
|
||||
private Button computeNgramsB;
|
||||
|
||||
@FXML
|
||||
public ProgressBar ngramProgressBar;
|
||||
@FXML
|
||||
public Label progressLabel;
|
||||
|
||||
@FXML
|
||||
private Hyperlink helpH;
|
||||
|
||||
private enum MODE {
|
||||
LETTER,
|
||||
WORD
|
||||
}
|
||||
|
||||
private MODE currentMode;
|
||||
|
||||
private Corpus corpus;
|
||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||
private Filter filter;
|
||||
private boolean useDb;
|
||||
private HostServices hostService;
|
||||
|
||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
|
||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||
|
||||
|
||||
// TODO: pass observables for taxonomy based on header scan
|
||||
// after header scan
|
||||
private ObservableList<String> taxonomyCCBValues;
|
||||
private CorpusType currentCorpusType;
|
||||
|
||||
public void init() {
|
||||
currentMode = MODE.WORD;
|
||||
toggleMode(currentMode);
|
||||
|
||||
// calculateForCB
|
||||
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||
calculateFor = CalculateFor.factory(newValue);
|
||||
logger.info("calculateForCB:", calculateFor.toString());
|
||||
});
|
||||
|
||||
calculateForCB.getSelectionModel().select(0);
|
||||
|
||||
// msd
|
||||
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if (!newValue) {
|
||||
// focus lost
|
||||
String value = msdTF.getText();
|
||||
logger.info("msdTf: ", value);
|
||||
|
||||
if (!ValidationUtil.isEmpty(value)) {
|
||||
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
|
||||
|
||||
int nOfRequiredMsdTokens = 1;
|
||||
if (msdTmp.size() != nOfRequiredMsdTokens) {
|
||||
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
|
||||
logAlert(msg);
|
||||
showAlert(Alert.AlertType.ERROR, msg);
|
||||
}
|
||||
msd = new ArrayList<>();
|
||||
msdStrings = new ArrayList<>();
|
||||
for (String msdToken : msdTmp) {
|
||||
msd.add(Pattern.compile(msdToken));
|
||||
msdStrings.add(msdToken);
|
||||
}
|
||||
logger.info(String.format("msd accepted (%d)", msd.size()));
|
||||
|
||||
} else if (!ValidationUtil.isEmpty(newValue)) {
|
||||
msd = new ArrayList<>();
|
||||
msdStrings = new ArrayList<>();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
msdTF.setText("");
|
||||
msd = new ArrayList<>();
|
||||
|
||||
// taxonomy
|
||||
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||
taxonomyCCB.getItems().removeAll();
|
||||
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
|
||||
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
taxonomy = new ArrayList<>();
|
||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
taxonomy.addAll(checkedItems);
|
||||
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
||||
});
|
||||
taxonomyCCB.getCheckModel().clearChecks();
|
||||
} else {
|
||||
taxonomyCCB.setDisable(true);
|
||||
}
|
||||
|
||||
computeNgramsB.setOnAction(e -> {
|
||||
compute();
|
||||
logger.info("compute button");
|
||||
});
|
||||
helpH.setOnAction(e -> openHelpWebsite());
|
||||
}
|
||||
|
||||
/**
|
||||
* case a: values for combo boxes can change after a corpus change
|
||||
* <ul>
|
||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
||||
* <li>same corpus type, different subset - keep</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* case b: values for combo boxes can change after a header scan
|
||||
* <ul>
|
||||
* <li>at first, fields are populated by corpus type defaults</li>
|
||||
* <li>after, with gathered data</li>
|
||||
* </ul>
|
||||
* <p></p>
|
||||
* ngrams: 1
|
||||
* calculateFor: word
|
||||
* msd:
|
||||
* taxonomy:
|
||||
* skip: 0
|
||||
* iscvv: false
|
||||
* string length: 1
|
||||
*/
|
||||
public void populateFields() {
|
||||
// corpus changed if: current one is null (this is first run of the app)
|
||||
// or if currentCorpus != gui's corpus
|
||||
boolean corpusChanged = currentCorpusType == null
|
||||
|| currentCorpusType != corpus.getCorpusType();
|
||||
|
||||
|
||||
// TODO: check for GOS, GIGAFIDA, SOLAR...
|
||||
// refresh and:
|
||||
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
||||
if (calculateFor == null) {
|
||||
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
||||
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
||||
}
|
||||
|
||||
if (!filter.hasMsd()) {
|
||||
// if current corpus doesn't have msd data, disable this field
|
||||
msd = new ArrayList<>();
|
||||
msdTF.setText("");
|
||||
msdTF.setDisable(true);
|
||||
logger.info("no msd data");
|
||||
} else {
|
||||
if (ValidationUtil.isEmpty(msd)
|
||||
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
||||
// msd has not been set previously
|
||||
// or msd has been set but the corpus changed -> reset
|
||||
msd = new ArrayList<>();
|
||||
msdTF.setText("");
|
||||
msdTF.setDisable(false);
|
||||
logger.info("msd reset");
|
||||
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
||||
// if msd has been set, but corpus type remained the same, we can keep any set msd value
|
||||
msdTF.setText(StringUtils.join(msdStrings, " "));
|
||||
msdTF.setDisable(false);
|
||||
logger.info("msd kept");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: trigger on rescan
|
||||
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
||||
// user changed corpus (by type) or by selection & triggered a rescan of headers
|
||||
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
ObservableList<String> tax = corpus.getTaxonomy();
|
||||
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
|
||||
currentCorpusType = corpus.getCorpusType();
|
||||
// setTaxonomyIsDirty(false);
|
||||
} else {
|
||||
|
||||
}
|
||||
|
||||
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
ObservableList<String> tax = corpus.getTaxonomy();
|
||||
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
|
||||
* sets combobox values to what is applicable ...
|
||||
*
|
||||
* @param mode
|
||||
*/
|
||||
public void toggleMode(MODE mode) {
|
||||
if (mode == null) {
|
||||
mode = currentMode;
|
||||
}
|
||||
|
||||
logger.info("mode: ", mode.toString());
|
||||
|
||||
if (mode == MODE.WORD) {
|
||||
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
|
||||
} else if (mode == MODE.LETTER) {
|
||||
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
|
||||
|
||||
|
||||
// if calculateFor was selected for something other than a word or a lemma -> reset
|
||||
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
|
||||
// if the user selected something else before selecting ngram for letters, reset that choice
|
||||
calculateFor = CalculateFor.WORD;
|
||||
calculateForCB.getSelectionModel().select("različnica");
|
||||
}
|
||||
}
|
||||
|
||||
// override if orth mode, allow only word
|
||||
if (corpus.isGosOrthMode()) {
|
||||
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
|
||||
msdTF.setDisable(true);
|
||||
} else {
|
||||
msdTF.setDisable(false);
|
||||
}
|
||||
}
|
||||
|
||||
private void compute() {
|
||||
Filter filter = new Filter();
|
||||
filter.setNgramValue(1);
|
||||
filter.setCalculateFor(calculateFor);
|
||||
filter.setMsd(msd);
|
||||
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
filter.setSkipValue(0);
|
||||
filter.setIsCvv(false);
|
||||
filter.setSolarFilters(solarFiltersMap);
|
||||
filter.setStringLength(1);
|
||||
|
||||
String message = Validation.validateForStringLevel(filter);
|
||||
if (message == null) {
|
||||
// no errors
|
||||
logger.info("Executing: ", filter.toString());
|
||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||
execute(statistic);
|
||||
} else {
|
||||
logAlert(message);
|
||||
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
||||
}
|
||||
}
|
||||
|
||||
private void logAlert(String alert) {
|
||||
logger.info("alert: " + alert);
|
||||
}
|
||||
|
||||
private void openHelpWebsite(){
|
||||
hostService.showDocument(Messages.HELP_URL);
|
||||
}
|
||||
|
||||
public Corpus getCorpus() {
|
||||
return corpus;
|
||||
}
|
||||
|
||||
public void setCorpus(Corpus corpus) {
|
||||
this.corpus = corpus;
|
||||
|
||||
if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
||||
setSelectedFiltersLabel(null);
|
||||
} else {
|
||||
setSelectedFiltersLabel("/");
|
||||
}
|
||||
}
|
||||
|
||||
public void setSelectedFiltersLabel(String content) {
|
||||
if (content != null) {
|
||||
solarFilters.setVisible(true);
|
||||
selectedFiltersLabel.setVisible(true);
|
||||
selectedFiltersLabel.setText(content);
|
||||
} else {
|
||||
solarFilters.setVisible(false);
|
||||
selectedFiltersLabel.setVisible(false);
|
||||
}
|
||||
}
|
||||
|
||||
private void execute(StatisticsNew statistic) {
|
||||
logger.info("Started execution: ", statistic.getFilter());
|
||||
|
||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
boolean corpusIsSplit = corpusFiles.size() > 1;
|
||||
|
||||
final Task<Void> task = new Task<Void>() {
|
||||
@SuppressWarnings("Duplicates")
|
||||
@Override
|
||||
protected Void call() throws Exception {
|
||||
long i = 0;
|
||||
for (File f : corpusFiles) {
|
||||
readXML(f.toString(), statistic);
|
||||
i++;
|
||||
this.updateProgress(i, corpusFiles.size());
|
||||
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
progressLabel.textProperty().bind(task.messageProperty());
|
||||
|
||||
task.setOnSucceeded(e -> {
|
||||
try {
|
||||
boolean successullySaved = statistic.saveResultToDisk();
|
||||
if (successullySaved) {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
} else {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||
}
|
||||
} catch (UnsupportedEncodingException e1) {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
logger.error("Error while saving", e1);
|
||||
}
|
||||
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
task.setOnFailed(e -> {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
||||
logger.error("Error while executing", e);
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
final Thread thread = new Thread(task, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
}
|
||||
|
||||
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
||||
this.solarFiltersMap = solarFiltersMap;
|
||||
}
|
||||
public void setHostServices(HostServices hostServices){
|
||||
this.hostService = hostServices;
|
||||
}
|
||||
|
||||
}
|
||||
18
src/main/java/gui/SelectedFiltersPane.java
Normal file
18
src/main/java/gui/SelectedFiltersPane.java
Normal file
@@ -0,0 +1,18 @@
|
||||
package gui;
|
||||
|
||||
import javafx.scene.control.Label;
|
||||
|
||||
public class SelectedFiltersPane {
|
||||
|
||||
|
||||
public Label selectedFiltersLabel;
|
||||
|
||||
public Label getSelectedFiltersLabel() {
|
||||
return selectedFiltersLabel;
|
||||
}
|
||||
|
||||
public void setSelectedFiltersLabel(String filters) {
|
||||
this.selectedFiltersLabel = new Label(filters);
|
||||
this.selectedFiltersLabel.setText("test?");
|
||||
}
|
||||
}
|
||||
511
src/main/java/gui/StringAnalysisTabNew2.java
Executable file
511
src/main/java/gui/StringAnalysisTabNew2.java
Executable file
@@ -0,0 +1,511 @@
|
||||
package gui;
|
||||
|
||||
import static alg.XML_processing.*;
|
||||
import static gui.GUIController.*;
|
||||
import static gui.Messages.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javafx.application.HostServices;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.controlsfx.control.CheckComboBox;
|
||||
|
||||
import data.*;
|
||||
import javafx.collections.FXCollections;
|
||||
import javafx.collections.ListChangeListener;
|
||||
import javafx.collections.ObservableList;
|
||||
import javafx.concurrent.Task;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.scene.control.*;
|
||||
import javafx.scene.layout.Pane;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
public class StringAnalysisTabNew2 {
|
||||
public final static Logger logger = LogManager.getLogger(StringAnalysisTabNew2.class);
|
||||
|
||||
@FXML
|
||||
public Label selectedFiltersLabel;
|
||||
@FXML
|
||||
public Label solarFilters;
|
||||
|
||||
@FXML
|
||||
private TextField msdTF;
|
||||
private ArrayList<Pattern> msd;
|
||||
private ArrayList<String> msdStrings;
|
||||
|
||||
@FXML
|
||||
private CheckComboBox<String> taxonomyCCB;
|
||||
private ArrayList<String> taxonomy;
|
||||
|
||||
@FXML
|
||||
private CheckBox calculatecvvCB;
|
||||
private boolean calculateCvv;
|
||||
|
||||
@FXML
|
||||
private TextField stringLengthTF;
|
||||
private Integer stringLength;
|
||||
|
||||
@FXML
|
||||
private ComboBox<String> calculateForCB;
|
||||
private CalculateFor calculateFor;
|
||||
|
||||
@FXML
|
||||
private ComboBox<String> ngramValueCB;
|
||||
private Integer ngramValue;
|
||||
|
||||
@FXML
|
||||
private ComboBox<String> skipValueCB;
|
||||
private Integer skipValue;
|
||||
|
||||
@FXML
|
||||
private Pane paneWords;
|
||||
|
||||
@FXML
|
||||
private Pane paneLetters;
|
||||
|
||||
@FXML
|
||||
private Button computeNgramsB;
|
||||
|
||||
@FXML
|
||||
public ProgressBar ngramProgressBar;
|
||||
@FXML
|
||||
public Label progressLabel;
|
||||
|
||||
@FXML
|
||||
private Hyperlink helpH;
|
||||
|
||||
private enum MODE {
|
||||
LETTER,
|
||||
WORD
|
||||
}
|
||||
|
||||
private MODE currentMode;
|
||||
|
||||
private Corpus corpus;
|
||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||
private Filter filter;
|
||||
private boolean useDb;
|
||||
private HostServices hostService;
|
||||
|
||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica", "oblikoskladenjska oznaka");
|
||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||
private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||
|
||||
|
||||
// TODO: pass observables for taxonomy based on header scan
|
||||
// after header scan
|
||||
private ObservableList<String> taxonomyCCBValues;
|
||||
private CorpusType currentCorpusType;
|
||||
|
||||
public void init() {
|
||||
currentMode = MODE.WORD;
|
||||
toggleMode(currentMode);
|
||||
|
||||
// ngram value CB
|
||||
ngramValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if (newValue.equals("nivo črk")) {
|
||||
ngramValue = 0;
|
||||
toggleMode(MODE.LETTER);
|
||||
} else {
|
||||
ngramValue = Integer.valueOf(newValue);
|
||||
toggleMode(MODE.WORD);
|
||||
}
|
||||
|
||||
// skip only on ngrams of more than one word
|
||||
if (ngramValue > 1) {
|
||||
skipValueCB.setDisable(false);
|
||||
} else {
|
||||
skipValueCB.getSelectionModel().select(0);
|
||||
skipValue = 0;
|
||||
skipValueCB.setDisable(true);
|
||||
}
|
||||
|
||||
logger.info("ngramValueCB:", ngramValue);
|
||||
});
|
||||
|
||||
// set first n-gram value to 2 at index 0
|
||||
ngramValueCB.getSelectionModel().select(0); // selected index
|
||||
ngramValue = 2; // actual value at that index
|
||||
|
||||
// calculateForCB
|
||||
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||
calculateFor = CalculateFor.factory(newValue);
|
||||
logger.info("calculateForCB:", calculateFor.toString());
|
||||
});
|
||||
|
||||
calculateForCB.getSelectionModel().select(0);
|
||||
|
||||
// msd
|
||||
msdTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if (!newValue) {
|
||||
// focus lost
|
||||
String value = msdTF.getText();
|
||||
logger.info("msdTf: ", value);
|
||||
|
||||
if (!ValidationUtil.isEmpty(value)) {
|
||||
ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(value.split(" ")));
|
||||
|
||||
int nOfRequiredMsdTokens = ngramValue == 0 ? 1 : ngramValue;
|
||||
if (msdTmp.size() != nOfRequiredMsdTokens) {
|
||||
String msg = String.format(Messages.WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES, nOfRequiredMsdTokens, msdTmp.size());
|
||||
logAlert(msg);
|
||||
showAlert(Alert.AlertType.ERROR, msg);
|
||||
}
|
||||
msd = new ArrayList<>();
|
||||
msdStrings = new ArrayList<>();
|
||||
for (String msdToken : msdTmp) {
|
||||
msd.add(Pattern.compile(msdToken));
|
||||
msdStrings.add(msdToken);
|
||||
}
|
||||
logger.info(String.format("msd accepted (%d)", msd.size()));
|
||||
|
||||
} else if (!ValidationUtil.isEmpty(newValue)) {
|
||||
msd = new ArrayList<>();
|
||||
msdStrings = new ArrayList<>();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
msdTF.setText("");
|
||||
msd = new ArrayList<>();
|
||||
|
||||
// taxonomy
|
||||
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||
taxonomyCCB.getItems().removeAll();
|
||||
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
|
||||
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
taxonomy = new ArrayList<>();
|
||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
taxonomy.addAll(checkedItems);
|
||||
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
||||
});
|
||||
taxonomyCCB.getCheckModel().clearChecks();
|
||||
} else {
|
||||
taxonomyCCB.setDisable(true);
|
||||
}
|
||||
|
||||
// skip
|
||||
skipValueCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||
skipValue = Integer.valueOf(newValue);
|
||||
logger.info("Skip " + skipValue);
|
||||
});
|
||||
|
||||
skipValueCB.getSelectionModel().select(0);
|
||||
skipValue = 0;
|
||||
|
||||
// cvv
|
||||
calculatecvvCB.selectedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
calculateCvv = newValue;
|
||||
logger.info("calculate cvv: " + calculateCvv);
|
||||
});
|
||||
|
||||
calculatecvvCB.setSelected(false);
|
||||
|
||||
// string length
|
||||
stringLengthTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||
if (!newValue) {
|
||||
// focus lost
|
||||
String value = stringLengthTF.getText();
|
||||
if (!ValidationUtil.isEmpty(value)) {
|
||||
if (!ValidationUtil.isNumber(value)) {
|
||||
logAlert("stringlengthTf: " + WARNING_ONLY_NUMBERS_ALLOWED);
|
||||
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
|
||||
}
|
||||
stringLength = Integer.parseInt(value);
|
||||
} else {
|
||||
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_MISSING_STRING_LENGTH);
|
||||
stringLengthTF.setText("1");
|
||||
logAlert(WARNING_MISSING_STRING_LENGTH);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
computeNgramsB.setOnAction(e -> {
|
||||
compute();
|
||||
logger.info("compute button");
|
||||
});
|
||||
|
||||
helpH.setOnAction(e -> openHelpWebsite());
|
||||
}
|
||||
|
||||
/**
|
||||
* case a: values for combo boxes can change after a corpus change
|
||||
* <ul>
|
||||
* <li>different corpus type - reset all fields so no old values remain</li>
|
||||
* <li>same corpus type, different subset - keep</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* case b: values for combo boxes can change after a header scan
|
||||
* <ul>
|
||||
* <li>at first, fields are populated by corpus type defaults</li>
|
||||
* <li>after, with gathered data</li>
|
||||
* </ul>
|
||||
* <p></p>
|
||||
* ngrams: 1
|
||||
* calculateFor: word
|
||||
* msd:
|
||||
* taxonomy:
|
||||
* skip: 0
|
||||
* iscvv: false
|
||||
* string length: 1
|
||||
*/
|
||||
public void populateFields() {
|
||||
// corpus changed if: current one is null (this is first run of the app)
|
||||
// or if currentCorpus != gui's corpus
|
||||
boolean corpusChanged = currentCorpusType == null
|
||||
|| currentCorpusType != corpus.getCorpusType();
|
||||
|
||||
// keep ngram value if set
|
||||
if (ngramValue == null) {
|
||||
ngramValueCB.getSelectionModel().select("1");
|
||||
ngramValue = 1;
|
||||
}
|
||||
|
||||
// TODO: check for GOS, GIGAFIDA, SOLAR...
|
||||
// refresh and:
|
||||
// TODO if current value != null && is in new calculateFor ? keep : otherwise reset
|
||||
if (calculateFor == null) {
|
||||
calculateForCB.getSelectionModel().select(calculateForCB.getItems().get(0));
|
||||
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
|
||||
}
|
||||
|
||||
if (!filter.hasMsd()) {
|
||||
// if current corpus doesn't have msd data, disable this field
|
||||
msd = new ArrayList<>();
|
||||
msdTF.setText("");
|
||||
msdTF.setDisable(true);
|
||||
logger.info("no msd data");
|
||||
} else {
|
||||
if (ValidationUtil.isEmpty(msd)
|
||||
|| (!ValidationUtil.isEmpty(msd) && corpusChanged)) {
|
||||
// msd has not been set previously
|
||||
// or msd has been set but the corpus changed -> reset
|
||||
msd = new ArrayList<>();
|
||||
msdTF.setText("");
|
||||
msdTF.setDisable(false);
|
||||
logger.info("msd reset");
|
||||
} else if (!ValidationUtil.isEmpty(msd) && !corpusChanged) {
|
||||
// if msd has been set, but corpus type remained the same, we can keep any set msd value
|
||||
msdTF.setText(StringUtils.join(msdStrings, " "));
|
||||
msdTF.setDisable(false);
|
||||
logger.info("msd kept");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: taxonomy: refresh and keep if in new taxonomy, otherwise empty (no selection)
|
||||
|
||||
// keep skip value
|
||||
if (skipValue == null) {
|
||||
skipValueCB.getSelectionModel().select("0");
|
||||
skipValue = 0;
|
||||
}
|
||||
|
||||
// keep calculateCvv
|
||||
calculatecvvCB.setSelected(calculateCvv);
|
||||
|
||||
// keep string length if set
|
||||
if (stringLength != null) {
|
||||
stringLengthTF.setText(String.valueOf(stringLength));
|
||||
} else {
|
||||
stringLengthTF.setText("1");
|
||||
stringLength = 1;
|
||||
}
|
||||
|
||||
// TODO: trigger on rescan
|
||||
if ((currentCorpusType != null && currentCorpusType != corpus.getCorpusType())) {
|
||||
// user changed corpus (by type) or by selection & triggered a rescan of headers
|
||||
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
ObservableList<String> tax = corpus.getTaxonomy();
|
||||
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
|
||||
currentCorpusType = corpus.getCorpusType();
|
||||
// setTaxonomyIsDirty(false);
|
||||
} else {
|
||||
|
||||
}
|
||||
|
||||
// see if we read taxonomy from headers, otherwise use default values for given corpus
|
||||
ObservableList<String> tax = corpus.getTaxonomy();
|
||||
taxonomyCCBValues = tax != null ? tax : Taxonomy.getDefaultForComboBox(corpus.getCorpusType());
|
||||
taxonomyCCB.getItems().addAll(taxonomyCCBValues);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggles visibility for panes which hold fields for skipgram value (not applicable when calculating for letters) etc.,
|
||||
* sets combobox values to what is applicable ...
|
||||
*
|
||||
* @param mode
|
||||
*/
|
||||
public void toggleMode(MODE mode) {
|
||||
if (mode == null) {
|
||||
mode = currentMode;
|
||||
}
|
||||
|
||||
logger.info("mode: ", mode.toString());
|
||||
|
||||
if (mode == MODE.WORD) {
|
||||
paneWords.setVisible(true);
|
||||
paneLetters.setVisible(false);
|
||||
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS);
|
||||
} else if (mode == MODE.LETTER) {
|
||||
paneWords.setVisible(false);
|
||||
paneLetters.setVisible(true);
|
||||
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_LETTERS);
|
||||
|
||||
// populate with default cvv length value
|
||||
if (stringLength == null) {
|
||||
stringLengthTF.setText("1");
|
||||
stringLength = 1;
|
||||
} else {
|
||||
stringLengthTF.setText(String.valueOf(stringLength));
|
||||
}
|
||||
|
||||
// if calculateFor was selected for something other than a word or a lemma -> reset
|
||||
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
|
||||
// if the user selected something else before selecting ngram for letters, reset that choice
|
||||
calculateFor = CalculateFor.WORD;
|
||||
calculateForCB.getSelectionModel().select("različnica");
|
||||
}
|
||||
}
|
||||
|
||||
// override if orth mode, allow only word
|
||||
if (corpus.isGosOrthMode()) {
|
||||
calculateForCB.getItems().setAll(N_GRAM_COMPUTE_FOR_WORDS_ORTH);
|
||||
msdTF.setDisable(true);
|
||||
} else {
|
||||
msdTF.setDisable(false);
|
||||
}
|
||||
}
|
||||
|
||||
private void compute() {
|
||||
Filter filter = new Filter();
|
||||
filter.setNgramValue(ngramValue);
|
||||
filter.setCalculateFor(calculateFor);
|
||||
filter.setMsd(msd);
|
||||
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
filter.setSkipValue(skipValue);
|
||||
filter.setIsCvv(calculateCvv);
|
||||
filter.setSolarFilters(solarFiltersMap);
|
||||
|
||||
if (ngramValue != null && ngramValue == 0) {
|
||||
filter.setStringLength(stringLength);
|
||||
}
|
||||
|
||||
String message = Validation.validateForStringLevel(filter);
|
||||
if (message == null) {
|
||||
// no errors
|
||||
logger.info("Executing: ", filter.toString());
|
||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||
execute(statistic);
|
||||
} else {
|
||||
logAlert(message);
|
||||
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
||||
}
|
||||
}
|
||||
|
||||
private void logAlert(String alert) {
|
||||
logger.info("alert: " + alert);
|
||||
}
|
||||
|
||||
private void openHelpWebsite(){
|
||||
hostService.showDocument(Messages.HELP_URL);
|
||||
}
|
||||
|
||||
public Corpus getCorpus() {
|
||||
return corpus;
|
||||
}
|
||||
|
||||
public void setCorpus(Corpus corpus) {
|
||||
this.corpus = corpus;
|
||||
|
||||
if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
||||
setSelectedFiltersLabel(null);
|
||||
} else {
|
||||
setSelectedFiltersLabel("/");
|
||||
}
|
||||
}
|
||||
|
||||
public void setSelectedFiltersLabel(String content) {
|
||||
if (content != null) {
|
||||
solarFilters.setVisible(true);
|
||||
selectedFiltersLabel.setVisible(true);
|
||||
selectedFiltersLabel.setText(content);
|
||||
} else {
|
||||
solarFilters.setVisible(false);
|
||||
selectedFiltersLabel.setVisible(false);
|
||||
}
|
||||
}
|
||||
|
||||
private void execute(StatisticsNew statistic) {
|
||||
logger.info("Started execution: ", statistic.getFilter());
|
||||
|
||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
boolean corpusIsSplit = corpusFiles.size() > 1;
|
||||
|
||||
final Task<Void> task = new Task<Void>() {
|
||||
@SuppressWarnings("Duplicates")
|
||||
@Override
|
||||
protected Void call() throws Exception {
|
||||
long i = 0;
|
||||
for (File f : corpusFiles) {
|
||||
readXML(f.toString(), statistic);
|
||||
i++;
|
||||
this.updateProgress(i, corpusFiles.size());
|
||||
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
progressLabel.textProperty().bind(task.messageProperty());
|
||||
|
||||
task.setOnSucceeded(e -> {
|
||||
try {
|
||||
boolean successullySaved = statistic.saveResultToDisk();
|
||||
if (successullySaved) {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
} else {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||
}
|
||||
} catch (UnsupportedEncodingException e1) {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
logger.error("Error while saving", e1);
|
||||
}
|
||||
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
task.setOnFailed(e -> {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
||||
logger.error("Error while executing", e);
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
final Thread thread = new Thread(task, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
}
|
||||
|
||||
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
||||
this.solarFiltersMap = solarFiltersMap;
|
||||
}
|
||||
public void setHostServices(HostServices hostServices){
|
||||
this.hostService = hostServices;
|
||||
}
|
||||
}
|
||||
77
src/main/java/gui/ValidationUtil.java
Normal file
77
src/main/java/gui/ValidationUtil.java
Normal file
@@ -0,0 +1,77 @@
|
||||
package gui;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
|
||||
public class ValidationUtil {
|
||||
|
||||
public static boolean isNumber(String value) {
|
||||
return NumberUtils.isCreatable(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if an object is empty or null. Null part is especially important,
|
||||
* since Java's built-in isEmpty() methods don't check for this condition
|
||||
* and throw a nullPointerException as a result.
|
||||
* <p>
|
||||
* Supported structures:
|
||||
* <ul>
|
||||
* <li>String: empty if null or length is zero</li>
|
||||
* <li>List: empty if null or size() == 0</li>
|
||||
* <li>Map: empty if null or if it contains no keys, or if all keys map to an empty value </li>
|
||||
* </ul>
|
||||
*/
|
||||
public static boolean isEmpty(Object o) {
|
||||
if (o == null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (o instanceof String) {
|
||||
if (((String) o).length() == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (o instanceof List) {
|
||||
if (((List) o).isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (o instanceof Map) {
|
||||
if (((Map) o).keySet().isEmpty()) {
|
||||
return true;
|
||||
} else {
|
||||
for (Object val : ((Map) o).values()) {
|
||||
if (!isEmpty(val)) {
|
||||
// if map contains any value that isn't empty, the map isn't considered empty
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static boolean isNotEmpty(Object o) {
|
||||
return !isEmpty(o);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a given File is a folder for which we have appropriate permission
|
||||
*/
|
||||
public static boolean isValidDirectory(File f) {
|
||||
return f.isDirectory() && f.canRead() && f.canWrite();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a given File is a folder for which we have appropriate permission
|
||||
*/
|
||||
public static boolean isReadableDirectory(File f) {
|
||||
return f.isDirectory() && f.canRead();
|
||||
}
|
||||
}
|
||||
208
src/main/java/gui/WordFormationTab.java
Normal file
208
src/main/java/gui/WordFormationTab.java
Normal file
@@ -0,0 +1,208 @@
|
||||
package gui;
|
||||
|
||||
import static alg.XML_processing.*;
|
||||
import static gui.GUIController.*;
|
||||
import static gui.Messages.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
||||
import javafx.application.HostServices;
|
||||
import javafx.scene.control.*;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.controlsfx.control.CheckComboBox;
|
||||
|
||||
import data.*;
|
||||
import javafx.collections.ListChangeListener;
|
||||
import javafx.collections.ObservableList;
|
||||
import javafx.concurrent.Task;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.scene.layout.AnchorPane;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
public class WordFormationTab {
|
||||
public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
|
||||
|
||||
public AnchorPane wordAnalysisTabPane;
|
||||
|
||||
@FXML
|
||||
public Label selectedFiltersLabel;
|
||||
@FXML
|
||||
public Label solarFilters;
|
||||
|
||||
@FXML
|
||||
private CheckComboBox<String> taxonomyCCB;
|
||||
private ArrayList<String> taxonomy;
|
||||
|
||||
@FXML
|
||||
private Button computeB;
|
||||
|
||||
@FXML
|
||||
public ProgressBar ngramProgressBar;
|
||||
@FXML
|
||||
public Label progressLabel;
|
||||
|
||||
@FXML
|
||||
private Hyperlink helpH;
|
||||
|
||||
private Corpus corpus;
|
||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||
private HostServices hostService;
|
||||
|
||||
// after header scan
|
||||
private ObservableList<String> taxonomyCCBValues;
|
||||
private CorpusType currentCorpusType;
|
||||
private boolean useDb;
|
||||
|
||||
|
||||
public void init() {
|
||||
// taxonomy
|
||||
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||
taxonomyCCB.getItems().removeAll();
|
||||
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
|
||||
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
taxonomy = new ArrayList<>();
|
||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
taxonomy.addAll(checkedItems);
|
||||
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
||||
});
|
||||
taxonomyCCB.getCheckModel().clearChecks();
|
||||
} else {
|
||||
taxonomyCCB.setDisable(true);
|
||||
}
|
||||
|
||||
computeB.setOnAction(e -> {
|
||||
compute();
|
||||
logger.info("compute button");
|
||||
});
|
||||
|
||||
helpH.setOnAction(e -> openHelpWebsite());
|
||||
}
|
||||
|
||||
private void compute() {
|
||||
Filter filter = new Filter();
|
||||
filter.setNgramValue(1);
|
||||
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
filter.setSkipValue(0);
|
||||
filter.setMsd(new ArrayList<>());
|
||||
filter.setIsCvv(false);
|
||||
filter.setSolarFilters(solarFiltersMap);
|
||||
|
||||
String message = Validation.validateForStringLevel(filter);
|
||||
if (message == null) {
|
||||
// no errors
|
||||
logger.info("Executing: ", filter.toString());
|
||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||
execute(statistic);
|
||||
} else {
|
||||
logAlert(message);
|
||||
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
||||
}
|
||||
}
|
||||
|
||||
private void openHelpWebsite(){
|
||||
hostService.showDocument(Messages.HELP_URL);
|
||||
}
|
||||
|
||||
private void execute(StatisticsNew statistic) {
|
||||
logger.info("Started execution: ", statistic.getFilter());
|
||||
|
||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
|
||||
final Task<Void> task = new Task<Void>() {
|
||||
@SuppressWarnings("Duplicates")
|
||||
@Override
|
||||
protected Void call() throws Exception {
|
||||
long i = 0;
|
||||
for (File f : corpusFiles) {
|
||||
readXML(f.toString(), statistic);
|
||||
i++;
|
||||
this.updateProgress(i, corpusFiles.size());
|
||||
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
progressLabel.textProperty().bind(task.messageProperty());
|
||||
|
||||
task.setOnSucceeded(e -> {
|
||||
try {
|
||||
// first, we have to recalculate all occurrences to detailed statistics
|
||||
boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
|
||||
|
||||
if (successullySaved) {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
} else {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||
}
|
||||
} catch (UnsupportedEncodingException e1) {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
logger.error("Error while saving", e1);
|
||||
}
|
||||
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
task.setOnFailed(e -> {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
||||
logger.error("Error while executing", e);
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
final Thread thread = new Thread(task, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
}
|
||||
|
||||
private void logAlert(String alert) {
|
||||
logger.info("alert: " + alert);
|
||||
}
|
||||
|
||||
|
||||
public void setCorpus(Corpus corpus) {
|
||||
this.corpus = corpus;
|
||||
|
||||
if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
||||
setSelectedFiltersLabel(null);
|
||||
} else {
|
||||
setSelectedFiltersLabel("/");
|
||||
}
|
||||
}
|
||||
|
||||
public void setSelectedFiltersLabel(String content) {
|
||||
if (content != null) {
|
||||
solarFilters.setVisible(true);
|
||||
selectedFiltersLabel.setVisible(true);
|
||||
selectedFiltersLabel.setText(content);
|
||||
} else {
|
||||
solarFilters.setVisible(false);
|
||||
selectedFiltersLabel.setVisible(false);
|
||||
}
|
||||
}
|
||||
|
||||
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
||||
this.solarFiltersMap = solarFiltersMap;
|
||||
}
|
||||
|
||||
public void setHostServices(HostServices hostServices){
|
||||
this.hostService = hostServices;
|
||||
}
|
||||
}
|
||||
207
src/main/java/gui/WordLevelTab.java
Normal file
207
src/main/java/gui/WordLevelTab.java
Normal file
@@ -0,0 +1,207 @@
|
||||
package gui;
|
||||
|
||||
import static alg.XML_processing.*;
|
||||
import static gui.GUIController.*;
|
||||
import static gui.Messages.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
||||
import javafx.application.HostServices;
|
||||
import javafx.scene.control.*;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.controlsfx.control.CheckComboBox;
|
||||
|
||||
import data.*;
|
||||
import javafx.collections.ListChangeListener;
|
||||
import javafx.collections.ObservableList;
|
||||
import javafx.concurrent.Task;
|
||||
import javafx.fxml.FXML;
|
||||
import javafx.scene.layout.AnchorPane;
|
||||
|
||||
@SuppressWarnings("Duplicates")
|
||||
public class WordLevelTab {
|
||||
public final static Logger logger = LogManager.getLogger(WordLevelTab.class);
|
||||
|
||||
public AnchorPane wordLevelAnalysisTabPane;
|
||||
|
||||
@FXML
|
||||
public Label selectedFiltersLabel;
|
||||
@FXML
|
||||
public Label solarFilters;
|
||||
|
||||
@FXML
|
||||
private CheckComboBox<String> taxonomyCCB;
|
||||
private ArrayList<String> taxonomy;
|
||||
|
||||
@FXML
|
||||
private Button computeB;
|
||||
|
||||
@FXML
|
||||
public ProgressBar ngramProgressBar;
|
||||
@FXML
|
||||
public Label progressLabel;
|
||||
|
||||
@FXML
|
||||
private Hyperlink helpH;
|
||||
|
||||
private Corpus corpus;
|
||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||
private HostServices hostService;
|
||||
|
||||
// after header scan
|
||||
private ObservableList<String> taxonomyCCBValues;
|
||||
private CorpusType currentCorpusType;
|
||||
private boolean useDb;
|
||||
|
||||
|
||||
public void init() {
|
||||
// taxonomy
|
||||
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||
taxonomyCCB.getItems().removeAll();
|
||||
taxonomyCCB.getItems().setAll(corpus.getTaxonomy());
|
||||
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||
taxonomy = new ArrayList<>();
|
||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||
taxonomy.addAll(checkedItems);
|
||||
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
||||
});
|
||||
taxonomyCCB.getCheckModel().clearChecks();
|
||||
} else {
|
||||
taxonomyCCB.setDisable(true);
|
||||
}
|
||||
|
||||
computeB.setOnAction(e -> {
|
||||
compute();
|
||||
logger.info("compute button");
|
||||
});
|
||||
|
||||
helpH.setOnAction(e -> openHelpWebsite());
|
||||
}
|
||||
|
||||
private void openHelpWebsite(){
|
||||
hostService.showDocument(Messages.HELP_URL);
|
||||
}
|
||||
private void compute() {
|
||||
Filter filter = new Filter();
|
||||
filter.setNgramValue(1);
|
||||
filter.setCalculateFor(CalculateFor.WORD);
|
||||
filter.setTaxonomy(Tax.getTaxonomyCodes(taxonomy, corpus.getCorpusType()));
|
||||
filter.setAl(AnalysisLevel.WORD_LEVEL);
|
||||
filter.setSkipValue(0);
|
||||
filter.setMsd(new ArrayList<>());
|
||||
filter.setIsCvv(false);
|
||||
filter.setSolarFilters(solarFiltersMap);
|
||||
|
||||
String message = Validation.validateForStringLevel(filter);
|
||||
if (message == null) {
|
||||
// no errors
|
||||
logger.info("Executing: ", filter.toString());
|
||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||
execute(statistic);
|
||||
} else {
|
||||
logAlert(message);
|
||||
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
||||
}
|
||||
}
|
||||
|
||||
private void execute(StatisticsNew statistic) {
|
||||
logger.info("Started execution: ", statistic.getFilter());
|
||||
|
||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||
|
||||
final Task<Void> task = new Task<Void>() {
|
||||
@SuppressWarnings("Duplicates")
|
||||
@Override
|
||||
protected Void call() throws Exception {
|
||||
long i = 0;
|
||||
for (File f : corpusFiles) {
|
||||
readXML(f.toString(), statistic);
|
||||
i++;
|
||||
this.updateProgress(i, corpusFiles.size());
|
||||
this.updateMessage(String.format(ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y, i, corpusFiles.size(), f.getName()));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||
progressLabel.textProperty().bind(task.messageProperty());
|
||||
|
||||
task.setOnSucceeded(e -> {
|
||||
try {
|
||||
// first, we have to recalculate all occurrences to detailed statistics
|
||||
boolean successullySaved = statistic.saveResultNestedToDisk();
|
||||
|
||||
if (successullySaved) {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED);
|
||||
} else {
|
||||
showAlert(Alert.AlertType.INFORMATION, Messages.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS);
|
||||
}
|
||||
} catch (UnsupportedEncodingException e1) {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_SAVING_RESULTS_TO_CSV);
|
||||
logger.error("Error while saving", e1);
|
||||
}
|
||||
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
task.setOnFailed(e -> {
|
||||
showAlert(Alert.AlertType.ERROR, ERROR_WHILE_EXECUTING);
|
||||
logger.error("Error while executing", e);
|
||||
ngramProgressBar.progressProperty().unbind();
|
||||
ngramProgressBar.setProgress(0.0);
|
||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||
progressLabel.textProperty().unbind();
|
||||
progressLabel.setText("");
|
||||
});
|
||||
|
||||
final Thread thread = new Thread(task, "task");
|
||||
thread.setDaemon(true);
|
||||
thread.start();
|
||||
}
|
||||
|
||||
private void logAlert(String alert) {
|
||||
logger.info("alert: " + alert);
|
||||
}
|
||||
|
||||
|
||||
public void setCorpus(Corpus corpus) {
|
||||
this.corpus = corpus;
|
||||
|
||||
if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
||||
setSelectedFiltersLabel(null);
|
||||
} else {
|
||||
setSelectedFiltersLabel("/");
|
||||
}
|
||||
}
|
||||
|
||||
public void setSelectedFiltersLabel(String content) {
|
||||
if (content != null) {
|
||||
solarFilters.setVisible(true);
|
||||
selectedFiltersLabel.setVisible(true);
|
||||
selectedFiltersLabel.setText(content);
|
||||
} else {
|
||||
solarFilters.setVisible(false);
|
||||
selectedFiltersLabel.setVisible(false);
|
||||
}
|
||||
}
|
||||
|
||||
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
||||
this.solarFiltersMap = solarFiltersMap;
|
||||
}
|
||||
|
||||
public void setHostServices(HostServices hostServices){
|
||||
this.hostService = hostServices;
|
||||
}
|
||||
}
|
||||
3
src/main/java/manifest/META-INF/MANIFEST.MF
Normal file
3
src/main/java/manifest/META-INF/MANIFEST.MF
Normal file
@@ -0,0 +1,3 @@
|
||||
Manifest-Version: 1.0
|
||||
Main-Class: gui.GUIController
|
||||
|
||||
25
src/main/java/util/ByteUtils.java
Normal file
25
src/main/java/util/ByteUtils.java
Normal file
@@ -0,0 +1,25 @@
|
||||
package util;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class ByteUtils {
|
||||
|
||||
/*
|
||||
* Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
|
||||
*/
|
||||
public static byte[] longToBytes(long x) {
|
||||
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
|
||||
buffer.putLong(x);
|
||||
return buffer.array();
|
||||
}
|
||||
|
||||
/*
|
||||
* Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
|
||||
*/
|
||||
public static long bytesToLong(byte[] bytes) {
|
||||
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
|
||||
buffer.put(bytes);
|
||||
buffer.flip();//need flip
|
||||
return buffer.getLong();
|
||||
}
|
||||
}
|
||||
46
src/main/java/util/Combinations.java
Normal file
46
src/main/java/util/Combinations.java
Normal file
@@ -0,0 +1,46 @@
|
||||
package util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class Combinations {
|
||||
private static HashSet<HashSet<Integer>> result = new HashSet<>();
|
||||
|
||||
|
||||
/* arr[] ---> Input Array
|
||||
data[] ---> Temporary array to store current combination
|
||||
start & end ---> Staring and Ending indexes in arr[]
|
||||
index ---> Current index in data[]
|
||||
r ---> Size of a combination to be printed */
|
||||
static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) {
|
||||
// Current combination is ready to be printed, print it
|
||||
if (index == combinationLength) {
|
||||
result.add(new HashSet<>(Arrays.asList(data)));
|
||||
return;
|
||||
}
|
||||
|
||||
// replace index with all possible elements. The condition
|
||||
// "end-i+1 >= r-index" makes sure that including one element
|
||||
// at index will make a combination with remaining elements
|
||||
// at remaining positions
|
||||
for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) {
|
||||
data[index] = arr[i];
|
||||
combinationUtil(arr, data, i + 1, end, index + 1, combinationLength);
|
||||
}
|
||||
}
|
||||
|
||||
public static HashSet<HashSet<Integer>> generateIndices(int maxNOfIndices) {
|
||||
result = new HashSet<>();
|
||||
int[] arr = IntStream.range(1, maxNOfIndices).toArray();
|
||||
for (int i = 1; i < maxNOfIndices - 1; i++) {
|
||||
// A temporary array to store all combination one by one
|
||||
combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i);
|
||||
}
|
||||
|
||||
// also add an empty one for X.... (all of this type)
|
||||
result.add(new HashSet<>());
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
267
src/main/java/util/Export.java
Normal file
267
src/main/java/util/Export.java
Normal file
@@ -0,0 +1,267 @@
|
||||
package util;
|
||||
|
||||
import static util.Util.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVPrinter;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
|
||||
import data.Enums.WordLevelType;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class Export {
|
||||
public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
|
||||
JSONArray wrapper = new JSONArray();
|
||||
|
||||
for (Pair<String, Map<String, Long>> p : set) {
|
||||
JSONArray data_wrapper = new JSONArray();
|
||||
JSONObject metric = new JSONObject();
|
||||
|
||||
String title = p.getLeft();
|
||||
Map<String, Long> map = p.getRight();
|
||||
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
|
||||
long total = Util.mapSumFrequencies(map);
|
||||
|
||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
||||
JSONObject data_entry = new JSONObject();
|
||||
data_entry.put("word", e.getKey());
|
||||
data_entry.put("frequency", e.getValue());
|
||||
data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
|
||||
|
||||
data_wrapper.add(data_entry);
|
||||
}
|
||||
|
||||
metric.put("Title", title);
|
||||
metric.put("data", data_wrapper);
|
||||
wrapper.add(metric);
|
||||
}
|
||||
|
||||
try (FileWriter file = new FileWriter("statistics.json")) {
|
||||
file.write(wrapper.toJSONString());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
|
||||
//CSV file header
|
||||
Object[] FILE_HEADER = {"word", "frequency", "percent"};
|
||||
|
||||
String fileName = "";
|
||||
|
||||
for (Pair<String, Map<String, Long>> p : set) {
|
||||
String title = p.getLeft();
|
||||
fileName = title.replace(": ", "-");
|
||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||
|
||||
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||
|
||||
Map<String, Long> map = p.getRight();
|
||||
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
|
||||
long total = Util.mapSumFrequencies(map);
|
||||
|
||||
OutputStreamWriter fileWriter = null;
|
||||
CSVPrinter csvFilePrinter = null;
|
||||
|
||||
//Create the CSVFormat object with "\n" as a record delimiter
|
||||
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
||||
|
||||
try {
|
||||
//initialize FileWriter object
|
||||
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
||||
|
||||
//initialize CSVPrinter object
|
||||
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
||||
|
||||
// write info block
|
||||
printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
||||
|
||||
//Create CSV file header
|
||||
csvFilePrinter.printRecord(FILE_HEADER);
|
||||
|
||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
||||
List dataEntry = new ArrayList<>();
|
||||
dataEntry.add(e.getKey());
|
||||
dataEntry.add(e.getValue().toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / total));
|
||||
csvFilePrinter.printRecord(dataEntry);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.out.println("Error in CsvFileWriter!");
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (fileWriter != null) {
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
}
|
||||
if (csvFilePrinter != null) {
|
||||
csvFilePrinter.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
|
||||
//CSV file header
|
||||
Object[] FILE_HEADER = {"word", "frequency", "percent"};
|
||||
|
||||
String fileName = "";
|
||||
|
||||
fileName = title.replace(": ", "-");
|
||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||
|
||||
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||
|
||||
OutputStreamWriter fileWriter = null;
|
||||
CSVPrinter csvFilePrinter = null;
|
||||
|
||||
//Create the CSVFormat object with "\n" as a record delimiter
|
||||
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
||||
|
||||
try {
|
||||
//initialize FileWriter object
|
||||
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
||||
|
||||
//initialize CSVPrinter object
|
||||
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
||||
|
||||
// write info block
|
||||
printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
||||
|
||||
//Create CSV file header
|
||||
csvFilePrinter.printRecord(FILE_HEADER);
|
||||
|
||||
for (Object[] resultEntry : result) {
|
||||
List dataEntry = new ArrayList<>();
|
||||
dataEntry.add(resultEntry[0]);
|
||||
dataEntry.add(resultEntry[1]);
|
||||
dataEntry.add(formatNumberAsPercent(resultEntry[2]));
|
||||
csvFilePrinter.printRecord(dataEntry);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.out.println("Error in CsvFileWriter!");
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (fileWriter != null) {
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
}
|
||||
if (csvFilePrinter != null) {
|
||||
csvFilePrinter.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
|
||||
//CSV file header
|
||||
Object[] FILE_HEADER = {"type", "key", "word", "frequency"};
|
||||
|
||||
String fileName = "";
|
||||
|
||||
fileName = title.replace(": ", "-");
|
||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||
|
||||
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||
|
||||
OutputStreamWriter fileWriter = null;
|
||||
CSVPrinter csvFilePrinter = null;
|
||||
|
||||
//Create the CSVFormat object with "\n" as a record delimiter
|
||||
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
||||
|
||||
try {
|
||||
//initialize FileWriter object
|
||||
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
||||
|
||||
//initialize CSVPrinter object
|
||||
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
||||
|
||||
// write info block
|
||||
printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
||||
|
||||
//Create CSV file header
|
||||
csvFilePrinter.printRecord(FILE_HEADER);
|
||||
|
||||
for (Map.Entry<WordLevelType, Map<String, Map<String, Long>>> typeEntry : result.entrySet()) {
|
||||
for (Map.Entry<String, Map<String, Long>> keyWordEntry : typeEntry.getValue().entrySet()) {
|
||||
for (Map.Entry<String, Long> calculationResults : keyWordEntry.getValue().entrySet()) {
|
||||
List values = new ArrayList();
|
||||
values.add(typeEntry.getKey().getName());
|
||||
values.add(keyWordEntry.getKey());
|
||||
values.add(calculationResults.getKey());
|
||||
values.add(calculationResults.getValue());
|
||||
csvFilePrinter.printRecord(values);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.out.println("Error in CsvFileWriter!");
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (fileWriter != null) {
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
}
|
||||
if (csvFilePrinter != null) {
|
||||
csvFilePrinter.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
return fileName;
|
||||
}
|
||||
|
||||
private static void printHeaderInfo(CSVPrinter csvFilePrinter, LinkedHashMap<String, String> headerInfoBlock) throws IOException {
|
||||
for (Map.Entry<String, String> entry : headerInfoBlock.entrySet()) {
|
||||
List values = new ArrayList();
|
||||
values.add(entry.getKey());
|
||||
values.add(entry.getValue());
|
||||
csvFilePrinter.printRecord(values);
|
||||
}
|
||||
|
||||
// 2 empty lines
|
||||
List values = new ArrayList();
|
||||
csvFilePrinter.printRecord(values);
|
||||
csvFilePrinter.printRecord(values);
|
||||
|
||||
}
|
||||
}
|
||||
31
src/main/java/util/Key.java
Normal file
31
src/main/java/util/Key.java
Normal file
@@ -0,0 +1,31 @@
|
||||
package util;
|
||||
|
||||
public class Key /*implements Comparable<Key> */ {
|
||||
// private final String value;
|
||||
//
|
||||
// Key(String value) {
|
||||
// this.value = value;
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public int compareTo(Key o) {
|
||||
// return Objects.compare(this.value, o.value);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public boolean equals(Object o) {
|
||||
// if (this.equals(o)) {
|
||||
// return true;
|
||||
// }
|
||||
// if (o == null || getClass() != o.getClass()) {
|
||||
// return false;
|
||||
// }
|
||||
// Key key = (Key) o;
|
||||
// return Objects.equals(value, key.value);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public int hashCode() {
|
||||
// return 0;
|
||||
// }
|
||||
}
|
||||
63
src/main/java/util/TimeWatch.java
Normal file
63
src/main/java/util/TimeWatch.java
Normal file
@@ -0,0 +1,63 @@
|
||||
package util;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Adapted from http://memorynotfound.com/calculating-elapsed-time-java/
|
||||
*/
|
||||
public class TimeWatch {
|
||||
|
||||
private long starts;
|
||||
|
||||
private TimeWatch() {
|
||||
reset();
|
||||
}
|
||||
|
||||
public static TimeWatch start() {
|
||||
return new TimeWatch();
|
||||
}
|
||||
|
||||
private TimeWatch reset() {
|
||||
starts = System.nanoTime();
|
||||
return this;
|
||||
}
|
||||
|
||||
private long time() {
|
||||
long ends = System.nanoTime();
|
||||
return ends - starts;
|
||||
}
|
||||
|
||||
private long time(TimeUnit unit) {
|
||||
return unit.convert(time(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
private String toMinuteSeconds() {
|
||||
return String.format("%d min, %d sec", time(TimeUnit.MINUTES),
|
||||
time(TimeUnit.SECONDS) - time(TimeUnit.MINUTES));
|
||||
}
|
||||
|
||||
public String toFullTime() {
|
||||
long hours = time(TimeUnit.HOURS);
|
||||
long minutes = time(TimeUnit.MINUTES) - TimeUnit.HOURS.toMinutes(hours);
|
||||
long seconds = time(TimeUnit.SECONDS) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
|
||||
long milliseconds = time(TimeUnit.MILLISECONDS) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
|
||||
|
||||
return String.format("%d h, %d min, %d s, %d ms", hours, minutes, seconds, milliseconds);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
||||
return "Elapsed Time in nano seconds: ";
|
||||
}
|
||||
|
||||
private void exampleUsage() {
|
||||
TimeWatch watch = TimeWatch.start();
|
||||
|
||||
// do something...
|
||||
|
||||
System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds());
|
||||
System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS));
|
||||
System.out.println("Elapsed Time in nano seconds: " + watch.time());
|
||||
|
||||
}
|
||||
}
|
||||
225
src/main/java/util/Util.java
Normal file
225
src/main/java/util/Util.java
Normal file
@@ -0,0 +1,225 @@
|
||||
package util;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import data.Settings;
|
||||
import gui.GUIController;
|
||||
import gui.ValidationUtil;
|
||||
|
||||
public class Util {
|
||||
public final static Logger logger = LogManager.getLogger(Util.class);
|
||||
|
||||
|
||||
public static String toReadableTime(long time) {
|
||||
long hours = time(TimeUnit.HOURS, time);
|
||||
long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours);
|
||||
long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
|
||||
long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
|
||||
long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds);
|
||||
long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds);
|
||||
|
||||
return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds);
|
||||
}
|
||||
|
||||
private static long time(TimeUnit unit, long t) {
|
||||
return unit.convert(t, TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a number to a more readable format.
|
||||
* 12345 -> 12.345
|
||||
* 12345,678 -> 12.345,67
|
||||
*
|
||||
* @param o byte, double, float, int,long, short
|
||||
*
|
||||
* @return number formatted with thousands separator and 2 decimal places (floats)
|
||||
*/
|
||||
private static String formatNumberReadable(Object o) {
|
||||
if (isInstanceOfInteger(o))
|
||||
return String.format("%,d", o);
|
||||
else if (isInstanceOfFloat(o))
|
||||
return String.format("%,.2f", o);
|
||||
else
|
||||
return "- invalid input format -";
|
||||
}
|
||||
|
||||
public static String formatNumberAsPercent(Object o) {
|
||||
return MessageFormat.format("{0,number,#.###%}", o);
|
||||
}
|
||||
|
||||
private static boolean isInstanceOfInteger(Object o) {
|
||||
Set<Class<?>> types = new HashSet<>();
|
||||
types.add(Byte.class);
|
||||
types.add(Short.class);
|
||||
types.add(Integer.class);
|
||||
types.add(Long.class);
|
||||
|
||||
return types.contains(o.getClass());
|
||||
}
|
||||
|
||||
private static boolean isInstanceOfFloat(Object o) {
|
||||
Set<Class<?>> types = new HashSet<>();
|
||||
types.add(Float.class);
|
||||
types.add(Double.class);
|
||||
|
||||
return types.contains(o.getClass());
|
||||
}
|
||||
|
||||
public static <K, V> void printMap(Map<K, V> map) {
|
||||
System.out.println("\nkey: value");
|
||||
map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v)));
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic map converter -> since AtomicLongs aren't as comparable.
|
||||
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
|
||||
*/
|
||||
public static <K, V> Map<String, Long> atomicInt2StringAndInt(Map<K, V> map) {
|
||||
Map m = new HashMap<String, Long>();
|
||||
|
||||
for (Map.Entry<K, V> e : map.entrySet()) {
|
||||
m.put(e.getKey().toString(), ((AtomicLong) e.getValue()).longValue());
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts a map in a descending order by value.
|
||||
*/
|
||||
public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue(Map<K, V> map, int limit) {
|
||||
/*
|
||||
sorted() in itself is O(1), since it's an intermediate operation that
|
||||
doesn't consume the stream, but simply adds an operation to the pipeline.
|
||||
Once the stream is consumed by a terminal operation, the sort happens and
|
||||
either
|
||||
- it doesn't do anything (O(1)) because the stream knows that the
|
||||
elements are already sorted (because they come from a SortedSet, for example)
|
||||
- or the stream is not parallel, and it delegates to Arrays.sort() (O(n log n))
|
||||
- or the stream is parallel, and it delegates to Arrays.parallelSort() (O(n log n))
|
||||
|
||||
As of JDK 8, the main sorting algorithm which is also used in standard
|
||||
stream API implementation for sequential sorting is TimSort. Its worst
|
||||
case is O(n log n), but it works incredibly fast (with O(n) and quite
|
||||
small constant) if data is presorted (in forward or reverse direction)
|
||||
or partially presorted (for example, if you concatenate two sorted lists
|
||||
and sort them again).
|
||||
*/
|
||||
// if limit is set to 0 or less, we take that to mean no limit at all
|
||||
if (limit <= 0) {
|
||||
limit = map.size();
|
||||
}
|
||||
|
||||
Map<K, V> result = new LinkedHashMap<>();
|
||||
TimeWatch watch = TimeWatch.start();
|
||||
|
||||
Stream<Map.Entry<K, V>> st = map.entrySet().stream();
|
||||
|
||||
st.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).limit(limit)
|
||||
.forEachOrdered(e -> result.put(e.getKey(), e.getValue()));
|
||||
|
||||
if (Settings.PRINT_LOG) {
|
||||
System.out.println(String.format("Elapsed time for sorting %s items: %s",
|
||||
formatNumberReadable(result.size()),
|
||||
watch.toFullTime()));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static <K, V> void printMap(Map<K, Integer> map, String title, int number_of_words) {
|
||||
System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title));
|
||||
map.forEach((k, v) ->
|
||||
System.out.println(String.format("%s:\t %s\t %s%%",
|
||||
k,
|
||||
Util.formatNumberReadable(v),
|
||||
Util.formatNumberReadable((double) v / number_of_words * 100))));
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
static long mapSumFrequencies(Map<String, Long> map) {
|
||||
long sum = 0;
|
||||
|
||||
for (long value : map.values()) {
|
||||
sum += value;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for passing optional integer values for sorting.
|
||||
*/
|
||||
public static int getValidInt(int... i) {
|
||||
if (i == null || i.length < 1 || i[0] <= 0) {
|
||||
return 0;
|
||||
} else {
|
||||
return i[0];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a map is empty. It also considers an edge case where map's keys are lists to check if those lists are empty.
|
||||
*/
|
||||
public static <K, V> boolean isMapEmpty(Map<K, V> map) {
|
||||
if (map.isEmpty()) {
|
||||
// default
|
||||
return true;
|
||||
}
|
||||
|
||||
// otherwise check if keys map to values that are empty
|
||||
for (V v : map.values()) {
|
||||
// todo: generalize to all collections if/when needed
|
||||
ArrayList<String> vl = new ArrayList((List<String>) v);
|
||||
if (!vl.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the location of the main class if possible, otherwise null
|
||||
*/
|
||||
public static File getWorkingDirectory() {
|
||||
// get location of the currently executing class
|
||||
String path = GUIController.class.getProtectionDomain().getCodeSource().getLocation().getPath();
|
||||
|
||||
logger.info("working dir path: ", path);
|
||||
|
||||
String decodedPath = null;
|
||||
try {
|
||||
decodedPath = URLDecoder.decode(path, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
logger.error("decoding: ", e);
|
||||
// e.printStackTrace();
|
||||
}
|
||||
|
||||
if (decodedPath != null) {
|
||||
File workingDirectory = new File(decodedPath);
|
||||
|
||||
// in case it's a file (class is packaged inside a jar), select its parent folder
|
||||
workingDirectory = workingDirectory.isFile() ? workingDirectory.getParentFile() : workingDirectory;
|
||||
|
||||
if (ValidationUtil.isReadableDirectory(workingDirectory)) {
|
||||
logger.info("working dir is ok: ", workingDirectory.getAbsolutePath());
|
||||
return workingDirectory;
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("working dir returing null");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
132
src/main/java/util/db/RDB.java
Normal file
132
src/main/java/util/db/RDB.java
Normal file
@@ -0,0 +1,132 @@
|
||||
package util.db;
|
||||
|
||||
import static util.ByteUtils.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.rocksdb.*;
|
||||
|
||||
import util.TimeWatch;
|
||||
|
||||
public class RDB {
|
||||
|
||||
private RocksDB db;
|
||||
private String path;
|
||||
private static final String UTF_8 = "UTF-8";
|
||||
|
||||
public RDB() {
|
||||
// different dbs i ncase of concurrent calculations
|
||||
this.path = System.getProperty("java.io.tmpdir")
|
||||
.concat(File.separator)
|
||||
.concat(String.format("corpusAnalyzer_db%d", LocalDateTime.now().toString().hashCode()));
|
||||
|
||||
this.db = createDB();
|
||||
}
|
||||
|
||||
|
||||
private RocksDB createDB() {
|
||||
RocksDB.loadLibrary();
|
||||
|
||||
// the Options class contains a set of configurable DB options
|
||||
// that determines the behaviour of the database.
|
||||
try (final Options options = new Options()) {
|
||||
options.setCreateIfMissing(true);
|
||||
|
||||
// a factory method that returns a RocksDB instance
|
||||
try (final RocksDB rdb = RocksDB.open(options, path)) {
|
||||
if (db != null) {
|
||||
return rdb;
|
||||
} else {
|
||||
this.db = rdb;
|
||||
}
|
||||
}
|
||||
} catch (RocksDBException e) {
|
||||
// do some error handling
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void writeBatch(Map<String, AtomicLong> results) throws UnsupportedEncodingException {
|
||||
RocksDB.loadLibrary();
|
||||
|
||||
// a factory method that returns a RocksDB instance
|
||||
try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
|
||||
final WriteBatch wb = new WriteBatch();
|
||||
|
||||
for (Map.Entry<String, AtomicLong> entry : results.entrySet()) {
|
||||
byte[] key = entry.getKey().getBytes(UTF_8);
|
||||
long resultValue = entry.getValue().longValue();
|
||||
|
||||
try {
|
||||
final byte[] dbValue = rdb.get(key);
|
||||
if (dbValue != null) {
|
||||
// value == null if key does not exist in db.
|
||||
wb.put(key, longToBytes(bytesToLong(dbValue) + resultValue));
|
||||
} else {
|
||||
wb.put(key, longToBytes(entry.getValue().longValue()));
|
||||
}
|
||||
} catch (RocksDBException e) {
|
||||
// TODO: error handling
|
||||
}
|
||||
}
|
||||
TimeWatch watch = TimeWatch.start();
|
||||
rdb.write(new WriteOptions(), wb);
|
||||
System.out.println(String.format("Writing %d entries took: %s", wb.count(), watch.toFullTime()));
|
||||
} catch (RocksDBException e) {
|
||||
// do some error handling
|
||||
}
|
||||
}
|
||||
|
||||
// public byte[] atomicIntToByteArray(final AtomicLong i) {
|
||||
// BigInteger bigInt = BigInteger.valueOf(i.intValue());
|
||||
//
|
||||
// return bigInt.toByteArray();
|
||||
// }
|
||||
|
||||
public RocksDB getDb() {
|
||||
return db;
|
||||
}
|
||||
|
||||
public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
|
||||
Map<String, AtomicLong> dump = new HashMap<>();
|
||||
RocksDB.loadLibrary();
|
||||
|
||||
// the Options class contains a set of configurable DB options
|
||||
// that determines the behaviour of the database.
|
||||
// a factory method that returns a RocksDB instance
|
||||
try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
|
||||
try (RocksIterator it = rdb.newIterator()) {
|
||||
it.seekToFirst();
|
||||
// it.next();
|
||||
|
||||
while (it.isValid()) {
|
||||
byte[] key = it.key();
|
||||
byte[] value = it.value();
|
||||
|
||||
dump.put(new String(key, UTF_8), new AtomicLong(bytesToLong(value)));
|
||||
|
||||
it.next();
|
||||
}
|
||||
}
|
||||
} catch (RocksDBException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
return dump;
|
||||
}
|
||||
|
||||
public void delete() {
|
||||
try {
|
||||
FileUtils.deleteDirectory(new File(path));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
68720
src/main/resources/GOS_small/TEI_GOS_small.xml
Normal file
68720
src/main/resources/GOS_small/TEI_GOS_small.xml
Normal file
File diff suppressed because it is too large
Load Diff
524
src/main/resources/GOS_tax_test/GOS_tax_test.xml
Normal file
524
src/main/resources/GOS_tax_test/GOS_tax_test.xml
Normal file
@@ -0,0 +1,524 @@
|
||||
<?oxygen RNGSchema="http://nl.ijs.si/ssj/gos/schema/tei_gos.rnc" type="compact"?>
|
||||
<!--DOCTYPE TEI SYSTEM "http://nl.ijs.si/ssj/gos/schema/tei_gos.dtd"-->
|
||||
<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xml:id="gos" xml:lang="slv">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title xml:lang="slv">Korpus GOS</title>
|
||||
<title xml:lang="eng">GOS Corpus</title>
|
||||
<funder xml:lang="slv">Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za
|
||||
šolstvo in šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje
|
||||
2007/2013, razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve:
|
||||
izboljšanje kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007/2013.
|
||||
</funder>
|
||||
<funder xml:lang="eng">The operation is partly financed by the European Union, the European Social Fund, and the
|
||||
Ministry of Education and Sport of the Republic of Slovenia. The operation is being carried out within the
|
||||
operational programme Human Resources Development for the period 2007/2013, developmental priorities:
|
||||
improvement of the quality and efficiency of educational and training systems 2007/2013.
|
||||
</funder>
|
||||
<respStmt>
|
||||
<name xml:id="MIRO">Miro Romih, Amebis</name>
|
||||
<resp xml:lang="slv">Vodja projekta "Sporazumevanje v slovenskem jeziku.</resp>
|
||||
<resp xml:lang="eng">"Communication in Slovene" project leader.</resp>
|
||||
</respStmt>
|
||||
<respStmt>
|
||||
<name xml:id="SIMON">Simon Krek, Amebis, JSI</name>
|
||||
<resp xml:lang="slv">Koordinator projekta "Sporazumevanje v slovenskem jeziku.</resp>
|
||||
<resp xml:lang="eng">"Communication in Slovene" project coordinator.</resp>
|
||||
</respStmt>
|
||||
<respStmt>
|
||||
<name xml:id="ANA">Ana Zwitter Vitez, Trojina</name>
|
||||
<resp xml:lang="slv">Koordinatorica gradnje korpusa GOS.</resp>
|
||||
<resp xml:lang="eng">Coordinator of the GOS corpus compilation project.</resp>
|
||||
</respStmt>
|
||||
<respStmt>
|
||||
<name xml:id="DARINKA">Darinka Verdonik, FERI</name>
|
||||
<resp xml:lang="slv">Koordinatorica izdelave spletnega konkordančnika GOS.</resp>
|
||||
<resp xml:lang="eng">Coordinator of the GOS corpus web concordancer project.</resp>
|
||||
</respStmt>
|
||||
<respStmt>
|
||||
<name xml:id="ET">Tomaž Erjavec, JSI</name>
|
||||
<resp xml:lang="slv">Redakcija zapisa TEI / XML.</resp>
|
||||
<resp xml:lang="eng">TEI / XML corpus encoding.</resp>
|
||||
</respStmt>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<publicationStmt>
|
||||
<distributor>
|
||||
<address xml:lang="en">
|
||||
<addrLine>Amebis, d.o.o., Kamnik</addrLine>
|
||||
<addrLine>Bakovnik 3</addrLine>
|
||||
<addrLine>SI-1241 Kamnik</addrLine>
|
||||
<addrLine>Slovenia</addrLine>
|
||||
</address>
|
||||
<address xml:lang="sl">
|
||||
<addrLine>Amebis, d.o.o., Kamnik</addrLine>
|
||||
<addrLine>Bakovnik 3</addrLine>
|
||||
<addrLine>1241 Kamnik</addrLine>
|
||||
</address>
|
||||
</distributor>
|
||||
<pubPlace>
|
||||
<ref target="http://www.slovenscina.eu/">http://www.slovenscina.eu/</ref>
|
||||
<ref target="http://www.korpus-gos.net/">http://www.korpus-gos.net/</ref>
|
||||
</pubPlace>
|
||||
<availability>
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja licenca <ref
|
||||
target="http://creativecommons.org/licenses/by-nc-sa/3.0/deed.sl">Priznanje
|
||||
avtorstva-Nekomercialno-Deljenje pod enakimi pogoji 3.0</ref>.
|
||||
</p>
|
||||
<p xml:lang="sl">Dovoljeno vam je:
|
||||
<list>
|
||||
<item>reproduciranje, distribuiranje, dajanje v najem in priobčevanje dela javnosti</item>
|
||||
<item>predelati delo</item>
|
||||
</list>
|
||||
Pod naslednjimi pogoji:
|
||||
<list>
|
||||
<item>Priznanje avtorstva — Pri uporabi dela morate navesti izvirnega avtorja na način, ki ga določi
|
||||
izvirni avtor oziroma dajalec licence. V znanstvenih publikacijah to pomeni citiranje ustreznega
|
||||
dela ali del, dostopnih na domači strani projekta, <ref target="http://www.slovenscina.eu/">
|
||||
http://www.slovenscina.eu/</ref>.
|
||||
</item>
|
||||
<item>Nekomercialno. Tega dela ne smete uporabiti v komercialne namene.</item>
|
||||
<item>Deljenje pod enakimi pogoji — Če spremenite, preoblikujete ali uporabite to delo v svojem delu,
|
||||
lahko distribuirate predelavo dela le pod licenco, ki je enaka tej.
|
||||
</item>
|
||||
</list>
|
||||
</p>
|
||||
<p xml:lang="en">This work is licenced under the <ref
|
||||
target="http://creativecommons.org/licenses/by-nc-sa/3.0/deed.en">Attribution-NonCommercial-ShareAlike
|
||||
3.0</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">You are free:
|
||||
<list>
|
||||
<item>to Share — to copy, distribute and transmit the work</item>
|
||||
<item>to Remix — to adapt the work</item>
|
||||
</list>
|
||||
Under the following conditions:
|
||||
<list>
|
||||
<item>Attribution — You must attribute the work in the manner specified by the author or licensor. In
|
||||
scientific publications this means citing the relevant publication or publications, referred to on
|
||||
the home page of the project: <ref target="http://www.slovenscina.eu/">
|
||||
http://www.slovenscina.eu/</ref>.
|
||||
</item>
|
||||
<item>Noncommercial. You may not use this work for commercial purposes.</item>
|
||||
<item>Share Alike. If you alter, transform, or build upon this work, you may distribute the resulting
|
||||
work only under the same or similar license to this one.
|
||||
</item>
|
||||
</list>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-03-14</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<p xml:lang="slv">Besedila so pretvorjena v TEI XML iz datotek programa Transcriber.</p>
|
||||
<p xml:lang="eng">Texts are transformed to TEI XML from Transcriber files.</p>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="slv">Projekt
|
||||
<ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>
|
||||
</p>
|
||||
<p xml:lang="eng">Project
|
||||
<ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>
|
||||
</p>
|
||||
</projectDesc>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="gosTaxons">
|
||||
<!-- TIP DISKURZA -->
|
||||
<category xml:id="gos.T">
|
||||
<catDesc>tip diskurza</catDesc>
|
||||
<category xml:id="gos.T.J">
|
||||
<catDesc>javni</catDesc>
|
||||
<category xml:id="gos.T.J.I">
|
||||
<catDesc>informativno-izobraževalni</catDesc>
|
||||
</category>
|
||||
<category xml:id="gos.T.J.R">
|
||||
<catDesc>razvedrilni</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="gos.T.N">
|
||||
<catDesc>nejavni</catDesc>
|
||||
<category xml:id="gos.T.N.N">
|
||||
<catDesc>nezasebni</catDesc>
|
||||
</category>
|
||||
<category xml:id="gos.T.N.Z">
|
||||
<catDesc>zasebni</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
<!-- KANAL -->
|
||||
<category xml:id="gos.K">
|
||||
<catDesc>kanal</catDesc>
|
||||
<category xml:id="gos.K.O">
|
||||
<catDesc>osebni stik</catDesc>
|
||||
</category>
|
||||
<category xml:id="gos.K.P">
|
||||
<catDesc>telefon</catDesc>
|
||||
</category>
|
||||
<category xml:id="gos.K.R">
|
||||
<catDesc>radio</catDesc>
|
||||
</category>
|
||||
<category xml:id="gos.K.T">
|
||||
<catDesc>televizija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<langUsage>
|
||||
<language ident="slv">slovenščina</language>
|
||||
<language ident="eng">angleščina</language>
|
||||
<language ident="deu">nemščina</language>
|
||||
<language ident="ita">italijanščina</language>
|
||||
<language ident="zls">južnoslovanski jeziki</language>
|
||||
<language ident="sla">drugi slovanski jeziki</language>
|
||||
<language ident="roa">drugi romanski jeziki</language>
|
||||
</langUsage>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
|
||||
<TEI xml:id="gos.001">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title xml:id="JIfajzakhu-np0911061839_s2">Splošno predavanje za prvi letnik prevajalstva.</title>
|
||||
<respStmt>
|
||||
<resp>snemanje</resp>
|
||||
<name>Neža Pahovnik</name>
|
||||
</respStmt>
|
||||
<respStmt>
|
||||
<resp>transkripcija</resp>
|
||||
<name>MatejaS</name>
|
||||
</respStmt>
|
||||
</titleStmt>
|
||||
<publicationStmt>
|
||||
<date>2009-11-05</date>
|
||||
<pubPlace></pubPlace>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<recordingStmt>
|
||||
<recording type="audio" dur="PT28M56S">
|
||||
<broadcast>
|
||||
<bibl>
|
||||
<title>terenski posnetek</title>
|
||||
</bibl>
|
||||
</broadcast>
|
||||
<date>2009-11-05</date>
|
||||
</recording>
|
||||
</recordingStmt>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="gos.T.J.I"/>
|
||||
<catRef target="gos.K.O"/>
|
||||
</textClass>
|
||||
<textDesc>
|
||||
<channel/>
|
||||
<constitution/>
|
||||
<derivation/>
|
||||
<domain>akademski, družboslovje</domain>
|
||||
<factuality/>
|
||||
<interaction/>
|
||||
<preparedness/>
|
||||
<purpose/>
|
||||
</textDesc>
|
||||
<particDesc>
|
||||
<listPerson n="1">
|
||||
<person n="Af-pred-02166">
|
||||
<sex value="2">ženski</sex>
|
||||
<age atLeast="35" atMost="59"/>
|
||||
<residence>LJ</residence>
|
||||
<education>fakulteta ali več</education>
|
||||
<langKnowledge>
|
||||
<langKnown tag="slv" level="first"/>
|
||||
</langKnowledge>
|
||||
</person>
|
||||
</listPerson>
|
||||
</particDesc>
|
||||
<settingDesc>
|
||||
<place>
|
||||
<region>LJ</region>
|
||||
<settlement>Ljubljana</settlement>
|
||||
</place>
|
||||
<setting>
|
||||
<date>2009-10-22</date>
|
||||
<time>14:40</time>
|
||||
</setting>
|
||||
</settingDesc>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text>
|
||||
<body>
|
||||
<div type="norm">
|
||||
<u who="Af-pred-02166">
|
||||
<seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
|
||||
<w lemma="n3" msd="L">n3</w>
|
||||
</seg>
|
||||
</u>
|
||||
</div>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
|
||||
<TEI xml:id="gos.002">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title xml:id="JIfajzakhu-np1003120917_s2">Ura filozofije, pri kateri predavatelj študentom razlaga nemško
|
||||
klasično filozofijo.
|
||||
</title>
|
||||
<respStmt>
|
||||
<resp>snemanje</resp>
|
||||
<name>Neža Pahovnik</name>
|
||||
</respStmt>
|
||||
<respStmt>
|
||||
<resp>transkripcija</resp>
|
||||
<name>Alenka Mirkac</name>
|
||||
</respStmt>
|
||||
</titleStmt>
|
||||
<publicationStmt>
|
||||
<date>2010-03-12</date>
|
||||
<pubPlace>Ljubljana</pubPlace>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<recordingStmt>
|
||||
<recording type="audio" dur="PT34M12S">
|
||||
<broadcast>
|
||||
<bibl>
|
||||
<title>terenski posnetek</title>
|
||||
</bibl>
|
||||
</broadcast>
|
||||
<date>2010-03-12</date>
|
||||
</recording>
|
||||
</recordingStmt>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="gos.T.J.R"/>
|
||||
<catRef target="gos.K.O"/>
|
||||
</textClass>
|
||||
<textDesc>
|
||||
<channel/>
|
||||
<constitution/>
|
||||
<derivation/>
|
||||
<domain>akademski, humanistika</domain>
|
||||
<factuality/>
|
||||
<interaction/>
|
||||
<preparedness/>
|
||||
<purpose/>
|
||||
</textDesc>
|
||||
<particDesc>
|
||||
<listPerson n="1">
|
||||
<person n="Zm-prof-01084">
|
||||
<sex value="1">moški</sex>
|
||||
<age atLeast="35" atMost="59"/>
|
||||
<residence>LJ, NM</residence>
|
||||
<education>fakulteta ali več</education>
|
||||
<langKnowledge>
|
||||
<langKnown tag="slv" level="first"/>
|
||||
</langKnowledge>
|
||||
</person>
|
||||
</listPerson>
|
||||
</particDesc>
|
||||
<settingDesc>
|
||||
<place>
|
||||
<region>LJ</region>
|
||||
<settlement>Ljubljana</settlement>
|
||||
</place>
|
||||
<setting>
|
||||
<date>2010-01-06</date>
|
||||
<time>19:40</time>
|
||||
</setting>
|
||||
</settingDesc>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text>
|
||||
<body>
|
||||
<div type="norm">
|
||||
<u who="Af-pred-02166">
|
||||
<seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
|
||||
<w lemma="n4" msd="L">n4</w>
|
||||
</seg>
|
||||
</u>
|
||||
</div>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
|
||||
<TEI xml:id="gos.001">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title xml:id="JIfajzakhu-np0911061839_s2">Splošno predavanje za prvi letnik prevajalstva.</title>
|
||||
<respStmt>
|
||||
<resp>snemanje</resp>
|
||||
<name>Neža Pahovnik</name>
|
||||
</respStmt>
|
||||
<respStmt>
|
||||
<resp>transkripcija</resp>
|
||||
<name>MatejaS</name>
|
||||
</respStmt>
|
||||
</titleStmt>
|
||||
<publicationStmt>
|
||||
<date>2009-11-05</date>
|
||||
<pubPlace></pubPlace>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<recordingStmt>
|
||||
<recording type="audio" dur="PT28M56S">
|
||||
<broadcast>
|
||||
<bibl>
|
||||
<title>terenski posnetek</title>
|
||||
</bibl>
|
||||
</broadcast>
|
||||
<date>2009-11-05</date>
|
||||
</recording>
|
||||
</recordingStmt>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="gos.T.J.I"/>
|
||||
<catRef target="gos.K.O"/>
|
||||
</textClass>
|
||||
<textDesc>
|
||||
<channel/>
|
||||
<constitution/>
|
||||
<derivation/>
|
||||
<domain>akademski, družboslovje</domain>
|
||||
<factuality/>
|
||||
<interaction/>
|
||||
<preparedness/>
|
||||
<purpose/>
|
||||
</textDesc>
|
||||
<particDesc>
|
||||
<listPerson n="1">
|
||||
<person n="Af-pred-02166">
|
||||
<sex value="2">ženski</sex>
|
||||
<age atLeast="35" atMost="59"/>
|
||||
<residence>LJ</residence>
|
||||
<education>fakulteta ali več</education>
|
||||
<langKnowledge>
|
||||
<langKnown tag="slv" level="first"/>
|
||||
</langKnowledge>
|
||||
</person>
|
||||
</listPerson>
|
||||
</particDesc>
|
||||
<settingDesc>
|
||||
<place>
|
||||
<region>LJ</region>
|
||||
<settlement>Ljubljana</settlement>
|
||||
</place>
|
||||
<setting>
|
||||
<date>2009-10-22</date>
|
||||
<time>14:40</time>
|
||||
</setting>
|
||||
</settingDesc>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text>
|
||||
<body>
|
||||
<div type="norm">
|
||||
<u who="Af-pred-02166">
|
||||
<seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
|
||||
<w lemma="n3" msd="L">n3</w>
|
||||
</seg>
|
||||
</u>
|
||||
</div>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
|
||||
<TEI xml:id="gos.002">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title xml:id="JIfajzakhu-np1003120917_s2">Ura filozofije, pri kateri predavatelj študentom razlaga nemško
|
||||
klasično filozofijo.
|
||||
</title>
|
||||
<respStmt>
|
||||
<resp>snemanje</resp>
|
||||
<name>Neža Pahovnik</name>
|
||||
</respStmt>
|
||||
<respStmt>
|
||||
<resp>transkripcija</resp>
|
||||
<name>Alenka Mirkac</name>
|
||||
</respStmt>
|
||||
</titleStmt>
|
||||
<publicationStmt>
|
||||
<date>2010-03-12</date>
|
||||
<pubPlace>Ljubljana</pubPlace>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<recordingStmt>
|
||||
<recording type="audio" dur="PT34M12S">
|
||||
<broadcast>
|
||||
<bibl>
|
||||
<title>terenski posnetek</title>
|
||||
</bibl>
|
||||
</broadcast>
|
||||
<date>2010-03-12</date>
|
||||
</recording>
|
||||
</recordingStmt>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="gos.T.J.R"/>
|
||||
<catRef target="gos.K.O"/>
|
||||
</textClass>
|
||||
<textDesc>
|
||||
<channel/>
|
||||
<constitution/>
|
||||
<derivation/>
|
||||
<domain>akademski, humanistika</domain>
|
||||
<factuality/>
|
||||
<interaction/>
|
||||
<preparedness/>
|
||||
<purpose/>
|
||||
</textDesc>
|
||||
<particDesc>
|
||||
<listPerson n="1">
|
||||
<person n="Zm-prof-01084">
|
||||
<sex value="1">moški</sex>
|
||||
<age atLeast="35" atMost="59"/>
|
||||
<residence>LJ, NM</residence>
|
||||
<education>fakulteta ali več</education>
|
||||
<langKnowledge>
|
||||
<langKnown tag="slv" level="first"/>
|
||||
</langKnowledge>
|
||||
</person>
|
||||
</listPerson>
|
||||
</particDesc>
|
||||
<settingDesc>
|
||||
<place>
|
||||
<region>LJ</region>
|
||||
<settlement>Ljubljana</settlement>
|
||||
</place>
|
||||
<setting>
|
||||
<date>2010-01-06</date>
|
||||
<time>19:40</time>
|
||||
</setting>
|
||||
</settingDesc>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text>
|
||||
<body>
|
||||
<div type="norm">
|
||||
<u who="Af-pred-02166">
|
||||
<seg xml:id="gos.001-0001.norm" corresp="#gos.001-0001" synch="JIfajzakhu-np0911061839_s2_0">
|
||||
<w lemma="n4" msd="L">n4</w>
|
||||
</seg>
|
||||
</u>
|
||||
</div>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
|
||||
</teiCorpus>
|
||||
133
src/main/resources/GUI.fxml
Normal file
133
src/main/resources/GUI.fxml
Normal file
@@ -0,0 +1,133 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--<?import gui.CorpusTab ?>-->
|
||||
<!--<?import gui.StringAnalysisTab ?>-->
|
||||
|
||||
<?import java.lang.*?>
|
||||
<?import javafx.collections.FXCollections?>
|
||||
<?import javafx.scene.control.*?>
|
||||
<?import javafx.scene.layout.*?>
|
||||
<AnchorPane prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111" xmlns:fx="http://javafx.com/fxml/1"
|
||||
fx:controller="gui.GUIController">
|
||||
<children>
|
||||
<TabPane fx:id="tabPane" prefHeight="600.0" prefWidth="800.0" tabClosingPolicy="UNAVAILABLE" AnchorPane.bottomAnchor="0.0"
|
||||
AnchorPane.leftAnchor="0.0" AnchorPane.rightAnchor="0.0" AnchorPane.topAnchor="0.0">
|
||||
<tabs>
|
||||
<Tab fx:id="corpusTab" closable="false" text="Korpus">
|
||||
<fx:include fx:id="ct" source="gui/CorpusTab.fxml"/>
|
||||
</Tab>
|
||||
<Tab fx:id="filterTab" closable="false" disable="true" text="Filter">
|
||||
<fx:include fx:id="ffs" source="gui/FiltersForSolar.fxml"/>
|
||||
</Tab>
|
||||
<Tab fx:id="CharacterLevelTabNew" closable="false" disable="true" text="Črke">
|
||||
<fx:include fx:id="cat" source="gui/CharacterAnalysisTab.fxml"/>
|
||||
</Tab>
|
||||
<Tab fx:id="wordLevelTab" closable="false" disable="true" text="Besedni deli">
|
||||
<fx:include fx:id="wl" source="gui/WordLevelTab.fxml"/>
|
||||
</Tab>
|
||||
<Tab fx:id="OneWordAnalysisTab" closable="false" disable="true" text="Besede">
|
||||
<fx:include fx:id="oneWordTab" source="gui/OneWordAnalysisTab.fxml"/>
|
||||
</Tab>
|
||||
<Tab fx:id="StringLevelTabNew2" closable="false" disable="true" text="Besedni nizi">
|
||||
<fx:include fx:id="satNew2" source="gui/StringAnalysisTabNew2.fxml"/>
|
||||
</Tab>
|
||||
<!--<Tab fx:id="wordFormationTab" closable="false" disable="true" text="Besedotvorni procesi">
|
||||
<fx:include fx:id="wf" source="gui/WordFormationTab.fxml"/>
|
||||
</Tab>-->
|
||||
<!--<Tab fx:id="wordLevelTab" closable="false" disable="true" text="Nivo besed in delov besed">-->
|
||||
<!--<content>-->
|
||||
<!--<AnchorPane minHeight="0.0" minWidth="0.0" prefHeight="180.0" prefWidth="200.0">-->
|
||||
<!--<children>-->
|
||||
<!--<Label fx:id="izbraniFiltriLabelB" layoutX="400.0" layoutY="14.0" text="Izbrani filtri:"/>-->
|
||||
<!--<Label fx:id="selectedFiltersLabelB" layoutX="399.0" layoutY="45.0" text="/"/>-->
|
||||
<!--<Label layoutX="35.0" layoutY="30.0" text="Različnica/lema"/>-->
|
||||
<!--<ComboBox fx:id="distributionWordOrLemmaCombo" layoutX="146.0" layoutY="26.0"-->
|
||||
<!--prefWidth="150.0" promptText="izberi">-->
|
||||
<!--<items>-->
|
||||
<!--<FXCollections fx:factory="observableArrayList">-->
|
||||
<!--<String fx:value="različnica"/>-->
|
||||
<!--<String fx:value="lema"/>-->
|
||||
<!--</FXCollections>-->
|
||||
<!--</items>-->
|
||||
<!--</ComboBox>-->
|
||||
<!--<Label layoutX="35.0" layoutY="75.0" text="JOS:"/>-->
|
||||
<!--<ComboBox fx:id="distributionJosCombo" layoutX="146.0" layoutY="71.0"-->
|
||||
<!--prefWidth="150.0" promptText="izberi">-->
|
||||
<!--<items>-->
|
||||
<!--<FXCollections fx:factory="observableArrayList">-->
|
||||
<!--<String fx:value="- brez -"/>-->
|
||||
<!--<String fx:value="samostalnik"/>-->
|
||||
<!--<String fx:value="glagol"/>-->
|
||||
<!--<String fx:value="pridevnik"/>-->
|
||||
<!--<String fx:value="prislov"/>-->
|
||||
<!--<String fx:value="zaimek"/>-->
|
||||
<!--<String fx:value="stevnik"/>-->
|
||||
<!--<String fx:value="predlog"/>-->
|
||||
<!--<String fx:value="veznik"/>-->
|
||||
<!--<String fx:value="clenek"/>-->
|
||||
<!--<String fx:value="medmet"/>-->
|
||||
<!--<String fx:value="okrajsava"/>-->
|
||||
<!--</FXCollections>-->
|
||||
<!--</items>-->
|
||||
<!--</ComboBox>-->
|
||||
<!--<Label layoutX="35.0" layoutY="120.0" text="Taksonomija:"/>-->
|
||||
<!--<ComboBox fx:id="distributionTaxonomyCombo" layoutX="146.0" layoutY="116.0"-->
|
||||
<!--prefWidth="150.0" promptText="izberi"-->
|
||||
<!--visibleRowCount="5">-->
|
||||
<!--</ComboBox>-->
|
||||
<!--<Button fx:id="distributionCalculateButton" layoutX="32.0" layoutY="180.0" mnemonicParsing="false"-->
|
||||
<!--prefHeight="25.0" prefWidth="243.0" text="Izračunaj"/>-->
|
||||
<!--<!–<TitledPane animated="false" layoutX="-2.0" layoutY="315.0" prefHeight="256.0" prefWidth="806.0" text="Distribucija zaporedij samoglasnikov in soglasnikov">–>-->
|
||||
<!--<!–<content>–>-->
|
||||
<!--<!–<AnchorPane minHeight="0.0" minWidth="0.0" prefHeight="180.0" prefWidth="457.0">–>-->
|
||||
<!--<!–<children>–>-->
|
||||
<!--<!–<Label layoutX="21.0" layoutY="18.0" text="Samostalnik/lema:" />–>-->
|
||||
<!--<!–<ComboBox fx:id="distributionCVVWordOrLemmaCombo" layoutX="135.0" layoutY="14.0" onAction="#distributionCVVWOrdOrLemma" prefWidth="150.0" promptText="izberi">–>-->
|
||||
<!--<!–<items>–>-->
|
||||
<!--<!–<FXCollections fx:factory="observableArrayList">–>-->
|
||||
<!--<!–<String fx:value="različnica" />–>-->
|
||||
<!--<!–<String fx:value="lema" />–>-->
|
||||
<!--<!–</FXCollections>–>-->
|
||||
<!--<!–</items>–>-->
|
||||
<!--<!–</ComboBox>–>-->
|
||||
<!--<!–<Label layoutX="21.0" layoutY="69.0" text="Dolžina zaporedja:" />–>-->
|
||||
<!--<!–<!–<TextField fx:id="CVVLengthTA" layoutX="136.0" layoutY="65.0" onAction="#CVVLength" prefHeight="25.0" prefWidth="214.0" promptText="vnesi dolžino zaporedja (celo število)" />–>–>-->
|
||||
<!--<!–<Button fx:id="distributionCalculateCVVButton" layoutX="22.0" layoutY="103.0" mnemonicParsing="false" onAction="#distributionCVVCalculate" prefHeight="25.0" prefWidth="243.0" text="Izračunaj" />–>-->
|
||||
<!--<!–<TextField fx:id="morphosyntacticFilterTextField" layoutX="22.0" layoutY="158.0" onAction="#morphosyntacticFilterTextArea" prefHeight="25.0" prefWidth="766.0" />–>-->
|
||||
|
||||
<!--<!–<ProgressBar fx:id="distributionProgressBar" layoutX="20.0" layoutY="174.0" prefHeight="18.0" prefWidth="770.0" progress="0.0" />–>-->
|
||||
<!--<!–<Label fx:id="distributionProgressLabel" layoutX="20.0" layoutY="199.0" prefHeight="17.0" prefWidth="769.0" text="Label" />–>-->
|
||||
<!--<!–</children>–>-->
|
||||
<!--<!–</AnchorPane>–>-->
|
||||
<!--<!–</content>–>-->
|
||||
<!--<!–</TitledPane>–>-->
|
||||
<!--</children>-->
|
||||
<!--</AnchorPane>-->
|
||||
<!--</content>-->
|
||||
<!--</Tab>-->
|
||||
<!--<Tab fx:id="wordFormationTab" disable="false" text="Oblikoslovne kategorije">-->
|
||||
<!--<content>-->
|
||||
<!--<AnchorPane minHeight="0.0" minWidth="0.0" prefHeight="180.0" prefWidth="200.0">-->
|
||||
<!--<children>-->
|
||||
<!--<Label fx:id="izbraniFiltriLabelC" layoutX="400.0" layoutY="14.0" text="Izbrani filtri:"/>-->
|
||||
<!--<Label fx:id="selectedFiltersLabelC" layoutX="399.0" layoutY="45.0" text="/"/>-->
|
||||
<!--<Label layoutX="18.0" layoutY="27.0" text="Taksonomija:"/>-->
|
||||
<!--<ComboBox fx:id="inflectedJosTaxonomyCombo" layoutX="129.0" layoutY="23.0"-->
|
||||
<!--prefWidth="150.0" promptText="izberi"-->
|
||||
<!--visibleRowCount="5">-->
|
||||
<!--</ComboBox>-->
|
||||
<!--<Button fx:id="inflectedJosCalculateButton" layoutX="15.0" layoutY="87.0" mnemonicParsing="false"-->
|
||||
<!--prefHeight="25.0" prefWidth="243.0" text="Izračunaj"/>-->
|
||||
|
||||
<!--<ProgressBar fx:id="inflectedJOSProgressBar" layoutX="15.0" layoutY="499.0" prefHeight="18.0"-->
|
||||
<!--prefWidth="770.0" progress="0.0"/>-->
|
||||
<!--<Label fx:id="inflectedJOSProgressLabel" layoutX="15.0" layoutY="524.0" prefHeight="17.0"-->
|
||||
<!--prefWidth="769.0" text="Label"/>-->
|
||||
<!--</children>-->
|
||||
<!--</AnchorPane>-->
|
||||
<!--</content>-->
|
||||
<!--</Tab>-->
|
||||
</tabs>
|
||||
</TabPane>
|
||||
</children>
|
||||
</AnchorPane>
|
||||
237
src/main/resources/Gigafida_minimal/gfmin.xml
Normal file
237
src/main/resources/Gigafida_minimal/gfmin.xml
Normal file
@@ -0,0 +1,237 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0012405" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: Branko Gradišnik. ANTI2(1999)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>52 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>ANTI2</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title n="???">neznani naslov</title>
|
||||
<author>Branko Gradišnik</author>
|
||||
<date>1999</date>
|
||||
<publisher n="drugo">neznani založnik</publisher>
|
||||
<note type="sourceLang"/>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="50"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="11"/>
|
||||
<tagUsage gi="p" occurs="2"/>
|
||||
<tagUsage gi="s" occurs="5"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="52"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<appInfo>
|
||||
<application ident="Amebis_pretvornik" version="1.0">
|
||||
<label>[ZDRUZEVANJE] 1:1</label>
|
||||
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\ANTI2.ZDR</label>
|
||||
<label>[1] **********</label>
|
||||
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.doc</label>
|
||||
<label>[FORMAT] MS Word for Windows 6.0/7.0</label>
|
||||
<label>[DATUM] 2.12.1999</label>
|
||||
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.RTF</label>
|
||||
<label>[PRETVORBA] RTF</label>
|
||||
<label>[KONEC] **********</label>
|
||||
</application>
|
||||
</appInfo>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.P">
|
||||
<catDesc>prenosnik</catDesc>
|
||||
<category xml:id="Ft.P.G">
|
||||
<catDesc>govorni</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.E">
|
||||
<catDesc>elektronski</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P">
|
||||
<catDesc>pisni</catDesc>
|
||||
<category xml:id="Ft.P.P.O">
|
||||
<catDesc>objavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C">
|
||||
<catDesc>časopisno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C.D">
|
||||
<catDesc>dnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.V">
|
||||
<catDesc>večkrat tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R">
|
||||
<catDesc>revialno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.R.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.S">
|
||||
<catDesc>štirinajstdnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.M">
|
||||
<catDesc>mesečno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.D">
|
||||
<catDesc>redkeje kot na mesec</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.O">
|
||||
<catDesc>občasno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N">
|
||||
<catDesc>neobjavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.N.J">
|
||||
<catDesc>javno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.I">
|
||||
<catDesc>interno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.Z">
|
||||
<catDesc>zasebno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.Z">
|
||||
<catDesc>zvrst</catDesc>
|
||||
<category xml:id="Ft.Z.U">
|
||||
<catDesc>umetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.U.P">
|
||||
<catDesc>pesniška</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.R">
|
||||
<catDesc>prozna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.D">
|
||||
<catDesc>dramska</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N">
|
||||
<catDesc>neumetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S">
|
||||
<catDesc>strokovna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S.H">
|
||||
<catDesc>humanistična in družboslovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.S.N">
|
||||
<catDesc>naravoslovna in tehnična</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.N">
|
||||
<catDesc>nestrokovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.P">
|
||||
<catDesc>pravna</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.L">
|
||||
<catDesc>lektorirano</catDesc>
|
||||
<category xml:id="Ft.L.D">
|
||||
<catDesc>da</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.L.N">
|
||||
<catDesc>ne</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.T.K.S"/>
|
||||
<catRef target="#Ft.P.P.N.Z"/>
|
||||
<catRef target="#Ft.Z.N.N"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0012405." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Vd" lemma="ker">Ker</w>
|
||||
<S/>
|
||||
<w msd="Ggnste-n" lemma="imeti">ima</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="junak">junak</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="posest">posesti</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
@@ -0,0 +1,70 @@
|
||||
"Korpus: ";Gigafida
|
||||
"Datum: ";14.05.2018 06:34
|
||||
"Analiza: ";Besedni nizi
|
||||
"n-gram nivo: ";nivo črk
|
||||
"Skip: ";0
|
||||
"Izračunaj za: ";lema
|
||||
"Izračunaj za kombinacije samoglasnikov in soglasnikov: ";ne
|
||||
"Dolžina niza: ";1
|
||||
|
||||
|
||||
word;frequency;percent
|
||||
a;438;11.086%
|
||||
i;390;9.871%
|
||||
e;341;8.631%
|
||||
o;328;8.302%
|
||||
t;262;6.631%
|
||||
n;261;6.606%
|
||||
r;229;5.796%
|
||||
k;174;4.404%
|
||||
d;144;3.645%
|
||||
s;141;3.569%
|
||||
v;133;3.366%
|
||||
l;123;3.113%
|
||||
j;120;3.037%
|
||||
p;120;3.037%
|
||||
z;81;2.05%
|
||||
b;75;1.898%
|
||||
u;71;1.797%
|
||||
"č";65;1.645%
|
||||
m;58;1.468%
|
||||
g;53;1.341%
|
||||
c;44;1.114%
|
||||
"š";32;0.81%
|
||||
"ž";32;0.81%
|
||||
1;28;0.709%
|
||||
h;20;0.506%
|
||||
0;19;0.481%
|
||||
2;18;0.456%
|
||||
".";17;0.43%
|
||||
M;13;0.329%
|
||||
6;12;0.304%
|
||||
f;11;0.278%
|
||||
9;10;0.253%
|
||||
3;8;0.202%
|
||||
A;7;0.177%
|
||||
J;7;0.177%
|
||||
T;6;0.152%
|
||||
B;5;0.127%
|
||||
K;5;0.127%
|
||||
P;5;0.127%
|
||||
5;4;0.101%
|
||||
8;4;0.101%
|
||||
R;4;0.101%
|
||||
S;4;0.101%
|
||||
4;3;0.076%
|
||||
":";3;0.076%
|
||||
D;3;0.076%
|
||||
F;3;0.076%
|
||||
I;3;0.076%
|
||||
7;2;0.051%
|
||||
G;2;0.051%
|
||||
w;2;0.051%
|
||||
"'";1;0.025%
|
||||
C;1;0.025%
|
||||
E;1;0.025%
|
||||
L;1;0.025%
|
||||
N;1;0.025%
|
||||
V;1;0.025%
|
||||
Z;1;0.025%
|
||||
"Š";1;0.025%
|
||||
|
@@ -0,0 +1,390 @@
|
||||
"Korpus: ";Gigafida
|
||||
"Datum: ";14.05.2018 06:37
|
||||
"Analiza: ";Besedni nizi
|
||||
"n-gram nivo: ";nivo črk
|
||||
"Skip: ";0
|
||||
"Izračunaj za: ";lema
|
||||
"Izračunaj za kombinacije samoglasnikov in soglasnikov: ";ne
|
||||
"Dolžina niza: ";2
|
||||
|
||||
|
||||
word;frequency;percent
|
||||
ti;122;3.835%
|
||||
en;70;2.201%
|
||||
at;59;1.855%
|
||||
it;56;1.76%
|
||||
in;54;1.698%
|
||||
ko;54;1.698%
|
||||
st;48;1.509%
|
||||
na;48;1.509%
|
||||
po;46;1.446%
|
||||
ar;45;1.415%
|
||||
ka;45;1.415%
|
||||
ra;44;1.383%
|
||||
an;42;1.32%
|
||||
pr;40;1.257%
|
||||
bi;40;1.257%
|
||||
je;39;1.226%
|
||||
re;38;1.195%
|
||||
te;37;1.163%
|
||||
ja;37;1.163%
|
||||
od;36;1.132%
|
||||
ov;36;1.132%
|
||||
ta;33;1.037%
|
||||
ri;31;0.975%
|
||||
el;31;0.975%
|
||||
er;30;0.943%
|
||||
da;28;0.88%
|
||||
se;27;0.849%
|
||||
za;27;0.849%
|
||||
ni;26;0.817%
|
||||
av;24;0.754%
|
||||
do;24;0.754%
|
||||
vi;24;0.754%
|
||||
ro;23;0.723%
|
||||
ed;23;0.723%
|
||||
ek;23;0.723%
|
||||
le;23;0.723%
|
||||
li;22;0.692%
|
||||
nj;22;0.692%
|
||||
os;22;0.692%
|
||||
de;21;0.66%
|
||||
la;21;0.66%
|
||||
lo;21;0.66%
|
||||
ve;20;0.629%
|
||||
lj;20;0.629%
|
||||
no;20;0.629%
|
||||
ol;20;0.629%
|
||||
aj;19;0.597%
|
||||
or;19;0.597%
|
||||
rt;18;0.566%
|
||||
to;18;0.566%
|
||||
va;18;0.566%
|
||||
es;18;0.566%
|
||||
me;18;0.566%
|
||||
on;18;0.566%
|
||||
ki;17;0.534%
|
||||
pe;17;0.534%
|
||||
ak;16;0.503%
|
||||
ce;16;0.503%
|
||||
dr;16;0.503%
|
||||
et;15;0.472%
|
||||
ic;15;0.472%
|
||||
ik;15;0.472%
|
||||
is;15;0.472%
|
||||
ič;15;0.472%
|
||||
ob;15;0.472%
|
||||
sk;14;0.44%
|
||||
ca;14;0.44%
|
||||
ga;14;0.44%
|
||||
ot;14;0.44%
|
||||
as;13;0.409%
|
||||
rk;13;0.409%
|
||||
ru;13;0.409%
|
||||
ev;13;0.409%
|
||||
"ča";13;0.409%
|
||||
"če";13;0.409%
|
||||
ij;13;0.409%
|
||||
ir;13;0.409%
|
||||
kr;13;0.409%
|
||||
ma;13;0.409%
|
||||
ne;13;0.409%
|
||||
og;13;0.409%
|
||||
ur;12;0.377%
|
||||
"ža";12;0.377%
|
||||
vo;12;0.377%
|
||||
go;12;0.377%
|
||||
zd;12;0.377%
|
||||
iz;12;0.377%
|
||||
ju;12;0.377%
|
||||
op;12;0.377%
|
||||
ad;11;0.346%
|
||||
iž;11;0.346%
|
||||
"či";11;0.346%
|
||||
Ma;11;0.346%
|
||||
oz;11;0.346%
|
||||
al;10;0.314%
|
||||
di;10;0.314%
|
||||
us;10;0.314%
|
||||
em;10;0.314%
|
||||
eč;10;0.314%
|
||||
om;10;0.314%
|
||||
pa;10;0.314%
|
||||
so;9;0.283%
|
||||
ug;9;0.283%
|
||||
"ša";9;0.283%
|
||||
iv;9;0.283%
|
||||
mi;9;0.283%
|
||||
ok;9;0.283%
|
||||
be;8;0.251%
|
||||
bl;8;0.251%
|
||||
nč;8;0.251%
|
||||
oč;8;0.251%
|
||||
tr;8;0.251%
|
||||
ec;8;0.251%
|
||||
ze;8;0.251%
|
||||
ns;8;0.251%
|
||||
sp;7;0.22%
|
||||
dj;7;0.22%
|
||||
un;7;0.22%
|
||||
aš;7;0.22%
|
||||
il;7;0.22%
|
||||
"še";7;0.22%
|
||||
ke;7;0.22%
|
||||
eš;7;0.22%
|
||||
1.;6;0.189%
|
||||
10;6;0.189%
|
||||
ah;6;0.189%
|
||||
rj;6;0.189%
|
||||
ba;6;0.189%
|
||||
uh;6;0.189%
|
||||
eb;6;0.189%
|
||||
"že";6;0.189%
|
||||
ep;6;0.189%
|
||||
ji;6;0.189%
|
||||
ml;6;0.189%
|
||||
nb;6;0.189%
|
||||
nk;6;0.189%
|
||||
am;5;0.157%
|
||||
ap;5;0.157%
|
||||
az;5;0.157%
|
||||
20;5;0.157%
|
||||
sn;5;0.157%
|
||||
sr;5;0.157%
|
||||
dn;5;0.157%
|
||||
ej;5;0.157%
|
||||
ez;5;0.157%
|
||||
ač;5;0.157%
|
||||
ge;5;0.157%
|
||||
gl;5;0.157%
|
||||
gr;5;0.157%
|
||||
ha;5;0.157%
|
||||
"čk";5;0.157%
|
||||
"čl";5;0.157%
|
||||
"št";5;0.157%
|
||||
uč;5;0.157%
|
||||
jd;5;0.157%
|
||||
kl;5;0.157%
|
||||
ku;5;0.157%
|
||||
Ju;5;0.157%
|
||||
Ko;5;0.157%
|
||||
oj;5;0.157%
|
||||
01;4;0.126%
|
||||
11;4;0.126%
|
||||
rb;4;0.126%
|
||||
rm;4;0.126%
|
||||
bo;4;0.126%
|
||||
sa;4;0.126%
|
||||
si;4;0.126%
|
||||
ci;4;0.126%
|
||||
tj;4;0.126%
|
||||
tv;4;0.126%
|
||||
To;4;0.126%
|
||||
eh;4;0.126%
|
||||
"ži";4;0.126%
|
||||
"žn";4;0.126%
|
||||
vl;4;0.126%
|
||||
oš;4;0.126%
|
||||
ož;4;0.126%
|
||||
ib;4;0.126%
|
||||
id;4;0.126%
|
||||
"šk";4;0.126%
|
||||
zg;4;0.126%
|
||||
zi;4;0.126%
|
||||
mo;4;0.126%
|
||||
".1";4;0.126%
|
||||
nt;4;0.126%
|
||||
oc;4;0.126%
|
||||
of;4;0.126%
|
||||
ac;3;0.094%
|
||||
13;3;0.094%
|
||||
19;3;0.094%
|
||||
Ag;3;0.094%
|
||||
br;3;0.094%
|
||||
Ro;3;0.094%
|
||||
sv;3;0.094%
|
||||
ck;3;0.094%
|
||||
Br;3;0.094%
|
||||
dl;3;0.094%
|
||||
ud;3;0.094%
|
||||
du;3;0.094%
|
||||
um;3;0.094%
|
||||
up;3;0.094%
|
||||
ut;3;0.094%
|
||||
vn;3;0.094%
|
||||
62;3;0.094%
|
||||
vs;3;0.094%
|
||||
66;3;0.094%
|
||||
fi;3;0.094%
|
||||
he;3;0.094%
|
||||
hk;3;0.094%
|
||||
ho;3;0.094%
|
||||
9.;3;0.094%
|
||||
ig;3;0.094%
|
||||
im;3;0.094%
|
||||
"šn";3;0.094%
|
||||
až;3;0.094%
|
||||
nd;3;0.094%
|
||||
".2";3;0.094%
|
||||
00;2;0.063%
|
||||
pt;2;0.063%
|
||||
09;2;0.063%
|
||||
12;2;0.063%
|
||||
ag;2;0.063%
|
||||
rc;2;0.063%
|
||||
rd;2;0.063%
|
||||
rg;2;0.063%
|
||||
rn;2;0.063%
|
||||
21;2;0.063%
|
||||
rs;2;0.063%
|
||||
2:;2;0.063%
|
||||
Al;2;0.063%
|
||||
An;2;0.063%
|
||||
sl;2;0.063%
|
||||
3.;2;0.063%
|
||||
su;2;0.063%
|
||||
1s;2;0.063%
|
||||
th;2;0.063%
|
||||
tn;2;0.063%
|
||||
db;2;0.063%
|
||||
Sr;2;0.063%
|
||||
tu;2;0.063%
|
||||
46;2;0.063%
|
||||
dg;2;0.063%
|
||||
dk;2;0.063%
|
||||
ub;2;0.063%
|
||||
dt;2;0.063%
|
||||
Da;2;0.063%
|
||||
vd;2;0.063%
|
||||
fa;2;0.063%
|
||||
vr;2;0.063%
|
||||
ff;2;0.063%
|
||||
vz;2;0.063%
|
||||
fo;2;0.063%
|
||||
Fi;2;0.063%
|
||||
bč;2;0.063%
|
||||
gu;2;0.063%
|
||||
8.;2;0.063%
|
||||
"čn";2;0.063%
|
||||
Go;2;0.063%
|
||||
98;2;0.063%
|
||||
99;2;0.063%
|
||||
"šp";2;0.063%
|
||||
zm;2;0.063%
|
||||
zn;2;0.063%
|
||||
jc;2;0.063%
|
||||
Ja;2;0.063%
|
||||
ll;2;0.063%
|
||||
ln;2;0.063%
|
||||
uš;2;0.063%
|
||||
už;2;0.063%
|
||||
vš;2;0.063%
|
||||
ež;2;0.063%
|
||||
nu;2;0.063%
|
||||
vž;2;0.063%
|
||||
03;1;0.031%
|
||||
08;1;0.031%
|
||||
Pa;1;0.031%
|
||||
Pe;1;0.031%
|
||||
iš;1;0.031%
|
||||
Pl;1;0.031%
|
||||
Po;1;0.031%
|
||||
ab;1;0.031%
|
||||
Pr;1;0.031%
|
||||
rf;1;0.031%
|
||||
rh;1;0.031%
|
||||
t.;1;0.031%
|
||||
2.;1;0.031%
|
||||
22;1;0.031%
|
||||
24;1;0.031%
|
||||
25;1;0.031%
|
||||
29;1;0.031%
|
||||
bn;1;0.031%
|
||||
SC;1;0.031%
|
||||
sm;1;0.031%
|
||||
30;1;0.031%
|
||||
31;1;0.031%
|
||||
Ba;1;0.031%
|
||||
cc;1;0.031%
|
||||
35;1;0.031%
|
||||
Ru;1;0.031%
|
||||
Be;1;0.031%
|
||||
co;1;0.031%
|
||||
ct;1;0.031%
|
||||
4.;1;0.031%
|
||||
St;1;0.031%
|
||||
dp;1;0.031%
|
||||
Ta;1;0.031%
|
||||
uc;1;0.031%
|
||||
ds;1;0.031%
|
||||
uf;1;0.031%
|
||||
dv;1;0.031%
|
||||
uk;1;0.031%
|
||||
ea;1;0.031%
|
||||
56;1;0.031%
|
||||
Tu;1;0.031%
|
||||
ef;1;0.031%
|
||||
De;1;0.031%
|
||||
eg;1;0.031%
|
||||
ei;1;0.031%
|
||||
"žm";1;0.031%
|
||||
nš;1;0.031%
|
||||
vk;1;0.031%
|
||||
60;1;0.031%
|
||||
fe;1;0.031%
|
||||
El;1;0.031%
|
||||
Va;1;0.031%
|
||||
fu;1;0.031%
|
||||
nž;1;0.031%
|
||||
wi;1;0.031%
|
||||
i';1;0.031%
|
||||
gi;1;0.031%
|
||||
Fr;1;0.031%
|
||||
"čb";1;0.031%
|
||||
hi;1;0.031%
|
||||
I.;1;0.031%
|
||||
"ču";1;0.031%
|
||||
hr;1;0.031%
|
||||
"Šm";1;0.031%
|
||||
ie;1;0.031%
|
||||
97;1;0.031%
|
||||
9:;1;0.031%
|
||||
io;1;0.031%
|
||||
zb;1;0.031%
|
||||
"'s";1;0.031%
|
||||
zo;1;0.031%
|
||||
":2";1;0.031%
|
||||
zr;1;0.031%
|
||||
zs;1;0.031%
|
||||
":3";1;0.031%
|
||||
zu;1;0.031%
|
||||
":5";1;0.031%
|
||||
zv;1;0.031%
|
||||
jn;1;0.031%
|
||||
In;1;0.031%
|
||||
jo;1;0.031%
|
||||
js;1;0.031%
|
||||
Iv;1;0.031%
|
||||
kd;1;0.031%
|
||||
Zu;1;0.031%
|
||||
ld;1;0.031%
|
||||
lm;1;0.031%
|
||||
lu;1;0.031%
|
||||
Lj;1;0.031%
|
||||
mp;1;0.031%
|
||||
ms;1;0.031%
|
||||
MS;1;0.031%
|
||||
nc;1;0.031%
|
||||
ng;1;0.031%
|
||||
".0";1;0.031%
|
||||
Mo;1;0.031%
|
||||
nr;1;0.031%
|
||||
".7";1;0.031%
|
||||
".9";1;0.031%
|
||||
"šč";1;0.031%
|
||||
Ne;1;0.031%
|
||||
oh;1;0.031%
|
||||
oi;1;0.031%
|
||||
ow;1;0.031%
|
||||
pi;1;0.031%
|
||||
pl;1;0.031%
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,455 @@
|
||||
"Korpus: ";Gigafida
|
||||
"Datum: ";31.01.2018 05:11
|
||||
"Analiza: ";Besedni nizi
|
||||
"n-gram nivo: ";1
|
||||
"Skip: ";0
|
||||
"Izračunaj za: ";lema
|
||||
|
||||
|
||||
word;frequency;percent
|
||||
biti;29;3.766%
|
||||
in;29;3.766%
|
||||
v;16;2.078%
|
||||
z;12;1.558%
|
||||
se;10;1.299%
|
||||
on;9;1.169%
|
||||
za;9;1.169%
|
||||
ki;8;1.039%
|
||||
na;8;1.039%
|
||||
da;7;0.909%
|
||||
kako;7;0.909%
|
||||
o;6;0.779%
|
||||
ta;5;0.649%
|
||||
elina;4;0.519%
|
||||
ajdov;4;0.519%
|
||||
zadruga;4;0.519%
|
||||
postati;4;0.519%
|
||||
grozdje;4;0.519%
|
||||
ne;4;0.519%
|
||||
pol;4;0.519%
|
||||
dodati;4;0.519%
|
||||
ti;4;0.519%
|
||||
cerkev;4;0.519%
|
||||
kaša;4;0.519%
|
||||
totenbirt;4;0.519%
|
||||
približno;4;0.519%
|
||||
drug;4;0.519%
|
||||
sestra;4;0.519%
|
||||
korenje;3;0.39%
|
||||
Jurkovič;3;0.39%
|
||||
do;3;0.39%
|
||||
srbeč;3;0.39%
|
||||
"če";3;0.39%
|
||||
narod;3;0.39%
|
||||
Matjaž;3;0.39%
|
||||
"član";3;0.39%
|
||||
Koper;3;0.39%
|
||||
ura;3;0.39%
|
||||
gost;3;0.39%
|
||||
ob;3;0.39%
|
||||
od;3;0.39%
|
||||
oreh;3;0.39%
|
||||
po;3;0.39%
|
||||
križarjenje;3;0.39%
|
||||
jaz;3;0.39%
|
||||
mlad;3;0.39%
|
||||
izdelovati;3;0.39%
|
||||
62;3;0.39%
|
||||
ogledalo;3;0.39%
|
||||
kocka;3;0.39%
|
||||
"še";3;0.39%
|
||||
kovinski;3;0.39%
|
||||
koža;3;0.39%
|
||||
Agata;3;0.39%
|
||||
vino;3;0.39%
|
||||
dati;3;0.39%
|
||||
zelenjaven;3;0.39%
|
||||
juha;3;0.39%
|
||||
pomaranča;3;0.39%
|
||||
dobro;2;0.26%
|
||||
imeti;2;0.26%
|
||||
ter;2;0.26%
|
||||
jesenski;2;0.26%
|
||||
lahko;2;0.26%
|
||||
1;2;0.26%
|
||||
3;2;0.26%
|
||||
korenčkov;2;0.26%
|
||||
več;2;0.26%
|
||||
Marta;2;0.26%
|
||||
gepard;2;0.26%
|
||||
ustanovitev;2;0.26%
|
||||
a;2;0.26%
|
||||
the;2;0.26%
|
||||
tiskarna;2;0.26%
|
||||
Roblek;2;0.26%
|
||||
učiteljica;2;0.26%
|
||||
eko;2;0.26%
|
||||
torta;2;0.26%
|
||||
Totenbirt;2;0.26%
|
||||
ideja;2;0.26%
|
||||
kuhati;2;0.26%
|
||||
Javšnik;2;0.26%
|
||||
"špasen";2;0.26%
|
||||
voda;2;0.26%
|
||||
društvo;2;0.26%
|
||||
"življenje";2;0.26%
|
||||
pečica;2;0.26%
|
||||
ladja;2;0.26%
|
||||
praven;2;0.26%
|
||||
oseba;2;0.26%
|
||||
medtem;2;0.26%
|
||||
namen;2;0.26%
|
||||
Jurkovička;2;0.26%
|
||||
Martika;2;0.26%
|
||||
oprati;2;0.26%
|
||||
resničen;2;0.26%
|
||||
kar;2;0.26%
|
||||
junak;2;0.26%
|
||||
Godec;2;0.26%
|
||||
pa;2;0.26%
|
||||
"čas";2;0.26%
|
||||
"žena";2;0.26%
|
||||
pekač;2;0.26%
|
||||
težava;2;0.26%
|
||||
1st;2;0.26%
|
||||
pot;2;0.26%
|
||||
ker;2;0.26%
|
||||
star;2;0.26%
|
||||
sodnica;2;0.26%
|
||||
nekaj;2;0.26%
|
||||
46;2;0.26%
|
||||
officer;2;0.26%
|
||||
lata;2;0.26%
|
||||
pri;2;0.26%
|
||||
nov;2;0.26%
|
||||
Tomijev;2;0.26%
|
||||
znebiti;2;0.26%
|
||||
april;2;0.26%
|
||||
pozdrav;2;0.26%
|
||||
posoda;2;0.26%
|
||||
vdova;2;0.26%
|
||||
Sredozemlje;2;0.26%
|
||||
svoj;2;0.26%
|
||||
občina;2;0.26%
|
||||
1998;2;0.26%
|
||||
Alenka;2;0.26%
|
||||
zgodba;2;0.26%
|
||||
mesto;2;0.26%
|
||||
pravi;2;0.26%
|
||||
Fijavž;2;0.26%
|
||||
velik;2;0.26%
|
||||
potem;2;0.26%
|
||||
veličasten;2;0.26%
|
||||
zahoden;2;0.26%
|
||||
organizacija;1;0.13%
|
||||
odvisno;1;0.13%
|
||||
dekan;1;0.13%
|
||||
viroza;1;0.13%
|
||||
drunk;1;0.13%
|
||||
pričati;1;0.13%
|
||||
Brolo;1;0.13%
|
||||
Končar;1;0.13%
|
||||
tek;1;0.13%
|
||||
sister;1;0.13%
|
||||
okusen;1;0.13%
|
||||
dokler;1;0.13%
|
||||
izgubiti;1;0.13%
|
||||
pospeševati;1;0.13%
|
||||
zvezdniški;1;0.13%
|
||||
vključno;1;0.13%
|
||||
spoštovan;1;0.13%
|
||||
5;1;0.13%
|
||||
cek;1;0.13%
|
||||
1113;1;0.13%
|
||||
roka;1;0.13%
|
||||
g;1;0.13%
|
||||
nedoločen;1;0.13%
|
||||
izumirati;1;0.13%
|
||||
uporabiti;1;0.13%
|
||||
pomarančen;1;0.13%
|
||||
Darko;1;0.13%
|
||||
polica;1;0.13%
|
||||
Frenk;1;0.13%
|
||||
križarjanje;1;0.13%
|
||||
de;1;0.13%
|
||||
gospodarski;1;0.13%
|
||||
Marseille;1;0.13%
|
||||
dl;1;0.13%
|
||||
torinski;1;0.13%
|
||||
12:35;1;0.13%
|
||||
strah;1;0.13%
|
||||
Danijel;1;0.13%
|
||||
vliti;1;0.13%
|
||||
"ženska";1;0.13%
|
||||
kompas;1;0.13%
|
||||
iti;1;0.13%
|
||||
test;1;0.13%
|
||||
ustaviti;1;0.13%
|
||||
Barcelona;1;0.13%
|
||||
tako;1;0.13%
|
||||
en;1;0.13%
|
||||
premešati;1;0.13%
|
||||
upravljanje;1;0.13%
|
||||
sutano;1;0.13%
|
||||
Tanja;1;0.13%
|
||||
naročiti;1;0.13%
|
||||
09.11.2010;1;0.13%
|
||||
intermarketing;1;0.13%
|
||||
nakazovati;1;0.13%
|
||||
križariti;1;0.13%
|
||||
2010;1;0.13%
|
||||
2130;1;0.13%
|
||||
zaprt;1;0.13%
|
||||
prezgodaj;1;0.13%
|
||||
zdeti;1;0.13%
|
||||
arhivo;1;0.13%
|
||||
sin;1;0.13%
|
||||
akreditacija;1;0.13%
|
||||
Performs;1;0.13%
|
||||
paličen;1;0.13%
|
||||
Marijana;1;0.13%
|
||||
sladkor;1;0.13%
|
||||
potekati;1;0.13%
|
||||
istospolno;1;0.13%
|
||||
12:25;1;0.13%
|
||||
I.;1;0.13%
|
||||
tisti;1;0.13%
|
||||
jesti;1;0.13%
|
||||
vnaprej;1;0.13%
|
||||
naj;1;0.13%
|
||||
mehko;1;0.13%
|
||||
judge;1;0.13%
|
||||
tukaj;1;0.13%
|
||||
iz;1;0.13%
|
||||
foto;1;0.13%
|
||||
palma;1;0.13%
|
||||
Mojca;1;0.13%
|
||||
nizek;1;0.13%
|
||||
blagajna;1;0.13%
|
||||
mešalnik;1;0.13%
|
||||
"želeti";1;0.13%
|
||||
vse;1;0.13%
|
||||
31.10;1;0.13%
|
||||
okus;1;0.13%
|
||||
dragocen;1;0.13%
|
||||
pojasnjevati;1;0.13%
|
||||
optimist;1;0.13%
|
||||
jogurt;1;0.13%
|
||||
vsebovati;1;0.13%
|
||||
skorajda;1;0.13%
|
||||
operacija;1;0.13%
|
||||
ko;1;0.13%
|
||||
podjetje;1;0.13%
|
||||
teden;1;0.13%
|
||||
ustanoviti;1;0.13%
|
||||
Kofu;1;0.13%
|
||||
666;1;0.13%
|
||||
druga;1;0.13%
|
||||
motnja;1;0.13%
|
||||
košček;1;0.13%
|
||||
izbrati;1;0.13%
|
||||
prav;1;0.13%
|
||||
ogret;1;0.13%
|
||||
rezina;1;0.13%
|
||||
odgovoren;1;0.13%
|
||||
vsota;1;0.13%
|
||||
Planinšek;1;0.13%
|
||||
pridružiti;1;0.13%
|
||||
sok;1;0.13%
|
||||
Indija;1;0.13%
|
||||
fantastica;1;0.13%
|
||||
Palermo;1;0.13%
|
||||
dober;1;0.13%
|
||||
"člen";1;0.13%
|
||||
29.03.2010;1;0.13%
|
||||
splošen;1;0.13%
|
||||
pojav;1;0.13%
|
||||
ali;1;0.13%
|
||||
poslednji;1;0.13%
|
||||
priokus;1;0.13%
|
||||
račun;1;0.13%
|
||||
trg;1;0.13%
|
||||
proklamirati;1;0.13%
|
||||
nazaj;1;0.13%
|
||||
Anand;1;0.13%
|
||||
pecilen;1;0.13%
|
||||
vame;1;0.13%
|
||||
peč;1;0.13%
|
||||
edinstven;1;0.13%
|
||||
1.7;1;0.13%
|
||||
cena;1;0.13%
|
||||
usta;1;0.13%
|
||||
med;1;0.13%
|
||||
veliko;1;0.13%
|
||||
zmešati;1;0.13%
|
||||
ogledati;1;0.13%
|
||||
srbečica;1;0.13%
|
||||
Maja;1;0.13%
|
||||
21.;1;0.13%
|
||||
kaj;1;0.13%
|
||||
Branko;1;0.13%
|
||||
zelo;1;0.13%
|
||||
Mallorca;1;0.13%
|
||||
polovica;1;0.13%
|
||||
zakon;1;0.13%
|
||||
aranžma;1;0.13%
|
||||
antikrist;1;0.13%
|
||||
bert;1;0.13%
|
||||
minuta;1;0.13%
|
||||
urednik;1;0.13%
|
||||
poleg;1;0.13%
|
||||
volilen;1;0.13%
|
||||
priloga;1;0.13%
|
||||
mareziga;1;0.13%
|
||||
unikaten;1;0.13%
|
||||
križati;1;0.13%
|
||||
dunajski;1;0.13%
|
||||
Detela;1;0.13%
|
||||
jurkovička;1;0.13%
|
||||
naročnik;1;0.13%
|
||||
naš;1;0.13%
|
||||
pred;1;0.13%
|
||||
lep;1;0.13%
|
||||
bogastvo;1;0.13%
|
||||
1.;1;0.13%
|
||||
ključen;1;0.13%
|
||||
6000;1;0.13%
|
||||
penast;1;0.13%
|
||||
"čast";1;0.13%
|
||||
2.;1;0.13%
|
||||
20;1;0.13%
|
||||
peška;1;0.13%
|
||||
22;1;0.13%
|
||||
moka;1;0.13%
|
||||
narezati;1;0.13%
|
||||
mik;1;0.13%
|
||||
danes;1;0.13%
|
||||
"članica";1;0.13%
|
||||
ravno;1;0.13%
|
||||
odpraviti;1;0.13%
|
||||
sprejemljiv;1;0.13%
|
||||
uresničevati;1;0.13%
|
||||
pristop;1;0.13%
|
||||
oni;1;0.13%
|
||||
ponuditi;1;0.13%
|
||||
obiskati;1;0.13%
|
||||
mogoč;1;0.13%
|
||||
določba;1;0.13%
|
||||
jed;1;0.13%
|
||||
umešati;1;0.13%
|
||||
tekoč;1;0.13%
|
||||
Ivek;1;0.13%
|
||||
Neapelj;1;0.13%
|
||||
povzročati;1;0.13%
|
||||
kateri;1;0.13%
|
||||
pogost;1;0.13%
|
||||
izdelan;1;0.13%
|
||||
izstop;1;0.13%
|
||||
prt;1;0.13%
|
||||
referendum;1;0.13%
|
||||
66;1;0.13%
|
||||
preprost;1;0.13%
|
||||
komedija;1;0.13%
|
||||
lupinica;1;0.13%
|
||||
Eli's;1;0.13%
|
||||
masa;1;0.13%
|
||||
korist;1;0.13%
|
||||
recept;1;0.13%
|
||||
požig;1;0.13%
|
||||
vzeti;1;0.13%
|
||||
komisija;1;0.13%
|
||||
Ankaran;1;0.13%
|
||||
prositi;1;0.13%
|
||||
tudi;1;0.13%
|
||||
posebej;1;0.13%
|
||||
8.;1;0.13%
|
||||
svoboden;1;0.13%
|
||||
sanjati;1;0.13%
|
||||
Tunis;1;0.13%
|
||||
ohraniti;1;0.13%
|
||||
kolobar;1;0.13%
|
||||
dieten;1;0.13%
|
||||
19.;1;0.13%
|
||||
Valentinrozman;1;0.13%
|
||||
09:56;1;0.13%
|
||||
kloniranje;1;0.13%
|
||||
začeti;1;0.13%
|
||||
anatemizirati;1;0.13%
|
||||
streti;1;0.13%
|
||||
97;1;0.13%
|
||||
zaradi;1;0.13%
|
||||
nekdo;1;0.13%
|
||||
sodelovanje;1;0.13%
|
||||
križarka;1;0.13%
|
||||
prostovoljen;1;0.13%
|
||||
počitnice;1;0.13%
|
||||
"število";1;0.13%
|
||||
jesen;1;0.13%
|
||||
koncert;1;0.13%
|
||||
Prison;1;0.13%
|
||||
prošnja;1;0.13%
|
||||
"želodec";1;0.13%
|
||||
older;1;0.13%
|
||||
MSC;1;0.13%
|
||||
prašek;1;0.13%
|
||||
"št.";1;0.13%
|
||||
Stepančič;1;0.13%
|
||||
zavreti;1;0.13%
|
||||
"škofija";1;0.13%
|
||||
lahek;1;0.13%
|
||||
prispevek;1;0.13%
|
||||
južek;1;0.13%
|
||||
temeljiti;1;0.13%
|
||||
novinar;1;0.13%
|
||||
popeljati;1;0.13%
|
||||
"Šmarje";1;0.13%
|
||||
zavračati;1;0.13%
|
||||
oziroma;1;0.13%
|
||||
ustanoiveti;1;0.13%
|
||||
Beljan;1;0.13%
|
||||
dermatologinja;1;0.13%
|
||||
goljufati;1;0.13%
|
||||
okrogel;1;0.13%
|
||||
Brecelj;1;0.13%
|
||||
Podobnik;1;0.13%
|
||||
13.9;1;0.13%
|
||||
prinašati;1;0.13%
|
||||
soliti;1;0.13%
|
||||
neškodljiv;1;0.13%
|
||||
widow;1;0.13%
|
||||
skrivati;1;0.13%
|
||||
08.11.2010;1;0.13%
|
||||
usmerjen;1;0.13%
|
||||
genova;1;0.13%
|
||||
dan;1;0.13%
|
||||
24.10;1;0.13%
|
||||
akreditirati;1;0.13%
|
||||
narediti;1;0.13%
|
||||
peder;1;0.13%
|
||||
ves;1;0.13%
|
||||
liter;1;0.13%
|
||||
posuti;1;0.13%
|
||||
zakaj;1;0.13%
|
||||
odkrivati;1;0.13%
|
||||
Roberto;1;0.13%
|
||||
detective;1;0.13%
|
||||
Ručigaj;1;0.13%
|
||||
bolan;1;0.13%
|
||||
odstraniti;1;0.13%
|
||||
jajce;1;0.13%
|
||||
odličen;1;0.13%
|
||||
konec;1;0.13%
|
||||
posest;1;0.13%
|
||||
nared;1;0.13%
|
||||
duhovnik;1;0.13%
|
||||
pogledati;1;0.13%
|
||||
sreča;1;0.13%
|
||||
zato;1;0.13%
|
||||
cesta;1;0.13%
|
||||
saj;1;0.13%
|
||||
sam;1;0.13%
|
||||
opreka;1;0.13%
|
||||
enakopraven;1;0.13%
|
||||
olje;1;0.13%
|
||||
Ljubljana;1;0.13%
|
||||
Zucco;1;0.13%
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,512 @@
|
||||
"Korpus: ";Gigafida
|
||||
"Datum: ";25.01.2018 06:27
|
||||
"Analiza: ";Besedni nizi
|
||||
"n-gram nivo: ";1
|
||||
"Skip: ";0
|
||||
"Izračunaj za: ";različnica
|
||||
|
||||
|
||||
word;frequency;percent
|
||||
in;29;3.766%
|
||||
v;16;2.078%
|
||||
je;14;1.818%
|
||||
za;9;1.169%
|
||||
ki;8;1.039%
|
||||
na;8;1.039%
|
||||
se;8;1.039%
|
||||
da;7;0.909%
|
||||
kako;7;0.909%
|
||||
bi;6;0.779%
|
||||
o;6;0.779%
|
||||
s;6;0.779%
|
||||
z;6;0.779%
|
||||
elina;4;0.519%
|
||||
dodamo;4;0.519%
|
||||
ne;4;0.519%
|
||||
pol;4;0.519%
|
||||
ogledala;4;0.519%
|
||||
totenbirt;4;0.519%
|
||||
kašo;4;0.519%
|
||||
približno;4;0.519%
|
||||
sestra;4;0.519%
|
||||
ajdovo;4;0.519%
|
||||
korenje;3;0.39%
|
||||
Jurkovič;3;0.39%
|
||||
do;3;0.39%
|
||||
izdelujejo;3;0.39%
|
||||
"če";3;0.39%
|
||||
Koper;3;0.39%
|
||||
ure;3;0.39%
|
||||
"članov";3;0.39%
|
||||
drugo;3;0.39%
|
||||
postane;3;0.39%
|
||||
mu;3;0.39%
|
||||
grozdje;3;0.39%
|
||||
ob;3;0.39%
|
||||
od;3;0.39%
|
||||
po;3;0.39%
|
||||
cerkev;3;0.39%
|
||||
62;3;0.39%
|
||||
"še";3;0.39%
|
||||
kovinska;3;0.39%
|
||||
Agata;3;0.39%
|
||||
juho;3;0.39%
|
||||
zahodnega;2;0.26%
|
||||
tem;2;0.26%
|
||||
ter;2;0.26%
|
||||
lahko;2;0.26%
|
||||
1;2;0.26%
|
||||
3;2;0.26%
|
||||
več;2;0.26%
|
||||
Marta;2;0.26%
|
||||
gepard;2;0.26%
|
||||
bo;2;0.26%
|
||||
ustanovitev;2;0.26%
|
||||
a;2;0.26%
|
||||
the;2;0.26%
|
||||
tiskarna;2;0.26%
|
||||
Roblek;2;0.26%
|
||||
učiteljica;2;0.26%
|
||||
eko;2;0.26%
|
||||
Totenbirt;2;0.26%
|
||||
idejo;2;0.26%
|
||||
težav;2;0.26%
|
||||
vode;2;0.26%
|
||||
resnična;2;0.26%
|
||||
novih;2;0.26%
|
||||
orehe;2;0.26%
|
||||
zadruga;2;0.26%
|
||||
Matjaž;2;0.26%
|
||||
nam;2;0.26%
|
||||
jo;2;0.26%
|
||||
vse;2;0.26%
|
||||
medtem;2;0.26%
|
||||
namen;2;0.26%
|
||||
Jurkovička;2;0.26%
|
||||
aprila;2;0.26%
|
||||
Martika;2;0.26%
|
||||
starejša;2;0.26%
|
||||
srbečo;2;0.26%
|
||||
junak;2;0.26%
|
||||
Godec;2;0.26%
|
||||
gosta;2;0.26%
|
||||
pa;2;0.26%
|
||||
kuhamo;2;0.26%
|
||||
križarjenju;2;0.26%
|
||||
"času";2;0.26%
|
||||
"žena";2;0.26%
|
||||
pekač;2;0.26%
|
||||
1st;2;0.26%
|
||||
pot;2;0.26%
|
||||
si;2;0.26%
|
||||
ker;2;0.26%
|
||||
sodnica;2;0.26%
|
||||
občin;2;0.26%
|
||||
nekaj;2;0.26%
|
||||
46;2;0.26%
|
||||
officer;2;0.26%
|
||||
late;2;0.26%
|
||||
pri;2;0.26%
|
||||
zelenjavne;2;0.26%
|
||||
damo;2;0.26%
|
||||
znebiti;2;0.26%
|
||||
jih;2;0.26%
|
||||
kocke;2;0.26%
|
||||
operemo;2;0.26%
|
||||
posodi;2;0.26%
|
||||
kožo;2;0.26%
|
||||
tomijeva;2;0.26%
|
||||
vas;2;0.26%
|
||||
bosta;2;0.26%
|
||||
mlajša;2;0.26%
|
||||
tega;2;0.26%
|
||||
vdova;2;0.26%
|
||||
Sredozemlja;2;0.26%
|
||||
1998;2;0.26%
|
||||
korenčkovo;2;0.26%
|
||||
vino;2;0.26%
|
||||
zgodba;2;0.26%
|
||||
ima;2;0.26%
|
||||
Fijavž;2;0.26%
|
||||
potem;2;0.26%
|
||||
organizacija;1;0.13%
|
||||
dobro;1;0.13%
|
||||
zadrugah;1;0.13%
|
||||
odvisno;1;0.13%
|
||||
požigom;1;0.13%
|
||||
svobodnem;1;0.13%
|
||||
drunk;1;0.13%
|
||||
Brolo;1;0.13%
|
||||
priokusom;1;0.13%
|
||||
Končar;1;0.13%
|
||||
začne;1;0.13%
|
||||
tek;1;0.13%
|
||||
sister;1;0.13%
|
||||
naša;1;0.13%
|
||||
zvezdniška;1;0.13%
|
||||
dokler;1;0.13%
|
||||
pravno;1;0.13%
|
||||
pospeševati;1;0.13%
|
||||
vključno;1;0.13%
|
||||
5;1;0.13%
|
||||
zavremo;1;0.13%
|
||||
jesenska;1;0.13%
|
||||
dietna;1;0.13%
|
||||
pravne;1;0.13%
|
||||
ValentinRozman;1;0.13%
|
||||
postali;1;0.13%
|
||||
roko;1;0.13%
|
||||
1113;1;0.13%
|
||||
izberemo;1;0.13%
|
||||
kolobarje;1;0.13%
|
||||
zavračamo;1;0.13%
|
||||
sami;1;0.13%
|
||||
g;1;0.13%
|
||||
narežemo;1;0.13%
|
||||
popeljala;1;0.13%
|
||||
uporabite;1;0.13%
|
||||
police;1;0.13%
|
||||
okusu;1;0.13%
|
||||
Darko;1;0.13%
|
||||
"špasnem";1;0.13%
|
||||
gospodarske;1;0.13%
|
||||
de;1;0.13%
|
||||
upravljanju;1;0.13%
|
||||
torto;1;0.13%
|
||||
Marseille;1;0.13%
|
||||
dl;1;0.13%
|
||||
križarjanju;1;0.13%
|
||||
12:35;1;0.13%
|
||||
torta;1;0.13%
|
||||
mladi;1;0.13%
|
||||
Neaplja;1;0.13%
|
||||
temelji;1;0.13%
|
||||
proklamirali;1;0.13%
|
||||
odličnimi;1;0.13%
|
||||
tako;1;0.13%
|
||||
pristopu;1;0.13%
|
||||
testu;1;0.13%
|
||||
sutano;1;0.13%
|
||||
Tanja;1;0.13%
|
||||
poslednjih;1;0.13%
|
||||
Barcelono;1;0.13%
|
||||
cerkvi;1;0.13%
|
||||
Javšnik;1;0.13%
|
||||
09.11.2010;1;0.13%
|
||||
intermarketing;1;0.13%
|
||||
onemu;1;0.13%
|
||||
volilna;1;0.13%
|
||||
nakazovati;1;0.13%
|
||||
"števila";1;0.13%
|
||||
2010;1;0.13%
|
||||
2130;1;0.13%
|
||||
akreditacijo;1;0.13%
|
||||
mogoče;1;0.13%
|
||||
društvu;1;0.13%
|
||||
prezgodaj;1;0.13%
|
||||
arhivo;1;0.13%
|
||||
svojega;1;0.13%
|
||||
društvo;1;0.13%
|
||||
gre;1;0.13%
|
||||
Performs;1;0.13%
|
||||
odstranimo;1;0.13%
|
||||
"življenje";1;0.13%
|
||||
i.;1;0.13%
|
||||
veličastni;1;0.13%
|
||||
ustanovi;1;0.13%
|
||||
koščkov;1;0.13%
|
||||
Marijana;1;0.13%
|
||||
sladkor;1;0.13%
|
||||
veličastna;1;0.13%
|
||||
istospolno;1;0.13%
|
||||
"življenju";1;0.13%
|
||||
pečico;1;0.13%
|
||||
12:25;1;0.13%
|
||||
viroz;1;0.13%
|
||||
tekoča;1;0.13%
|
||||
pečice;1;0.13%
|
||||
vanjo;1;0.13%
|
||||
nedoločenega;1;0.13%
|
||||
posujemo;1;0.13%
|
||||
"ženskami";1;0.13%
|
||||
jeste;1;0.13%
|
||||
narod;1;0.13%
|
||||
vnaprej;1;0.13%
|
||||
uresničuje;1;0.13%
|
||||
ladji;1;0.13%
|
||||
naj;1;0.13%
|
||||
ceka;1;0.13%
|
||||
ladja;1;0.13%
|
||||
mehko;1;0.13%
|
||||
judge;1;0.13%
|
||||
dni;1;0.13%
|
||||
tista;1;0.13%
|
||||
palmo;1;0.13%
|
||||
Mojco;1;0.13%
|
||||
tukaj;1;0.13%
|
||||
iz;1;0.13%
|
||||
foto;1;0.13%
|
||||
duhovnike;1;0.13%
|
||||
ji;1;0.13%
|
||||
blagajna;1;0.13%
|
||||
ponudimo;1;0.13%
|
||||
31.10;1;0.13%
|
||||
ju;1;0.13%
|
||||
edinstvena;1;0.13%
|
||||
pojasnjuje;1;0.13%
|
||||
smo;1;0.13%
|
||||
osebe;1;0.13%
|
||||
ustanoivi;1;0.13%
|
||||
prostovoljnem;1;0.13%
|
||||
optimist;1;0.13%
|
||||
jogurt;1;0.13%
|
||||
osebo;1;0.13%
|
||||
skorajda;1;0.13%
|
||||
ko;1;0.13%
|
||||
obiskali;1;0.13%
|
||||
operaciji;1;0.13%
|
||||
večjih;1;0.13%
|
||||
podjetje;1;0.13%
|
||||
izumira;1;0.13%
|
||||
novinarja;1;0.13%
|
||||
druge;1;0.13%
|
||||
666;1;0.13%
|
||||
drugi;1;0.13%
|
||||
premešamo;1;0.13%
|
||||
motnja;1;0.13%
|
||||
prav;1;0.13%
|
||||
določbe;1;0.13%
|
||||
peči;1;0.13%
|
||||
Indiji;1;0.13%
|
||||
penasto;1;0.13%
|
||||
Palermu;1;0.13%
|
||||
Planinšek;1;0.13%
|
||||
sok;1;0.13%
|
||||
fantastica;1;0.13%
|
||||
dober;1;0.13%
|
||||
"člen";1;0.13%
|
||||
počitnic;1;0.13%
|
||||
ust;1;0.13%
|
||||
gosto;1;0.13%
|
||||
mi;1;0.13%
|
||||
29.03.2010;1;0.13%
|
||||
pojav;1;0.13%
|
||||
ali;1;0.13%
|
||||
račun;1;0.13%
|
||||
torinskim;1;0.13%
|
||||
grozdju;1;0.13%
|
||||
trg;1;0.13%
|
||||
Mallorco;1;0.13%
|
||||
nazaj;1;0.13%
|
||||
vami;1;0.13%
|
||||
koristi;1;0.13%
|
||||
rezino;1;0.13%
|
||||
"špasnega";1;0.13%
|
||||
naročili;1;0.13%
|
||||
srbečico;1;0.13%
|
||||
1.7;1;0.13%
|
||||
cena;1;0.13%
|
||||
Javšnika;1;0.13%
|
||||
med;1;0.13%
|
||||
veliko;1;0.13%
|
||||
Maja;1;0.13%
|
||||
21.;1;0.13%
|
||||
kaj;1;0.13%
|
||||
Branko;1;0.13%
|
||||
zelo;1;0.13%
|
||||
polovico;1;0.13%
|
||||
nižja;1;0.13%
|
||||
velike;1;0.13%
|
||||
kar;1;0.13%
|
||||
pedri;1;0.13%
|
||||
strli;1;0.13%
|
||||
zakon;1;0.13%
|
||||
aranžma;1;0.13%
|
||||
bert;1;0.13%
|
||||
srbeče;1;0.13%
|
||||
povzroča;1;0.13%
|
||||
urednik;1;0.13%
|
||||
jeseni;1;0.13%
|
||||
prilogi;1;0.13%
|
||||
poleg;1;0.13%
|
||||
dekani;1;0.13%
|
||||
vsote;1;0.13%
|
||||
marezige;1;0.13%
|
||||
Matjaža;1;0.13%
|
||||
križati;1;0.13%
|
||||
Detela;1;0.13%
|
||||
jurkovička;1;0.13%
|
||||
vsebuje;1;0.13%
|
||||
naročnik;1;0.13%
|
||||
dunajska;1;0.13%
|
||||
odkrivajte;1;0.13%
|
||||
pred;1;0.13%
|
||||
lep;1;0.13%
|
||||
anatemizirala;1;0.13%
|
||||
bogastvo;1;0.13%
|
||||
1.;1;0.13%
|
||||
spoštovani;1;0.13%
|
||||
antikrista;1;0.13%
|
||||
bolni;1;0.13%
|
||||
6000;1;0.13%
|
||||
ste;1;0.13%
|
||||
goljufal;1;0.13%
|
||||
zaprta;1;0.13%
|
||||
"čast";1;0.13%
|
||||
mešalnikom;1;0.13%
|
||||
pozdravi;1;0.13%
|
||||
sinov;1;0.13%
|
||||
križarjenja;1;0.13%
|
||||
peške;1;0.13%
|
||||
2.;1;0.13%
|
||||
20;1;0.13%
|
||||
minut;1;0.13%
|
||||
22;1;0.13%
|
||||
prtom;1;0.13%
|
||||
danes;1;0.13%
|
||||
ohranimo;1;0.13%
|
||||
sprejemljivi;1;0.13%
|
||||
"članica";1;0.13%
|
||||
paličnim;1;0.13%
|
||||
ravno;1;0.13%
|
||||
odpraviti;1;0.13%
|
||||
Anandm;1;0.13%
|
||||
umešamo;1;0.13%
|
||||
ta;1;0.13%
|
||||
pridružite;1;0.13%
|
||||
prinašala;1;0.13%
|
||||
zdi;1;0.13%
|
||||
Tunisu;1;0.13%
|
||||
jed;1;0.13%
|
||||
splošne;1;0.13%
|
||||
ogreto;1;0.13%
|
||||
Ivek;1;0.13%
|
||||
odgovorni;1;0.13%
|
||||
"želimo";1;0.13%
|
||||
pecilni;1;0.13%
|
||||
dala;1;0.13%
|
||||
skrivali;1;0.13%
|
||||
bolje;1;0.13%
|
||||
moko;1;0.13%
|
||||
solimo;1;0.13%
|
||||
izgubil;1;0.13%
|
||||
orehih;1;0.13%
|
||||
zmešamo;1;0.13%
|
||||
referendum;1;0.13%
|
||||
66;1;0.13%
|
||||
poglejte;1;0.13%
|
||||
maso;1;0.13%
|
||||
zelenjavna;1;0.13%
|
||||
preprost;1;0.13%
|
||||
komedija;1;0.13%
|
||||
Eli's;1;0.13%
|
||||
recept;1;0.13%
|
||||
komisija;1;0.13%
|
||||
Ankaran;1;0.13%
|
||||
naroda;1;0.13%
|
||||
Kofujem;1;0.13%
|
||||
tudi;1;0.13%
|
||||
posebej;1;0.13%
|
||||
usmerjeni;1;0.13%
|
||||
8.;1;0.13%
|
||||
lupinico;1;0.13%
|
||||
zadrugo;1;0.13%
|
||||
narodom;1;0.13%
|
||||
kocka;1;0.13%
|
||||
katerega;1;0.13%
|
||||
19.;1;0.13%
|
||||
izstopu;1;0.13%
|
||||
09:56;1;0.13%
|
||||
vzamemo;1;0.13%
|
||||
pozdrav;1;0.13%
|
||||
"škofije";1;0.13%
|
||||
"čemer";1;0.13%
|
||||
97;1;0.13%
|
||||
zaradi;1;0.13%
|
||||
izdelana;1;0.13%
|
||||
nekdo;1;0.13%
|
||||
kloniranju;1;0.13%
|
||||
vam;1;0.13%
|
||||
okusna;1;0.13%
|
||||
boste;1;0.13%
|
||||
križarki;1;0.13%
|
||||
prosimo;1;0.13%
|
||||
unikatna;1;0.13%
|
||||
ključno;1;0.13%
|
||||
kože;1;0.13%
|
||||
enem;1;0.13%
|
||||
naredimo;1;0.13%
|
||||
koncert;1;0.13%
|
||||
Prison;1;0.13%
|
||||
prošnja;1;0.13%
|
||||
"želodec";1;0.13%
|
||||
Frenki;1;0.13%
|
||||
older;1;0.13%
|
||||
MSC;1;0.13%
|
||||
prašek;1;0.13%
|
||||
kompasom;1;0.13%
|
||||
"št.";1;0.13%
|
||||
posesti;1;0.13%
|
||||
Stepančič;1;0.13%
|
||||
pomarančni;1;0.13%
|
||||
lahek;1;0.13%
|
||||
prispevek;1;0.13%
|
||||
južek;1;0.13%
|
||||
koncu;1;0.13%
|
||||
"Šmarje";1;0.13%
|
||||
oziroma;1;0.13%
|
||||
Beljan;1;0.13%
|
||||
dermatologinja;1;0.13%
|
||||
okrogel;1;0.13%
|
||||
vlijemo;1;0.13%
|
||||
Brecelj;1;0.13%
|
||||
Podobnik;1;0.13%
|
||||
13.9;1;0.13%
|
||||
Alenke;1;0.13%
|
||||
priča;1;0.13%
|
||||
neškodljiv;1;0.13%
|
||||
widow;1;0.13%
|
||||
nismo;1;0.13%
|
||||
Alenka;1;0.13%
|
||||
08.11.2010;1;0.13%
|
||||
strahu;1;0.13%
|
||||
genove;1;0.13%
|
||||
tednu;1;0.13%
|
||||
vinu;1;0.13%
|
||||
potekal;1;0.13%
|
||||
24.10;1;0.13%
|
||||
sanja;1;0.13%
|
||||
dragoceno;1;0.13%
|
||||
akreditirate;1;0.13%
|
||||
liter;1;0.13%
|
||||
mesta;1;0.13%
|
||||
zakaj;1;0.13%
|
||||
ustavili;1;0.13%
|
||||
Roberto;1;0.13%
|
||||
detective;1;0.13%
|
||||
Danijela;1;0.13%
|
||||
Ručigaj;1;0.13%
|
||||
jajci;1;0.13%
|
||||
mesti;1;0.13%
|
||||
mika;1;0.13%
|
||||
nared;1;0.13%
|
||||
pravo;1;0.13%
|
||||
križarili;1;0.13%
|
||||
bila;1;0.13%
|
||||
sodelovanju;1;0.13%
|
||||
prava;1;0.13%
|
||||
zato;1;0.13%
|
||||
cesta;1;0.13%
|
||||
saj;1;0.13%
|
||||
srečo;1;0.13%
|
||||
olje;1;0.13%
|
||||
svojih;1;0.13%
|
||||
Ljubljana;1;0.13%
|
||||
pomaranče;1;0.13%
|
||||
jesenskem;1;0.13%
|
||||
pomarančo;1;0.13%
|
||||
opreki;1;0.13%
|
||||
najpogostejša;1;0.13%
|
||||
pomaranči;1;0.13%
|
||||
Zucco;1;0.13%
|
||||
enakopravnem;1;0.13%
|
||||
|
@@ -0,0 +1,623 @@
|
||||
word;frequency;percent
|
||||
ajdovo kašo;4;0.586%
|
||||
in ajdovo;3;0.439%
|
||||
kovinska ogledala;3;0.439%
|
||||
kako izdelujejo;3;0.439%
|
||||
pol ure;3;0.439%
|
||||
Agata Jurkovič;3;0.439%
|
||||
težav s;2;0.293%
|
||||
za pol;2;0.293%
|
||||
62 vdova;2;0.293%
|
||||
resnična zgodba;2;0.293%
|
||||
v času;2;0.293%
|
||||
srbečo kožo;2;0.293%
|
||||
novih občin;2;0.293%
|
||||
Roblek Martika;2;0.293%
|
||||
znebiti težav;2;0.293%
|
||||
korenje in;2;0.293%
|
||||
in damo;2;0.293%
|
||||
tiskarna gepard;2;0.293%
|
||||
elina starejša;2;0.293%
|
||||
vse v;2;0.293%
|
||||
the late;2;0.293%
|
||||
kako se;2;0.293%
|
||||
korenčkovo juho;2;0.293%
|
||||
totenbirt the;2;0.293%
|
||||
Marta Fijavž;2;0.293%
|
||||
za ustanovitev;2;0.293%
|
||||
ne bi;2;0.293%
|
||||
elina mlajša;2;0.293%
|
||||
vdova sodnica;2;0.293%
|
||||
46 učiteljica;2;0.293%
|
||||
učiteljica tomijeva;2;0.293%
|
||||
s srbečo;2;0.293%
|
||||
da je;2;0.293%
|
||||
dodamo še;2;0.293%
|
||||
"žena elina";2;0.293%
|
||||
zelenjavne kocke;2;0.293%
|
||||
zahodnega Sredozemlja;2;0.293%
|
||||
sodnica elina;2;0.293%
|
||||
gepard 1;2;0.293%
|
||||
Godec in;2;0.293%
|
||||
grozdje in;2;0.293%
|
||||
mlajša sestra;2;0.293%
|
||||
Martika 46;2;0.293%
|
||||
starejša sestra;2;0.293%
|
||||
tomijeva žena;2;0.293%
|
||||
se znebiti;2;0.293%
|
||||
idejo o;2;0.293%
|
||||
Fijavž Roblek;2;0.293%
|
||||
Jurkovič Jurkovička;2;0.293%
|
||||
da bi;2;0.293%
|
||||
in orehe;2;0.293%
|
||||
Jurkovička 62;2;0.293%
|
||||
izdelujejo kovinska;2;0.293%
|
||||
ustanovitev novih;2;0.293%
|
||||
z idejo;1;0.146%
|
||||
Jurkovič jurkovička;1;0.146%
|
||||
da za;1;0.146%
|
||||
gospodarske koristi;1;0.146%
|
||||
na križarjenju;1;0.146%
|
||||
in Marseille;1;0.146%
|
||||
Neaplja se;1;0.146%
|
||||
vode odvisno;1;0.146%
|
||||
polovico zelenjavne;1;0.146%
|
||||
"ženskami v";1;0.146%
|
||||
Planinšek Ručigaj;1;0.146%
|
||||
jajci in;1;0.146%
|
||||
pol eko;1;0.146%
|
||||
ravno prav;1;0.146%
|
||||
pojasnjuje dermatologinja;1;0.146%
|
||||
pot tista;1;0.146%
|
||||
mogoče da;1;0.146%
|
||||
približno pol;1;0.146%
|
||||
ohranimo nekaj;1;0.146%
|
||||
je zelo;1;0.146%
|
||||
recept za;1;0.146%
|
||||
temelji na;1;0.146%
|
||||
okusu in;1;0.146%
|
||||
v čast;1;0.146%
|
||||
"špasnega križarjenja";1;0.146%
|
||||
o zadrugah;1;0.146%
|
||||
posujemo grozdje;1;0.146%
|
||||
je nared;1;0.146%
|
||||
več sinov;1;0.146%
|
||||
officer detective;1;0.146%
|
||||
junak v;1;0.146%
|
||||
gosta in;1;0.146%
|
||||
in dietna;1;0.146%
|
||||
referendum za;1;0.146%
|
||||
palmo de;1;0.146%
|
||||
prošnja za;1;0.146%
|
||||
dekani ki;1;0.146%
|
||||
pri operaciji;1;0.146%
|
||||
enakopravnem sodelovanju;1;0.146%
|
||||
posesti nekaj;1;0.146%
|
||||
si kako;1;0.146%
|
||||
Brecelj Agata;1;0.146%
|
||||
paličnim mešalnikom;1;0.146%
|
||||
v sutano;1;0.146%
|
||||
lahko jeste;1;0.146%
|
||||
Koper 21.;1;0.146%
|
||||
rezino pomaranče;1;0.146%
|
||||
blagajna zaprta;1;0.146%
|
||||
09.11.2010 ob;1;0.146%
|
||||
cena ki;1;0.146%
|
||||
tega mi;1;0.146%
|
||||
oziroma postane;1;0.146%
|
||||
ustanoivi društvo;1;0.146%
|
||||
nedoločenega števila;1;0.146%
|
||||
ki bo;1;0.146%
|
||||
lahek in;1;0.146%
|
||||
"članov ki";1;0.146%
|
||||
ali drugo;1;0.146%
|
||||
1 veliko;1;0.146%
|
||||
namen pospeševati;1;0.146%
|
||||
pekač in;1;0.146%
|
||||
bi goljufal;1;0.146%
|
||||
zgodba sami;1;0.146%
|
||||
jeseni naredimo;1;0.146%
|
||||
osebe če;1;0.146%
|
||||
in požigom;1;0.146%
|
||||
Mallorco Barcelono;1;0.146%
|
||||
za korenčkovo;1;0.146%
|
||||
jesenska torta;1;0.146%
|
||||
tudi posebej;1;0.146%
|
||||
testu posujemo;1;0.146%
|
||||
poslednjih dni;1;0.146%
|
||||
Beljan in;1;0.146%
|
||||
prezgodaj je;1;0.146%
|
||||
jih cerkev;1;0.146%
|
||||
sodelovanju in;1;0.146%
|
||||
junak ustanoivi;1;0.146%
|
||||
tem uresničuje;1;0.146%
|
||||
drugi posodi;1;0.146%
|
||||
kocke in;1;0.146%
|
||||
danes skorajda;1;0.146%
|
||||
strli na;1;0.146%
|
||||
srbečico in;1;0.146%
|
||||
pospeševati gospodarske;1;0.146%
|
||||
do Neaplja;1;0.146%
|
||||
najpogostejša motnja;1;0.146%
|
||||
Totenbirt Marta;1;0.146%
|
||||
"če se";1;0.146%
|
||||
prava pot;1;0.146%
|
||||
s tem;1;0.146%
|
||||
mu odstranimo;1;0.146%
|
||||
narod pa;1;0.146%
|
||||
prilogi vam;1;0.146%
|
||||
se ustavili;1;0.146%
|
||||
komisija za;1;0.146%
|
||||
nekaj o;1;0.146%
|
||||
svobodnem izstopu;1;0.146%
|
||||
uresničuje namen;1;0.146%
|
||||
cerkev in;1;0.146%
|
||||
Prison officer;1;0.146%
|
||||
viroz saj;1;0.146%
|
||||
Totenbirt Agata;1;0.146%
|
||||
juho in;1;0.146%
|
||||
19. aprila;1;0.146%
|
||||
zmešamo s;1;0.146%
|
||||
nazaj peči;1;0.146%
|
||||
Roberto Zucco;1;0.146%
|
||||
foto arhivo;1;0.146%
|
||||
zato da;1;0.146%
|
||||
sanja a;1;0.146%
|
||||
kože je;1;0.146%
|
||||
"št. 2130";1;0.146%
|
||||
Tunisu obiskali;1;0.146%
|
||||
jed je;1;0.146%
|
||||
ne postane;1;0.146%
|
||||
jo odpraviti;1;0.146%
|
||||
olje vino;1;0.146%
|
||||
postane ravno;1;0.146%
|
||||
tem da;1;0.146%
|
||||
in neškodljiv;1;0.146%
|
||||
aprila 1998;1;0.146%
|
||||
se mu;1;0.146%
|
||||
late Frenki;1;0.146%
|
||||
Indiji naj;1;0.146%
|
||||
potem gre;1;0.146%
|
||||
ko dobro;1;0.146%
|
||||
s priokusom;1;0.146%
|
||||
vinu in;1;0.146%
|
||||
na roko;1;0.146%
|
||||
je cena;1;0.146%
|
||||
ogreto pečico;1;0.146%
|
||||
na veličastni;1;0.146%
|
||||
"števila članov";1;0.146%
|
||||
grozdje vino;1;0.146%
|
||||
jogurt olje;1;0.146%
|
||||
po grozdju;1;0.146%
|
||||
pečico za;1;0.146%
|
||||
do 13.9;1;0.146%
|
||||
pravno osebo;1;0.146%
|
||||
"še jogurt";1;0.146%
|
||||
akreditirate naša;1;0.146%
|
||||
bi prinašala;1;0.146%
|
||||
na ladji;1;0.146%
|
||||
je od;1;0.146%
|
||||
preprost recept;1;0.146%
|
||||
kocke ter;1;0.146%
|
||||
prostovoljnem pristopu;1;0.146%
|
||||
dobro premešamo;1;0.146%
|
||||
Alenka Godec;1;0.146%
|
||||
in dekani;1;0.146%
|
||||
iz pečice;1;0.146%
|
||||
Koper 8.;1;0.146%
|
||||
nismo naročili;1;0.146%
|
||||
odgovorni urednik;1;0.146%
|
||||
bo potekal;1;0.146%
|
||||
vlijemo v;1;0.146%
|
||||
roko zato;1;0.146%
|
||||
proklamirali in;1;0.146%
|
||||
velike vsote;1;0.146%
|
||||
približno liter;1;0.146%
|
||||
62 widow;1;0.146%
|
||||
in Danijela;1;0.146%
|
||||
prav gosta;1;0.146%
|
||||
je tiskarna;1;0.146%
|
||||
boste od;1;0.146%
|
||||
late južek;1;0.146%
|
||||
bi skrivali;1;0.146%
|
||||
račun št.;1;0.146%
|
||||
mladi istospolno;1;0.146%
|
||||
Danijela ceka;1;0.146%
|
||||
mu ne;1;0.146%
|
||||
Zucco 1st;1;0.146%
|
||||
povzroča srbečico;1;0.146%
|
||||
8. aprila;1;0.146%
|
||||
kar v;1;0.146%
|
||||
okrogel pekač;1;0.146%
|
||||
damo nazaj;1;0.146%
|
||||
ter kuhamo;1;0.146%
|
||||
dodamo drugo;1;0.146%
|
||||
5 popeljala;1;0.146%
|
||||
in kar;1;0.146%
|
||||
bogastvo in;1;0.146%
|
||||
o križarjenju;1;0.146%
|
||||
torinskim prtom;1;0.146%
|
||||
za referendum;1;0.146%
|
||||
zadruga lahko;1;0.146%
|
||||
križati idejo;1;0.146%
|
||||
pravo pot;1;0.146%
|
||||
Tanja Planinšek;1;0.146%
|
||||
ob 09:56;1;0.146%
|
||||
ustanovi podjetje;1;0.146%
|
||||
vino pomarančo;1;0.146%
|
||||
katerega je;1;0.146%
|
||||
skorajda najpogostejša;1;0.146%
|
||||
in sladkor;1;0.146%
|
||||
od 24.10;1;0.146%
|
||||
v opreki;1;0.146%
|
||||
torta vsebuje;1;0.146%
|
||||
v posodi;1;0.146%
|
||||
zavračamo račun;1;0.146%
|
||||
zelo lahek;1;0.146%
|
||||
izstopu enakopravnem;1;0.146%
|
||||
Barcelono in;1;0.146%
|
||||
tista ki;1;0.146%
|
||||
bosta zvezdniška;1;0.146%
|
||||
ki ji;1;0.146%
|
||||
od genove;1;0.146%
|
||||
Ankaran škofije;1;0.146%
|
||||
ki ju;1;0.146%
|
||||
mesta zahodnega;1;0.146%
|
||||
križarjenja je;1;0.146%
|
||||
bi se;1;0.146%
|
||||
na kolobarje;1;0.146%
|
||||
ponudimo z;1;0.146%
|
||||
je prava;1;0.146%
|
||||
pecilni prašek;1;0.146%
|
||||
splošne določbe;1;0.146%
|
||||
v ogreto;1;0.146%
|
||||
operaciji 666;1;0.146%
|
||||
ste bolni;1;0.146%
|
||||
v življenju;1;0.146%
|
||||
zdi ključno;1;0.146%
|
||||
"želimo na";1;0.146%
|
||||
mika 08.11.2010;1;0.146%
|
||||
za duhovnike;1;0.146%
|
||||
v okrogel;1;0.146%
|
||||
dala tiskarna;1;0.146%
|
||||
Performs in;1;0.146%
|
||||
lep pozdrav;1;0.146%
|
||||
Matjaž Javšnik;1;0.146%
|
||||
poleg tega;1;0.146%
|
||||
tako da;1;0.146%
|
||||
ob 12:25;1;0.146%
|
||||
sinov naroda;1;0.146%
|
||||
24.10 do;1;0.146%
|
||||
skrivali svojega;1;0.146%
|
||||
poglejte si;1;0.146%
|
||||
in komedija;1;0.146%
|
||||
sprejemljivi potem;1;0.146%
|
||||
odstranimo peške;1;0.146%
|
||||
potekal 19.;1;0.146%
|
||||
med mesti;1;0.146%
|
||||
podjetje drugo;1;0.146%
|
||||
"škofije Šmarje";1;0.146%
|
||||
ki ima;1;0.146%
|
||||
in pol;1;0.146%
|
||||
moko pecilni;1;0.146%
|
||||
maso vlijemo;1;0.146%
|
||||
prispevek ki;1;0.146%
|
||||
tega pa;1;0.146%
|
||||
srbeče kože;1;0.146%
|
||||
pedri če;1;0.146%
|
||||
dl vode;1;0.146%
|
||||
naročnik je;1;0.146%
|
||||
začne nakazovati;1;0.146%
|
||||
večjih koščkov;1;0.146%
|
||||
ker tega;1;0.146%
|
||||
nekdo mu;1;0.146%
|
||||
operemo grozdje;1;0.146%
|
||||
veličastna mesta;1;0.146%
|
||||
1st Prison;1;0.146%
|
||||
bo ladja;1;0.146%
|
||||
vsebuje grozdje;1;0.146%
|
||||
genove do;1;0.146%
|
||||
priokusom po;1;0.146%
|
||||
in srečo;1;0.146%
|
||||
detective 1st;1;0.146%
|
||||
dunajska cesta;1;0.146%
|
||||
pred ženskami;1;0.146%
|
||||
totenbirt Agata;1;0.146%
|
||||
pojav srbeče;1;0.146%
|
||||
de Mallorco;1;0.146%
|
||||
jih strli;1;0.146%
|
||||
widow judge;1;0.146%
|
||||
zadruga je;1;0.146%
|
||||
medtem v;1;0.146%
|
||||
older sister;1;0.146%
|
||||
izgubil dragoceno;1;0.146%
|
||||
in Matjaž;1;0.146%
|
||||
naredimo torto;1;0.146%
|
||||
29.03.2010 ob;1;0.146%
|
||||
volilna komisija;1;0.146%
|
||||
prosimo da;1;0.146%
|
||||
a okusna;1;0.146%
|
||||
zavremo približno;1;0.146%
|
||||
torto s;1;0.146%
|
||||
z narodom;1;0.146%
|
||||
antikrista vključno;1;0.146%
|
||||
požigom z;1;0.146%
|
||||
postane mehko;1;0.146%
|
||||
počitnic od;1;0.146%
|
||||
ob 12:35;1;0.146%
|
||||
in tekoča;1;0.146%
|
||||
Darko Stepančič;1;0.146%
|
||||
kako gosto;1;0.146%
|
||||
penasto umešamo;1;0.146%
|
||||
veličastni križarki;1;0.146%
|
||||
s kompasom;1;0.146%
|
||||
bert 09.11.2010;1;0.146%
|
||||
o društvu;1;0.146%
|
||||
ustavili v;1;0.146%
|
||||
zgodba prezgodaj;1;0.146%
|
||||
mi pri;1;0.146%
|
||||
20 minut;1;0.146%
|
||||
08.11.2010 ob;1;0.146%
|
||||
narežemo korenje;1;0.146%
|
||||
križarili boste;1;0.146%
|
||||
pomarančni sok;1;0.146%
|
||||
97 ker;1;0.146%
|
||||
se s;1;0.146%
|
||||
društvu poslednjih;1;0.146%
|
||||
ju lahko;1;0.146%
|
||||
zvezdniška gosta;1;0.146%
|
||||
odvisno kako;1;0.146%
|
||||
v posesti;1;0.146%
|
||||
gre za;1;0.146%
|
||||
od 1.7;1;0.146%
|
||||
organizacija vnaprej;1;0.146%
|
||||
bolje potem;1;0.146%
|
||||
anatemizirala tako;1;0.146%
|
||||
ki priča;1;0.146%
|
||||
aprila akreditirate;1;0.146%
|
||||
Matjaž Koper;1;0.146%
|
||||
ima namen;1;0.146%
|
||||
66 drunk;1;0.146%
|
||||
smo jih;1;0.146%
|
||||
enem tednu;1;0.146%
|
||||
onemu zdi;1;0.146%
|
||||
ladja MSC;1;0.146%
|
||||
eko zelenjavne;1;0.146%
|
||||
z rezino;1;0.146%
|
||||
dober tek;1;0.146%
|
||||
"še ajdovo";1;0.146%
|
||||
mesti zahodnega;1;0.146%
|
||||
"življenju mogoče";1;0.146%
|
||||
21. 2.;1;0.146%
|
||||
lahko ustanovi;1;0.146%
|
||||
saj je;1;0.146%
|
||||
popeljala med;1;0.146%
|
||||
kako jo;1;0.146%
|
||||
gosto juho;1;0.146%
|
||||
je blagajna;1;0.146%
|
||||
"še za";1;0.146%
|
||||
za želodec;1;0.146%
|
||||
juho želimo;1;0.146%
|
||||
vzamemo iz;1;0.146%
|
||||
Palermu in;1;0.146%
|
||||
dermatologinja Tanja;1;0.146%
|
||||
Alenke Godec;1;0.146%
|
||||
eko zelenjavna;1;0.146%
|
||||
nekaj večjih;1;0.146%
|
||||
Anandm Kofujem;1;0.146%
|
||||
bosta z;1;0.146%
|
||||
postali sprejemljivi;1;0.146%
|
||||
a se;1;0.146%
|
||||
je bila;1;0.146%
|
||||
križarjenju tukaj;1;0.146%
|
||||
križarjenju bosta;1;0.146%
|
||||
je vse;1;0.146%
|
||||
naroda narod;1;0.146%
|
||||
ogledala izdelana;1;0.146%
|
||||
pridružite se;1;0.146%
|
||||
nakazovati velike;1;0.146%
|
||||
okusna jed;1;0.146%
|
||||
Maja Končar;1;0.146%
|
||||
pa izumira;1;0.146%
|
||||
liter vode;1;0.146%
|
||||
dietna a;1;0.146%
|
||||
uporabite v;1;0.146%
|
||||
prtom in;1;0.146%
|
||||
na koncu;1;0.146%
|
||||
ValentinRozman 29.03.2010;1;0.146%
|
||||
je danes;1;0.146%
|
||||
obiskali palmo;1;0.146%
|
||||
ji dodamo;1;0.146%
|
||||
"članica druge";1;0.146%
|
||||
juho solimo;1;0.146%
|
||||
vino pomarančni;1;0.146%
|
||||
koristi svojih;1;0.146%
|
||||
se mladi;1;0.146%
|
||||
do 31.10;1;0.146%
|
||||
"Šmarje marezige";1;0.146%
|
||||
z Anandm;1;0.146%
|
||||
drugo zadrugo;1;0.146%
|
||||
cesta 22;1;0.146%
|
||||
kašo ki;1;0.146%
|
||||
namen zaradi;1;0.146%
|
||||
v približno;1;0.146%
|
||||
vas bo;1;0.146%
|
||||
kašo in;1;0.146%
|
||||
pekač vzamemo;1;0.146%
|
||||
istospolno usmerjeni;1;0.146%
|
||||
se onemu;1;0.146%
|
||||
bi anatemizirala;1;0.146%
|
||||
posodi zavremo;1;0.146%
|
||||
svojega strahu;1;0.146%
|
||||
zaradi katerega;1;0.146%
|
||||
veliko nižja;1;0.146%
|
||||
Marijana Brecelj;1;0.146%
|
||||
vključno z;1;0.146%
|
||||
i. splošne;1;0.146%
|
||||
unikatna ogledala;1;0.146%
|
||||
Branko Podobnik;1;0.146%
|
||||
gosta Alenka;1;0.146%
|
||||
mehko približno;1;0.146%
|
||||
odličnimi pozdravi;1;0.146%
|
||||
in upravljanju;1;0.146%
|
||||
1. člen;1;0.146%
|
||||
izdelujejo ta;1;0.146%
|
||||
grozdju pomaranči;1;0.146%
|
||||
o tem;1;0.146%
|
||||
naj bi;1;0.146%
|
||||
orehe ki;1;0.146%
|
||||
Končar Marta;1;0.146%
|
||||
trg Brolo;1;0.146%
|
||||
koncert Alenke;1;0.146%
|
||||
prinašala bogastvo;1;0.146%
|
||||
Marseille vse;1;0.146%
|
||||
judge Eli's;1;0.146%
|
||||
vas nismo;1;0.146%
|
||||
cerkev v;1;0.146%
|
||||
pomaranči vinu;1;0.146%
|
||||
je izgubil;1;0.146%
|
||||
sami si;1;0.146%
|
||||
v drugi;1;0.146%
|
||||
priča o;1;0.146%
|
||||
prašek in;1;0.146%
|
||||
strahu pred;1;0.146%
|
||||
vami koncert;1;0.146%
|
||||
MSC fantastica;1;0.146%
|
||||
občin Ankaran;1;0.146%
|
||||
je organizacija;1;0.146%
|
||||
s paličnim;1;0.146%
|
||||
Kofujem torinskim;1;0.146%
|
||||
približno 20;1;0.146%
|
||||
jesenskem špasnem;1;0.146%
|
||||
"špasnem križarjanju";1;0.146%
|
||||
cerkvi bolje;1;0.146%
|
||||
duhovnike več;1;0.146%
|
||||
izberemo pravo;1;0.146%
|
||||
1113 Ljubljana;1;0.146%
|
||||
Detela Matjaž;1;0.146%
|
||||
dokler korenje;1;0.146%
|
||||
posodi zmešamo;1;0.146%
|
||||
postane članica;1;0.146%
|
||||
6000 Koper;1;0.146%
|
||||
Mojco Beljan;1;0.146%
|
||||
vam zavračamo;1;0.146%
|
||||
mu začne;1;0.146%
|
||||
ima junak;1;0.146%
|
||||
ker ima;1;0.146%
|
||||
bila ust;1;0.146%
|
||||
bi postali;1;0.146%
|
||||
"čast jeseni";1;0.146%
|
||||
v Palermu;1;0.146%
|
||||
z odličnimi;1;0.146%
|
||||
pot prispevek;1;0.146%
|
||||
približno 3;1;0.146%
|
||||
v enem;1;0.146%
|
||||
pri vas;1;0.146%
|
||||
nam jo;1;0.146%
|
||||
ki nam;1;0.146%
|
||||
je cerkvi;1;0.146%
|
||||
kaj povzroča;1;0.146%
|
||||
premešamo dodamo;1;0.146%
|
||||
odpraviti pojasnjuje;1;0.146%
|
||||
društvo da;1;0.146%
|
||||
z vami;1;0.146%
|
||||
"če jih";1;0.146%
|
||||
po testu;1;0.146%
|
||||
nam na;1;0.146%
|
||||
jeste tudi;1;0.146%
|
||||
in Tunisu;1;0.146%
|
||||
pristopu svobodnem;1;0.146%
|
||||
in kako;1;0.146%
|
||||
narodom če;1;0.146%
|
||||
zelenjavna kocka;1;0.146%
|
||||
v Indiji;1;0.146%
|
||||
ter temelji;1;0.146%
|
||||
korenje ne;1;0.146%
|
||||
pomarančo in;1;0.146%
|
||||
naša novinarja;1;0.146%
|
||||
upravljanju članov;1;0.146%
|
||||
kolobarje narežemo;1;0.146%
|
||||
damo v;1;0.146%
|
||||
da ohranimo;1;0.146%
|
||||
novinarja Mojco;1;0.146%
|
||||
dodamo moko;1;0.146%
|
||||
pravne osebe;1;0.146%
|
||||
na prostovoljnem;1;0.146%
|
||||
za akreditacijo;1;0.146%
|
||||
cerkev ne;1;0.146%
|
||||
"čemer se";1;0.146%
|
||||
Brolo 3;1;0.146%
|
||||
in ne;1;0.146%
|
||||
izdelana v;1;0.146%
|
||||
ta unikatna;1;0.146%
|
||||
"času počitnic";1;0.146%
|
||||
o kloniranju;1;0.146%
|
||||
koncu vanjo;1;0.146%
|
||||
marezige in;1;0.146%
|
||||
medtem operemo;1;0.146%
|
||||
in mu;1;0.146%
|
||||
drugo pravno;1;0.146%
|
||||
zadrugo ali;1;0.146%
|
||||
svojih članov;1;0.146%
|
||||
vode ki;1;0.146%
|
||||
in lupinico;1;0.146%
|
||||
potem bi;1;0.146%
|
||||
3 dl;1;0.146%
|
||||
dragoceno življenje;1;0.146%
|
||||
Ivek 66;1;0.146%
|
||||
jurkovička 62;1;0.146%
|
||||
se nam;1;0.146%
|
||||
Matjaža Javšnika;1;0.146%
|
||||
"članov ter";1;0.146%
|
||||
2130 97;1;0.146%
|
||||
osebo oziroma;1;0.146%
|
||||
po okusu;1;0.146%
|
||||
o čemer;1;0.146%
|
||||
pa je;1;0.146%
|
||||
"če je";1;0.146%
|
||||
mešalnikom da;1;0.146%
|
||||
križarjanju vas;1;0.146%
|
||||
kuhamo dokler;1;0.146%
|
||||
1st police;1;0.146%
|
||||
opreki z;1;0.146%
|
||||
vanjo dodamo;1;0.146%
|
||||
totenbirt Ivek;1;0.146%
|
||||
si izberemo;1;0.146%
|
||||
operemo korenje;1;0.146%
|
||||
aranžma špasnega;1;0.146%
|
||||
ključno pri;1;0.146%
|
||||
in orehih;1;0.146%
|
||||
več o;1;0.146%
|
||||
Javšnika optimist;1;0.146%
|
||||
zakon o;1;0.146%
|
||||
"času viroz";1;0.146%
|
||||
ladji bosta;1;0.146%
|
||||
kuhamo približno;1;0.146%
|
||||
v prilogi;1;0.146%
|
||||
neškodljiv za;1;0.146%
|
||||
kompasom odkrivajte;1;0.146%
|
||||
ne sanja;1;0.146%
|
||||
sok in;1;0.146%
|
||||
drugo polovico;1;0.146%
|
||||
usmerjeni proklamirali;1;0.146%
|
||||
peči še;1;0.146%
|
||||
kloniranju antikrista;1;0.146%
|
||||
edinstvena kovinska;1;0.146%
|
||||
solimo po;1;0.146%
|
||||
da postane;1;0.146%
|
||||
police officer;1;0.146%
|
||||
je dala;1;0.146%
|
||||
pečice po;1;0.146%
|
||||
druge pravne;1;0.146%
|
||||
sladkor penasto;1;0.146%
|
||||
odkrivajte veličastna;1;0.146%
|
||||
komedija Matjaža;1;0.146%
|
||||
vnaprej nedoločenega;1;0.146%
|
||||
na jesenskem;1;0.146%
|
||||
in pedri;1;0.146%
|
||||
Eli's older;1;0.146%
|
||||
2. 1998;1;0.146%
|
||||
fantastica 5;1;0.146%
|
||||
posebej uporabite;1;0.146%
|
||||
ki smo;1;0.146%
|
||||
jo je;1;0.146%
|
||||
|
@@ -0,0 +1,572 @@
|
||||
word;frequency;percent
|
||||
in ajdovo kašo;3;0.499%
|
||||
težav s srbečo;2;0.333%
|
||||
za pol ure;2;0.333%
|
||||
46 učiteljica tomijeva;2;0.333%
|
||||
elina starejša sestra;2;0.333%
|
||||
izdelujejo kovinska ogledala;2;0.333%
|
||||
62 vdova sodnica;2;0.333%
|
||||
učiteljica tomijeva žena;2;0.333%
|
||||
Jurkovič Jurkovička 62;2;0.333%
|
||||
tomijeva žena elina;2;0.333%
|
||||
kako se znebiti;2;0.333%
|
||||
vdova sodnica elina;2;0.333%
|
||||
Roblek Martika 46;2;0.333%
|
||||
kako izdelujejo kovinska;2;0.333%
|
||||
elina mlajša sestra;2;0.333%
|
||||
Marta Fijavž Roblek;2;0.333%
|
||||
znebiti težav s;2;0.333%
|
||||
ustanovitev novih občin;2;0.333%
|
||||
za ustanovitev novih;2;0.333%
|
||||
sodnica elina starejša;2;0.333%
|
||||
Martika 46 učiteljica;2;0.333%
|
||||
Fijavž Roblek Martika;2;0.333%
|
||||
se znebiti težav;2;0.333%
|
||||
"žena elina mlajša";2;0.333%
|
||||
totenbirt the late;2;0.333%
|
||||
tiskarna gepard 1;2;0.333%
|
||||
Agata Jurkovič Jurkovička;2;0.333%
|
||||
Jurkovička 62 vdova;2;0.333%
|
||||
s srbečo kožo;2;0.333%
|
||||
ne postane mehko;1;0.166%
|
||||
ko dobro premešamo;1;0.166%
|
||||
v življenju mogoče;1;0.166%
|
||||
da bi goljufal;1;0.166%
|
||||
Ivek 66 drunk;1;0.166%
|
||||
vas nismo naročili;1;0.166%
|
||||
posesti nekaj o;1;0.166%
|
||||
posebej uporabite v;1;0.166%
|
||||
ki ji dodamo;1;0.166%
|
||||
okusu in kar;1;0.166%
|
||||
lahko ustanovi podjetje;1;0.166%
|
||||
Alenka Godec in;1;0.166%
|
||||
junak ustanoivi društvo;1;0.166%
|
||||
tako da bi;1;0.166%
|
||||
novinarja Mojco Beljan;1;0.166%
|
||||
torinskim prtom in;1;0.166%
|
||||
da postane ravno;1;0.166%
|
||||
cena ki nam;1;0.166%
|
||||
kar v posodi;1;0.166%
|
||||
začne nakazovati velike;1;0.166%
|
||||
s paličnim mešalnikom;1;0.166%
|
||||
na ladji bosta;1;0.166%
|
||||
neškodljiv za želodec;1;0.166%
|
||||
damo nazaj peči;1;0.166%
|
||||
prošnja za akreditacijo;1;0.166%
|
||||
bolje potem gre;1;0.166%
|
||||
edinstvena kovinska ogledala;1;0.166%
|
||||
da ohranimo nekaj;1;0.166%
|
||||
MSC fantastica 5;1;0.166%
|
||||
med mesti zahodnega;1;0.166%
|
||||
izberemo pravo pot;1;0.166%
|
||||
po testu posujemo;1;0.166%
|
||||
Prison officer detective;1;0.166%
|
||||
duhovnike več sinov;1;0.166%
|
||||
in pedri če;1;0.166%
|
||||
več sinov naroda;1;0.166%
|
||||
Agata Jurkovič jurkovička;1;0.166%
|
||||
judge Eli's older;1;0.166%
|
||||
jurkovička 62 widow;1;0.166%
|
||||
nekaj o čemer;1;0.166%
|
||||
v čast jeseni;1;0.166%
|
||||
jih strli na;1;0.166%
|
||||
in damo nazaj;1;0.166%
|
||||
izdelana v Indiji;1;0.166%
|
||||
kloniranju antikrista vključno;1;0.166%
|
||||
je bila ust;1;0.166%
|
||||
cerkev v opreki;1;0.166%
|
||||
dermatologinja Tanja Planinšek;1;0.166%
|
||||
dl vode ki;1;0.166%
|
||||
približno pol ure;1;0.166%
|
||||
ravno prav gosta;1;0.166%
|
||||
račun št. 2130;1;0.166%
|
||||
Mallorco Barcelono in;1;0.166%
|
||||
ima namen pospeševati;1;0.166%
|
||||
gre za duhovnike;1;0.166%
|
||||
zmešamo s paličnim;1;0.166%
|
||||
zakon o zadrugah;1;0.166%
|
||||
o kloniranju antikrista;1;0.166%
|
||||
potekal 19. aprila;1;0.166%
|
||||
naročnik je tiskarna;1;0.166%
|
||||
postali sprejemljivi potem;1;0.166%
|
||||
potem bi se;1;0.166%
|
||||
Maja Končar Marta;1;0.166%
|
||||
dekani ki bo;1;0.166%
|
||||
jeseni naredimo torto;1;0.166%
|
||||
mu začne nakazovati;1;0.166%
|
||||
mladi istospolno usmerjeni;1;0.166%
|
||||
ustavili v Palermu;1;0.166%
|
||||
bert 09.11.2010 ob;1;0.166%
|
||||
jeste tudi posebej;1;0.166%
|
||||
dodamo moko pecilni;1;0.166%
|
||||
totenbirt Ivek 66;1;0.166%
|
||||
prtom in požigom;1;0.166%
|
||||
druge pravne osebe;1;0.166%
|
||||
posujemo grozdje in;1;0.166%
|
||||
"če se s";1;0.166%
|
||||
pomarančo in orehe;1;0.166%
|
||||
zato da ohranimo;1;0.166%
|
||||
tega pa je;1;0.166%
|
||||
ustanoivi društvo da;1;0.166%
|
||||
Anandm Kofujem torinskim;1;0.166%
|
||||
preprost recept za;1;0.166%
|
||||
a okusna jed;1;0.166%
|
||||
mesti zahodnega Sredozemlja;1;0.166%
|
||||
Končar Marta Fijavž;1;0.166%
|
||||
pri operaciji 666;1;0.166%
|
||||
bi prinašala bogastvo;1;0.166%
|
||||
komisija za ustanovitev;1;0.166%
|
||||
medtem operemo grozdje;1;0.166%
|
||||
in Danijela ceka;1;0.166%
|
||||
jih cerkev ne;1;0.166%
|
||||
in dekani ki;1;0.166%
|
||||
si kako izdelujejo;1;0.166%
|
||||
"špasnega križarjenja je";1;0.166%
|
||||
Marijana Brecelj Agata;1;0.166%
|
||||
nazaj peči še;1;0.166%
|
||||
zadruga je organizacija;1;0.166%
|
||||
Jurkovič jurkovička 62;1;0.166%
|
||||
se nam na;1;0.166%
|
||||
sprejemljivi potem bi;1;0.166%
|
||||
in Tunisu obiskali;1;0.166%
|
||||
pridružite se nam;1;0.166%
|
||||
vinu in orehih;1;0.166%
|
||||
za duhovnike več;1;0.166%
|
||||
namen zaradi katerega;1;0.166%
|
||||
danes skorajda najpogostejša;1;0.166%
|
||||
in upravljanju članov;1;0.166%
|
||||
koncert Alenke Godec;1;0.166%
|
||||
poleg tega pa;1;0.166%
|
||||
komedija Matjaža Javšnika;1;0.166%
|
||||
gosto juho želimo;1;0.166%
|
||||
Brecelj Agata Jurkovič;1;0.166%
|
||||
narod pa izumira;1;0.166%
|
||||
smo jih strli;1;0.166%
|
||||
ker tega mi;1;0.166%
|
||||
aprila akreditirate naša;1;0.166%
|
||||
s tem uresničuje;1;0.166%
|
||||
palmo de Mallorco;1;0.166%
|
||||
vključno z Anandm;1;0.166%
|
||||
prispevek ki priča;1;0.166%
|
||||
sinov naroda narod;1;0.166%
|
||||
se mladi istospolno;1;0.166%
|
||||
zdi ključno pri;1;0.166%
|
||||
o tem da;1;0.166%
|
||||
je cena ki;1;0.166%
|
||||
cerkvi bolje potem;1;0.166%
|
||||
ki ju lahko;1;0.166%
|
||||
v enem tednu;1;0.166%
|
||||
v posesti nekaj;1;0.166%
|
||||
i. splošne določbe;1;0.166%
|
||||
da za referendum;1;0.166%
|
||||
nakazovati velike vsote;1;0.166%
|
||||
prava pot tista;1;0.166%
|
||||
grozdje in damo;1;0.166%
|
||||
za korenčkovo juho;1;0.166%
|
||||
drugo polovico zelenjavne;1;0.166%
|
||||
8. aprila 1998;1;0.166%
|
||||
kuhamo dokler korenje;1;0.166%
|
||||
z narodom če;1;0.166%
|
||||
Godec in komedija;1;0.166%
|
||||
je dala tiskarna;1;0.166%
|
||||
juho solimo po;1;0.166%
|
||||
se ustavili v;1;0.166%
|
||||
ter kuhamo dokler;1;0.166%
|
||||
bo ladja MSC;1;0.166%
|
||||
novih občin Ankaran;1;0.166%
|
||||
na roko zato;1;0.166%
|
||||
v Palermu in;1;0.166%
|
||||
v približno liter;1;0.166%
|
||||
posodi zavremo približno;1;0.166%
|
||||
pristopu svobodnem izstopu;1;0.166%
|
||||
grozdje vino pomarančo;1;0.166%
|
||||
potem gre za;1;0.166%
|
||||
Totenbirt Marta Fijavž;1;0.166%
|
||||
jajci in sladkor;1;0.166%
|
||||
kolobarje narežemo korenje;1;0.166%
|
||||
in dietna a;1;0.166%
|
||||
vino pomarančni sok;1;0.166%
|
||||
pred ženskami v;1;0.166%
|
||||
z vami koncert;1;0.166%
|
||||
vami koncert Alenke;1;0.166%
|
||||
pri vas nismo;1;0.166%
|
||||
prezgodaj je izgubil;1;0.166%
|
||||
tem uresničuje namen;1;0.166%
|
||||
ju lahko jeste;1;0.166%
|
||||
ne bi skrivali;1;0.166%
|
||||
po grozdju pomaranči;1;0.166%
|
||||
o križarjenju tukaj;1;0.166%
|
||||
mehko približno 20;1;0.166%
|
||||
s kompasom odkrivajte;1;0.166%
|
||||
peči še za;1;0.166%
|
||||
1 veliko nižja;1;0.166%
|
||||
kocke in ajdovo;1;0.166%
|
||||
in damo v;1;0.166%
|
||||
je izgubil dragoceno;1;0.166%
|
||||
mogoče da je;1;0.166%
|
||||
21. 2. 1998;1;0.166%
|
||||
z Anandm Kofujem;1;0.166%
|
||||
koncu vanjo dodamo;1;0.166%
|
||||
jesenska torta vsebuje;1;0.166%
|
||||
Zucco 1st Prison;1;0.166%
|
||||
ohranimo nekaj večjih;1;0.166%
|
||||
widow judge Eli's;1;0.166%
|
||||
5 popeljala med;1;0.166%
|
||||
uresničuje namen zaradi;1;0.166%
|
||||
vse v življenju;1;0.166%
|
||||
zgodba sami si;1;0.166%
|
||||
v posodi zmešamo;1;0.166%
|
||||
vlijemo v okrogel;1;0.166%
|
||||
pečico za pol;1;0.166%
|
||||
oziroma postane članica;1;0.166%
|
||||
zadruga lahko ustanovi;1;0.166%
|
||||
Koper 8. aprila;1;0.166%
|
||||
društvo da bi;1;0.166%
|
||||
zaradi katerega je;1;0.166%
|
||||
okrogel pekač in;1;0.166%
|
||||
Alenke Godec in;1;0.166%
|
||||
mika 08.11.2010 ob;1;0.166%
|
||||
boste od genove;1;0.166%
|
||||
Matjaža Javšnika optimist;1;0.166%
|
||||
juho želimo na;1;0.166%
|
||||
je danes skorajda;1;0.166%
|
||||
kaj povzroča srbečico;1;0.166%
|
||||
opreki z narodom;1;0.166%
|
||||
gepard 1 veliko;1;0.166%
|
||||
polovico zelenjavne kocke;1;0.166%
|
||||
officer detective 1st;1;0.166%
|
||||
je blagajna zaprta;1;0.166%
|
||||
drugo zadrugo ali;1;0.166%
|
||||
pospeševati gospodarske koristi;1;0.166%
|
||||
Koper 21. 2.;1;0.166%
|
||||
pomarančni sok in;1;0.166%
|
||||
pojav srbeče kože;1;0.166%
|
||||
operemo korenje in;1;0.166%
|
||||
dokler korenje ne;1;0.166%
|
||||
namen pospeševati gospodarske;1;0.166%
|
||||
viroz saj je;1;0.166%
|
||||
ter temelji na;1;0.166%
|
||||
postane članica druge;1;0.166%
|
||||
ki bo potekal;1;0.166%
|
||||
"št. 2130 97";1;0.166%
|
||||
pravo pot prispevek;1;0.166%
|
||||
kašo ki ju;1;0.166%
|
||||
in Marseille vse;1;0.166%
|
||||
veličastna mesta zahodnega;1;0.166%
|
||||
medtem v drugi;1;0.166%
|
||||
za referendum za;1;0.166%
|
||||
osebe če se;1;0.166%
|
||||
ki ima namen;1;0.166%
|
||||
je tiskarna gepard;1;0.166%
|
||||
zgodba prezgodaj je;1;0.166%
|
||||
08.11.2010 ob 09:56;1;0.166%
|
||||
zadrugo ali drugo;1;0.166%
|
||||
naj bi prinašala;1;0.166%
|
||||
"Šmarje marezige in";1;0.166%
|
||||
odpraviti pojasnjuje dermatologinja;1;0.166%
|
||||
"članov ter temelji";1;0.166%
|
||||
osebo oziroma postane;1;0.166%
|
||||
2130 97 ker;1;0.166%
|
||||
križarjenju bosta z;1;0.166%
|
||||
genove do Neaplja;1;0.166%
|
||||
je zelo lahek;1;0.166%
|
||||
okusna jed je;1;0.166%
|
||||
ker ima junak;1;0.166%
|
||||
iz pečice po;1;0.166%
|
||||
korenčkovo juho in;1;0.166%
|
||||
organizacija vnaprej nedoločenega;1;0.166%
|
||||
zelenjavne kocke in;1;0.166%
|
||||
postane ravno prav;1;0.166%
|
||||
od genove do;1;0.166%
|
||||
aranžma špasnega križarjenja;1;0.166%
|
||||
korenje ne postane;1;0.166%
|
||||
kože je danes;1;0.166%
|
||||
Roberto Zucco 1st;1;0.166%
|
||||
uporabite v času;1;0.166%
|
||||
vanjo dodamo še;1;0.166%
|
||||
Tunisu obiskali palmo;1;0.166%
|
||||
vode odvisno kako;1;0.166%
|
||||
prosimo da za;1;0.166%
|
||||
tega mi pri;1;0.166%
|
||||
bosta zvezdniška gosta;1;0.166%
|
||||
pravne osebe če;1;0.166%
|
||||
v času viroz;1;0.166%
|
||||
Matjaž Koper 21.;1;0.166%
|
||||
bo potekal 19.;1;0.166%
|
||||
saj je zelo;1;0.166%
|
||||
sok in lupinico;1;0.166%
|
||||
testu posujemo grozdje;1;0.166%
|
||||
zelenjavne kocke ter;1;0.166%
|
||||
križarjanju vas bo;1;0.166%
|
||||
kako gosto juho;1;0.166%
|
||||
maso vlijemo v;1;0.166%
|
||||
dobro premešamo dodamo;1;0.166%
|
||||
pečice po testu;1;0.166%
|
||||
in sladkor penasto;1;0.166%
|
||||
do Neaplja se;1;0.166%
|
||||
damo v ogreto;1;0.166%
|
||||
nekdo mu začne;1;0.166%
|
||||
jogurt olje vino;1;0.166%
|
||||
skorajda najpogostejša motnja;1;0.166%
|
||||
v okrogel pekač;1;0.166%
|
||||
pa je cena;1;0.166%
|
||||
je cerkvi bolje;1;0.166%
|
||||
eko zelenjavna kocka;1;0.166%
|
||||
priča o tem;1;0.166%
|
||||
občin Ankaran škofije;1;0.166%
|
||||
naredimo torto s;1;0.166%
|
||||
"članica druge pravne";1;0.166%
|
||||
resnična zgodba prezgodaj;1;0.166%
|
||||
prinašala bogastvo in;1;0.166%
|
||||
detective 1st police;1;0.166%
|
||||
roko zato da;1;0.166%
|
||||
ajdovo kašo in;1;0.166%
|
||||
se mu ne;1;0.166%
|
||||
dunajska cesta 22;1;0.166%
|
||||
naša novinarja Mojco;1;0.166%
|
||||
istospolno usmerjeni proklamirali;1;0.166%
|
||||
24.10 do 31.10;1;0.166%
|
||||
približno liter vode;1;0.166%
|
||||
korenčkovo juho solimo;1;0.166%
|
||||
podjetje drugo zadrugo;1;0.166%
|
||||
cerkev ne bi;1;0.166%
|
||||
"življenju mogoče da";1;0.166%
|
||||
pojasnjuje dermatologinja Tanja;1;0.166%
|
||||
koristi svojih članov;1;0.166%
|
||||
odvisno kako gosto;1;0.166%
|
||||
kašo in dietna;1;0.166%
|
||||
v času počitnic;1;0.166%
|
||||
v Indiji naj;1;0.166%
|
||||
09.11.2010 ob 12:35;1;0.166%
|
||||
gosta in tekoča;1;0.166%
|
||||
nedoločenega števila članov;1;0.166%
|
||||
pecilni prašek in;1;0.166%
|
||||
katerega je bila;1;0.166%
|
||||
a se onemu;1;0.166%
|
||||
1st police officer;1;0.166%
|
||||
1st Prison officer;1;0.166%
|
||||
lahko jeste tudi;1;0.166%
|
||||
se s tem;1;0.166%
|
||||
pedri če jih;1;0.166%
|
||||
križati idejo o;1;0.166%
|
||||
in komedija Matjaža;1;0.166%
|
||||
se onemu zdi;1;0.166%
|
||||
na prostovoljnem pristopu;1;0.166%
|
||||
da je prava;1;0.166%
|
||||
postane mehko približno;1;0.166%
|
||||
ustanovi podjetje drugo;1;0.166%
|
||||
"času viroz saj";1;0.166%
|
||||
z odličnimi pozdravi;1;0.166%
|
||||
"želimo na kolobarje";1;0.166%
|
||||
gosta Alenka Godec;1;0.166%
|
||||
in kako jo;1;0.166%
|
||||
proklamirali in ne;1;0.166%
|
||||
v opreki z;1;0.166%
|
||||
mu odstranimo peške;1;0.166%
|
||||
Palermu in Tunisu;1;0.166%
|
||||
Kofujem torinskim prtom;1;0.166%
|
||||
Indiji naj bi;1;0.166%
|
||||
kompasom odkrivajte veličastna;1;0.166%
|
||||
ki smo jih;1;0.166%
|
||||
in kar v;1;0.166%
|
||||
"še jogurt olje";1;0.166%
|
||||
pot tista ki;1;0.166%
|
||||
Detela Matjaž Koper;1;0.166%
|
||||
obiskali palmo de;1;0.166%
|
||||
recept za korenčkovo;1;0.166%
|
||||
ta unikatna ogledala;1;0.166%
|
||||
na jesenskem špasnem;1;0.166%
|
||||
pot prispevek ki;1;0.166%
|
||||
narodom če je;1;0.166%
|
||||
si izberemo pravo;1;0.166%
|
||||
bogastvo in srečo;1;0.166%
|
||||
ladji bosta zvezdniška;1;0.166%
|
||||
Beljan in Danijela;1;0.166%
|
||||
vnaprej nedoločenega števila;1;0.166%
|
||||
sodelovanju in upravljanju;1;0.166%
|
||||
približno 3 dl;1;0.166%
|
||||
pol eko zelenjavne;1;0.166%
|
||||
totenbirt Agata Jurkovič;1;0.166%
|
||||
počitnic od 1.7;1;0.166%
|
||||
jo odpraviti pojasnjuje;1;0.166%
|
||||
zavračamo račun št.;1;0.166%
|
||||
dodamo še jogurt;1;0.166%
|
||||
Barcelono in Marseille;1;0.166%
|
||||
križarili boste od;1;0.166%
|
||||
ključno pri operaciji;1;0.166%
|
||||
paličnim mešalnikom da;1;0.166%
|
||||
Totenbirt Agata Jurkovič;1;0.166%
|
||||
anatemizirala tako da;1;0.166%
|
||||
je od 24.10;1;0.166%
|
||||
torta vsebuje grozdje;1;0.166%
|
||||
zvezdniška gosta Alenka;1;0.166%
|
||||
"še ajdovo kašo";1;0.166%
|
||||
da bi postali;1;0.166%
|
||||
"še za pol";1;0.166%
|
||||
zelo lahek in;1;0.166%
|
||||
orehe ki smo;1;0.166%
|
||||
nekaj večjih koščkov;1;0.166%
|
||||
ki priča o;1;0.166%
|
||||
korenje in ajdovo;1;0.166%
|
||||
"čast jeseni naredimo";1;0.166%
|
||||
temelji na prostovoljnem;1;0.166%
|
||||
izdelujejo ta unikatna;1;0.166%
|
||||
pekač in damo;1;0.166%
|
||||
strahu pred ženskami;1;0.166%
|
||||
antikrista vključno z;1;0.166%
|
||||
več o križarjenju;1;0.166%
|
||||
je organizacija vnaprej;1;0.166%
|
||||
mu ne sanja;1;0.166%
|
||||
svobodnem izstopu enakopravnem;1;0.166%
|
||||
kocke ter kuhamo;1;0.166%
|
||||
vse v enem;1;0.166%
|
||||
Mojco Beljan in;1;0.166%
|
||||
premešamo dodamo še;1;0.166%
|
||||
Eli's older sister;1;0.166%
|
||||
posodi zmešamo s;1;0.166%
|
||||
bi anatemizirala tako;1;0.166%
|
||||
prostovoljnem pristopu svobodnem;1;0.166%
|
||||
in neškodljiv za;1;0.166%
|
||||
svojih članov ter;1;0.166%
|
||||
29.03.2010 ob 12:25;1;0.166%
|
||||
skrivali svojega strahu;1;0.166%
|
||||
vsebuje grozdje vino;1;0.166%
|
||||
sladkor penasto umešamo;1;0.166%
|
||||
ki nam jo;1;0.166%
|
||||
Tanja Planinšek Ručigaj;1;0.166%
|
||||
vzamemo iz pečice;1;0.166%
|
||||
olje vino pomarančni;1;0.166%
|
||||
ogledala izdelana v;1;0.166%
|
||||
enakopravnem sodelovanju in;1;0.166%
|
||||
referendum za ustanovitev;1;0.166%
|
||||
v prilogi vam;1;0.166%
|
||||
dodamo drugo polovico;1;0.166%
|
||||
od 24.10 do;1;0.166%
|
||||
izstopu enakopravnem sodelovanju;1;0.166%
|
||||
bi skrivali svojega;1;0.166%
|
||||
odkrivajte veličastna mesta;1;0.166%
|
||||
mešalnikom da postane;1;0.166%
|
||||
Ankaran škofije Šmarje;1;0.166%
|
||||
in orehe ki;1;0.166%
|
||||
zavremo približno 3;1;0.166%
|
||||
bi se mladi;1;0.166%
|
||||
dodamo še ajdovo;1;0.166%
|
||||
"škofije Šmarje marezige";1;0.166%
|
||||
nam jo je;1;0.166%
|
||||
povzroča srbečico in;1;0.166%
|
||||
bosta z vami;1;0.166%
|
||||
je vse v;1;0.166%
|
||||
jesenskem špasnem križarjanju;1;0.166%
|
||||
akreditirate naša novinarja;1;0.166%
|
||||
97 ker tega;1;0.166%
|
||||
juho in ajdovo;1;0.166%
|
||||
korenje in pol;1;0.166%
|
||||
ogreto pečico za;1;0.166%
|
||||
o društvu poslednjih;1;0.166%
|
||||
junak v posesti;1;0.166%
|
||||
liter vode odvisno;1;0.166%
|
||||
strli na roko;1;0.166%
|
||||
in Matjaž Javšnik;1;0.166%
|
||||
grozdje in mu;1;0.166%
|
||||
marezige in dekani;1;0.166%
|
||||
nam na veličastni;1;0.166%
|
||||
je prava pot;1;0.166%
|
||||
usmerjeni proklamirali in;1;0.166%
|
||||
mi pri vas;1;0.166%
|
||||
drugi posodi zavremo;1;0.166%
|
||||
ji dodamo drugo;1;0.166%
|
||||
mesta zahodnega Sredozemlja;1;0.166%
|
||||
torto s priokusom;1;0.166%
|
||||
62 widow judge;1;0.166%
|
||||
vino pomarančo in;1;0.166%
|
||||
jo je dala;1;0.166%
|
||||
prilogi vam zavračamo;1;0.166%
|
||||
naroda narod pa;1;0.166%
|
||||
s priokusom po;1;0.166%
|
||||
ne bi anatemizirala;1;0.166%
|
||||
svojega strahu pred;1;0.166%
|
||||
the late južek;1;0.166%
|
||||
3 dl vode;1;0.166%
|
||||
jed je nared;1;0.166%
|
||||
lahek in neškodljiv;1;0.166%
|
||||
pravno osebo oziroma;1;0.166%
|
||||
operemo grozdje in;1;0.166%
|
||||
resnična zgodba sami;1;0.166%
|
||||
eko zelenjavne kocke;1;0.166%
|
||||
volilna komisija za;1;0.166%
|
||||
dala tiskarna gepard;1;0.166%
|
||||
na križarjenju bosta;1;0.166%
|
||||
ponudimo z rezino;1;0.166%
|
||||
"števila članov ki";1;0.166%
|
||||
pomaranči vinu in;1;0.166%
|
||||
"ženskami v sutano";1;0.166%
|
||||
o čemer se;1;0.166%
|
||||
the late Frenki;1;0.166%
|
||||
požigom z idejo;1;0.166%
|
||||
19. aprila akreditirate;1;0.166%
|
||||
na kolobarje narežemo;1;0.166%
|
||||
de Mallorco Barcelono;1;0.166%
|
||||
v drugi posodi;1;0.166%
|
||||
na koncu vanjo;1;0.166%
|
||||
Neaplja se ustavili;1;0.166%
|
||||
dietna a okusna;1;0.166%
|
||||
fantastica 5 popeljala;1;0.166%
|
||||
na veličastni križarki;1;0.166%
|
||||
onemu zdi ključno;1;0.166%
|
||||
cerkev in pedri;1;0.166%
|
||||
tem da je;1;0.166%
|
||||
kovinska ogledala izdelana;1;0.166%
|
||||
gospodarske koristi svojih;1;0.166%
|
||||
društvu poslednjih dni;1;0.166%
|
||||
sami si izberemo;1;0.166%
|
||||
vode ki ji;1;0.166%
|
||||
"čemer se mu";1;0.166%
|
||||
izgubil dragoceno življenje;1;0.166%
|
||||
idejo o društvu;1;0.166%
|
||||
poglejte si kako;1;0.166%
|
||||
po okusu in;1;0.166%
|
||||
sanja a se;1;0.166%
|
||||
bi postali sprejemljivi;1;0.166%
|
||||
da je vse;1;0.166%
|
||||
in ne bi;1;0.166%
|
||||
z rezino pomaranče;1;0.166%
|
||||
"času počitnic od";1;0.166%
|
||||
ladja MSC fantastica;1;0.166%
|
||||
vas bo ladja;1;0.166%
|
||||
priokusom po grozdju;1;0.166%
|
||||
kako jo odpraviti;1;0.166%
|
||||
pekač vzamemo iz;1;0.166%
|
||||
in požigom z;1;0.166%
|
||||
v ogreto pečico;1;0.166%
|
||||
idejo o kloniranju;1;0.166%
|
||||
križarjenja je od;1;0.166%
|
||||
ne sanja a;1;0.166%
|
||||
srbeče kože je;1;0.166%
|
||||
z idejo o;1;0.166%
|
||||
"če je cerkvi";1;0.166%
|
||||
kako izdelujejo ta;1;0.166%
|
||||
narežemo korenje in;1;0.166%
|
||||
grozdju pomaranči vinu;1;0.166%
|
||||
drugo pravno osebo;1;0.166%
|
||||
moko pecilni prašek;1;0.166%
|
||||
"če jih cerkev";1;0.166%
|
||||
ValentinRozman 29.03.2010 ob;1;0.166%
|
||||
ajdovo kašo ki;1;0.166%
|
||||
prav gosta in;1;0.166%
|
||||
Marseille vse v;1;0.166%
|
||||
srbečico in kako;1;0.166%
|
||||
približno 20 minut;1;0.166%
|
||||
kuhamo približno pol;1;0.166%
|
||||
trg Brolo 3;1;0.166%
|
||||
in pol eko;1;0.166%
|
||||
Godec in Matjaž;1;0.166%
|
||||
"članov ki ima";1;0.166%
|
||||
ali drugo pravno;1;0.166%
|
||||
vam zavračamo račun;1;0.166%
|
||||
prašek in orehe;1;0.166%
|
||||
popeljala med mesti;1;0.166%
|
||||
"špasnem križarjanju vas";1;0.166%
|
||||
tudi posebej uporabite;1;0.166%
|
||||
solimo po okusu;1;0.166%
|
||||
in mu odstranimo;1;0.166%
|
||||
ima junak v;1;0.166%
|
||||
|
350
src/main/resources/Gigafida_subset/F0012405.xml
Normal file
350
src/main/resources/Gigafida_subset/F0012405.xml
Normal file
@@ -0,0 +1,350 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0012405" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: Branko Gradišnik. ANTI2(1999)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>52 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>ANTI2</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title n="???">neznani naslov</title>
|
||||
<author>Branko Gradišnik</author>
|
||||
<date>1999</date>
|
||||
<publisher n="drugo">neznani založnik</publisher>
|
||||
<note type="sourceLang"/>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="50"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="11"/>
|
||||
<tagUsage gi="p" occurs="2"/>
|
||||
<tagUsage gi="s" occurs="5"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="52"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<appInfo>
|
||||
<application ident="Amebis_pretvornik" version="1.0">
|
||||
<label>[ZDRUZEVANJE] 1:1</label>
|
||||
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\ANTI2.ZDR</label>
|
||||
<label>[1] **********</label>
|
||||
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.doc</label>
|
||||
<label>[FORMAT] MS Word for Windows 6.0/7.0</label>
|
||||
<label>[DATUM] 2.12.1999</label>
|
||||
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\LITERAT\IDEJE\Anti2.RTF</label>
|
||||
<label>[PRETVORBA] RTF</label>
|
||||
<label>[KONEC] **********</label>
|
||||
</application>
|
||||
</appInfo>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.P">
|
||||
<catDesc>prenosnik</catDesc>
|
||||
<category xml:id="Ft.P.G">
|
||||
<catDesc>govorni</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.E">
|
||||
<catDesc>elektronski</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P">
|
||||
<catDesc>pisni</catDesc>
|
||||
<category xml:id="Ft.P.P.O">
|
||||
<catDesc>objavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C">
|
||||
<catDesc>časopisno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C.D">
|
||||
<catDesc>dnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.V">
|
||||
<catDesc>večkrat tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R">
|
||||
<catDesc>revialno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.R.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.S">
|
||||
<catDesc>štirinajstdnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.M">
|
||||
<catDesc>mesečno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.D">
|
||||
<catDesc>redkeje kot na mesec</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.O">
|
||||
<catDesc>občasno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N">
|
||||
<catDesc>neobjavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.N.J">
|
||||
<catDesc>javno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.I">
|
||||
<catDesc>interno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.Z">
|
||||
<catDesc>zasebno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.Z">
|
||||
<catDesc>zvrst</catDesc>
|
||||
<category xml:id="Ft.Z.U">
|
||||
<catDesc>umetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.U.P">
|
||||
<catDesc>pesniška</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.R">
|
||||
<catDesc>prozna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.D">
|
||||
<catDesc>dramska</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N">
|
||||
<catDesc>neumetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S">
|
||||
<catDesc>strokovna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S.H">
|
||||
<catDesc>humanistična in družboslovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.S.N">
|
||||
<catDesc>naravoslovna in tehnična</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.N">
|
||||
<catDesc>nestrokovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.P">
|
||||
<catDesc>pravna</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.L">
|
||||
<catDesc>lektorirano</catDesc>
|
||||
<category xml:id="Ft.L.D">
|
||||
<catDesc>da</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.L.N">
|
||||
<catDesc>ne</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.T.K.S"/>
|
||||
<catRef target="#Ft.P.P.N.Z"/>
|
||||
<catRef target="#Ft.Z.N.N"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0012405." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Somei" lemma="junak">Junak</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="ustanoiveti">ustanoivi</w>
|
||||
<S/>
|
||||
<w msd="Sosei" lemma="društvo">društvo</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="da">da</w>
|
||||
<S/>
|
||||
<w msd="Gp-g" lemma="biti">bi</w>
|
||||
<S/>
|
||||
<w msd="Ggnd-em" lemma="goljufati">goljufal</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Zn-mei" lemma="nekdo">Nekdo</w>
|
||||
<S/>
|
||||
<w msd="Zotmed--k" lemma="on">mu</w>
|
||||
<S/>
|
||||
<w msd="Ggdste" lemma="začeti">začne</w>
|
||||
<S/>
|
||||
<w msd="Ggnn" lemma="nakazovati">nakazovati</w>
|
||||
<S/>
|
||||
<w msd="Ppnzmt" lemma="velik">velike</w>
|
||||
<S/>
|
||||
<w msd="Sozmt" lemma="vsota">vsote</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Rsn" lemma="zakaj">Zakaj</w>
|
||||
<c>?</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Vd" lemma="ker">Ker</w>
|
||||
<S/>
|
||||
<w msd="Ggnste-n" lemma="imeti">ima</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="junak">junak</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="posest">posesti</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="nekaj">nekaj</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="o">o</w>
|
||||
<S/>
|
||||
<w msd="Zz-sem" lemma="kar">čemer</w>
|
||||
<S/>
|
||||
<w msd="Zp------k" lemma="se">se</w>
|
||||
<S/>
|
||||
<w msd="Zotmed--k" lemma="on">mu</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="ne">ne</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="sanjati">sanja</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="a">a</w>
|
||||
<S/>
|
||||
<w msd="Zp------k" lemma="se">se</w>
|
||||
<S/>
|
||||
<w msd="Zk-sed" lemma="oni">onemu</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="zdeti">zdi</w>
|
||||
<S/>
|
||||
<w msd="Ppnsei" lemma="ključen">ključno</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="pri">pri</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="operacija">operaciji</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="666">666</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ggvn" lemma="križati">Križati</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="ideja">idejo</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="o">o</w>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="kloniranje">kloniranju</w>
|
||||
<S/>
|
||||
<w msd="Somer" lemma="antikrist">Antikrista</w>
|
||||
<S/>
|
||||
<c>(</c>
|
||||
<w msd="Rsn" lemma="vključno">vključno</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">z</w>
|
||||
<S/>
|
||||
<w msd="Slmeo" lemma="Anand">Anandm</w>
|
||||
<S/>
|
||||
<w msd="Slmeo" lemma="Kofu">Kofujem</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Ppnmeo" lemma="torinski">torinskim</w>
|
||||
<S/>
|
||||
<w msd="Someo" lemma="prt">prtom</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Someo" lemma="požig">požigom</w>
|
||||
<c>)</c>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">z</w>
|
||||
<S/>
|
||||
<w msd="Sozeo" lemma="ideja">idejo</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="o">o</w>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="društvo">Društvu</w>
|
||||
<S/>
|
||||
<w msd="Ppnmmr" lemma="poslednji">poslednjih</w>
|
||||
<S/>
|
||||
<w msd="Sommr" lemma="dan">dni</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
367
src/main/resources/Gigafida_subset/F0016316.xml
Normal file
367
src/main/resources/Gigafida_subset/F0016316.xml
Normal file
@@ -0,0 +1,367 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0016316" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: Državni zbor Republike Slovenije. ZZad. (1992)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>62 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>A0050230</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<pubPlace>Ljubljana</pubPlace>
|
||||
<title>ZZad</title>
|
||||
<author>Državni zbor Republike Slovenije</author>
|
||||
<date>1992</date>
|
||||
<publisher n="Državni zbor Republike Slovenije">Državni zbor Republike Slovenije</publisher>
|
||||
<note type="sourceLang"/>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="58"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="7"/>
|
||||
<tagUsage gi="p" occurs="5"/>
|
||||
<tagUsage gi="s" occurs="5"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="62"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<appInfo>
|
||||
<application ident="Amebis_pretvornik" version="1.0">
|
||||
<label>[AVTOMATSKO] DZZAK</label>
|
||||
<label>[IME] A0050230</label>
|
||||
<label>[IZVOR] d:\fida\korpus\vhod\dzzak\0118.txt</label>
|
||||
<label>[DATUM] 7.4.2000</label>
|
||||
</application>
|
||||
</appInfo>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.P">
|
||||
<catDesc>prenosnik</catDesc>
|
||||
<category xml:id="Ft.P.G">
|
||||
<catDesc>govorni</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.E">
|
||||
<catDesc>elektronski</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P">
|
||||
<catDesc>pisni</catDesc>
|
||||
<category xml:id="Ft.P.P.O">
|
||||
<catDesc>objavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C">
|
||||
<catDesc>časopisno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C.D">
|
||||
<catDesc>dnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.V">
|
||||
<catDesc>večkrat tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R">
|
||||
<catDesc>revialno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.R.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.S">
|
||||
<catDesc>štirinajstdnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.M">
|
||||
<catDesc>mesečno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.D">
|
||||
<catDesc>redkeje kot na mesec</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.O">
|
||||
<catDesc>občasno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N">
|
||||
<catDesc>neobjavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.N.J">
|
||||
<catDesc>javno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.I">
|
||||
<catDesc>interno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.Z">
|
||||
<catDesc>zasebno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.Z">
|
||||
<catDesc>zvrst</catDesc>
|
||||
<category xml:id="Ft.Z.U">
|
||||
<catDesc>umetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.U.P">
|
||||
<catDesc>pesniška</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.R">
|
||||
<catDesc>prozna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.D">
|
||||
<catDesc>dramska</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N">
|
||||
<catDesc>neumetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S">
|
||||
<catDesc>strokovna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S.H">
|
||||
<catDesc>humanistična in družboslovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.S.N">
|
||||
<catDesc>naravoslovna in tehnična</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.N">
|
||||
<catDesc>nestrokovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.P">
|
||||
<catDesc>pravna</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.L">
|
||||
<catDesc>lektorirano</catDesc>
|
||||
<category xml:id="Ft.L.D">
|
||||
<catDesc>da</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.L.N">
|
||||
<catDesc>ne</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.T.D"/>
|
||||
<catRef target="#Ft.P.P.O"/>
|
||||
<catRef target="#Ft.Z.N.S.H"/>
|
||||
<catRef target="#Ft.L.D"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0016316." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Kav" lemma="1.">1.</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="člen">člen</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Krv" lemma="I.">I.</w>
|
||||
<S/>
|
||||
<w msd="Ppnzer" lemma="splošen">SPLOŠNE</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="določba">DOLOČBE</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Sozei" lemma="zadruga">Zadruga</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="lahko">lahko</w>
|
||||
<S/>
|
||||
<w msd="Ggdste" lemma="ustanoviti">ustanovi</w>
|
||||
<S/>
|
||||
<w msd="Soset" lemma="podjetje">podjetje</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kbzzet" lemma="drug">drugo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="zadruga">zadrugo</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="ali">ali</w>
|
||||
<S/>
|
||||
<w msd="Kbzzet" lemma="drug">drugo</w>
|
||||
<S/>
|
||||
<w msd="Ppnzet" lemma="praven">pravno</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="oseba">osebo</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="oziroma">oziroma</w>
|
||||
<S/>
|
||||
<w msd="Ggdste" lemma="postati">postane</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="članica">članica</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="druga">druge</w>
|
||||
<S/>
|
||||
<w msd="Ppnzer" lemma="praven">pravne</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="oseba">osebe</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="če">če</w>
|
||||
<S/>
|
||||
<w msd="Zp------k" lemma="se">se</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">s</w>
|
||||
<S/>
|
||||
<w msd="Zk-seo" lemma="ta">tem</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="uresničevati">uresničuje</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="namen">namen</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Dr" lemma="zaradi">zaradi</w>
|
||||
<S/>
|
||||
<w msd="Zv-mer" lemma="kateri">katerega</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Gp-d-ez" lemma="biti">bila</w>
|
||||
<S/>
|
||||
<w msd="Sosmr" lemma="usta">ust</w>
|
||||
<S/>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Somei" lemma="zakon">ZAKON</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="o">O</w>
|
||||
<S/>
|
||||
<w msd="Sozmm" lemma="zadruga">ZADRUGAH</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Sozei" lemma="zadruga">Zadruga</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="organizacija">organizacija</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="vnaprej">vnaprej</w>
|
||||
<S/>
|
||||
<w msd="Ppnser" lemma="nedoločen">nedoločenega</w>
|
||||
<S/>
|
||||
<w msd="Soser" lemma="število">števila</w>
|
||||
<S/>
|
||||
<w msd="Sommr" lemma="član">članov</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="ki">ki</w>
|
||||
<S/>
|
||||
<w msd="Ggnste-n" lemma="imeti">ima</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="namen">namen</w>
|
||||
<S/>
|
||||
<w msd="Ggnn" lemma="pospeševati">pospeševati</w>
|
||||
<S/>
|
||||
<w msd="Ppnzmt" lemma="gospodarski">gospodarske</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="korist">koristi</w>
|
||||
<S/>
|
||||
<w msd="Zp-mmr" lemma="svoj">svojih</w>
|
||||
<S/>
|
||||
<w msd="Sommr" lemma="član">članov</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="ter">ter</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="temeljiti">temelji</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="na">na</w>
|
||||
<S/>
|
||||
<w msd="Ppnmem" lemma="prostovoljen">prostovoljnem</w>
|
||||
<S/>
|
||||
<w msd="Somem" lemma="pristop">pristopu</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Ppnmem" lemma="svoboden">svobodnem</w>
|
||||
<S/>
|
||||
<w msd="Somem" lemma="izstop">izstopu</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Ppnsem" lemma="enakopraven">enakopravnem</w>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="sodelovanje">sodelovanju</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="upravljanje">upravljanju</w>
|
||||
<S/>
|
||||
<w msd="Sommr" lemma="član">članov</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
336
src/main/resources/Gigafida_subset/F0018194.xml
Normal file
336
src/main/resources/Gigafida_subset/F0018194.xml
Normal file
@@ -0,0 +1,336 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0018194" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: Branko Gradišnik. CERKEV(2000)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>50 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>CERKEV</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title n="???">neznani naslov</title>
|
||||
<author>Branko Gradišnik</author>
|
||||
<date>2000</date>
|
||||
<publisher n="drugo">neznani založnik</publisher>
|
||||
<note type="sourceLang"/>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="48"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="7"/>
|
||||
<tagUsage gi="p" occurs="2"/>
|
||||
<tagUsage gi="s" occurs="2"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="50"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<appInfo>
|
||||
<application ident="Amebis_pretvornik" version="1.0">
|
||||
<label>[ZDRUZEVANJE] 1:1</label>
|
||||
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\CERKEV.ZDR</label>
|
||||
<label>[1] **********</label>
|
||||
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\SOBOTNA\KOLUMNE\pnz\IDEJE\CERKEV.DOC</label>
|
||||
<label>[FORMAT] MS Word for Windows 6.0/7.0</label>
|
||||
<label>[DATUM] 24.2.2000</label>
|
||||
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\Brane\IZVIRNO\SOBOTNA\KOLUMNE\pnz\IDEJE\CERKEV.RTF</label>
|
||||
<label>[PRETVORBA] RTF</label>
|
||||
<label>[KONEC] **********</label>
|
||||
</application>
|
||||
</appInfo>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.P">
|
||||
<catDesc>prenosnik</catDesc>
|
||||
<category xml:id="Ft.P.G">
|
||||
<catDesc>govorni</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.E">
|
||||
<catDesc>elektronski</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P">
|
||||
<catDesc>pisni</catDesc>
|
||||
<category xml:id="Ft.P.P.O">
|
||||
<catDesc>objavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C">
|
||||
<catDesc>časopisno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C.D">
|
||||
<catDesc>dnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.V">
|
||||
<catDesc>večkrat tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R">
|
||||
<catDesc>revialno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.R.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.S">
|
||||
<catDesc>štirinajstdnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.M">
|
||||
<catDesc>mesečno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.D">
|
||||
<catDesc>redkeje kot na mesec</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.O">
|
||||
<catDesc>občasno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N">
|
||||
<catDesc>neobjavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.N.J">
|
||||
<catDesc>javno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.I">
|
||||
<catDesc>interno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.Z">
|
||||
<catDesc>zasebno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.Z">
|
||||
<catDesc>zvrst</catDesc>
|
||||
<category xml:id="Ft.Z.U">
|
||||
<catDesc>umetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.U.P">
|
||||
<catDesc>pesniška</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.R">
|
||||
<catDesc>prozna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.D">
|
||||
<catDesc>dramska</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N">
|
||||
<catDesc>neumetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S">
|
||||
<catDesc>strokovna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S.H">
|
||||
<catDesc>humanistična in družboslovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.S.N">
|
||||
<catDesc>naravoslovna in tehnična</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.N">
|
||||
<catDesc>nestrokovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.P">
|
||||
<catDesc>pravna</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.L">
|
||||
<catDesc>lektorirano</catDesc>
|
||||
<category xml:id="Ft.L.D">
|
||||
<catDesc>da</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.L.N">
|
||||
<catDesc>ne</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.T.K.S"/>
|
||||
<catRef target="#Ft.P.P.N.Z"/>
|
||||
<catRef target="#Ft.Z.N.N"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0018194." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Sozei" lemma="cerkev">Cerkev</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="opreka">opreki</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">z</w>
|
||||
<S/>
|
||||
<w msd="Someo" lemma="narod">narodom</w>
|
||||
<c>:</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="če">Če</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Sozed" lemma="cerkev">Cerkvi</w>
|
||||
<S/>
|
||||
<w msd="Rsr" lemma="dobro">bolje</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="potem">potem</w>
|
||||
<S/>
|
||||
<w msd="Ggvste" lemma="iti">gre</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="za">za</w>
|
||||
<S/>
|
||||
<w msd="Sommt" lemma="duhovnik">duhovnike</w>
|
||||
<S/>
|
||||
<w msd="Rsr" lemma="več">več</w>
|
||||
<S/>
|
||||
<w msd="Sommr" lemma="sin">sinov</w>
|
||||
<S/>
|
||||
<w msd="Somer" lemma="narod">naroda</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="narod">narod</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="pa">pa</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="izumirati">izumira</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Sozei" lemma="cerkev">Cerkev</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sommi" lemma="peder">pedri</w>
|
||||
<c>:</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="če">Če</w>
|
||||
<S/>
|
||||
<w msd="Zotmmt--k" lemma="on">jih</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="cerkev">Cerkev</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="ne">ne</w>
|
||||
<S/>
|
||||
<w msd="Gp-g" lemma="biti">bi</w>
|
||||
<S/>
|
||||
<w msd="Ggvd-ez" lemma="anatemizirati">anatemizirala</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="tako">tako</w>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="da">da</w>
|
||||
<S/>
|
||||
<w msd="Gp-g" lemma="biti">bi</w>
|
||||
<S/>
|
||||
<w msd="Ggdd-mm" lemma="postati">postali</w>
|
||||
<S/>
|
||||
<w msd="Ppnmmi" lemma="sprejemljiv">sprejemljivi</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="potem">potem</w>
|
||||
<S/>
|
||||
<w msd="Gp-g" lemma="biti">bi</w>
|
||||
<S/>
|
||||
<w msd="Zp------k" lemma="se">se</w>
|
||||
<S/>
|
||||
<w msd="Ppnmmi" lemma="mlad">mladi</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="istospolno">istospolno</w>
|
||||
<S/>
|
||||
<w msd="Pdnmmi" lemma="usmerjen">usmerjeni</w>
|
||||
<S/>
|
||||
<w msd="Ggvd-mm" lemma="proklamirati">proklamirali</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="ne">ne</w>
|
||||
<S/>
|
||||
<w msd="Gp-g" lemma="biti">bi</w>
|
||||
<S/>
|
||||
<w msd="Ggnd-mm" lemma="skrivati">skrivali</w>
|
||||
<S/>
|
||||
<w msd="Zp-mer" lemma="svoj">svojega</w>
|
||||
<S/>
|
||||
<w msd="Somer" lemma="strah">strahu</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="pred">pred</w>
|
||||
<S/>
|
||||
<w msd="Sozmo" lemma="ženska">ženskami</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="sutano">sutano</w>
|
||||
<c>!</c>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
367
src/main/resources/Gigafida_subset/F0026709.xml
Normal file
367
src/main/resources/Gigafida_subset/F0026709.xml
Normal file
@@ -0,0 +1,367 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0026709" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: INTERA(1998)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>53 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>INTERA</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title n="???">neznani naslov</title>
|
||||
<author n="???">neznani avtor</author>
|
||||
<date>1998</date>
|
||||
<publisher n="drugo">neznani založnik</publisher>
|
||||
<note type="sourceLang"/>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="45"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="10"/>
|
||||
<tagUsage gi="p" occurs="7"/>
|
||||
<tagUsage gi="s" occurs="11"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="53"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<appInfo>
|
||||
<application ident="Amebis_pretvornik" version="1.0">
|
||||
<label>[ZDRUZEVANJE] 1:1</label>
|
||||
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\INTERA.ZDR</label>
|
||||
<label>[1] **********</label>
|
||||
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\PrimNov\KOMERC\INTERa.MAR</label>
|
||||
<label>[FORMAT] WordStar 4.0</label>
|
||||
<label>[DATUM] 12.10.1999</label>
|
||||
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\PrimNov\KOMERC\INTERa.RTF</label>
|
||||
<label>[PRETVORBA] RTF</label>
|
||||
<label>[KONEC] **********</label>
|
||||
</application>
|
||||
</appInfo>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.P">
|
||||
<catDesc>prenosnik</catDesc>
|
||||
<category xml:id="Ft.P.G">
|
||||
<catDesc>govorni</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.E">
|
||||
<catDesc>elektronski</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P">
|
||||
<catDesc>pisni</catDesc>
|
||||
<category xml:id="Ft.P.P.O">
|
||||
<catDesc>objavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C">
|
||||
<catDesc>časopisno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C.D">
|
||||
<catDesc>dnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.V">
|
||||
<catDesc>večkrat tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R">
|
||||
<catDesc>revialno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.R.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.S">
|
||||
<catDesc>štirinajstdnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.M">
|
||||
<catDesc>mesečno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.D">
|
||||
<catDesc>redkeje kot na mesec</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.O">
|
||||
<catDesc>občasno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N">
|
||||
<catDesc>neobjavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.N.J">
|
||||
<catDesc>javno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.I">
|
||||
<catDesc>interno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.Z">
|
||||
<catDesc>zasebno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.Z">
|
||||
<catDesc>zvrst</catDesc>
|
||||
<category xml:id="Ft.Z.U">
|
||||
<catDesc>umetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.U.P">
|
||||
<catDesc>pesniška</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.R">
|
||||
<catDesc>prozna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.D">
|
||||
<catDesc>dramska</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N">
|
||||
<catDesc>neumetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S">
|
||||
<catDesc>strokovna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S.H">
|
||||
<catDesc>humanistična in družboslovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.S.N">
|
||||
<catDesc>naravoslovna in tehnična</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.N">
|
||||
<catDesc>nestrokovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.P">
|
||||
<catDesc>pravna</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.L">
|
||||
<catDesc>lektorirano</catDesc>
|
||||
<category xml:id="Ft.L.D">
|
||||
<catDesc>da</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.L.N">
|
||||
<catDesc>ne</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.T.K.S"/>
|
||||
<catRef target="#Ft.P.P.N.I"/>
|
||||
<catRef target="#Ft.Z.N.N"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0026709." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Somei" lemma="g">g</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Slmei" lemma="Detela">Detela</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Matjaž">Matjaž</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Koper">Koper</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kav" lemma="21.">21.</w>
|
||||
<S/>
|
||||
<w msd="Kav" lemma="2.">2.</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="1998">1998</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Dm" lemma="v">V</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="priloga">prilogi</w>
|
||||
<S/>
|
||||
<w msd="Zod-md" lemma="ti">vam</w>
|
||||
<S/>
|
||||
<w msd="Ggnspm" lemma="zavračati">zavračamo</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="račun">račun</w>
|
||||
<S/>
|
||||
<w msd="O" lemma="št.">št.</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="2130">2130</w>
|
||||
<c>/</c>
|
||||
<w msd="Kag" lemma="97">97</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="ker">ker</w>
|
||||
<S/>
|
||||
<w msd="Zk-ser" lemma="ta">tega</w>
|
||||
<S/>
|
||||
<w msd="Zop-ed--k" lemma="jaz">mi</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="pri">pri</w>
|
||||
<S/>
|
||||
<w msd="Zod-mm" lemma="ti">vas</w>
|
||||
<S/>
|
||||
<w msd="Gp-spm-d" lemma="biti">nismo</w>
|
||||
<S/>
|
||||
<w msd="Ggdd-mm" lemma="naročiti">naročili</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Somei" lemma="naročnik">Naročnik</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="tiskarna">Tiskarna</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="gepard">Gepard</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="1">1</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Dr" lemma="poleg">Poleg</w>
|
||||
<S/>
|
||||
<w msd="Zk-ser" lemma="ta">tega</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="pa">pa</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="cena">cena</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="ki">ki</w>
|
||||
<S/>
|
||||
<w msd="Zop-md" lemma="jaz">nam</w>
|
||||
<S/>
|
||||
<w msd="Zotzet--k" lemma="on">jo</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Ggdd-ez" lemma="dati">dala</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="tiskarna">Tiskarna</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="gepard">Gepard</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="1">1</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="veliko">veliko</w>
|
||||
<S/>
|
||||
<w msd="Pppzei" lemma="nizek">nižja</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ppnzei" lemma="dunajski">Dunajska</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="cesta">cesta</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="22">22</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Somei" lemma="intermarketing">INTERMARKETING</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ppnmein" lemma="lep">Lep</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="pozdrav">pozdrav</w>
|
||||
<c>!</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Slmei" lemma="Darko">Darko</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Stepančič">Stepančič</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Kag" lemma="1113">1113</w>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Ljubljana">Ljubljana</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Pdnmeid" lemma="spoštovan">Spoštovani</w>
|
||||
<c>!</c>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
365
src/main/resources/Gigafida_subset/F0030361.xml
Normal file
365
src/main/resources/Gigafida_subset/F0030361.xml
Normal file
@@ -0,0 +1,365 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0030361" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: REFEREND(1998)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>52 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>REFEREND</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title n="???">neznani naslov</title>
|
||||
<author n="???">neznani avtor</author>
|
||||
<date>1998</date>
|
||||
<publisher n="drugo">neznani založnik</publisher>
|
||||
<note type="sourceLang"/>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="44"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="10"/>
|
||||
<tagUsage gi="p" occurs="9"/>
|
||||
<tagUsage gi="s" occurs="9"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="52"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<appInfo>
|
||||
<application ident="Amebis_pretvornik" version="1.0">
|
||||
<label>[ZDRUZEVANJE] 1:1</label>
|
||||
<label>[IME] D:\FIDA\KORPUS\VNOS\2_ZDR\REFEREND.ZDR</label>
|
||||
<label>[1] **********</label>
|
||||
<label>[IZVOR] D:\FIDA\KORPUS\Vhod\PrimNov\NADJA\AKREDITI\1998\REFEREND</label>
|
||||
<label>[FORMAT] MS Word for Windows 6.0/7.0</label>
|
||||
<label>[DATUM] 13.10.1999</label>
|
||||
<label>[IZVOR_RTF] D:\FIDA\KORPUS\Vhod\PrimNov\NADJA\AKREDITI\1998\REFEREND.RTF</label>
|
||||
<label>[PRETVORBA] RTF</label>
|
||||
<label>[KONEC] **********</label>
|
||||
</application>
|
||||
</appInfo>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.P">
|
||||
<catDesc>prenosnik</catDesc>
|
||||
<category xml:id="Ft.P.G">
|
||||
<catDesc>govorni</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.E">
|
||||
<catDesc>elektronski</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P">
|
||||
<catDesc>pisni</catDesc>
|
||||
<category xml:id="Ft.P.P.O">
|
||||
<catDesc>objavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C">
|
||||
<catDesc>časopisno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.C.D">
|
||||
<catDesc>dnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.V">
|
||||
<catDesc>večkrat tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.C.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R">
|
||||
<catDesc>revialno</catDesc>
|
||||
<category xml:id="Ft.P.P.O.P.R.T">
|
||||
<catDesc>tedensko</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.S">
|
||||
<catDesc>štirinajstdnevno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.M">
|
||||
<catDesc>mesečno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.D">
|
||||
<catDesc>redkeje kot na mesec</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.O.P.R.O">
|
||||
<catDesc>občasno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N">
|
||||
<catDesc>neobjavljeno</catDesc>
|
||||
<category xml:id="Ft.P.P.N.J">
|
||||
<catDesc>javno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.I">
|
||||
<catDesc>interno</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.P.P.N.Z">
|
||||
<catDesc>zasebno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.Z">
|
||||
<catDesc>zvrst</catDesc>
|
||||
<category xml:id="Ft.Z.U">
|
||||
<catDesc>umetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.U.P">
|
||||
<catDesc>pesniška</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.R">
|
||||
<catDesc>prozna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.U.D">
|
||||
<catDesc>dramska</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N">
|
||||
<catDesc>neumetnostna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S">
|
||||
<catDesc>strokovna</catDesc>
|
||||
<category xml:id="Ft.Z.N.S.H">
|
||||
<catDesc>humanistična in družboslovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.S.N">
|
||||
<catDesc>naravoslovna in tehnična</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.N">
|
||||
<catDesc>nestrokovna</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.Z.N.P">
|
||||
<catDesc>pravna</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
<taxonomy>
|
||||
<category xml:id="Ft.L">
|
||||
<catDesc>lektorirano</catDesc>
|
||||
<category xml:id="Ft.L.D">
|
||||
<catDesc>da</catDesc>
|
||||
</category>
|
||||
<category xml:id="Ft.L.N">
|
||||
<catDesc>ne</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.T.K.S"/>
|
||||
<catRef target="#Ft.P.P.N.I"/>
|
||||
<catRef target="#Ft.Z.N.N"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0030361." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ppnmeid" lemma="odgovoren">odgovorni</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="urednik">urednik</w>
|
||||
<S/>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Kag" lemma="6000">6000</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Koper">Koper</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ppnzei" lemma="volilen">Volilna</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="komisija">komisija</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="za">za</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="ustanovitev">ustanovitev</w>
|
||||
<S/>
|
||||
<w msd="Ppnzmr" lemma="nov">novih</w>
|
||||
<S/>
|
||||
<w msd="Sozmr" lemma="občina">občin</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Somei" lemma="trg">Trg</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Brolo">Brolo</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="3">3</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ggnspm" lemma="prositi">Prosimo</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="da">da</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="za">za</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="referendum">referendum</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="za">za</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="ustanovitev">ustanovitev</w>
|
||||
<S/>
|
||||
<w msd="Ppnzmr" lemma="nov">novih</w>
|
||||
<S/>
|
||||
<w msd="Sozmr" lemma="občina">občin</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Ankaran">Ankaran</w>
|
||||
<c>-</c>
|
||||
<w msd="Sozer" lemma="škofija">Škofije</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Slsei" lemma="Šmarje">Šmarje</w>
|
||||
<c>-</c>
|
||||
<w msd="Sozmi" lemma="mareziga">Marezige</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sommi" lemma="dekan">Dekani</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="ki">ki</w>
|
||||
<S/>
|
||||
<w msd="Gp-pte-n" lemma="biti">bo</w>
|
||||
<S/>
|
||||
<w msd="Ggnd-em" lemma="potekati">potekal</w>
|
||||
<S/>
|
||||
<w msd="Kav" lemma="19.">19.</w>
|
||||
<S/>
|
||||
<w msd="Somer" lemma="april">aprila</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Ggvsdm" lemma="akreditirati">akreditirate</w>
|
||||
<S/>
|
||||
<w msd="Zspzeim" lemma="naš">naša</w>
|
||||
<S/>
|
||||
<w msd="Somer" lemma="novinar">novinarja</w>
|
||||
<S/>
|
||||
<w msd="Slzet" lemma="Mojca">Mojco</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Beljan">Beljan</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Slmetd" lemma="Danijel">Danijela</w>
|
||||
<S/>
|
||||
<w msd="Somer" lemma="cek">Ceka</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slmei" lemma="Koper">Koper</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kav" lemma="8.">8.</w>
|
||||
<S/>
|
||||
<w msd="Somer" lemma="april">aprila</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="1998">1998</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Sozei" lemma="prošnja">Prošnja</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="za">za</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="akreditacija">akreditacijo</w>
|
||||
<S/>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slmei" lemma="Branko">Branko</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Podobnik">Podobnik</w>
|
||||
<c>,</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Do" lemma="z">Z</w>
|
||||
<S/>
|
||||
<w msd="Ppnmmo" lemma="odličen">odličnimi</w>
|
||||
<S/>
|
||||
<w msd="Sommo" lemma="pozdrav">pozdravi</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
356
src/main/resources/Gigafida_subset/nested/F0036980.xml
Normal file
356
src/main/resources/Gigafida_subset/nested/F0036980.xml
Normal file
@@ -0,0 +1,356 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0036980" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: INTERNET (2010-10-12)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>100 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>arhivo.com</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title>INTERNET</title>
|
||||
<author n="???">neznani avtor</author>
|
||||
<date>2010-10-12</date>
|
||||
<publisher n="internet, novice">arhivo.com</publisher>
|
||||
<note type="sourceLang"/>
|
||||
<note n="URL">
|
||||
<list>
|
||||
<item>
|
||||
<ref target="http://www.arhivo.com/jesenska-torta&GID%3D8">
|
||||
http://www.arhivo.com/jesenska-torta&GID=8
|
||||
</ref>
|
||||
</item>
|
||||
</list>
|
||||
</note>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="97"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="25"/>
|
||||
<tagUsage gi="p" occurs="3"/>
|
||||
<tagUsage gi="s" occurs="11"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="100"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.I"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0036980." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ppnzei" lemma="jesenski">Jesenska</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="torta">torta</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="vsebovati">vsebuje</w>
|
||||
<S/>
|
||||
<w msd="Soset" lemma="grozdje">grozdje</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Soset" lemma="vino">vino</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="pomaranča">pomarančo</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sommt" lemma="oreh">orehe</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Somei" lemma="foto">Foto</w>
|
||||
<c>:</c>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="arhivo">Arhivo</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Dt" lemma="v">V</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="čast">čast</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="jesen">jeseni</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="narediti">naredimo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="torta">torto</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">s</w>
|
||||
<S/>
|
||||
<w msd="Someo" lemma="priokus">priokusom</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="po">po</w>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="grozdje">grozdju</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="pomaranča">pomaranči</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="vino">vinu</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sommm" lemma="oreh">orehih</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Sosmo" lemma="jajce">Jajci</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="sladkor">sladkor</w>
|
||||
<S/>
|
||||
<w msd="Ppnzet" lemma="penast">penasto</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="umešati">umešamo</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Ggdspm" lemma="dodati">Dodamo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="moka">moko</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Ppnmeid" lemma="pecilen">pecilni</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="prašek">prašek</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sommt" lemma="oreh">orehe</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="ki">ki</w>
|
||||
<S/>
|
||||
<w msd="Gp-spm-n" lemma="biti">smo</w>
|
||||
<S/>
|
||||
<w msd="Zotmmt--k" lemma="on">jih</w>
|
||||
<S/>
|
||||
<w msd="Ggdd-mm" lemma="streti">strli</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="na">na</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="roka">roko</w>
|
||||
<S/>
|
||||
<c>(</c>
|
||||
<w msd="Rsn" lemma="zato">zato</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="da">da</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="ohraniti">ohranimo</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="nekaj">nekaj</w>
|
||||
<S/>
|
||||
<w msd="Pppzmr" lemma="velik">večjih</w>
|
||||
<S/>
|
||||
<w msd="Sommr" lemma="košček">koščkov</w>
|
||||
<c>)</c>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Vd" lemma="ko">Ko</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="dobro">dobro</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="premešati">premešamo</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="dodati">dodamo</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="še">še</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="jogurt">jogurt</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sosei" lemma="olje">olje</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sosei" lemma="vino">vino</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Ppnmeid" lemma="pomarančen">pomarančni</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="sok">sok</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="lupinica">lupinico</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Sozet" lemma="masa">Maso</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="vliti">vlijemo</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Ppnmetn" lemma="okrogel">okrogel</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="pekač">pekač</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="dati">damo</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Pdnzet" lemma="ogret">ogreto</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="pečica">pečico</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="za">za</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="pol">pol</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="ura">ure</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Rsn" lemma="medtem">Medtem</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="oprati">operemo</w>
|
||||
<S/>
|
||||
<w msd="Soset" lemma="grozdje">grozdje</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Zotmed--k" lemma="on">mu</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="odstraniti">odstranimo</w>
|
||||
<S/>
|
||||
<w msd="Sozmt" lemma="peška">peške</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Somei" lemma="pekač">Pekač</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="vzeti">vzamemo</w>
|
||||
<S/>
|
||||
<w msd="Dr" lemma="iz">iz</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="pečica">pečice</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="po">po</w>
|
||||
<S/>
|
||||
<w msd="Somem" lemma="test">testu</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="posuti">posujemo</w>
|
||||
<S/>
|
||||
<w msd="Soset" lemma="grozdje">grozdje</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="dati">damo</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="nazaj">nazaj</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="peč">peči</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="še">še</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="za">za</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="pol">pol</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="ura">ure</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Ggdspm" lemma="ponuditi">Ponudimo</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">z</w>
|
||||
<S/>
|
||||
<w msd="Sozeo" lemma="rezina">rezino</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="pomaranča">pomaranče</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Ppnmein" lemma="dober">Dober</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="tek">tek</w>
|
||||
<c>!</c>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
408
src/main/resources/Gigafida_subset/nested/F0037258.xml
Normal file
408
src/main/resources/Gigafida_subset/nested/F0037258.xml
Normal file
@@ -0,0 +1,408 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0037258" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: INTERNET (2010-11-09)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>104 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>n-tv.si</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title>INTERNET</title>
|
||||
<author n="???">neznani avtor</author>
|
||||
<date>2010-11-09</date>
|
||||
<publisher n="internet, novice">n-tv.si</publisher>
|
||||
<note type="sourceLang"/>
|
||||
<note n="URL">
|
||||
<list>
|
||||
<item>
|
||||
<ref target="http://www.n-tv.si/zakaj-moci-posteljo">http://www.n-tv.si/zakaj-moci-posteljo</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.n-tv.si/video-blog-joc-o-predsodkih">
|
||||
http://www.n-tv.si/video-blog-joc-o-predsodkih
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.n-tv.si/resnicne-zgodbe">http://www.n-tv.si/resnicne-zgodbe</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.n-tv.si/resnicna-zgodba-v-spomin-andreju-markovicu">
|
||||
http://www.n-tv.si/resnicna-zgodba-v-spomin-andreju-markovicu
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.n-tv.si/ogledalo-ki-ni-iz-stekla-naj-bi-prinasalo-bogastvo">
|
||||
http://www.n-tv.si/ogledalo-ki-ni-iz-stekla-naj-bi-prinasalo-bogastvo
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.n-tv.si/dermatologija-znebite-se-tezav-s-srbeco-kozo">
|
||||
http://www.n-tv.si/dermatologija-znebite-se-tezav-s-srbeco-kozo
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.n-tv.si/je-res-slab-metabolizem-vzrok-debelosti">
|
||||
http://www.n-tv.si/je-res-slab-metabolizem-vzrok-debelosti
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.n-tv.si/strokovnjaki?p%3D4">http://www.n-tv.si/strokovnjaki?p=4</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.n-tv.si/pregled-rubrike?p%3D7">http://www.n-tv.si/pregled-rubrike?p=7
|
||||
</ref>
|
||||
</item>
|
||||
</list>
|
||||
</note>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="98"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="23"/>
|
||||
<tagUsage gi="p" occurs="9"/>
|
||||
<tagUsage gi="s" occurs="13"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="104"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.I"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0037258." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Rsn" lemma="kako">Kako</w>
|
||||
<S/>
|
||||
<w msd="Zp------k" lemma="se">se</w>
|
||||
<S/>
|
||||
<w msd="Ggdn" lemma="znebiti">znebiti</w>
|
||||
<S/>
|
||||
<w msd="Sozmr" lemma="težava">težav</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">s</w>
|
||||
<S/>
|
||||
<w msd="Pdnzeo" lemma="srbeč">srbečo</w>
|
||||
<S/>
|
||||
<w msd="Sozeo" lemma="koža">kožo</w>
|
||||
<c>?</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ppnzei" lemma="resničen">RESNIČNA</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="zgodba">ZGODBA</w>
|
||||
<c>:</c>
|
||||
<S/>
|
||||
<w msd="Ppnmmi" lemma="sam">Sami</w>
|
||||
<S/>
|
||||
<w msd="Zp---d--k" lemma="se">si</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="izbrati">izberemo</w>
|
||||
<S/>
|
||||
<w msd="Ppnzet" lemma="pravi">pravo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="pot">pot</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="prispevek">Prispevek</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="ki">ki</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="pričati">priča</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="o">o</w>
|
||||
<S/>
|
||||
<w msd="Zk-sem" lemma="ta">tem</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="da">da</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Zc-sei" lemma="ves">vse</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="življenje">življenju</w>
|
||||
<S/>
|
||||
<w msd="Ppnsei" lemma="mogoč">mogoče</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="da">da</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Ppnzei" lemma="pravi">prava</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="pot">pot</w>
|
||||
<S/>
|
||||
<w msd="Zk-zei" lemma="tisti">tista</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="ki">ki</w>
|
||||
<c>.</c>
|
||||
<c>.</c>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slmei" lemma="bert">bert</w>
|
||||
<S/>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="09.11.2010">09.11.2010</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="ob">ob</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="12:35">12:35</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Rsn" lemma="kako">Kako</w>
|
||||
<S/>
|
||||
<w msd="Ggnstm" lemma="izdelovati">izdelujejo</w>
|
||||
<S/>
|
||||
<w msd="Ppnsmt" lemma="kovinski">kovinska</w>
|
||||
<S/>
|
||||
<w msd="Soser" lemma="ogledalo">ogledala</w>
|
||||
<c>?</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slmei" lemma="Valentinrozman">ValentinRozman</w>
|
||||
<S/>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="29.03.2010">29.03.2010</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="ob">ob</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="12:25">12:25</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Rsn" lemma="kako">Kako</w>
|
||||
<S/>
|
||||
<w msd="Zp------k" lemma="se">se</w>
|
||||
<S/>
|
||||
<w msd="Ggdn" lemma="znebiti">znebiti</w>
|
||||
<S/>
|
||||
<w msd="Sozmr" lemma="težava">težav</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">s</w>
|
||||
<S/>
|
||||
<w msd="Pdnzeo" lemma="srbeč">srbečo</w>
|
||||
<S/>
|
||||
<w msd="Sozeo" lemma="koža">kožo</w>
|
||||
<c>?</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Somei" lemma="pojav">Pojav</w>
|
||||
<S/>
|
||||
<w msd="Pdnzer" lemma="srbeč">srbeče</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="koža">kože</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="danes">danes</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="skorajda">skorajda</w>
|
||||
<S/>
|
||||
<w msd="Ppszei" lemma="pogost">najpogostejša</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="motnja">motnja</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Rsn" lemma="kaj">Kaj</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="povzročati">povzroča</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="srbečica">srbečico</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="kako">kako</w>
|
||||
<S/>
|
||||
<w msd="Zotzet--k" lemma="on">jo</w>
|
||||
<S/>
|
||||
<w msd="Ggdn" lemma="odpraviti">odpraviti</w>
|
||||
<S/>
|
||||
<w msd="Ggnste" lemma="pojasnjevati">pojasnjuje</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="dermatologinja">dermatologinja</w>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Tanja">Tanja</w>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Planinšek">Planinšek</w>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Ručigaj">Ručigaj</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ppnzei" lemma="resničen">RESNIČNA</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="zgodba">ZGODBA</w>
|
||||
<c>:</c>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="prezgodaj">Prezgodaj</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Ggdd-em" lemma="izgubiti">izgubil</w>
|
||||
<S/>
|
||||
<w msd="Ppnset" lemma="dragocen">dragoceno</w>
|
||||
<S/>
|
||||
<w msd="Soset" lemma="življenje">življenje</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Somer" lemma="mik">Mika</w>
|
||||
<S/>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="08.11.2010">08.11.2010</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="ob">ob</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="09:56">09:56</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Rsn" lemma="kako">Kako</w>
|
||||
<S/>
|
||||
<w msd="Ggnstm" lemma="izdelovati">izdelujejo</w>
|
||||
<S/>
|
||||
<w msd="Ppnsmt" lemma="kovinski">kovinska</w>
|
||||
<S/>
|
||||
<w msd="Ggdd-ez" lemma="ogledati">ogledala</w>
|
||||
<c>?</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Ppnzei" lemma="edinstven">Edinstvena</w>
|
||||
<S/>
|
||||
<w msd="Ppnsmi" lemma="kovinski">kovinska</w>
|
||||
<S/>
|
||||
<w msd="Sosmi" lemma="ogledalo">ogledala</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Pdnzei" lemma="izdelan">izdelana</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Slzem" lemma="Indija">Indiji</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="L" lemma="naj">naj</w>
|
||||
<S/>
|
||||
<w msd="Gp-g" lemma="biti">bi</w>
|
||||
<S/>
|
||||
<w msd="Ggnd-ez" lemma="prinašati">prinašala</w>
|
||||
<S/>
|
||||
<w msd="Soset" lemma="bogastvo">bogastvo</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="sreča">srečo</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Ggdvdm" lemma="pogledati">Poglejte</w>
|
||||
<S/>
|
||||
<w msd="Zp---d--k" lemma="se">si</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="kako">kako</w>
|
||||
<S/>
|
||||
<w msd="Ggnstm" lemma="izdelovati">izdelujejo</w>
|
||||
<S/>
|
||||
<w msd="Zk-zei" lemma="ta">ta</w>
|
||||
<S/>
|
||||
<w msd="Ppnsmt" lemma="unikaten">unikatna</w>
|
||||
<S/>
|
||||
<w msd="Sosmt" lemma="ogledalo">ogledala</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
391
src/main/resources/Gigafida_subset/nested/F0037544.xml
Normal file
391
src/main/resources/Gigafida_subset/nested/F0037544.xml
Normal file
@@ -0,0 +1,391 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0037544" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: INTERNET (2010-12-09)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>121 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>arhivo.com</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title>INTERNET</title>
|
||||
<author n="???">neznani avtor</author>
|
||||
<date>2010-12-09</date>
|
||||
<publisher n="internet, novice">arhivo.com</publisher>
|
||||
<note type="sourceLang"/>
|
||||
<note n="URL">
|
||||
<list>
|
||||
<item>
|
||||
<ref target="http://www.arhivo.com/korenckova-juha-z-ajdovo-kaso&GID%3D8">
|
||||
http://www.arhivo.com/korenckova-juha-z-ajdovo-kaso&GID=8
|
||||
</ref>
|
||||
</item>
|
||||
</list>
|
||||
</note>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="119"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="21"/>
|
||||
<tagUsage gi="p" occurs="3"/>
|
||||
<tagUsage gi="s" occurs="9"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="121"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.I"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0037544." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="eko">eko</w>
|
||||
<S/>
|
||||
<w msd="Ppnzei" lemma="zelenjaven">zelenjavna</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="kocka">kocka</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Gp-sdm-n" lemma="biti">Ste</w>
|
||||
<S/>
|
||||
<w msd="Ppnmmi" lemma="bolan">bolni</w>
|
||||
<c>?</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Ppnmein" lemma="preprost">Preprost</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="recept">recept</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="za">za</w>
|
||||
<S/>
|
||||
<w msd="Psnzet" lemma="korenčkov">korenčkovo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="juha">juho</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Psnzet" lemma="ajdov">ajdovo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="kaša">kašo</w>
|
||||
<S/>
|
||||
<c>(</c>
|
||||
<w msd="Vd" lemma="ki">ki</w>
|
||||
<S/>
|
||||
<w msd="Zotmdt--k" lemma="on">ju</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="lahko">lahko</w>
|
||||
<S/>
|
||||
<w msd="Ggnsdm" lemma="jesti">jeste</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="tudi">tudi</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="posebej">posebej</w>
|
||||
<c>)</c>
|
||||
<S/>
|
||||
<w msd="Ggdvdm" lemma="uporabiti">uporabite</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Somem" lemma="čas">času</w>
|
||||
<S/>
|
||||
<w msd="Sozmr" lemma="viroza">viroz</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="saj">saj</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="zelo">zelo</w>
|
||||
<S/>
|
||||
<w msd="Ppnmein" lemma="lahek">lahek</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Ppnmein" lemma="neškodljiv">neškodljiv</w>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="za">za</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="želodec">želodec</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ggdspm" lemma="oprati">Operemo</w>
|
||||
<S/>
|
||||
<w msd="Soset" lemma="korenje">korenje</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Psnzet" lemma="ajdov">ajdovo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="kaša">kašo</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Dt" lemma="v">V</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="približno">približno</w>
|
||||
<S/>
|
||||
<w msd="Sometn" lemma="liter">liter</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="voda">vode</w>
|
||||
<S/>
|
||||
<c>(</c>
|
||||
<w msd="Rsn" lemma="odvisno">odvisno</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="kako">kako</w>
|
||||
<S/>
|
||||
<w msd="Ppnzet" lemma="gost">gosto</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="juha">juho</w>
|
||||
<S/>
|
||||
<w msd="Ggnspm" lemma="želeti">želimo</w>
|
||||
<c>)</c>
|
||||
<S/>
|
||||
<w msd="Dt" lemma="na">na</w>
|
||||
<S/>
|
||||
<w msd="Sommt" lemma="kolobar">kolobarje</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="narezati">narežemo</w>
|
||||
<S/>
|
||||
<w msd="Soset" lemma="korenje">korenje</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="pol">pol</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="eko">eko</w>
|
||||
<S/>
|
||||
<w msd="Ppnzer" lemma="zelenjaven">zelenjavne</w>
|
||||
<S/>
|
||||
<w msd="Sozmt" lemma="kocka">kocke</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="ter">ter</w>
|
||||
<S/>
|
||||
<w msd="Ggnspm" lemma="kuhati">kuhamo</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="dokler">dokler</w>
|
||||
<S/>
|
||||
<w msd="Sosei" lemma="korenje">korenje</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="ne">ne</w>
|
||||
<S/>
|
||||
<w msd="Ggdste" lemma="postati">postane</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="mehko">mehko</w>
|
||||
<S/>
|
||||
<c>(</c>
|
||||
<w msd="Rsn" lemma="približno">približno</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="20">20</w>
|
||||
<S/>
|
||||
<w msd="Sozmr" lemma="minuta">minut</w>
|
||||
<c>)</c>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Rsn" lemma="medtem">Medtem</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Kbzmmi" lemma="drug">drugi</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="posoda">posodi</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="zavreti">zavremo</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="približno">približno</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="3">3</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="dl">dl</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="voda">vode</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="ki">ki</w>
|
||||
<S/>
|
||||
<w msd="Zotzed--k" lemma="on">ji</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="dodati">dodamo</w>
|
||||
<S/>
|
||||
<w msd="Kbzzet" lemma="drug">drugo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="polovica">polovico</w>
|
||||
<S/>
|
||||
<w msd="Ppnzer" lemma="zelenjaven">zelenjavne</w>
|
||||
<S/>
|
||||
<w msd="Sozmt" lemma="kocka">kocke</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Psnzet" lemma="ajdov">ajdovo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="kaša">kašo</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Ggnspm" lemma="kuhati">Kuhamo</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="približno">približno</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="pol">pol</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="ura">ure</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Psnsei" lemma="korenčkov">Korenčkovo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="juha">juho</w>
|
||||
<S/>
|
||||
<w msd="Ggnspm" lemma="soliti">solimo</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="po">po</w>
|
||||
<S/>
|
||||
<w msd="Somem" lemma="okus">okusu</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="kar">kar</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="posoda">posodi</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="zmešati">zmešamo</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">s</w>
|
||||
<S/>
|
||||
<w msd="Ppnmeo" lemma="paličen">paličnim</w>
|
||||
<S/>
|
||||
<w msd="Someo" lemma="mešalnik">mešalnikom</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vd" lemma="da">da</w>
|
||||
<S/>
|
||||
<w msd="Ggdste" lemma="postati">postane</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="ravno">ravno</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="prav">prav</w>
|
||||
<S/>
|
||||
<w msd="Ppnzei" lemma="gost">gosta</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Pdnzei" lemma="tekoč">tekoča</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Dm" lemma="na">Na</w>
|
||||
<S/>
|
||||
<w msd="Somem" lemma="konec">koncu</w>
|
||||
<S/>
|
||||
<w msd="Zotzet--z" lemma="vame">vanjo</w>
|
||||
<S/>
|
||||
<w msd="Ggdspm" lemma="dodati">dodamo</w>
|
||||
<S/>
|
||||
<w msd="L" lemma="še">še</w>
|
||||
<S/>
|
||||
<w msd="Psnzet" lemma="ajdov">ajdovo</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="kaša">kašo</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Ppnzei" lemma="dieten">dietna</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="a">a</w>
|
||||
<S/>
|
||||
<w msd="Ppnzei" lemma="okusen">okusna</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="jed">jed</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="nared">nared</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
355
src/main/resources/Gigafida_subset/nested/F0038754.xml
Normal file
355
src/main/resources/Gigafida_subset/nested/F0038754.xml
Normal file
@@ -0,0 +1,355 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0038754" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: INTERNET (2010-07-21)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>97 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>spasteater.si</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title>INTERNET</title>
|
||||
<author n="???">neznani avtor</author>
|
||||
<date>2010-07-21</date>
|
||||
<publisher n="internet, ustanove">spasteater.si</publisher>
|
||||
<note type="sourceLang"/>
|
||||
<note n="URL">
|
||||
<list>
|
||||
<item>
|
||||
<ref target="http://www.spasteater.si/si/novice_in_napovedi/4843/detail.html">
|
||||
http://www.spasteater.si/si/novice_in_napovedi/4843/detail.html
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.spasteater.si/si/vstopnice/kako_do_vstopnic/default.html">
|
||||
http://www.spasteater.si/si/vstopnice/kako_do_vstopnic/default.html
|
||||
</ref>
|
||||
</item>
|
||||
</list>
|
||||
</note>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="91"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="18"/>
|
||||
<tagUsage gi="p" occurs="7"/>
|
||||
<tagUsage gi="s" occurs="12"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="97"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.I"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0038754." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="L" lemma="več">Več</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="o">o</w>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="križarjenje">križarjenju</w>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="tukaj">tukaj</w>
|
||||
<c>!</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Do" lemma="z">S</w>
|
||||
<S/>
|
||||
<w msd="Someo" lemma="kompas">Kompasom</w>
|
||||
<S/>
|
||||
<w msd="Ggnvdm" lemma="odkrivati">odkrivajte</w>
|
||||
<S/>
|
||||
<w msd="Ppnsmt" lemma="veličasten">veličastna</w>
|
||||
<S/>
|
||||
<w msd="Sosmt" lemma="mesto">mesta</w>
|
||||
<S/>
|
||||
<w msd="Ppnser" lemma="zahoden">zahodnega</w>
|
||||
<S/>
|
||||
<w msd="Slser" lemma="Sredozemlje">Sredozemlja</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Dm" lemma="na">Na</w>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="križarjenje">križarjenju</w>
|
||||
<S/>
|
||||
<w msd="Gp-ptd-n" lemma="biti">bosta</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="z">z</w>
|
||||
<S/>
|
||||
<w msd="Zod-mo" lemma="ti">vami</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="koncert">koncert</w>
|
||||
<S/>
|
||||
<w msd="Slzer" lemma="Alenka">Alenke</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Godec">Godec</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="komedija">komedija</w>
|
||||
<S/>
|
||||
<w msd="Slmer" lemma="Matjaž">Matjaža</w>
|
||||
<S/>
|
||||
<w msd="Slmer" lemma="Javšnik">Javšnika</w>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="optimist">Optimist</w>
|
||||
<S/>
|
||||
<c>!</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Dm" lemma="na">Na</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="ladja">ladji</w>
|
||||
<S/>
|
||||
<w msd="Gp-ptd-n" lemma="biti">bosta</w>
|
||||
<S/>
|
||||
<w msd="Ppnzei" lemma="zvezdniški">zvezdniška</w>
|
||||
<S/>
|
||||
<w msd="Somdi" lemma="gost">gosta</w>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Alenka">Alenka</w>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Godec">Godec</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Matjaž">Matjaž</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Javšnik">Javšnik</w>
|
||||
<c>.</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Dm" lemma="na">Na</w>
|
||||
<S/>
|
||||
<w msd="Ppnsem" lemma="jesenski">jesenskem</w>
|
||||
<S/>
|
||||
<w msd="Ppnsem" lemma="špasen">špasnem</w>
|
||||
<S/>
|
||||
<w msd="Sosem" lemma="križarjanje">križarjanju</w>
|
||||
<S/>
|
||||
<w msd="Zod-mt" lemma="ti">vas</w>
|
||||
<S/>
|
||||
<w msd="Gp-pte-n" lemma="biti">bo</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="ladja">ladja</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="MSC">MSC</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="fantastica">Fantastica</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="5">5</w>
|
||||
<c>*</c>
|
||||
<S/>
|
||||
<w msd="Ggdd-ez" lemma="popeljati">popeljala</w>
|
||||
<S/>
|
||||
<w msd="Do" lemma="med">med</w>
|
||||
<S/>
|
||||
<w msd="Sosmo" lemma="mesto">mesti</w>
|
||||
<S/>
|
||||
<w msd="Ppnser" lemma="zahoden">zahodnega</w>
|
||||
<S/>
|
||||
<w msd="Slser" lemma="Sredozemlje">Sredozemlja</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Ggnd-mm" lemma="križariti">Križarili</w>
|
||||
<S/>
|
||||
<w msd="Gp-pdm-n" lemma="biti">boste</w>
|
||||
<S/>
|
||||
<w msd="Dr" lemma="od">od</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="genova">Genove</w>
|
||||
<S/>
|
||||
<w msd="Dr" lemma="do">do</w>
|
||||
<S/>
|
||||
<w msd="Slmer" lemma="Neapelj">Neaplja</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Zp------k" lemma="se">se</w>
|
||||
<S/>
|
||||
<w msd="Ggdd-mm" lemma="ustaviti">ustavili</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Slmem" lemma="Palermo">Palermu</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Slmem" lemma="Tunis">Tunisu</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Ggdd-mm" lemma="obiskati">obiskali</w>
|
||||
<S/>
|
||||
<w msd="Sozet" lemma="palma">Palmo</w>
|
||||
<S/>
|
||||
<w msd="N" lemma="de">de</w>
|
||||
<S/>
|
||||
<w msd="Slzet" lemma="Mallorca">Mallorco</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Slzet" lemma="Barcelona">Barcelono</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Marseille">Marseille</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Rsn" lemma="vse">vse</w>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="v">v</w>
|
||||
<S/>
|
||||
<w msd="Kbzmem" lemma="en">enem</w>
|
||||
<S/>
|
||||
<w msd="Somem" lemma="teden">tednu</w>
|
||||
<c>!</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Somei" lemma="aranžma">Aranžma</w>
|
||||
<S/>
|
||||
<w msd="Ppnser" lemma="špasen">špasnega</w>
|
||||
<S/>
|
||||
<w msd="Soser" lemma="križarjenje">križarjenja</w>
|
||||
<S/>
|
||||
<w msd="Gp-ste-n" lemma="biti">je</w>
|
||||
<S/>
|
||||
<w msd="Dr" lemma="od">od</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="24.10">24.10</w>
|
||||
<S/>
|
||||
<w msd="Dr" lemma="do">do</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="31.10">31.10</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Kag" lemma="2010">2010</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Ggdvdm" lemma="pridružiti">Pridružite</w>
|
||||
<S/>
|
||||
<w msd="Zp------k" lemma="se">se</w>
|
||||
<S/>
|
||||
<w msd="Zop-md" lemma="jaz">nam</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Dm" lemma="na">na</w>
|
||||
<S/>
|
||||
<w msd="Ppnzem" lemma="veličasten">veličastni</w>
|
||||
<S/>
|
||||
<w msd="Sozem" lemma="križarka">križarki</w>
|
||||
<c>!</c>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Dm" lemma="v">V</w>
|
||||
<S/>
|
||||
<w msd="Somem" lemma="čas">ČASU</w>
|
||||
<S/>
|
||||
<w msd="Sozmr" lemma="počitnice">POČITNIC</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Dr" lemma="od">OD</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="1.7">1.7</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Dr" lemma="do">DO</w>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="13.9">13.9</w>
|
||||
<c>.</c>
|
||||
<S/>
|
||||
</s>
|
||||
<s>
|
||||
<w msd="Gp-ste-n" lemma="biti">JE</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="blagajna">BLAGAJNA</w>
|
||||
<S/>
|
||||
<w msd="Pdnzei" lemma="zaprt">ZAPRTA</w>
|
||||
<c>!</c>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
402
src/main/resources/Gigafida_subset/nested/F0038920.xml
Normal file
402
src/main/resources/Gigafida_subset/nested/F0038920.xml
Normal file
@@ -0,0 +1,402 @@
|
||||
<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="F0038920" xml:lang="sl">
|
||||
<teiHeader>
|
||||
<fileDesc>
|
||||
<titleStmt>
|
||||
<title>Gigafida: INTERNET (2010-09-20)</title>
|
||||
<funder>Operacijo delno financira Evropska unija iz Evropskega socialnega sklada ter Ministrstvo za šolstvo in
|
||||
šport. Operacija se izvaja v okviru Operativnega programa razvoja človeških virov za obdobje 2007-2013,
|
||||
razvojne prioritete: razvoj človeških virov in vseživljenjskega učenja; prednostne usmeritve: izboljšanje
|
||||
kakovosti in učinkovitosti sistemov izobraževanja in usposabljanja 2007-2013.
|
||||
</funder>
|
||||
</titleStmt>
|
||||
<editionStmt>
|
||||
<edition>1.0</edition>
|
||||
</editionStmt>
|
||||
<extent>79 besed</extent>
|
||||
<publicationStmt>
|
||||
<idno>drama.si</idno>
|
||||
<availability status="restricted">
|
||||
<p xml:lang="sl">Avtorske pravice za to izdajo ureja Pogodba o zbiranju in uporabi besedilnega korpusa v
|
||||
okviru projekta Sporazumevanje v slovenskem jeziku, katere skrbnik je Fakulteta za družbene vede Univerze
|
||||
v Ljubljani, Kardeljeva ploščad 5, Ljubljana. Kopija pogodbe je dostopna na URL
|
||||
<ref target="http://www.slovenscina.eu/korpusi/pogodba">www.slovenscina.eu/korpusi/pogodba</ref>
|
||||
</p>
|
||||
</availability>
|
||||
<date>2012-04-15</date>
|
||||
</publicationStmt>
|
||||
<sourceDesc>
|
||||
<bibl>
|
||||
<title>INTERNET</title>
|
||||
<author n="???">neznani avtor</author>
|
||||
<date>2010-09-20</date>
|
||||
<publisher n="internet, ustanove">drama.si</publisher>
|
||||
<note type="sourceLang"/>
|
||||
<note n="URL">
|
||||
<list>
|
||||
<item>
|
||||
<ref target="http://www.drama.si/eng/ansambel/matija-rozman.html">
|
||||
http://www.drama.si/eng/ansambel/matija-rozman.html
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.drama.si/eng/ansambel/andrej-nahtigal.html">
|
||||
http://www.drama.si/eng/ansambel/andrej-nahtigal.html
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.drama.si/eng/ansambel/janez-skof.html">
|
||||
http://www.drama.si/eng/ansambel/janez-skof.html
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.drama.si/eng/ansambel/boris-mihalj.html">
|
||||
http://www.drama.si/eng/ansambel/boris-mihalj.html
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.drama.si/eng/ansambel/marijana-brecelj.html">
|
||||
http://www.drama.si/eng/ansambel/marijana-brecelj.html
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.drama.si/eng/ansambel/gregor-bakovic.html">
|
||||
http://www.drama.si/eng/ansambel/gregor-bakovic.html
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.drama.si/repertoar/totenbirt.html">
|
||||
http://www.drama.si/repertoar/totenbirt.html
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.drama.si/ansambel/marijana-brecelj.html">
|
||||
http://www.drama.si/ansambel/marijana-brecelj.html
|
||||
</ref>
|
||||
</item>
|
||||
<item>
|
||||
<ref target="http://www.drama.si/ansambel/maja-koncar.html">
|
||||
http://www.drama.si/ansambel/maja-koncar.html
|
||||
</ref>
|
||||
</item>
|
||||
</list>
|
||||
</note>
|
||||
</bibl>
|
||||
</sourceDesc>
|
||||
</fileDesc>
|
||||
<encodingDesc>
|
||||
<projectDesc>
|
||||
<p xml:lang="sl">Projekt <ref target="http://www.slovenscina.eu/">Sporazumevanje v slovenskem jeziku</ref>.
|
||||
</p>
|
||||
<p xml:lang="en">Project <ref target="http://www.slovenscina.eu/">Communication in Slovene</ref>.
|
||||
</p>
|
||||
</projectDesc>
|
||||
<tagsDecl>
|
||||
<namespace name="http://www.tei-c.org/ns/1.0">
|
||||
<tagUsage gi="S" occurs="91"/>
|
||||
<tagUsage gi="body" occurs="1"/>
|
||||
<tagUsage gi="c" occurs="46"/>
|
||||
<tagUsage gi="p" occurs="10"/>
|
||||
<tagUsage gi="s" occurs="10"/>
|
||||
<tagUsage gi="text" occurs="1"/>
|
||||
<tagUsage gi="w" occurs="79"/>
|
||||
</namespace>
|
||||
</tagsDecl>
|
||||
<classDecl>
|
||||
<taxonomy xml:id="SSJ">
|
||||
<category xml:id="SSJ.T">
|
||||
<catDesc>tisk</catDesc>
|
||||
<category xml:id="SSJ.T.K">
|
||||
<catDesc>knjižno</catDesc>
|
||||
<category xml:id="SSJ.T.K.L">
|
||||
<catDesc>leposlovno</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.K.S">
|
||||
<catDesc>strokovno</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P">
|
||||
<catDesc>periodično</catDesc>
|
||||
<category xml:id="SSJ.T.P.C">
|
||||
<catDesc>časopis</catDesc>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.P.R">
|
||||
<catDesc>revija</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.T.D">
|
||||
<catDesc>drugo</catDesc>
|
||||
</category>
|
||||
</category>
|
||||
<category xml:id="SSJ.I">
|
||||
<catDesc>internet</catDesc>
|
||||
</category>
|
||||
</taxonomy>
|
||||
</classDecl>
|
||||
</encodingDesc>
|
||||
<profileDesc>
|
||||
<textClass>
|
||||
<catRef target="#SSJ.I"/>
|
||||
</textClass>
|
||||
</profileDesc>
|
||||
</teiHeader>
|
||||
<text xml:id="F0038920." xml:lang="sl">
|
||||
<body>
|
||||
<p>
|
||||
<s>
|
||||
<c>“</c>
|
||||
<w msd="Somei" lemma="totenbirt">Totenbirt</w>
|
||||
<c>”</c>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Agata">Agata</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Jurkovič">Jurkovič</w>
|
||||
<S/>
|
||||
<c>–</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="jurkovička">Jurkovička</w>
|
||||
<S/>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="62">62</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="widow">widow</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="judge">judge</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Eli's">Eli's</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="older">older</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="sister">sister</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slmei" lemma="Performs">Performs</w>
|
||||
<S/>
|
||||
<w msd="Vp" lemma="in">in</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<c>“</c>
|
||||
<w msd="Somei" lemma="totenbirt">Totenbirt</w>
|
||||
<c>”</c>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Ivek">Ivek</w>
|
||||
<S/>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="66">66</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="drunk">drunk</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slzei" lemma="Maja">Maja</w>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Končar">Končar</w>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Marta">Marta</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Fijavž">Fijavž</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Roblek">Roblek</w>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Martika">Martika</w>
|
||||
<S/>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="46">46</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="učiteljica">učiteljica</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Psnzei" lemma="Tomijev">Tomijeva</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="žena">žena</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="elina">Elina</w>
|
||||
<S/>
|
||||
<w msd="Pppzei" lemma="mlad">mlajša</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="sestra">sestra</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slzei" lemma="Totenbirt">Totenbirt</w>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Agata">Agata</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Jurkovič">Jurkovič</w>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Jurkovička">Jurkovička</w>
|
||||
<S/>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="62">62</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="vdova">vdova</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="sodnica">sodnica</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="elina">Elina</w>
|
||||
<S/>
|
||||
<w msd="Pppzei" lemma="star">starejša</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="sestra">sestra</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slzei" lemma="Totenbirt">Totenbirt</w>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Marta">Marta</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Fijavž">Fijavž</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Roblek">Roblek</w>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Martika">Martika</w>
|
||||
<S/>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="46">46</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="učiteljica">učiteljica</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Psnzei" lemma="Tomijev">Tomijeva</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="žena">žena</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="elina">Elina</w>
|
||||
<S/>
|
||||
<w msd="Pppzei" lemma="mlad">mlajša</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="sestra">sestra</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slzei" lemma="Marijana">Marijana</w>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Brecelj">Brecelj</w>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slzei" lemma="Agata">Agata</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Jurkovič">Jurkovič</w>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Jurkovička">Jurkovička</w>
|
||||
<S/>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Kag" lemma="62">62</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="vdova">vdova</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="sodnica">sodnica</w>
|
||||
<c>,</c>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="elina">Elina</w>
|
||||
<S/>
|
||||
<w msd="Pppzei" lemma="star">starejša</w>
|
||||
<S/>
|
||||
<w msd="Sozei" lemma="sestra">sestra</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<c>“</c>
|
||||
<w msd="Somei" lemma="totenbirt">Totenbirt</w>
|
||||
<c>”</c>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="N" lemma="the">The</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="lata">late</w>
|
||||
<S/>
|
||||
<w msd="N" lemma="južek">Južek</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<w msd="Slmei" lemma="Roberto">Roberto</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Zucco">Zucco</w>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="Slzet" lemma="1st">1st</w>
|
||||
<S/>
|
||||
<w msd="Slmei" lemma="Prison">Prison</w>
|
||||
<S/>
|
||||
<w msd="N" lemma="officer">Officer</w>
|
||||
<S/>
|
||||
<c>/</c>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="detective">Detective</w>
|
||||
<S/>
|
||||
<c>/</c>
|
||||
<S/>
|
||||
<w msd="Somei" lemma="1st">1st</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="polica">Police</w>
|
||||
<S/>
|
||||
<w msd="N" lemma="officer">Officer</w>
|
||||
</s>
|
||||
</p>
|
||||
<p>
|
||||
<s>
|
||||
<c>“</c>
|
||||
<w msd="Somei" lemma="totenbirt">Totenbirt</w>
|
||||
<c>”</c>
|
||||
<S/>
|
||||
<c>-</c>
|
||||
<S/>
|
||||
<w msd="N" lemma="the">The</w>
|
||||
<S/>
|
||||
<w msd="Sozer" lemma="lata">late</w>
|
||||
<S/>
|
||||
<w msd="Slmmi" lemma="Frenk">Frenki</w>
|
||||
</s>
|
||||
</p>
|
||||
</body>
|
||||
</text>
|
||||
</TEI>
|
||||
18
src/main/resources/Lists/prefixes.txt
Normal file
18
src/main/resources/Lists/prefixes.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
brez
|
||||
dis
|
||||
do
|
||||
eks
|
||||
inter
|
||||
iz
|
||||
na
|
||||
ne
|
||||
ni
|
||||
ob
|
||||
od
|
||||
po
|
||||
pre
|
||||
pri
|
||||
pro
|
||||
raz
|
||||
spre
|
||||
za
|
||||
7
src/main/resources/Lists/suffixes.txt
Normal file
7
src/main/resources/Lists/suffixes.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
ga
|
||||
ma
|
||||
me
|
||||
nj
|
||||
nje
|
||||
njo
|
||||
se
|
||||
54
src/main/resources/gui/CharacterAnalysisTab.fxml
Normal file
54
src/main/resources/gui/CharacterAnalysisTab.fxml
Normal file
@@ -0,0 +1,54 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<?import javafx.scene.control.Button?>
|
||||
<?import javafx.scene.control.CheckBox?>
|
||||
<?import javafx.scene.control.Hyperlink?>
|
||||
<?import javafx.scene.control.Label?>
|
||||
<?import javafx.scene.control.ProgressBar?>
|
||||
<?import javafx.scene.control.RadioButton?>
|
||||
<?import javafx.scene.control.TextField?>
|
||||
<?import javafx.scene.control.ToggleGroup?>
|
||||
<?import javafx.scene.layout.AnchorPane?>
|
||||
<?import javafx.scene.layout.HBox?>
|
||||
<?import javafx.scene.layout.Pane?>
|
||||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
|
||||
<AnchorPane fx:id="characterAnalysisTab" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.112" xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.CharacterAnalysisTab">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Število črk" />
|
||||
<TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="20.0" prefWidth="180.0" />
|
||||
|
||||
<HBox layoutX="10.0" layoutY="60.0">
|
||||
<children>
|
||||
<RadioButton fx:id="lemmaRB" mnemonicParsing="false" prefHeight="25.0" prefWidth="86.0" selected="true" text="lema">
|
||||
<toggleGroup>
|
||||
<ToggleGroup fx:id="calculateForRB" />
|
||||
</toggleGroup></RadioButton>
|
||||
<RadioButton fx:id="varietyRB" mnemonicParsing="false" text="različnica" toggleGroup="$calculateForRB" />
|
||||
</children>
|
||||
</HBox>
|
||||
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Omejitev podatkov" />
|
||||
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Oznaka MSD" />
|
||||
<TextField fx:id="msdTF" layoutX="100.0" layoutY="160.0" prefWidth="180.0" />
|
||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Taksonomija" />
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="200.0" prefHeight="25.0" prefWidth="180.0" />
|
||||
|
||||
|
||||
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="240.0" prefHeight="84.0" prefWidth="380.0">
|
||||
<children>
|
||||
<CheckBox fx:id="calculatecvvCB" layoutX="10.0" mnemonicParsing="false" prefHeight="25.0" text="Izračunaj za kombinacije samoglasnikov in soglasnikov" visible="false" />
|
||||
</children>
|
||||
</Pane>
|
||||
|
||||
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false" prefHeight="25.0" prefWidth="250.0" text="Izračunaj" />
|
||||
</Pane>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:" />
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0" text=" " wrapText="true" />
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
|
||||
|
||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0" />
|
||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0" />
|
||||
|
||||
</AnchorPane>
|
||||
32
src/main/resources/gui/CorpusTab.fxml
Normal file
32
src/main/resources/gui/CorpusTab.fxml
Normal file
@@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<?import javafx.scene.control.Button?>
|
||||
<?import javafx.scene.control.CheckBox?>
|
||||
<?import javafx.scene.control.Label?>
|
||||
<?import javafx.scene.control.Hyperlink?>
|
||||
<?import javafx.scene.control.ProgressIndicator?>
|
||||
<?import javafx.scene.layout.AnchorPane?>
|
||||
<?import javafx.scene.layout.Pane?>
|
||||
|
||||
<AnchorPane prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111" xmlns:fx="http://javafx.com/fxml/1"
|
||||
fx:controller="gui.CorpusTab">
|
||||
<children>
|
||||
<Pane/>
|
||||
<Button fx:id="chooseCorpusLocationB" layoutX="10.0" layoutY="20.0" mnemonicParsing="false"
|
||||
text="Nastavi lokacijo korpusa"/>
|
||||
<CheckBox fx:id="readHeaderInfoChB" layoutX="176.0" layoutY="24.0" mnemonicParsing="false"
|
||||
text="Preberi info iz headerjev"/>
|
||||
<Pane fx:id="setCorpusWrapperP" layoutX="10.0" layoutY="60.0" prefHeight="118.0" prefWidth="683.0">
|
||||
<children>
|
||||
<Label fx:id="chooseCorpusL" prefHeight="50.0" prefWidth="704.0" text="Label"/>
|
||||
<CheckBox fx:id="gosUseOrthChB" layoutY="65.0" mnemonicParsing="false" text="Uporabi pogovorni zapis"/>
|
||||
</children>
|
||||
</Pane>
|
||||
<ProgressIndicator fx:id="locationScanPI" layoutX="10.0" layoutY="60.0" prefHeight="50.0" progress="0.0"/>
|
||||
<Button fx:id="chooseResultsLocationB" layoutX="10.0" layoutY="180.0" mnemonicParsing="false"
|
||||
text="Nastavi lokacijo rezultatov"/>
|
||||
<Label fx:id="chooseResultsL" layoutX="10.0" layoutY="220.0" text="Label"/>
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
|
||||
</children>
|
||||
</AnchorPane>
|
||||
30
src/main/resources/gui/FiltersForSolar.fxml
Normal file
30
src/main/resources/gui/FiltersForSolar.fxml
Normal file
@@ -0,0 +1,30 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<?import javafx.scene.control.Label?>
|
||||
<?import javafx.scene.layout.AnchorPane?>
|
||||
<?import javafx.scene.layout.Pane?>
|
||||
<?import javafx.scene.control.Hyperlink?>
|
||||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
|
||||
<AnchorPane fx:id="solarFiltersTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.FiltersForSolar">
|
||||
<Pane>
|
||||
<CheckComboBox fx:id="solarRegijaCCB" layoutX="104.0" layoutY="40.0" prefHeight="25.0" prefWidth="372.0"/>
|
||||
<Label layoutX="14.0" layoutY="44.0" text="Regija:"/>
|
||||
<CheckComboBox fx:id="solarPredmetCCB" layoutX="104.0" layoutY="87.0" prefHeight="25.0" prefWidth="372.0"/>
|
||||
<Label layoutX="14.0" layoutY="91.0" text="Predmet"/>
|
||||
<CheckComboBox fx:id="solarRazredCCB" layoutX="104.0" layoutY="136.0" prefHeight="25.0" prefWidth="372.0"/>
|
||||
<Label layoutX="14.0" layoutY="140.0" text="Razred"/>
|
||||
<CheckComboBox fx:id="solarLetoCCB" layoutX="104.0" layoutY="189.0" prefHeight="25.0" prefWidth="372.0"/>
|
||||
<Label layoutX="14.0" layoutY="193.0" text="Leto"/>
|
||||
<CheckComboBox fx:id="solarSolaCCB" layoutX="104.0" layoutY="246.0" prefHeight="25.0" prefWidth="372.0"/>
|
||||
<Label layoutX="14.0" layoutY="250.0" text="Šola"/>
|
||||
<CheckComboBox fx:id="solarVrstaBesedilaCCB" layoutX="104.0" layoutY="293.0" prefHeight="25.0" prefWidth="372.0"/>
|
||||
<Label layoutX="14.0" layoutY="297.0" text="Vrsta besedila"/>
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
|
||||
<Label layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0"
|
||||
prefWidth="275.0" text=" " wrapText="true"/>
|
||||
</Pane>
|
||||
</AnchorPane>
|
||||
56
src/main/resources/gui/OneWordAnalysisTab.fxml
Executable file
56
src/main/resources/gui/OneWordAnalysisTab.fxml
Executable file
@@ -0,0 +1,56 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<?import java.lang.String?>
|
||||
<?import javafx.collections.FXCollections?>
|
||||
<?import javafx.scene.control.Button?>
|
||||
<?import javafx.scene.control.CheckBox?>
|
||||
<?import javafx.scene.control.Hyperlink?>
|
||||
<?import javafx.scene.control.ComboBox?>
|
||||
<?import javafx.scene.control.Label?>
|
||||
<?import javafx.scene.control.ProgressBar?>
|
||||
<?import javafx.scene.control.TextField?>
|
||||
<?import javafx.scene.layout.AnchorPane?>
|
||||
<?import javafx.scene.layout.Pane?>
|
||||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
|
||||
<AnchorPane fx:id="oneWordAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.OneWordAnalysisTab">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Izračunaj za"/>
|
||||
<ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="20.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
|
||||
visibleRowCount="5">
|
||||
<items>
|
||||
<FXCollections fx:factory="observableArrayList">
|
||||
<String fx:value="lema"/>
|
||||
<String fx:value="različnica"/>
|
||||
<String fx:value="oblikoskladenjska oznaka"/>
|
||||
<String fx:value="oblikoskladenjska lastnost"/>
|
||||
<String fx:value="besedna vrsta"/>
|
||||
</FXCollections>
|
||||
</items>
|
||||
</ComboBox>
|
||||
|
||||
<!-- MSD and Taxonomy separated -->
|
||||
|
||||
<Label layoutX="10.0" layoutY="80.0" prefHeight="25.0" text="Omejitev podatkov" />
|
||||
|
||||
<Label layoutX="10.0" layoutY="120.0" prefHeight="25.0" text="Oznaka MSD"/>
|
||||
<TextField fx:id="msdTF" layoutX="100.0" layoutY="120.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="160.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
|
||||
|
||||
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
</Pane>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
|
||||
text=" " wrapText="true"/>
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
|
||||
|
||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
|
||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
|
||||
|
||||
</AnchorPane>
|
||||
13
src/main/resources/gui/SelectedFiltersPane.fxml
Normal file
13
src/main/resources/gui/SelectedFiltersPane.fxml
Normal file
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<?import javafx.scene.control.Label?>
|
||||
<?import javafx.scene.layout.AnchorPane?>
|
||||
<?import javafx.scene.control.Label?>
|
||||
<AnchorPane prefHeight="400.0" prefWidth="600.0" xmlns="http://javafx.com/javafx/8.0.111" xmlns:fx="http://javafx.com/fxml/1">
|
||||
<children>
|
||||
<Label layoutX="371.0" layoutY="26.0" text="Izbrani filtri:"/>
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="371.0" layoutY="43.0" prefHeight="188.0"
|
||||
prefWidth="215.0"
|
||||
text=" "/>
|
||||
</children>
|
||||
</AnchorPane>
|
||||
105
src/main/resources/gui/StringAnalysisTabNew2.fxml
Executable file
105
src/main/resources/gui/StringAnalysisTabNew2.fxml
Executable file
@@ -0,0 +1,105 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<?import java.lang.String?>
|
||||
<?import javafx.collections.FXCollections?>
|
||||
<?import javafx.scene.control.Button?>
|
||||
<?import javafx.scene.control.CheckBox?>
|
||||
<?import javafx.scene.control.ComboBox?>
|
||||
<?import javafx.scene.control.Hyperlink?>
|
||||
<?import javafx.scene.control.Label?>
|
||||
<?import javafx.scene.control.ProgressBar?>
|
||||
<?import javafx.scene.control.TextField?>
|
||||
<?import javafx.scene.layout.AnchorPane?>
|
||||
<?import javafx.scene.layout.Pane?>
|
||||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
|
||||
<AnchorPane fx:id="stringAnalysisTabPaneNew2" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.StringAnalysisTabNew2">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="N-gram nivo"/>
|
||||
<ComboBox fx:id="ngramValueCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0" promptText="izberi"
|
||||
visibleRowCount="5">
|
||||
<items>
|
||||
<FXCollections fx:factory="observableArrayList">
|
||||
<String fx:value="2"/>
|
||||
<String fx:value="3"/>
|
||||
<String fx:value="4"/>
|
||||
<String fx:value="5"/>
|
||||
</FXCollections>
|
||||
</items>
|
||||
</ComboBox>
|
||||
<Label layoutX="10.0" layoutY="60.0" prefHeight="25.0" text="Izračunaj za"/>
|
||||
<ComboBox fx:id="calculateForCB" layoutX="100.0" layoutY="60.0" minWidth="180.0" prefWidth="150.0" promptText="izberi"
|
||||
visibleRowCount="5">
|
||||
<items>
|
||||
<FXCollections fx:factory="observableArrayList">
|
||||
<String fx:value="lema"/>
|
||||
<String fx:value="različnica"/>
|
||||
<String fx:value="oblikoskladenjska oznaka"/>
|
||||
<String fx:value="oblikoskladenjska lastnost"/>
|
||||
<String fx:value="besedna vrsta"/>
|
||||
</FXCollections>
|
||||
</items>
|
||||
</ComboBox>
|
||||
|
||||
|
||||
<Pane fx:id="paneWords" layoutX="0.0" layoutY="100.0" prefHeight="36.0" prefWidth="380.0">
|
||||
<children>
|
||||
<Label layoutX="10.0" prefHeight="25.0" text="Preskok besed"/>
|
||||
<ComboBox fx:id="skipValueCB" layoutX="100.0" prefWidth="180.0" promptText="izberi"
|
||||
visibleRowCount="5">
|
||||
<items>
|
||||
<FXCollections fx:factory="observableArrayList">
|
||||
<String fx:value="0"/>
|
||||
<String fx:value="1"/>
|
||||
<String fx:value="2"/>
|
||||
<String fx:value="3"/>
|
||||
<String fx:value="4"/>
|
||||
<String fx:value="5"/>
|
||||
<String fx:value="6"/>
|
||||
<String fx:value="7"/>
|
||||
</FXCollections>
|
||||
</items>
|
||||
</ComboBox>
|
||||
</children>
|
||||
</Pane>
|
||||
|
||||
|
||||
|
||||
|
||||
<!-- MSD and Taxonomy separated -->
|
||||
|
||||
<Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov"/>
|
||||
|
||||
<Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
|
||||
<TextField fx:id="msdTF" layoutX="100.0" layoutY="200.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
|
||||
|
||||
<!-- samoglasniki/soglasniki -->
|
||||
<Pane fx:id="paneLetters" layoutX="0.0" layoutY="280.0" prefHeight="84.0" prefWidth="380.0">
|
||||
<children>
|
||||
<CheckBox fx:id="calculatecvvCB" layoutX="10.0" mnemonicParsing="false" prefHeight="25.0"
|
||||
text="Izračunaj za kombinacije samoglasnikov in soglasnikov"/>
|
||||
<TextField fx:id="stringLengthTF" layoutX="100.0" layoutY="40.0" prefWidth="180.0"/>
|
||||
<Label layoutX="10.0" layoutY="40.0" prefHeight="25.0" text="Dolžina niza"/>
|
||||
</children>
|
||||
</Pane>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<Button fx:id="computeNgramsB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
</Pane>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
|
||||
text=" " wrapText="true"/>
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0" text="Pomoč" />
|
||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
|
||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
|
||||
|
||||
</AnchorPane>
|
||||
25
src/main/resources/gui/WordFormationTab.fxml
Normal file
25
src/main/resources/gui/WordFormationTab.fxml
Normal file
@@ -0,0 +1,25 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
<?import javafx.scene.control.*?>
|
||||
<?import javafx.scene.layout.AnchorPane?>
|
||||
<?import javafx.scene.layout.Pane?>
|
||||
<AnchorPane fx:id="wordAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordFormationTab">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
<Button fx:id="computeB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
</Pane>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
|
||||
text=" " wrapText="true"/>
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
|
||||
|
||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
|
||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
|
||||
|
||||
</AnchorPane>
|
||||
25
src/main/resources/gui/WordLevelTab.fxml
Normal file
25
src/main/resources/gui/WordLevelTab.fxml
Normal file
@@ -0,0 +1,25 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<?import org.controlsfx.control.CheckComboBox?>
|
||||
<?import javafx.scene.control.*?>
|
||||
<?import javafx.scene.layout.AnchorPane?>
|
||||
<?import javafx.scene.layout.Pane?>
|
||||
<AnchorPane fx:id="wordLevelAnalysisTabPane" prefHeight="600.0" prefWidth="800.0" xmlns="http://javafx.com/javafx/8.0.111"
|
||||
xmlns:fx="http://javafx.com/fxml/1" fx:controller="gui.WordLevelTab">
|
||||
<Pane>
|
||||
<Label layoutX="10.0" layoutY="20.0" prefHeight="25.0" text="Taksonomija"/>
|
||||
<CheckComboBox fx:id="taxonomyCCB" layoutX="100.0" layoutY="20.0" prefHeight="25.0" prefWidth="180.0"/>
|
||||
<Button fx:id="computeB" layoutX="14.0" layoutY="382.0" mnemonicParsing="false"
|
||||
prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
|
||||
</Pane>
|
||||
|
||||
<Label fx:id="solarFilters" layoutX="510.0" layoutY="20.0" text="Izbrani filtri:"/>
|
||||
<Label fx:id="selectedFiltersLabel" alignment="TOP_LEFT" layoutX="510.0" layoutY="45.0" prefHeight="540.0" prefWidth="275.0"
|
||||
text=" " wrapText="true"/>
|
||||
|
||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="20.0" text="Pomoč"/>
|
||||
|
||||
<ProgressBar fx:id="ngramProgressBar" layoutX="10.0" layoutY="517.0" prefHeight="16.0" prefWidth="780.0" progress="0.0"/>
|
||||
<Label fx:id="progressLabel" layoutX="10.0" layoutY="541.0" prefHeight="25.0" prefWidth="780.0"/>
|
||||
|
||||
</AnchorPane>
|
||||
22
src/main/resources/log4j2.xml
Normal file
22
src/main/resources/log4j2.xml
Normal file
@@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Configuration status="ALL" name="log-config">
|
||||
<Properties>
|
||||
<Property name="LOG_DIR">log</Property>
|
||||
<Property name="ARCHIVE">${LOG_DIR}/archive</Property>
|
||||
<Property name="PATTERN">[%p] %d{dd.MM.yyyy HH:mm:ss} - %c.%M:%L - %m%n</Property>
|
||||
</Properties>
|
||||
<Appenders>
|
||||
<Console name="STDOUT" target="SYSTEM_OUT">
|
||||
<PatternLayout pattern="${PATTERN}"/>
|
||||
</Console>
|
||||
<File name="MyFile" fileName="${LOG_DIR}/CorpusAnalyzer.log" immediateFlush="true" append="true">
|
||||
<PatternLayout pattern="${PATTERN}"/>
|
||||
</File>
|
||||
</Appenders>
|
||||
<loggers>
|
||||
<root level="all">
|
||||
<appender-ref ref="MyFile" level="all"/>
|
||||
<appender-ref ref="STDOUT" level="all"/>
|
||||
</root>
|
||||
</loggers>
|
||||
</Configuration>
|
||||
85
src/test/java/Common.java
Normal file
85
src/test/java/Common.java
Normal file
@@ -0,0 +1,85 @@
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import data.Sentence;
|
||||
import data.Word;
|
||||
|
||||
public class Common {
|
||||
|
||||
public static List<Sentence> corpus;
|
||||
public static List<Sentence> minCorpus;
|
||||
public static List<Sentence> midCorpus;
|
||||
public static List<Sentence> midCorpusSkip;
|
||||
public static List<Sentence> josTest;
|
||||
|
||||
static {
|
||||
Sentence testSentence;
|
||||
|
||||
// full sentence
|
||||
List<Word> words = new ArrayList<>();
|
||||
words.add(new Word("ker", "ker", "Vd"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("junak", "junak", "Somei"));
|
||||
words.add(new Word("v", "v", "Dm"));
|
||||
words.add(new Word("posesti", "posest", "Sozem"));
|
||||
words.add(new Word("nekaj", "nekaj", "Rsn"));
|
||||
words.add(new Word("o", "o", "Dm"));
|
||||
words.add(new Word("čemer", "kar", "Zz-sem"));
|
||||
words.add(new Word("se", "se", "Zp------k"));
|
||||
words.add(new Word("mu", "on", "Zotmed--k"));
|
||||
words.add(new Word("ne", "ne", "L"));
|
||||
words.add(new Word("sanja", "sanjati", "Ggnste"));
|
||||
words.add(new Word("a", "a", "Vp"));
|
||||
words.add(new Word("se", "se", "Zp------k"));
|
||||
words.add(new Word("onemu", "oni", "Zk-sed"));
|
||||
words.add(new Word("zdi", "zdeti", "Ggnste"));
|
||||
words.add(new Word("ključno", "ključen", "Ppnsei"));
|
||||
words.add(new Word("pri", "pri", "Dm"));
|
||||
words.add(new Word("operaciji", "operacija", "Sozem"));
|
||||
words.add(new Word("666", "666", "Kag"));
|
||||
|
||||
testSentence = new Sentence(words, "#Ft.Z.N.N");
|
||||
corpus = new ArrayList<>();
|
||||
corpus.add(testSentence);
|
||||
|
||||
// three word sentence
|
||||
testSentence = new Sentence(corpus.get(0).getSublist(0, 3), "#Ft.Z.N.N");
|
||||
minCorpus = new ArrayList<>();
|
||||
minCorpus.add(testSentence);
|
||||
|
||||
// five word sentence
|
||||
words = new ArrayList<>();
|
||||
words.add(new Word("ker", "ker", "Vd"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("junak", "junak", "Somei"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("posesti", "posest", "Sozem"));
|
||||
testSentence = new Sentence(words, "#Ft.Z.N.N");
|
||||
|
||||
midCorpus = new ArrayList<>();
|
||||
midCorpus.add(testSentence);
|
||||
|
||||
// five word sentence - for skipgrams
|
||||
words = new ArrayList<>();
|
||||
words.add(new Word("ker", "ker", "Vd"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("junak", "junak", "Somei"));
|
||||
words.add(new Word("v", "v", "Dm"));
|
||||
words.add(new Word("posesti", "posest", "Sozem"));
|
||||
testSentence = new Sentence(words, "#Ft.Z.N.N");
|
||||
|
||||
midCorpusSkip = new ArrayList<>();
|
||||
midCorpusSkip.add(testSentence);
|
||||
|
||||
// JOS test
|
||||
words = new ArrayList<>();
|
||||
words.add(new Word("junak", "junak", "Somei"));
|
||||
words.add(new Word("ima", "imeti", "Ggnste-n"));
|
||||
words.add(new Word("posesti", "posest", "Sozem"));
|
||||
testSentence = new Sentence(words, "#Ft.Z.N.N");
|
||||
|
||||
josTest = new ArrayList<>();
|
||||
josTest.add(testSentence);
|
||||
}
|
||||
|
||||
}
|
||||
42
src/test/java/CorpusTests.java
Normal file
42
src/test/java/CorpusTests.java
Normal file
@@ -0,0 +1,42 @@
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOCase;
|
||||
import org.apache.commons.io.filefilter.FileFilterUtils;
|
||||
import org.apache.commons.io.filefilter.TrueFileFilter;
|
||||
import org.junit.Test;
|
||||
|
||||
import data.*;
|
||||
import javafx.collections.ObservableList;
|
||||
|
||||
public class CorpusTests {
|
||||
|
||||
@Test
|
||||
public void solarTest() {
|
||||
//File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/Solar");
|
||||
// File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/GOS");
|
||||
File selectedDirectory = new File("/home/andrej/Desktop/corpus-analyzer/src/main/resources/Gigafida_subset");
|
||||
|
||||
Settings.resultsFilePath = new File(selectedDirectory.getAbsolutePath().concat(File.separator));
|
||||
|
||||
Settings.corpus = FileUtils.listFiles(selectedDirectory, FileFilterUtils.suffixFileFilter("xml", IOCase.INSENSITIVE), TrueFileFilter.INSTANCE);
|
||||
|
||||
File f = Settings.corpus.iterator().next();
|
||||
|
||||
Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, 0, CalculateFor.WORD);
|
||||
// stats.setCorpusType(CorpusType.GOS);
|
||||
stats.setCorpusType(CorpusType.SOLAR);
|
||||
|
||||
// XML_processing.readXMLGos(f.toString(), stats);
|
||||
// XML_processing.readXML(f.toString(), stats);
|
||||
// XML_processing.readXMLHeaderTag(f.toString(), "stats");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
ObservableList<String> var = GosTaxonomy.getForComboBox();
|
||||
String debug = "";
|
||||
|
||||
}
|
||||
}
|
||||
66
src/test/java/DBTest.java
Normal file
66
src/test/java/DBTest.java
Normal file
@@ -0,0 +1,66 @@
|
||||
import static junit.framework.Assert.*;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.rocksdb.RocksDB;
|
||||
|
||||
import util.db.RDB;
|
||||
|
||||
public class DBTest {
|
||||
|
||||
static {
|
||||
RocksDB.loadLibrary();
|
||||
}
|
||||
|
||||
// @Test
|
||||
public void dbConnectorTest() throws UnsupportedEncodingException {
|
||||
String key1 = "alfa";
|
||||
AtomicLong value1 = new AtomicLong(10);
|
||||
String key2 = "beta";
|
||||
AtomicLong value2 = new AtomicLong(20);
|
||||
String key3 = "alfa";
|
||||
AtomicLong value3 = new AtomicLong(50);
|
||||
String key4 = "theta";
|
||||
AtomicLong value4 = new AtomicLong(40);
|
||||
|
||||
HashMap<String, AtomicLong> results = new HashMap<>();
|
||||
results.put(key1, value1);
|
||||
results.put(key2, value2);
|
||||
|
||||
RDB db = new RDB();
|
||||
db.writeBatch(results);
|
||||
|
||||
// let's check how that fared out
|
||||
Map<String, AtomicLong> dumpedResults = db.getDump();
|
||||
// should have 2 items
|
||||
assertEquals(2, dumpedResults.size());
|
||||
|
||||
// entry comparison
|
||||
assertTrue(dumpedResults.containsKey(key1));
|
||||
assertTrue(value1.longValue() == dumpedResults.get(key1).longValue());
|
||||
assertTrue(dumpedResults.containsKey(key2));
|
||||
assertTrue(value2.longValue() == dumpedResults.get(key2).longValue());
|
||||
|
||||
results = new HashMap<>();
|
||||
results.put(key3, value3);
|
||||
results.put(key4, value4);
|
||||
db.writeBatch(results);
|
||||
dumpedResults = db.getDump();
|
||||
|
||||
// should have 3 items with alfa's value reflecting summation
|
||||
assertEquals(3, dumpedResults.size());
|
||||
|
||||
// entry comparison
|
||||
assertTrue(dumpedResults.containsKey(key1));
|
||||
assertTrue(value1.longValue() + value3.longValue() == dumpedResults.get(key1).longValue());
|
||||
assertTrue(dumpedResults.containsKey(key2));
|
||||
assertTrue(value2.longValue() == dumpedResults.get(key2).longValue());
|
||||
assertTrue(dumpedResults.containsKey(key4));
|
||||
assertTrue(value4.longValue() == dumpedResults.get(key4).longValue());
|
||||
|
||||
db.delete();
|
||||
}
|
||||
}
|
||||
334
src/test/java/NgramTests.java
Normal file
334
src/test/java/NgramTests.java
Normal file
@@ -0,0 +1,334 @@
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import alg.ngram.Ngrams;
|
||||
import data.*;
|
||||
|
||||
@SuppressWarnings({"Duplicates", "unused"})
|
||||
public class NgramTests {
|
||||
|
||||
@Test
|
||||
public void letterNgramsTest() {
|
||||
Map<String, AtomicLong> result = null;
|
||||
|
||||
Filter filter = new Filter();
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
filter.setStringLength(4);
|
||||
filter.setNgramValue(0); // letters
|
||||
filter.setCalculateFor(CalculateFor.WORD);
|
||||
|
||||
Corpus testCorpus = new Corpus();
|
||||
testCorpus.setCorpusType(CorpusType.GIGAFIDA);
|
||||
testCorpus.setDetectedCorpusFiles(new ArrayList<>());
|
||||
|
||||
// tests:
|
||||
// - no regex
|
||||
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.minCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
// tests:
|
||||
// - algorithm skips words that are shorter than set length value
|
||||
assertEquals(2, result.size());
|
||||
assertTrue(result.containsKey("juna"));
|
||||
assertEquals(1, result.get("juna").longValue());
|
||||
assertTrue(result.containsKey("unak"));
|
||||
assertEquals(1, result.get("unak").longValue());
|
||||
|
||||
// tests:
|
||||
// - map update (count) works ok
|
||||
filter.setStringLength(3);
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertEquals(2, result.get("ima").longValue());
|
||||
|
||||
// tests:
|
||||
// - pre-check for the following regex test - this one should include word "ima", next one shouldn't
|
||||
filter.setStringLength(3);
|
||||
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertTrue(result.containsKey("ima"));
|
||||
|
||||
// tests:
|
||||
// - regex: S.* // vsi samostalniki
|
||||
ArrayList<Pattern> msdRegex = new ArrayList<>();
|
||||
msdRegex.add(Pattern.compile("S.*"));
|
||||
filter.setMsd(msdRegex);
|
||||
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertFalse(result.containsKey("ima"));
|
||||
|
||||
// tests:
|
||||
// - more precise regex
|
||||
msdRegex = new ArrayList<>();
|
||||
msdRegex.add(Pattern.compile("S.z.*")); // should include "posesti", but not "junak"
|
||||
filter.setMsd(msdRegex);
|
||||
filter.setStringLength(5);
|
||||
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertFalse(result.containsKey("junak"));
|
||||
assertEquals(3, result.size());
|
||||
|
||||
// tests:
|
||||
// - trickier regex
|
||||
msdRegex = new ArrayList<>();
|
||||
msdRegex.add(Pattern.compile(".{2}")); // should count only for msd="Vd" - "ker"
|
||||
filter.setMsd(msdRegex);
|
||||
filter.setStringLength(3);
|
||||
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertEquals(1, result.size());
|
||||
assertTrue(result.containsKey("ker"));
|
||||
assertEquals(1, result.get("ker").longValue());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void wordsNgramsTest() {
|
||||
Map<String, AtomicLong> result = null;
|
||||
|
||||
Filter filter = new Filter();
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
filter.setNgramValue(3);
|
||||
|
||||
Corpus testCorpus = new Corpus();
|
||||
testCorpus.setCorpusType(CorpusType.GIGAFIDA);
|
||||
testCorpus.setDetectedCorpusFiles(new ArrayList<>());
|
||||
|
||||
// tests:
|
||||
// - normal ngrams - word
|
||||
// midCorpus contains 5 words which should make for 3 3-grams
|
||||
filter.setCalculateFor(CalculateFor.WORD);
|
||||
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertEquals(3, result.size());
|
||||
assertTrue(result.containsKey("ker ima junak"));
|
||||
assertTrue(result.containsKey("ima junak ima"));
|
||||
assertTrue(result.containsKey("junak ima posesti"));
|
||||
|
||||
// tests:
|
||||
// - normal ngrams - lemmas
|
||||
filter.setCalculateFor(CalculateFor.LEMMA);
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertEquals(3, result.size());
|
||||
assertTrue(result.containsKey("ker imeti junak"));
|
||||
assertTrue(result.containsKey("imeti junak imeti"));
|
||||
assertTrue(result.containsKey("junak imeti posest"));
|
||||
|
||||
// tests:
|
||||
// - normal ngrams - msd
|
||||
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertEquals(3, result.size());
|
||||
assertTrue(result.containsKey("Vd Ggnste-n Somei"));
|
||||
assertTrue(result.containsKey("Ggnste-n Somei Ggnste-n"));
|
||||
assertTrue(result.containsKey("Somei Ggnste-n Sozem"));
|
||||
|
||||
// tests:
|
||||
// - ngrams - word - regex filter
|
||||
filter.setCalculateFor(CalculateFor.WORD);
|
||||
ArrayList<Pattern> msdRegex = new ArrayList<>();
|
||||
msdRegex.add(Pattern.compile("S.*"));
|
||||
msdRegex.add(Pattern.compile("G.*"));
|
||||
msdRegex.add(Pattern.compile(".*"));
|
||||
filter.setMsd(msdRegex);
|
||||
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertEquals(1, result.size());
|
||||
assertTrue(result.containsKey("junak ima posesti"));
|
||||
|
||||
// tests:
|
||||
// - ngrams - word - regex filter
|
||||
filter.setCalculateFor(CalculateFor.WORD);
|
||||
filter.setNgramValue(2);
|
||||
msdRegex = new ArrayList<>();
|
||||
msdRegex.add(Pattern.compile("G.*"));
|
||||
msdRegex.add(Pattern.compile("Some.*"));
|
||||
filter.setMsd(msdRegex);
|
||||
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpus, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
assertEquals(1, result.size());
|
||||
assertTrue(result.containsKey("ima junak"));
|
||||
}
|
||||
|
||||
|
||||
// @Test
|
||||
// public void ngramsTest() {
|
||||
// // minimal compliance test
|
||||
// Statistics stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_SPECS);
|
||||
//
|
||||
// Map<String, AtomicLong> results = recalculate(minCorpus, stats);
|
||||
//
|
||||
// // 1-gram minCorpusa should equal minCorpus' size
|
||||
// assertEquals(minCorpus.get(0).getWords().size(), results.size());
|
||||
//
|
||||
// // each resulting word should have a frequency of 1
|
||||
// List<Word> words = minCorpus.get(0).getWords();
|
||||
// for (int i = 0; i < results.size(); i++) {
|
||||
// Word w = words.get(i);
|
||||
// AtomicLong frequency = results.get(w.getMsd());
|
||||
// assertEquals(1, frequency.intValue());
|
||||
// }
|
||||
//
|
||||
// // repeat for 2grams
|
||||
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, null, CalculateFor.MORPHOSYNTACTIC_SPECS);
|
||||
// results = recalculate(minCorpus, stats);
|
||||
//
|
||||
// // 2-gram of a 3 item corpus should equal 2 (first two words and second two words)
|
||||
// assertEquals(2, results.size());
|
||||
//
|
||||
// // add a filter
|
||||
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||
//
|
||||
// List<String> morphosyntacticFilter = new ArrayList<>();
|
||||
// morphosyntacticFilter.add("Sozem");
|
||||
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
|
||||
//
|
||||
// results = recalculate(minCorpus, stats);
|
||||
//
|
||||
// // since min corpus doesn't contain Sozem, results should be empty
|
||||
// assertEquals(0, results.size());
|
||||
//
|
||||
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||
// morphosyntacticFilter = new ArrayList<>();
|
||||
// morphosyntacticFilter.add("Somei");
|
||||
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
|
||||
// results = recalculate(minCorpus, stats);
|
||||
//
|
||||
// // since we have 1 Somei, 1 result
|
||||
// assertEquals(1, results.size());
|
||||
// assertEquals(1, results.get("Somei").intValue());
|
||||
//
|
||||
// // actual filter with wildcards
|
||||
// // 1gram
|
||||
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 1, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||
// morphosyntacticFilter = new ArrayList<>();
|
||||
// morphosyntacticFilter.add("So***");
|
||||
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
|
||||
// results = recalculate(minCorpus, stats);
|
||||
//
|
||||
// assertEquals(1, results.size());
|
||||
// assertEquals(1, results.get("Somei").intValue());
|
||||
//
|
||||
// // 2gram
|
||||
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||
// morphosyntacticFilter = new ArrayList<>();
|
||||
// morphosyntacticFilter.add("Ggns*e-n");
|
||||
// morphosyntacticFilter.add("So***");
|
||||
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
|
||||
// results = recalculate(minCorpus, stats);
|
||||
//
|
||||
// assertEquals(1, results.size());
|
||||
// assertEquals(1, results.get("Ggnste-n Somei").intValue());
|
||||
//
|
||||
// // 2gram midCorpus
|
||||
// stats = new Statistics(AnalysisLevel.STRING_LEVEL, 2, null, CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||
// morphosyntacticFilter = new ArrayList<>();
|
||||
// morphosyntacticFilter.add("Ggns*e-n");
|
||||
// morphosyntacticFilter.add("So***");
|
||||
// stats.setMorphosyntacticFilter(morphosyntacticFilter);
|
||||
// results = recalculate(midCorpus, stats);
|
||||
//
|
||||
// assertEquals(2, results.size());
|
||||
// assertEquals(1, results.get("Ggnste-n Somei").intValue());
|
||||
// assertEquals(1, results.get("Ggnste-n Sozem").intValue());
|
||||
// }
|
||||
|
||||
private Map<String, AtomicLong> recalculate(List<Sentence> corpus, Statistics stats) {
|
||||
// calculateForAll(corpus, stats);
|
||||
return stats.getResult();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void skipgramsTest() {
|
||||
Map<String, AtomicLong> result = null;
|
||||
|
||||
Filter filter = new Filter();
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
filter.setCalculateFor(CalculateFor.WORD);
|
||||
|
||||
Corpus testCorpus = new Corpus();
|
||||
testCorpus.setCorpusType(CorpusType.GIGAFIDA);
|
||||
testCorpus.setDetectedCorpusFiles(new ArrayList<>());
|
||||
|
||||
// tests:
|
||||
// - bigrams
|
||||
filter.setNgramValue(2);
|
||||
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpusSkip, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
Set<String> bigrams = new HashSet<>(Arrays.asList("ker ima", "ima junak", "junak v", "v posesti"));
|
||||
Set<String> bigramsActual = result.keySet();
|
||||
assertEquals(bigrams, bigramsActual);
|
||||
|
||||
// test:
|
||||
// - two skip bigrams
|
||||
filter.setNgramValue(2);
|
||||
filter.setSkipValue(2);
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpusSkip, stats);
|
||||
result = stats.getResult();
|
||||
|
||||
Set<String> twoSkipBigrams = new HashSet<>(Arrays.asList("ker ima", "ker junak", "ker v", "ima junak", "ima v", "ima posesti", "junak v", "junak posesti", "v posesti"));
|
||||
Set<String> twoSkipBigramsActual = result.keySet();
|
||||
|
||||
assertEquals(twoSkipBigrams, twoSkipBigramsActual);
|
||||
|
||||
// tests:
|
||||
// - trigrams
|
||||
filter.setNgramValue(3);
|
||||
filter.setSkipValue(null);
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpusSkip, stats);
|
||||
result = stats.getResult();
|
||||
Set<String> trigrams = new HashSet<>(Arrays.asList("ker ima junak", "ima junak v", "junak v posesti"));
|
||||
Set<String> trigramsActual = result.keySet();
|
||||
|
||||
assertEquals(trigrams, trigramsActual);
|
||||
|
||||
// tests:
|
||||
// - two skip trigrams
|
||||
filter.setNgramValue(3);
|
||||
filter.setSkipValue(2);
|
||||
stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.midCorpusSkip, stats);
|
||||
result = stats.getResult();
|
||||
HashSet<String> twoSkipTrigrams = new HashSet<>(Arrays.asList("ker ima junak", "ker ima v", "ker ima posesti", "ker junak v", "ker junak posesti", "ker v posesti", "ima junak v", "ima junak posesti", "ima v posesti", "junak v posesti"));
|
||||
Set<String> twoSkipTrigramsActual = result.keySet();
|
||||
|
||||
assertEquals(twoSkipTrigrams, twoSkipTrigramsActual);
|
||||
}
|
||||
}
|
||||
51
src/test/java/WordFormationTest.java
Normal file
51
src/test/java/WordFormationTest.java
Normal file
@@ -0,0 +1,51 @@
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import alg.inflectedJOS.WordFormation;
|
||||
import alg.ngram.Ngrams;
|
||||
import data.*;
|
||||
|
||||
public class WordFormationTest {
|
||||
|
||||
@Test
|
||||
public void calculationTest() throws UnsupportedEncodingException {
|
||||
Map<String, AtomicLong> result = null;
|
||||
|
||||
Filter filter = new Filter();
|
||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||
filter.setNgramValue(1);
|
||||
|
||||
Corpus testCorpus = new Corpus();
|
||||
testCorpus.setCorpusType(CorpusType.GIGAFIDA);
|
||||
testCorpus.setDetectedCorpusFiles(new ArrayList<>());
|
||||
|
||||
// tests:
|
||||
// - normal ngrams - word
|
||||
// midCorpus contains 5 words which should make for 3 3-grams
|
||||
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||
StatisticsNew stats = new StatisticsNew(testCorpus, filter, false);
|
||||
Ngrams.calculateForAll(Common.josTest, stats);
|
||||
result = stats.getResult();
|
||||
WordFormation.calculateStatistics(stats);
|
||||
Object[][] resultArr = stats.getResultCustom();
|
||||
String debug = "";
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAnything() {
|
||||
String a = "Somei";
|
||||
String b = "SomeiD";
|
||||
|
||||
String c = a.substring(0, 5);
|
||||
String d = b.substring(0, 5);
|
||||
|
||||
String debug = "";
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
15
src/test/java/WordLevelTest.java
Normal file
15
src/test/java/WordLevelTest.java
Normal file
@@ -0,0 +1,15 @@
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import data.Enums.WordLevelDefaultValues;
|
||||
|
||||
public class WordLevelTest {
|
||||
|
||||
@Test
|
||||
public void testResourceFiles() {
|
||||
HashSet<String> suffixes = WordLevelDefaultValues.getSuffixes();
|
||||
String debug = "";
|
||||
|
||||
}
|
||||
}
|
||||
39
src/test/java/WordTest.java
Normal file
39
src/test/java/WordTest.java
Normal file
@@ -0,0 +1,39 @@
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import data.Word;
|
||||
|
||||
public class WordTest {
|
||||
@Test
|
||||
public void paddingTest() {
|
||||
Word w1 = new Word("w1", "l1", "Somei");
|
||||
Word w2 = new Word("w2", "l2", "Sometd");
|
||||
|
||||
// w1's msd should get padded
|
||||
String msd1 = w1.getMsd();
|
||||
String msd2 = w2.getMsd();
|
||||
assertEquals(msd1.length(), msd2.length());
|
||||
assertEquals(Word.PAD_CHARACTER, msd1.charAt(msd1.length() - 1));
|
||||
|
||||
w1 = new Word("w1", "l1", "Gp-g");
|
||||
w2 = new Word("w2", "l2", "Gp-g---d");
|
||||
|
||||
// w1's msd should get padded
|
||||
msd1 = w1.getMsd();
|
||||
msd2 = w2.getMsd();
|
||||
assertEquals(msd1.length(), msd2.length());
|
||||
assertEquals(Word.PAD_CHARACTER, msd1.charAt(msd1.length() - 1));
|
||||
assertEquals(Word.PAD_CHARACTER, msd2.charAt(2));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cvvTest() {
|
||||
String siAlphabet = "abcčdefghijklmnoprsštuvzž";
|
||||
String siAlphabetCvv = "VCCCCVCCCVCCCCCVCCCCCVCCC";
|
||||
|
||||
Word w1 = new Word(siAlphabet, "l1", null);
|
||||
assertEquals(siAlphabetCvv, w1.getCVVWord());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user