Added functionality for n-grams (comma separation), minimal occurances etc.

This commit is contained in:
Luka 2018-07-31 08:58:17 +02:00
parent 681eb4f949
commit 179f09c4bd
27 changed files with 405 additions and 4962 deletions

View File

@ -293,13 +293,17 @@ public class XML_processing {
// "word" node value
if (in_word) {
stavek.add(new Word(characters.getData(), lemma, msd));
stavek.add(new Word(characters.getData(), lemma, msd, null));
in_word = false;
} else if(inPunctuation){
String punctuation = ",";
stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
if (stavek.size() > 0){
stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
}
inPunctuation = false;
}
break;
@ -652,6 +656,7 @@ public class XML_processing {
boolean inOrthDiv = false;
boolean computeForOrth = stats.getCorpus().isGosOrthMode();
ArrayList<String> currentFiletaxonomy = new ArrayList<>();
ArrayList<String> currentFiletaxonomyLong = new ArrayList<>();
String lemma = "";
String msd = "";
@ -718,7 +723,10 @@ public class XML_processing {
if (tax != null) {
// keep only taxonomy properties
currentFiletaxonomy.add(String.valueOf(tax.getValue()));
String currentFiletaxonomyElement = String.valueOf(tax.getValue());
currentFiletaxonomy.add(currentFiletaxonomyElement);
Tax taxonomy = new Tax();
currentFiletaxonomyLong.add(taxonomy.getLongTaxonomyName(currentFiletaxonomyElement));
}
} else if (qName.equalsIgnoreCase("div")) {
gosType = String.valueOf(startElement.getAttributeByName(QName.valueOf("type")).getValue());
@ -730,9 +738,9 @@ public class XML_processing {
if (inWord) {
Characters characters = event.asCharacters();
if (gosType.equals("norm") && msd != null) {
sentence.add(new Word(characters.getData(), lemma, msd));
sentence.add(new Word(characters.getData(), lemma, msd, currentFiletaxonomyLong));
} else {
sentence.add(new Word(characters.getData()));
sentence.add(new Word(characters.getData(), lemma, msd, currentFiletaxonomyLong));
}
inWord = false;

View File

@ -26,7 +26,9 @@ public class Filter {
HAS_MSD,
SOLAR_FILTERS,
MULTIPLE_KEYS,
NOTE_PUNCTUATIONS
NOTE_PUNCTUATIONS,
MINIMAL_OCCURRENCES,
MINIMAL_TAXONOMY
}
public Filter() {
@ -170,4 +172,21 @@ public class Filter {
public boolean getNotePunctuations() {
return filter.containsKey(NOTE_PUNCTUATIONS) && (boolean) filter.get(NOTE_PUNCTUATIONS);
}
public void setMinimalOccurrences(Integer minOccurrences) {
filter.put(MINIMAL_OCCURRENCES, minOccurrences);
}
public Integer getMinimalOccurrences() {
return (Integer) filter.get(MINIMAL_OCCURRENCES);
}
public void setMinimalTaxonomy(Integer minTaxonomy) {
filter.put(MINIMAL_TAXONOMY, minTaxonomy);
}
public Integer getMinimalTaxonomy() {
return (Integer) filter.get(MINIMAL_TAXONOMY);
}
}

View File

@ -48,15 +48,16 @@ public class StatisticsNew {
this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
// create table for counting word occurances per taxonomies
if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
}
} else {
for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
Tax taxonomy = new Tax();
this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());
if (this.corpus.getTaxonomy() != null) {
if (this.filter.getTaxonomy().isEmpty()) {
for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
this.taxonomyResult.put(this.corpus.getTaxonomy().get(i), new ConcurrentHashMap<>());
}
} else {
for (int i = 0; i < this.filter.getTaxonomy().size(); i++) {
Tax taxonomy = new Tax();
this.taxonomyResult.put(taxonomy.getLongTaxonomyName(this.filter.getTaxonomy().get(i)), new ConcurrentHashMap<>());
}
}
}
@ -209,11 +210,45 @@ public class StatisticsNew {
analysisProducedResults = true;
}
removeMinimalOccurrences(taxonomyResult.get("Total"), filter.getMinimalOccurrences());
removeMinimalTaxonomy(taxonomyResult, filter.getMinimalTaxonomy());
stats.add(ImmutablePair.of(resultTitle, getSortedResult(taxonomyResult.get("Total"), Util.getValidInt(limit))));
Export.SetToCSV(stats, corpus.getChosenResultsLocation(), headerInfoBlock(), taxonomyResult);
return true;
}
/**
* Removes lines, where number of different taxonomies is lower than specified number (minimalTaxonomy)
*/
private void removeMinimalTaxonomy(Map<String, Map<MultipleHMKeys, AtomicLong>> taxonomyResult, Integer minimalTaxonomy) {
if (minimalTaxonomy == 1)
return;
int occurances;
for (MultipleHMKeys key : taxonomyResult.get("Total").keySet()){
occurances = 0;
for (String columnNameKey : taxonomyResult.keySet()){
if(!columnNameKey.equals("Total") && taxonomyResult.get(columnNameKey).get(key).intValue() >= 1)
occurances++;
}
if(occurances < minimalTaxonomy){
taxonomyResult.get("Total").remove(key);
}
}
}
/**
* Removes lines where total number of occurrences is lower than specified number (minimalOccurrences)
*/
private void removeMinimalOccurrences(Map<MultipleHMKeys, AtomicLong> taxonomyResultTotal, Integer minimalOccurrences) {
if (minimalOccurrences == 0)
return;
for (MultipleHMKeys key : taxonomyResultTotal.keySet()){
if(taxonomyResultTotal.get(key).intValue() < minimalOccurrences){
taxonomyResultTotal.remove(key);
}
}
}
public boolean saveResultNestedToDisk(int... limit) throws UnsupportedEncodingException {
resultTitle = generateResultTitle();
@ -285,7 +320,8 @@ public class StatisticsNew {
public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
for (String key : taxonomyResult.keySet()) {
// first word should have the same taxonomy as others
if (taxonomy.contains(key) || key.equals("Total")) {
if (key.equals("Total") || taxonomy.contains(key)) {
// if (key.equals("Total") || taxonomy != null && taxonomy.contains(key)) {
// if taxonomy not in map and in this word
AtomicLong r = taxonomyResult.get(key).putIfAbsent(o, new AtomicLong(1));
@ -389,13 +425,13 @@ public class StatisticsNew {
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
Integer ngramLevel = filter.getNgramValue();
if (ngramLevel == 0)
info.put("Analiza:", "Črke");
info.put("Analiza", "Črke");
else if (ngramLevel == 1)
info.put("Analiza", "Besede");
else
info.put("Analiza:", filter.getAl().toString());
info.put("Analiza", filter.getAl().toString());
} else {
info.put("Analiza:", filter.getAl().toString());
info.put("Analiza", filter.getAl().toString());
}
if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {

View File

@ -16,6 +16,7 @@ public class Word implements Serializable {
private String word;
private String lemma;
private String msd;
// private String msd;
private List<String> taxonomy;
private final HashSet<Character> VOWELS = new HashSet<>(Arrays.asList('a', 'e', 'i', 'o', 'u'));

View File

@ -51,6 +51,14 @@ public class CharacterAnalysisTab {
private TextField stringLengthTF;
private Integer stringLength;
@FXML
private TextField minimalOccurrencesTF;
private Integer minimalOccurrences;
@FXML
private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy;
@FXML
private ToggleGroup calculateForRB;
private CalculateFor calculateFor;
@ -189,6 +197,49 @@ public class CharacterAnalysisTab {
}
});
// set default values
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalOccurrencesTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalOccurrencesTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalOccurrences = Integer.parseInt(value);
}
} else {
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
}
}
});
minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalTaxonomyTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalTaxonomyTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalTaxonomy = Integer.parseInt(value);
}
} else {
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
}
}
});
computeNgramsB.setOnAction(e -> {
compute();
logger.info("compute button");
@ -344,6 +395,8 @@ public class CharacterAnalysisTab {
filter.setIsCvv(calculateCvv);
filter.setSolarFilters(solarFiltersMap);
filter.setStringLength(stringLength);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
String message = Validation.validateForStringLevel(filter);
if (message == null) {

View File

@ -105,7 +105,7 @@ public class CorpusTab {
gosUseOrthChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
gosUseOrth = newValue;
corpus.setGosOrthMode(gosUseOrth);
wordFormationTab.setDisable(gosUseOrth);
// wordFormationTab.setDisable(gosUseOrth);
satNew2Controller.toggleMode(null);
oneWordTabController.toggleMode(null);
catController.toggleMode(null);

View File

@ -80,6 +80,7 @@ public class GUIController extends Application {
@Override
public void start(Stage primaryStage) throws IOException {
Parent root = FXMLLoader.load(getClass().getResource("/GUI.fxml"));
// Parent root = FXMLLoader.load(ResourceLookup.resources.url("GUI.fxml"));
primaryStage.setTitle("GUI");
Scene scene = new Scene(root, 800, 600);
// https://github.com/dicolar/jbootx

View File

@ -15,6 +15,7 @@ public class Messages {
public static final String WARNING_DIFFERING_NGRAM_LEVEL_AND_FILTER_TOKENS_INFO = "Izberite drugo število ali popravite filter.";
public static final String WARNING_WORD_OR_LEMMA = "Izberite, če želite statistiko izračunati za besede ali leme.";
public static final String WARNING_ONLY_NUMBERS_ALLOWED = "Prosim vnesite veljavno število.";
public static final String WARNING_NUMBER_TOO_BIG = "Vnešeno število je večje od števila taksonomij.";
public static final String WARNING_MISMATCHED_NGRAM_AND_TOKENS_VALUES = "Število za ngram (%d) in število msd oznak (%d) se morata ujemati.";
public static final String WARNING_MISSING_STRING_LENGTH = "Dolžina niza mora biti večja od 0. Vstavljena je privzeta vrednost (1).";
public static final String WARNING_NO_TAXONOMY_FOUND = "Iz korpusnih datotek ni bilo moč razbrati taksonomije. Prosim izberite drugo lokacijo ali korpus.";

View File

@ -49,6 +49,13 @@ public class OneWordAnalysisTab {
private ComboBox<String> calculateForCB;
private CalculateFor calculateFor;
@FXML
private TextField minimalOccurrencesTF;
private Integer minimalOccurrences;
@FXML
private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy;
@FXML
private Button computeNgramsB;
@ -191,6 +198,49 @@ public class OneWordAnalysisTab {
taxonomyCCB.setDisable(true);
}
// set default values
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalOccurrencesTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalOccurrencesTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalOccurrences = Integer.parseInt(value);
}
} else {
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
}
}
});
minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalTaxonomyTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalTaxonomyTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalTaxonomy = Integer.parseInt(value);
}
} else {
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
}
}
});
computeNgramsB.setOnAction(e -> {
compute();
logger.info("compute button");
@ -313,6 +363,7 @@ public class OneWordAnalysisTab {
} else {
msdTF.setDisable(false);
}
calculateFor = CalculateFor.factory(calculateForCB.getItems().get(0));
}
private void compute() {
@ -327,6 +378,8 @@ public class OneWordAnalysisTab {
filter.setSolarFilters(solarFiltersMap);
filter.setStringLength(1);
filter.setMultipleKeys(alsoVisualize);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
String message = Validation.validateForStringLevel(filter);
if (message == null) {

View File

@ -66,6 +66,14 @@ public class StringAnalysisTabNew2 {
private CheckBox notePunctuationsChB;
private boolean notePunctuations;
@FXML
private TextField minimalOccurrencesTF;
private Integer minimalOccurrences;
@FXML
private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy;
@FXML
private Pane paneWords;
@ -139,6 +147,13 @@ public class StringAnalysisTabNew2 {
ngramValueCB.getSelectionModel().select(0); // selected index
ngramValue = 2; // actual value at that index
// set default values
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
notePunctuations = true;
// set
notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
@ -240,6 +255,42 @@ public class StringAnalysisTabNew2 {
}
});
minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalOccurrencesTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalOccurrencesTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalOccurrences = Integer.parseInt(value);
}
} else {
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
}
}
});
minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalTaxonomyTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalTaxonomyTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalTaxonomy = Integer.parseInt(value);
}
} else {
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
}
}
});
computeNgramsB.setOnAction(e -> {
compute();
logger.info("compute button");
@ -411,6 +462,8 @@ public class StringAnalysisTabNew2 {
filter.setIsCvv(calculateCvv);
filter.setSolarFilters(solarFiltersMap);
filter.setNotePunctuations(notePunctuations);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
if (ngramValue != null && ngramValue == 0) {
filter.setStringLength(stringLength);

View File

@ -40,6 +40,14 @@ public class WordFormationTab {
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
@FXML
private TextField minimalOccurrencesTF;
private Integer minimalOccurrences;
@FXML
private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy;
@FXML
private Button computeB;
@ -77,6 +85,49 @@ public class WordFormationTab {
taxonomyCCB.setDisable(true);
}
// set default values
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalOccurrencesTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalOccurrencesTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalOccurrences = Integer.parseInt(value);
}
} else {
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
}
}
});
minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalTaxonomyTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalTaxonomyTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalTaxonomy = Integer.parseInt(value);
}
} else {
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
}
}
});
computeB.setOnAction(e -> {
compute();
logger.info("compute button");
@ -95,6 +146,8 @@ public class WordFormationTab {
filter.setMsd(new ArrayList<>());
filter.setIsCvv(false);
filter.setSolarFilters(solarFiltersMap);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
String message = Validation.validateForStringLevel(filter);
if (message == null) {

View File

@ -40,6 +40,14 @@ public class WordLevelTab {
private CheckComboBox<String> taxonomyCCB;
private ArrayList<String> taxonomy;
@FXML
private TextField minimalOccurrencesTF;
private Integer minimalOccurrences;
@FXML
private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy;
@FXML
private Button computeB;
@ -77,6 +85,49 @@ public class WordLevelTab {
taxonomyCCB.setDisable(true);
}
// set default values
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalOccurrencesTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalOccurrencesTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalOccurrences = Integer.parseInt(value);
}
} else {
minimalOccurrencesTF.setText("1");
minimalOccurrences = 1;
}
}
});
minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) {
// focus lost
String value = minimalTaxonomyTF.getText();
if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) {
logAlert("minimalTaxonomyTF: " + WARNING_ONLY_NUMBERS_ALLOWED);
GUIController.showAlert(Alert.AlertType.ERROR, WARNING_ONLY_NUMBERS_ALLOWED);
} else {
minimalTaxonomy = Integer.parseInt(value);
}
} else {
minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1;
}
}
});
computeB.setOnAction(e -> {
compute();
logger.info("compute button");
@ -98,6 +149,8 @@ public class WordLevelTab {
filter.setMsd(new ArrayList<>());
filter.setIsCvv(false);
filter.setSolarFilters(solarFiltersMap);
filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy);
String message = Validation.validateForStringLevel(filter);
if (message == null) {

View File

@ -86,16 +86,28 @@ public class Export {
//CSV file header
if (headerInfoBlock.containsKey("Analiza") && headerInfoBlock.get("Analiza").equals("Besede")) {
if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Različnica");
if (headerInfoBlock.get("Analiza").equals("Besede")){
FILE_HEADER_AL.add("Različnica");
} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
FILE_HEADER_AL.add("Različnice");
}
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Lema");
if (headerInfoBlock.get("Analiza").equals("Besede")){
FILE_HEADER_AL.add("Lema");
} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
FILE_HEADER_AL.add("Leme");
}
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
if (headerInfoBlock.get("Analiza").equals("Besede")){
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
} else if (headerInfoBlock.get("Analiza").equals("Besedni nizi")) {
FILE_HEADER_AL.add("Oblikoskladenjska oznake");
}
} else {
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Lema");

View File

@ -1,70 +0,0 @@
"Korpus: ";Gigafida
"Datum: ";14.05.2018 06:34
"Analiza: ";Besedni nizi
"n-gram nivo: ";nivo črk
"Skip: ";0
"Izračunaj za: ";lema
"Izračunaj za kombinacije samoglasnikov in soglasnikov: ";ne
"Dolžina niza: ";1
word;frequency;percent
a;438;11.086%
i;390;9.871%
e;341;8.631%
o;328;8.302%
t;262;6.631%
n;261;6.606%
r;229;5.796%
k;174;4.404%
d;144;3.645%
s;141;3.569%
v;133;3.366%
l;123;3.113%
j;120;3.037%
p;120;3.037%
z;81;2.05%
b;75;1.898%
u;71;1.797%
"č";65;1.645%
m;58;1.468%
g;53;1.341%
c;44;1.114%
"š";32;0.81%
"ž";32;0.81%
1;28;0.709%
h;20;0.506%
0;19;0.481%
2;18;0.456%
".";17;0.43%
M;13;0.329%
6;12;0.304%
f;11;0.278%
9;10;0.253%
3;8;0.202%
A;7;0.177%
J;7;0.177%
T;6;0.152%
B;5;0.127%
K;5;0.127%
P;5;0.127%
5;4;0.101%
8;4;0.101%
R;4;0.101%
S;4;0.101%
4;3;0.076%
":";3;0.076%
D;3;0.076%
F;3;0.076%
I;3;0.076%
7;2;0.051%
G;2;0.051%
w;2;0.051%
"'";1;0.025%
C;1;0.025%
E;1;0.025%
L;1;0.025%
N;1;0.025%
V;1;0.025%
Z;1;0.025%
"Š";1;0.025%
Can't render this file because it has a wrong number of fields in line 11.

View File

@ -1,390 +0,0 @@
"Korpus: ";Gigafida
"Datum: ";14.05.2018 06:37
"Analiza: ";Besedni nizi
"n-gram nivo: ";nivo črk
"Skip: ";0
"Izračunaj za: ";lema
"Izračunaj za kombinacije samoglasnikov in soglasnikov: ";ne
"Dolžina niza: ";2
word;frequency;percent
ti;122;3.835%
en;70;2.201%
at;59;1.855%
it;56;1.76%
in;54;1.698%
ko;54;1.698%
st;48;1.509%
na;48;1.509%
po;46;1.446%
ar;45;1.415%
ka;45;1.415%
ra;44;1.383%
an;42;1.32%
pr;40;1.257%
bi;40;1.257%
je;39;1.226%
re;38;1.195%
te;37;1.163%
ja;37;1.163%
od;36;1.132%
ov;36;1.132%
ta;33;1.037%
ri;31;0.975%
el;31;0.975%
er;30;0.943%
da;28;0.88%
se;27;0.849%
za;27;0.849%
ni;26;0.817%
av;24;0.754%
do;24;0.754%
vi;24;0.754%
ro;23;0.723%
ed;23;0.723%
ek;23;0.723%
le;23;0.723%
li;22;0.692%
nj;22;0.692%
os;22;0.692%
de;21;0.66%
la;21;0.66%
lo;21;0.66%
ve;20;0.629%
lj;20;0.629%
no;20;0.629%
ol;20;0.629%
aj;19;0.597%
or;19;0.597%
rt;18;0.566%
to;18;0.566%
va;18;0.566%
es;18;0.566%
me;18;0.566%
on;18;0.566%
ki;17;0.534%
pe;17;0.534%
ak;16;0.503%
ce;16;0.503%
dr;16;0.503%
et;15;0.472%
ic;15;0.472%
ik;15;0.472%
is;15;0.472%
ič;15;0.472%
ob;15;0.472%
sk;14;0.44%
ca;14;0.44%
ga;14;0.44%
ot;14;0.44%
as;13;0.409%
rk;13;0.409%
ru;13;0.409%
ev;13;0.409%
"ča";13;0.409%
"če";13;0.409%
ij;13;0.409%
ir;13;0.409%
kr;13;0.409%
ma;13;0.409%
ne;13;0.409%
og;13;0.409%
ur;12;0.377%
"ža";12;0.377%
vo;12;0.377%
go;12;0.377%
zd;12;0.377%
iz;12;0.377%
ju;12;0.377%
op;12;0.377%
ad;11;0.346%
iž;11;0.346%
"či";11;0.346%
Ma;11;0.346%
oz;11;0.346%
al;10;0.314%
di;10;0.314%
us;10;0.314%
em;10;0.314%
eč;10;0.314%
om;10;0.314%
pa;10;0.314%
so;9;0.283%
ug;9;0.283%
"ša";9;0.283%
iv;9;0.283%
mi;9;0.283%
ok;9;0.283%
be;8;0.251%
bl;8;0.251%
nč;8;0.251%
oč;8;0.251%
tr;8;0.251%
ec;8;0.251%
ze;8;0.251%
ns;8;0.251%
sp;7;0.22%
dj;7;0.22%
un;7;0.22%
aš;7;0.22%
il;7;0.22%
"še";7;0.22%
ke;7;0.22%
eš;7;0.22%
1.;6;0.189%
10;6;0.189%
ah;6;0.189%
rj;6;0.189%
ba;6;0.189%
uh;6;0.189%
eb;6;0.189%
"že";6;0.189%
ep;6;0.189%
ji;6;0.189%
ml;6;0.189%
nb;6;0.189%
nk;6;0.189%
am;5;0.157%
ap;5;0.157%
az;5;0.157%
20;5;0.157%
sn;5;0.157%
sr;5;0.157%
dn;5;0.157%
ej;5;0.157%
ez;5;0.157%
ač;5;0.157%
ge;5;0.157%
gl;5;0.157%
gr;5;0.157%
ha;5;0.157%
"čk";5;0.157%
"čl";5;0.157%
"št";5;0.157%
uč;5;0.157%
jd;5;0.157%
kl;5;0.157%
ku;5;0.157%
Ju;5;0.157%
Ko;5;0.157%
oj;5;0.157%
01;4;0.126%
11;4;0.126%
rb;4;0.126%
rm;4;0.126%
bo;4;0.126%
sa;4;0.126%
si;4;0.126%
ci;4;0.126%
tj;4;0.126%
tv;4;0.126%
To;4;0.126%
eh;4;0.126%
"ži";4;0.126%
"žn";4;0.126%
vl;4;0.126%
oš;4;0.126%
ož;4;0.126%
ib;4;0.126%
id;4;0.126%
"šk";4;0.126%
zg;4;0.126%
zi;4;0.126%
mo;4;0.126%
".1";4;0.126%
nt;4;0.126%
oc;4;0.126%
of;4;0.126%
ac;3;0.094%
13;3;0.094%
19;3;0.094%
Ag;3;0.094%
br;3;0.094%
Ro;3;0.094%
sv;3;0.094%
ck;3;0.094%
Br;3;0.094%
dl;3;0.094%
ud;3;0.094%
du;3;0.094%
um;3;0.094%
up;3;0.094%
ut;3;0.094%
vn;3;0.094%
62;3;0.094%
vs;3;0.094%
66;3;0.094%
fi;3;0.094%
he;3;0.094%
hk;3;0.094%
ho;3;0.094%
9.;3;0.094%
ig;3;0.094%
im;3;0.094%
"šn";3;0.094%
až;3;0.094%
nd;3;0.094%
".2";3;0.094%
00;2;0.063%
pt;2;0.063%
09;2;0.063%
12;2;0.063%
ag;2;0.063%
rc;2;0.063%
rd;2;0.063%
rg;2;0.063%
rn;2;0.063%
21;2;0.063%
rs;2;0.063%
2:;2;0.063%
Al;2;0.063%
An;2;0.063%
sl;2;0.063%
3.;2;0.063%
su;2;0.063%
1s;2;0.063%
th;2;0.063%
tn;2;0.063%
db;2;0.063%
Sr;2;0.063%
tu;2;0.063%
46;2;0.063%
dg;2;0.063%
dk;2;0.063%
ub;2;0.063%
dt;2;0.063%
Da;2;0.063%
vd;2;0.063%
fa;2;0.063%
vr;2;0.063%
ff;2;0.063%
vz;2;0.063%
fo;2;0.063%
Fi;2;0.063%
bč;2;0.063%
gu;2;0.063%
8.;2;0.063%
"čn";2;0.063%
Go;2;0.063%
98;2;0.063%
99;2;0.063%
"šp";2;0.063%
zm;2;0.063%
zn;2;0.063%
jc;2;0.063%
Ja;2;0.063%
ll;2;0.063%
ln;2;0.063%
uš;2;0.063%
už;2;0.063%
vš;2;0.063%
ež;2;0.063%
nu;2;0.063%
vž;2;0.063%
03;1;0.031%
08;1;0.031%
Pa;1;0.031%
Pe;1;0.031%
iš;1;0.031%
Pl;1;0.031%
Po;1;0.031%
ab;1;0.031%
Pr;1;0.031%
rf;1;0.031%
rh;1;0.031%
t.;1;0.031%
2.;1;0.031%
22;1;0.031%
24;1;0.031%
25;1;0.031%
29;1;0.031%
bn;1;0.031%
SC;1;0.031%
sm;1;0.031%
30;1;0.031%
31;1;0.031%
Ba;1;0.031%
cc;1;0.031%
35;1;0.031%
Ru;1;0.031%
Be;1;0.031%
co;1;0.031%
ct;1;0.031%
4.;1;0.031%
St;1;0.031%
dp;1;0.031%
Ta;1;0.031%
uc;1;0.031%
ds;1;0.031%
uf;1;0.031%
dv;1;0.031%
uk;1;0.031%
ea;1;0.031%
56;1;0.031%
Tu;1;0.031%
ef;1;0.031%
De;1;0.031%
eg;1;0.031%
ei;1;0.031%
"žm";1;0.031%
nš;1;0.031%
vk;1;0.031%
60;1;0.031%
fe;1;0.031%
El;1;0.031%
Va;1;0.031%
fu;1;0.031%
nž;1;0.031%
wi;1;0.031%
i';1;0.031%
gi;1;0.031%
Fr;1;0.031%
"čb";1;0.031%
hi;1;0.031%
I.;1;0.031%
"ču";1;0.031%
hr;1;0.031%
"Šm";1;0.031%
ie;1;0.031%
97;1;0.031%
9:;1;0.031%
io;1;0.031%
zb;1;0.031%
"'s";1;0.031%
zo;1;0.031%
":2";1;0.031%
zr;1;0.031%
zs;1;0.031%
":3";1;0.031%
zu;1;0.031%
":5";1;0.031%
zv;1;0.031%
jn;1;0.031%
In;1;0.031%
jo;1;0.031%
js;1;0.031%
Iv;1;0.031%
kd;1;0.031%
Zu;1;0.031%
ld;1;0.031%
lm;1;0.031%
lu;1;0.031%
Lj;1;0.031%
mp;1;0.031%
ms;1;0.031%
MS;1;0.031%
nc;1;0.031%
ng;1;0.031%
".0";1;0.031%
Mo;1;0.031%
nr;1;0.031%
".7";1;0.031%
".9";1;0.031%
"šč";1;0.031%
Ne;1;0.031%
oh;1;0.031%
oi;1;0.031%
ow;1;0.031%
pi;1;0.031%
pl;1;0.031%
Can't render this file because it has a wrong number of fields in line 11.

View File

@ -1,455 +0,0 @@
"Korpus: ";Gigafida
"Datum: ";31.01.2018 05:11
"Analiza: ";Besedni nizi
"n-gram nivo: ";1
"Skip: ";0
"Izračunaj za: ";lema
word;frequency;percent
biti;29;3.766%
in;29;3.766%
v;16;2.078%
z;12;1.558%
se;10;1.299%
on;9;1.169%
za;9;1.169%
ki;8;1.039%
na;8;1.039%
da;7;0.909%
kako;7;0.909%
o;6;0.779%
ta;5;0.649%
elina;4;0.519%
ajdov;4;0.519%
zadruga;4;0.519%
postati;4;0.519%
grozdje;4;0.519%
ne;4;0.519%
pol;4;0.519%
dodati;4;0.519%
ti;4;0.519%
cerkev;4;0.519%
kaša;4;0.519%
totenbirt;4;0.519%
približno;4;0.519%
drug;4;0.519%
sestra;4;0.519%
korenje;3;0.39%
Jurkovič;3;0.39%
do;3;0.39%
srbeč;3;0.39%
"če";3;0.39%
narod;3;0.39%
Matjaž;3;0.39%
"član";3;0.39%
Koper;3;0.39%
ura;3;0.39%
gost;3;0.39%
ob;3;0.39%
od;3;0.39%
oreh;3;0.39%
po;3;0.39%
križarjenje;3;0.39%
jaz;3;0.39%
mlad;3;0.39%
izdelovati;3;0.39%
62;3;0.39%
ogledalo;3;0.39%
kocka;3;0.39%
"še";3;0.39%
kovinski;3;0.39%
koža;3;0.39%
Agata;3;0.39%
vino;3;0.39%
dati;3;0.39%
zelenjaven;3;0.39%
juha;3;0.39%
pomaranča;3;0.39%
dobro;2;0.26%
imeti;2;0.26%
ter;2;0.26%
jesenski;2;0.26%
lahko;2;0.26%
1;2;0.26%
3;2;0.26%
korenčkov;2;0.26%
več;2;0.26%
Marta;2;0.26%
gepard;2;0.26%
ustanovitev;2;0.26%
a;2;0.26%
the;2;0.26%
tiskarna;2;0.26%
Roblek;2;0.26%
učiteljica;2;0.26%
eko;2;0.26%
torta;2;0.26%
Totenbirt;2;0.26%
ideja;2;0.26%
kuhati;2;0.26%
Javšnik;2;0.26%
"špasen";2;0.26%
voda;2;0.26%
društvo;2;0.26%
"življenje";2;0.26%
pečica;2;0.26%
ladja;2;0.26%
praven;2;0.26%
oseba;2;0.26%
medtem;2;0.26%
namen;2;0.26%
Jurkovička;2;0.26%
Martika;2;0.26%
oprati;2;0.26%
resničen;2;0.26%
kar;2;0.26%
junak;2;0.26%
Godec;2;0.26%
pa;2;0.26%
"čas";2;0.26%
"žena";2;0.26%
pekač;2;0.26%
težava;2;0.26%
1st;2;0.26%
pot;2;0.26%
ker;2;0.26%
star;2;0.26%
sodnica;2;0.26%
nekaj;2;0.26%
46;2;0.26%
officer;2;0.26%
lata;2;0.26%
pri;2;0.26%
nov;2;0.26%
Tomijev;2;0.26%
znebiti;2;0.26%
april;2;0.26%
pozdrav;2;0.26%
posoda;2;0.26%
vdova;2;0.26%
Sredozemlje;2;0.26%
svoj;2;0.26%
občina;2;0.26%
1998;2;0.26%
Alenka;2;0.26%
zgodba;2;0.26%
mesto;2;0.26%
pravi;2;0.26%
Fijavž;2;0.26%
velik;2;0.26%
potem;2;0.26%
veličasten;2;0.26%
zahoden;2;0.26%
organizacija;1;0.13%
odvisno;1;0.13%
dekan;1;0.13%
viroza;1;0.13%
drunk;1;0.13%
pričati;1;0.13%
Brolo;1;0.13%
Končar;1;0.13%
tek;1;0.13%
sister;1;0.13%
okusen;1;0.13%
dokler;1;0.13%
izgubiti;1;0.13%
pospeševati;1;0.13%
zvezdniški;1;0.13%
vključno;1;0.13%
spoštovan;1;0.13%
5;1;0.13%
cek;1;0.13%
1113;1;0.13%
roka;1;0.13%
g;1;0.13%
nedoločen;1;0.13%
izumirati;1;0.13%
uporabiti;1;0.13%
pomarančen;1;0.13%
Darko;1;0.13%
polica;1;0.13%
Frenk;1;0.13%
križarjanje;1;0.13%
de;1;0.13%
gospodarski;1;0.13%
Marseille;1;0.13%
dl;1;0.13%
torinski;1;0.13%
12:35;1;0.13%
strah;1;0.13%
Danijel;1;0.13%
vliti;1;0.13%
"ženska";1;0.13%
kompas;1;0.13%
iti;1;0.13%
test;1;0.13%
ustaviti;1;0.13%
Barcelona;1;0.13%
tako;1;0.13%
en;1;0.13%
premešati;1;0.13%
upravljanje;1;0.13%
sutano;1;0.13%
Tanja;1;0.13%
naročiti;1;0.13%
09.11.2010;1;0.13%
intermarketing;1;0.13%
nakazovati;1;0.13%
križariti;1;0.13%
2010;1;0.13%
2130;1;0.13%
zaprt;1;0.13%
prezgodaj;1;0.13%
zdeti;1;0.13%
arhivo;1;0.13%
sin;1;0.13%
akreditacija;1;0.13%
Performs;1;0.13%
paličen;1;0.13%
Marijana;1;0.13%
sladkor;1;0.13%
potekati;1;0.13%
istospolno;1;0.13%
12:25;1;0.13%
I.;1;0.13%
tisti;1;0.13%
jesti;1;0.13%
vnaprej;1;0.13%
naj;1;0.13%
mehko;1;0.13%
judge;1;0.13%
tukaj;1;0.13%
iz;1;0.13%
foto;1;0.13%
palma;1;0.13%
Mojca;1;0.13%
nizek;1;0.13%
blagajna;1;0.13%
mešalnik;1;0.13%
"želeti";1;0.13%
vse;1;0.13%
31.10;1;0.13%
okus;1;0.13%
dragocen;1;0.13%
pojasnjevati;1;0.13%
optimist;1;0.13%
jogurt;1;0.13%
vsebovati;1;0.13%
skorajda;1;0.13%
operacija;1;0.13%
ko;1;0.13%
podjetje;1;0.13%
teden;1;0.13%
ustanoviti;1;0.13%
Kofu;1;0.13%
666;1;0.13%
druga;1;0.13%
motnja;1;0.13%
košček;1;0.13%
izbrati;1;0.13%
prav;1;0.13%
ogret;1;0.13%
rezina;1;0.13%
odgovoren;1;0.13%
vsota;1;0.13%
Planinšek;1;0.13%
pridružiti;1;0.13%
sok;1;0.13%
Indija;1;0.13%
fantastica;1;0.13%
Palermo;1;0.13%
dober;1;0.13%
"člen";1;0.13%
29.03.2010;1;0.13%
splošen;1;0.13%
pojav;1;0.13%
ali;1;0.13%
poslednji;1;0.13%
priokus;1;0.13%
račun;1;0.13%
trg;1;0.13%
proklamirati;1;0.13%
nazaj;1;0.13%
Anand;1;0.13%
pecilen;1;0.13%
vame;1;0.13%
peč;1;0.13%
edinstven;1;0.13%
1.7;1;0.13%
cena;1;0.13%
usta;1;0.13%
med;1;0.13%
veliko;1;0.13%
zmešati;1;0.13%
ogledati;1;0.13%
srbečica;1;0.13%
Maja;1;0.13%
21.;1;0.13%
kaj;1;0.13%