Added translataion fixes + punctuation in export fix

This commit is contained in:
Luka 2019-03-19 14:06:39 +01:00
parent 39624fa4f2
commit 10666b4453
19 changed files with 586 additions and 421 deletions

View File

@ -1744,7 +1744,7 @@ public class XML_processing {
// if we're calculating values for letters, omit words that are shorter than string length // if we're calculating values for letters, omit words that are shorter than string length
if (filter.getNgramValue() == 0) { if (filter.getNgramValue() == 0) {
sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord(filter.getWordParts()).length() < filter.getStringLength()) sentence.removeIf(w -> ((filter.getCalculateFor() == CalculateFor.WORD || filter.getCalculateFor() == CalculateFor.LOWERCASE_WORD) && w.getWord(filter.getWordParts()).length() < filter.getStringLength())
|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma(filter.getWordParts()).length() < filter.getStringLength())); || (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma(filter.getWordParts()).length() < filter.getStringLength()));
} }
} }
@ -1766,7 +1766,7 @@ public class XML_processing {
public static Word createWord(String word, String lemma, String msd, String normalizedWord, Filter f){ public static Word createWord(String word, String lemma, String msd, String normalizedWord, Filter f){
List<String> wString = new ArrayList<>(); List<String> wString = new ArrayList<>();
if (f.getWordParts().contains(CalculateFor.WORD)) if (f.getWordParts().contains(CalculateFor.WORD) || f.getWordParts().contains(CalculateFor.LOWERCASE_WORD))
wString.add(word); wString.add(word);
if (f.getWordParts().contains(CalculateFor.LEMMA)) if (f.getWordParts().contains(CalculateFor.LEMMA))
wString.add(lemma); wString.add(lemma);

View File

@ -222,6 +222,12 @@ public class Ngrams {
.map(w -> w.getLemma(wordParts)) .map(w -> w.getLemma(wordParts))
.collect(Collectors.toList())); .collect(Collectors.toList()));
return StringUtils.join(candidate, " "); return StringUtils.join(candidate, " ");
case LOWERCASE_WORD:
candidate.addAll(ngramCandidate
.stream()
.map(w -> w.getWord(wordParts).toLowerCase())
.collect(Collectors.toList()));
return StringUtils.join(candidate, " ");
case WORD: case WORD:
candidate.addAll(ngramCandidate candidate.addAll(ngramCandidate
.stream() .stream()
@ -298,6 +304,10 @@ public class Ngrams {
continue; continue;
} }
if(stats.getFilter().getCalculateFor().equals(CalculateFor.LOWERCASE_WORD)){
word = word.toLowerCase();
}
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) { for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
// TODO: locila? // TODO: locila?

View File

@ -22,6 +22,7 @@ public enum CalculateFor {
// DIST_LEMMAS("lema"); // DIST_LEMMAS("lema");
WORD("calculateFor.WORD"), WORD("calculateFor.WORD"),
LOWERCASE_WORD("calculateFor.LOWERCASE_WORD"),
NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"), NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"),
LEMMA("calculateFor.LEMMA"), LEMMA("calculateFor.LEMMA"),
MORPHOSYNTACTIC_SPECS("calculateFor.MORPHOSYNTACTIC_SPECS"), MORPHOSYNTACTIC_SPECS("calculateFor.MORPHOSYNTACTIC_SPECS"),
@ -47,6 +48,9 @@ public enum CalculateFor {
if (WORD.toString().equals(cf)) { if (WORD.toString().equals(cf)) {
return WORD; return WORD;
} }
if (LOWERCASE_WORD.toString().equals(cf)) {
return LOWERCASE_WORD;
}
if (LEMMA.toString().equals(cf)) { if (LEMMA.toString().equals(cf)) {
return LEMMA; return LEMMA;
} }
@ -71,6 +75,8 @@ public enum CalculateFor {
switch (this) { switch (this) {
case WORD: case WORD:
return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.word"); return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.word");
case LOWERCASE_WORD:
return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.lowercaseWord");
case NORMALIZED_WORD: case NORMALIZED_WORD:
return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.normalizedWord"); return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.normalizedWord");
case LEMMA: case LEMMA:
@ -92,6 +98,8 @@ public enum CalculateFor {
switch (this) { switch (this) {
case WORD: case WORD:
return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.word"); return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.word");
case LOWERCASE_WORD:
return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.lowercaseWord");
case NORMALIZED_WORD: case NORMALIZED_WORD:
return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.normalizedWord"); return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.normalizedWord");
case LEMMA: case LEMMA:
@ -118,6 +126,8 @@ public enum CalculateFor {
switch (this) { switch (this) {
case WORD: case WORD:
return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.word"); return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.word");
case LOWERCASE_WORD:
return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.lowercaseWord");
case NORMALIZED_WORD: case NORMALIZED_WORD:
return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.normalizedWord"); return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.normalizedWord");
case LEMMA: case LEMMA:
@ -139,6 +149,8 @@ public enum CalculateFor {
switch (this) { switch (this) {
case WORD: case WORD:
return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.word"); return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.word");
case LOWERCASE_WORD:
return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.lowercaseWord");
case NORMALIZED_WORD: case NORMALIZED_WORD:
return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.normalizedWord"); return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.normalizedWord");
case LEMMA: case LEMMA:
@ -168,6 +180,8 @@ public enum CalculateFor {
switch(this){ switch(this){
case WORD: case WORD:
return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.word2"); return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.word2");
case LOWERCASE_WORD:
return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.lowercaseWord2");
case NORMALIZED_WORD: case NORMALIZED_WORD:
return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.normalizedWord2"); return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.normalizedWord2");
case LEMMA: case LEMMA:
@ -194,6 +208,8 @@ public enum CalculateFor {
switch(this){ switch(this){
case WORD: case WORD:
return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.word2"); return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.word2");
case LOWERCASE_WORD:
return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.lowercaseWord2");
case NORMALIZED_WORD: case NORMALIZED_WORD:
return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.normalizedWord2"); return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.normalizedWord2");
case LEMMA: case LEMMA:
@ -221,6 +237,8 @@ public enum CalculateFor {
case WORD: case WORD:
case DIST_WORDS: case DIST_WORDS:
return I18N.get("exportTable.part.word3"); return I18N.get("exportTable.part.word3");
case LOWERCASE_WORD:
return I18N.get("exportTable.part.lowercaseWord3");
case NORMALIZED_WORD: case NORMALIZED_WORD:
return I18N.get("exportTable.part.normalizedWord3"); return I18N.get("exportTable.part.normalizedWord3");
case LEMMA: case LEMMA:
@ -240,6 +258,8 @@ public enum CalculateFor {
case WORD: case WORD:
case DIST_WORDS: case DIST_WORDS:
return I18N.get("exportTable.part.word3") + " " + I18N.get("exportTable.part.set"); return I18N.get("exportTable.part.word3") + " " + I18N.get("exportTable.part.set");
case LOWERCASE_WORD:
return I18N.get("exportTable.part.lowercaseWord3") + " " + I18N.get("exportTable.part.set");
case NORMALIZED_WORD: case NORMALIZED_WORD:
return I18N.get("exportTable.part.normalizedWord3") + " " + I18N.get("exportTable.part.set"); return I18N.get("exportTable.part.normalizedWord3") + " " + I18N.get("exportTable.part.set");
case LEMMA: case LEMMA:

View File

@ -36,6 +36,7 @@ public class Corpus {
boolean hasMsdData; boolean hasMsdData;
private ArrayList<String> validationErrors; private ArrayList<String> validationErrors;
private String corpusName = ""; private String corpusName = "";
private String punctuation = "punctuation.COMMA";
public Corpus() { public Corpus() {
validationErrors = new ArrayList<>(); validationErrors = new ArrayList<>();
@ -52,6 +53,16 @@ public class Corpus {
logger.info("Corpus.set: ", corpusName); logger.info("Corpus.set: ", corpusName);
} }
public String getPunctuation() {
return punctuation;
}
public void setPunctuation(String punctuation) {
// System.out.println(corpusName);
this.punctuation = punctuation;
logger.info("Punctuation.set: ", punctuation);
}
public CorpusType getCorpusType() { public CorpusType getCorpusType() {
return corpusType; return corpusType;
} }

View File

@ -320,6 +320,10 @@ public class Filter implements Cloneable {
ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS)); ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));
switch (wp) { switch (wp) {
case LOWERCASE_WORD:
if (!oldWp.contains(CalculateFor.LOWERCASE_WORD))
oldWp.add(CalculateFor.LOWERCASE_WORD);
break;
case WORD: case WORD:
case DIST_WORDS: case DIST_WORDS:
if (!oldWp.contains(CalculateFor.WORD)) if (!oldWp.contains(CalculateFor.WORD))

View File

@ -324,28 +324,28 @@ public class StatisticsNew {
return true; return true;
} }
public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException { // public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
filter.setAl(AnalysisLevel.WORD_FORMATION); // filter.setAl(AnalysisLevel.WORD_FORMATION);
resultTitle = generateResultTitle(); // resultTitle = generateResultTitle();
//
if (useDB) { // if (useDB) {
result = db.getDump(); // result = db.getDump();
db.delete(); // db.delete();
} // }
//
// if no results and nothing to save, return false // // if no results and nothing to save, return false
if (!(result.size() > 0)) { // if (!(result.size() > 0)) {
analysisProducedResults = false; // analysisProducedResults = false;
return false; // return false;
} else { // } else {
analysisProducedResults = true; // analysisProducedResults = true;
} // }
//
WordFormation.calculateStatistics(this); // WordFormation.calculateStatistics(this);
//
Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock()); // Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
return true; // return true;
} // }
private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) { private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {
Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>(); Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();
@ -682,6 +682,11 @@ public class StatisticsNew {
info.put(I18N.get("exportHeader.minOccurrences"), String.valueOf(filter.getMinimalOccurrences())); info.put(I18N.get("exportHeader.minOccurrences"), String.valueOf(filter.getMinimalOccurrences()));
info.put(I18N.get("exportHeader.minTaxonomies"), String.valueOf(filter.getMinimalTaxonomy())); info.put(I18N.get("exportHeader.minTaxonomies"), String.valueOf(filter.getMinimalTaxonomy()));
// if not letters extraction
if(filter.getNgramValue() > 0) {
info.put(I18N.get("exportHeader.minRelFre"), String.valueOf(filter.getMinimalRelFre()));
}
if (corpus.getCorpusType() == CorpusType.SOLAR) { if (corpus.getCorpusType() == CorpusType.SOLAR) {
HashMap<String, ObservableList<String>> filters = corpus.getSolarSelectedFilters(); HashMap<String, ObservableList<String>> filters = corpus.getSolarSelectedFilters();

View File

@ -28,7 +28,11 @@ public interface Word {
} }
default String getWord(ArrayList<CalculateFor> wordParts){ default String getWord(ArrayList<CalculateFor> wordParts){
return get(wordParts, CalculateFor.WORD); String w = get(wordParts, CalculateFor.WORD);
if (w == null){
return get(wordParts, CalculateFor.LOWERCASE_WORD);
}
return w;
} }
default String getLemma(ArrayList<CalculateFor> wordParts){ default String getLemma(ArrayList<CalculateFor> wordParts){
@ -102,9 +106,9 @@ public interface Word {
String returnValue = ""; String returnValue = "";
if (cvv) { if (cvv) {
returnValue = calculateFor == CalculateFor.WORD ? getCVVWord(cf) : getCVVLemma(cf); returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getCVVWord(cf) : getCVVLemma(cf);
} else { } else {
returnValue = calculateFor == CalculateFor.WORD ? getWord(cf) : getLemma(cf); returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getWord(cf) : getLemma(cf);
} }
return returnValue; return returnValue;

View File

@ -179,7 +179,7 @@ public class CharacterAnalysisTab {
private ChangeListener<Boolean> minimalOccurrencesListener; private ChangeListener<Boolean> minimalOccurrencesListener;
private ChangeListener<Boolean> minimalTaxonomyListener; private ChangeListener<Boolean> minimalTaxonomyListener;
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"}; private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"}; private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
@ -623,7 +623,7 @@ public class CharacterAnalysisTab {
} }
// if calculateFor was selected for something other than a word or a lemma -> reset // if calculateFor was selected for something other than a word or a lemma -> reset
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) { if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA || calculateFor == CalculateFor.LOWERCASE_WORD)) {
// if the user selected something else before selecting ngram for letters, reset that choice // if the user selected something else before selecting ngram for letters, reset that choice
calculateFor = CalculateFor.WORD; calculateFor = CalculateFor.WORD;

View File

@ -92,6 +92,9 @@ public class CorpusTab {
@FXML @FXML
public Label outputNameL; public Label outputNameL;
@FXML
public Label punctuationL;
@FXML @FXML
public ImageView chooseCorpusLocationI; public ImageView chooseCorpusLocationI;
@ -107,6 +110,9 @@ public class CorpusTab {
@FXML @FXML
public ImageView outputNameI; public ImageView outputNameI;
@FXML
public ImageView punctuationI;
@FXML @FXML
public TextField outputNameTF; public TextField outputNameTF;
public String outputName = ""; public String outputName = "";
@ -115,6 +121,10 @@ public class CorpusTab {
public ComboBox<String> selectReaderCB; public ComboBox<String> selectReaderCB;
public String selectReader; public String selectReader;
@FXML
public ComboBox<String> punctuationCB;
public String punctuation;
@FXML @FXML
private ProgressIndicator locationScanPI; private ProgressIndicator locationScanPI;
@ -137,7 +147,7 @@ public class CorpusTab {
private OneWordAnalysisTab oneWordTabController; private OneWordAnalysisTab oneWordTabController;
private CharacterAnalysisTab catController; private CharacterAnalysisTab catController;
private FiltersForSolar ffsController; private FiltersForSolar ffsController;
private WordFormationTab wfController; // private WordFormationTab wfController;
private WordLevelTab wlController; private WordLevelTab wlController;
private HostServices hostService; private HostServices hostService;
@ -146,6 +156,10 @@ public class CorpusTab {
private static final String [] SELECT_READER_ARRAY = {"VERT + REGI", "XML (Šolar 1.0)", "XML (GOS 1.0)", "XML (ssj500k 2.1)", "XML (Gigafida 2.0)", "XML (Gigafida 1.0, Kres 1.0)"}; private static final String [] SELECT_READER_ARRAY = {"VERT + REGI", "XML (Šolar 1.0)", "XML (GOS 1.0)", "XML (ssj500k 2.1)", "XML (Gigafida 2.0)", "XML (Gigafida 1.0, Kres 1.0)"};
private static final ArrayList<String> SELECT_READER = new ArrayList<>(Arrays.asList(SELECT_READER_ARRAY)); private static final ArrayList<String> SELECT_READER = new ArrayList<>(Arrays.asList(SELECT_READER_ARRAY));
private static final String [] PUNCTUATION_ARRAY = {"punctuation.COMMA", "punctuation.POINT"};
private static final ArrayList<String> PUNCTUATION = new ArrayList<>(Arrays.asList(PUNCTUATION_ARRAY));
private Collection<File> corpusFiles; private Collection<File> corpusFiles;
private File selectedDirectory; private File selectedDirectory;
@ -219,6 +233,23 @@ public class CorpusTab {
selectReaderCB.getSelectionModel().select(0); selectReaderCB.getSelectionModel().select(0);
// comma / point choice
punctuationCB.valueProperty().addListener((observable, oldValue, newValue) -> {
if(newValue == null){
// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
newValue = I18N.getTranslatedValue(oldValue, PUNCTUATION);
punctuationCB.getSelectionModel().select(newValue);
}
// System.out.println(oldValue);
// System.out.println(newValue);
punctuation = newValue;
if(corpus != null) {
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
}
});
punctuationCB.getSelectionModel().select(0);
// add listeners // add listeners
chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation()); chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
// chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB"))); // chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB")));
@ -301,12 +332,16 @@ public class CorpusTab {
readHeaderInfoL.textProperty().bind(I18N.createStringBinding("label.readHeaderInfo")); readHeaderInfoL.textProperty().bind(I18N.createStringBinding("label.readHeaderInfo"));
selectReaderL.textProperty().bind(I18N.createStringBinding("label.selectReader")); selectReaderL.textProperty().bind(I18N.createStringBinding("label.selectReader"));
outputNameL.textProperty().bind(I18N.createStringBinding("label.outputName")); outputNameL.textProperty().bind(I18N.createStringBinding("label.outputName"));
punctuationL.textProperty().bind(I18N.createStringBinding("label.punctuation"));
addTooltipToImage(chooseCorpusLocationI, I18N.createStringBinding("label.corpusTab.chooseCorpusLocationH")); addTooltipToImage(chooseCorpusLocationI, I18N.createStringBinding("label.corpusTab.chooseCorpusLocationH"));
addTooltipToImage(readHeaderInfoI, I18N.createStringBinding("label.corpusTab.readHeaderInfoH")); addTooltipToImage(readHeaderInfoI, I18N.createStringBinding("label.corpusTab.readHeaderInfoH"));
addTooltipToImage(chooseResultsLocationI, I18N.createStringBinding("label.corpusTab.chooseResultsLocationH")); addTooltipToImage(chooseResultsLocationI, I18N.createStringBinding("label.corpusTab.chooseResultsLocationH"));
addTooltipToImage(selectReaderI, I18N.createStringBinding("label.corpusTab.selectReaderH")); addTooltipToImage(selectReaderI, I18N.createStringBinding("label.corpusTab.selectReaderH"));
addTooltipToImage(outputNameI, I18N.createStringBinding("label.corpusTab.outputNameH")); addTooltipToImage(outputNameI, I18N.createStringBinding("label.corpusTab.outputNameH"));
addTooltipToImage(punctuationI, I18N.createStringBinding("label.corpusTab.punctuationH"));
punctuationCB.itemsProperty().bind(I18N.createObjectBinding(PUNCTUATION));
} }
private void togglePiAndSetCorpusWrapper(boolean piIsActive) { private void togglePiAndSetCorpusWrapper(boolean piIsActive) {
@ -456,8 +491,9 @@ public class CorpusTab {
} }
} }
} }
System.out.println(outputName); // System.out.println(outputName);
corpus.setCorpusName(outputName); corpus.setCorpusName(outputName);
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
} }
/** /**

View File

@ -167,6 +167,30 @@ public final class I18N {
// return MessageFormat.format(bundle.getString(key), args); // return MessageFormat.format(bundle.getString(key), args);
} }
public static String getRootValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
Locale loc;
if(getLocale().equals(Locale.ENGLISH)) {
loc = new Locale.Builder().setLanguage("sl").setRegion("SI").build();
} else {
loc = Locale.ENGLISH;
}
for (String el : nGramComputeForLetters){
if (oldValue.equals(getIndependent(el, loc))){
return el;
}
}
// in case translated language doesn't contain specified word, try original language
for (String el : nGramComputeForLetters){
if (oldValue.equals(get(el))){
return el;
}
}
return null;
}
public static String getTranslatedValue(String oldValue, ArrayList<String> nGramComputeForLetters) { public static String getTranslatedValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
Locale loc; Locale loc;
if(getLocale().equals(Locale.ENGLISH)) { if(getLocale().equals(Locale.ENGLISH)) {

View File

@ -210,13 +210,13 @@ public class OneWordAnalysisTab {
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"}; private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); // private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
@ -300,7 +300,7 @@ public class OneWordAnalysisTab {
alsoVisualizeCCB.getItems().removeAll(); alsoVisualizeCCB.getItems().removeAll();
if (newValue.equals(CalculateFor.LEMMA.toString())) { if (newValue.equals(CalculateFor.LEMMA.toString())) {
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA)); alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
} else if (newValue.equals(CalculateFor.WORD.toString())) { } else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) {
if (corpus.getCorpusType() == CorpusType.GOS) if (corpus.getCorpusType() == CorpusType.GOS)
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS)); alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
else else

View File

@ -250,13 +250,13 @@ public class StringAnalysisTabNew2 {
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList(); // private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"}; private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"}; private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); // private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
@ -408,7 +408,7 @@ public class StringAnalysisTabNew2 {
alsoVisualizeCCB.getItems().removeAll(); alsoVisualizeCCB.getItems().removeAll();
if (newValue.equals(CalculateFor.LEMMA.toString())) { if (newValue.equals(CalculateFor.LEMMA.toString())) {
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA)); alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
} else if (newValue.equals(CalculateFor.WORD.toString())) { } else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) {
if (corpus.getCorpusType() == CorpusType.GOS) if (corpus.getCorpusType() == CorpusType.GOS)
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS)); alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
else else

View File

@ -1,260 +1,260 @@
package gui; //package gui;
//
import static alg.XML_processing.*; //import static alg.XML_processing.*;
import static gui.GUIController.*; //import static gui.GUIController.*;
//
import java.io.File; //import java.io.File;
import java.io.UnsupportedEncodingException; //import java.io.UnsupportedEncodingException;
import java.util.*; //import java.util.*;
//
import javafx.application.HostServices; //import javafx.application.HostServices;
import javafx.scene.control.*; //import javafx.scene.control.*;
import org.apache.commons.lang3.StringUtils; //import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; //import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; //import org.apache.logging.log4j.Logger;
import org.controlsfx.control.CheckComboBox; //import org.controlsfx.control.CheckComboBox;
//
import data.*; //import data.*;
import javafx.collections.ListChangeListener; //import javafx.collections.ListChangeListener;
import javafx.collections.ObservableList; //import javafx.collections.ObservableList;
import javafx.concurrent.Task; //import javafx.concurrent.Task;
import javafx.fxml.FXML; //import javafx.fxml.FXML;
import javafx.scene.layout.AnchorPane; //import javafx.scene.layout.AnchorPane;
//
@SuppressWarnings("Duplicates") //@SuppressWarnings("Duplicates")
public class WordFormationTab { //public class WordFormationTab {
public final static Logger logger = LogManager.getLogger(WordFormationTab.class); // public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
//
public AnchorPane wordAnalysisTabPane; // public AnchorPane wordAnalysisTabPane;
//
@FXML // @FXML
public Label selectedFiltersLabel; // public Label selectedFiltersLabel;
@FXML // @FXML
public Label solarFilters; // public Label solarFilters;
//
@FXML // @FXML
private CheckComboBox<String> taxonomyCCB; // private CheckComboBox<String> taxonomyCCB;
private ArrayList<Taxonomy> taxonomy; // private ArrayList<Taxonomy> taxonomy;
//
@FXML // @FXML
private TextField minimalOccurrencesTF; // private TextField minimalOccurrencesTF;
private Integer minimalOccurrences; // private Integer minimalOccurrences;
//
@FXML // @FXML
private TextField minimalTaxonomyTF; // private TextField minimalTaxonomyTF;
private Integer minimalTaxonomy; // private Integer minimalTaxonomy;
//
@FXML // @FXML
private Button computeB; // private Button computeB;
//
@FXML // @FXML
public ProgressBar ngramProgressBar; // public ProgressBar ngramProgressBar;
@FXML // @FXML
public Label progressLabel; // public Label progressLabel;
//
@FXML // @FXML
private Hyperlink helpH; // private Hyperlink helpH;
//
private Corpus corpus; // private Corpus corpus;
private HashMap<String, HashSet<String>> solarFiltersMap; // private HashMap<String, HashSet<String>> solarFiltersMap;
private HostServices hostService; // private HostServices hostService;
//
// after header scan // // after header scan
private ObservableList<String> taxonomyCCBValues; // private ObservableList<String> taxonomyCCBValues;
private CorpusType currentCorpusType; // private CorpusType currentCorpusType;
private boolean useDb; // private boolean useDb;
//
//
public void init() { // public void init() {
// taxonomy // // taxonomy
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) { // if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
taxonomyCCB.getItems().removeAll(); // taxonomyCCB.getItems().removeAll();
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy()); // taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> { // taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
taxonomy = new ArrayList<>(); // taxonomy = new ArrayList<>();
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems(); // ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus); // ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
taxonomy.addAll(checkedItemsTaxonomy); // taxonomy.addAll(checkedItemsTaxonomy);
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ","))); // logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
}); // });
taxonomyCCB.getCheckModel().clearChecks(); // taxonomyCCB.getCheckModel().clearChecks();
} else { // } else {
taxonomyCCB.setDisable(true); // taxonomyCCB.setDisable(true);
} // }
//
// set default values // // set default values
minimalOccurrencesTF.setText("1"); // minimalOccurrencesTF.setText("1");
minimalOccurrences = 1; // minimalOccurrences = 1;
//
minimalTaxonomyTF.setText("1"); // minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1; // minimalTaxonomy = 1;
//
minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> { // minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) { // if (!newValue) {
// focus lost // // focus lost
String value = minimalOccurrencesTF.getText(); // String value = minimalOccurrencesTF.getText();
if (!ValidationUtil.isEmpty(value)) { // if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) { // if (!ValidationUtil.isNumber(value)) {
logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); // logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); // GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
} else { // } else {
minimalOccurrences = Integer.parseInt(value); // minimalOccurrences = Integer.parseInt(value);
} // }
} else { // } else {
minimalOccurrencesTF.setText("1"); // minimalOccurrencesTF.setText("1");
minimalOccurrences = 1; // minimalOccurrences = 1;
} // }
} // }
}); // });
//
minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> { // minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
if (!newValue) { // if (!newValue) {
// focus lost // // focus lost
String value = minimalTaxonomyTF.getText(); // String value = minimalTaxonomyTF.getText();
if (!ValidationUtil.isEmpty(value)) { // if (!ValidationUtil.isEmpty(value)) {
if (!ValidationUtil.isNumber(value)) { // if (!ValidationUtil.isNumber(value)) {
logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); // logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED")); // GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
} else { // } else {
minimalTaxonomy = Integer.parseInt(value); // minimalTaxonomy = Integer.parseInt(value);
} // }
} else { // } else {
minimalTaxonomyTF.setText("1"); // minimalTaxonomyTF.setText("1");
minimalTaxonomy = 1; // minimalTaxonomy = 1;
} // }
} // }
}); // });
//
computeB.setOnAction(e -> { // computeB.setOnAction(e -> {
compute(); // compute();
logger.info("compute button"); // logger.info("compute button");
}); // });
//
helpH.setOnAction(e -> openHelpWebsite()); // helpH.setOnAction(e -> openHelpWebsite());
} // }
//
private void compute() { // private void compute() {
Filter filter = new Filter(); // Filter filter = new Filter();
filter.setNgramValue(1); // filter.setNgramValue(1);
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY); // filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
filter.setTaxonomy(taxonomy); // filter.setTaxonomy(taxonomy);
filter.setAl(AnalysisLevel.STRING_LEVEL); // filter.setAl(AnalysisLevel.STRING_LEVEL);
filter.setSkipValue(0); // filter.setSkipValue(0);
filter.setMsd(new ArrayList<>()); // filter.setMsd(new ArrayList<>());
filter.setIsCvv(false); // filter.setIsCvv(false);
filter.setSolarFilters(solarFiltersMap); // filter.setSolarFilters(solarFiltersMap);
filter.setMinimalOccurrences(minimalOccurrences); // filter.setMinimalOccurrences(minimalOccurrences);
filter.setMinimalTaxonomy(minimalTaxonomy); // filter.setMinimalTaxonomy(minimalTaxonomy);
//
String message = Validation.validateForStringLevel(filter); // String message = Validation.validateForStringLevel(filter);
if (message == null) { // if (message == null) {
// no errors // // no errors
logger.info("Executing: ", filter.toString()); // logger.info("Executing: ", filter.toString());
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb); // StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
execute(statistic); // execute(statistic);
} else { // } else {
logAlert(message); // logAlert(message);
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message); // showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
} // }
} // }
//
private void openHelpWebsite(){ // private void openHelpWebsite(){
hostService.showDocument(Messages.HELP_URL); // hostService.showDocument(Messages.HELP_URL);
} // }
//
private void execute(StatisticsNew statistic) { // private void execute(StatisticsNew statistic) {
logger.info("Started execution: ", statistic.getFilter()); // logger.info("Started execution: ", statistic.getFilter());
//
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); // Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
//
final Task<Void> task = new Task<Void>() { // final Task<Void> task = new Task<Void>() {
@SuppressWarnings("Duplicates") // @SuppressWarnings("Duplicates")
@Override // @Override
protected Void call() throws Exception { // protected Void call() throws Exception {
int i = 0; // int i = 0;
Date startTime = new Date(); // Date startTime = new Date();
Date previousTime = new Date(); // Date previousTime = new Date();
for (File f : corpusFiles) { // for (File f : corpusFiles) {
readXML(f.toString(), statistic); // readXML(f.toString(), statistic);
i++; // i++;
this.updateProgress(i, corpusFiles.size()); // this.updateProgress(i, corpusFiles.size());
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName())); // this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
} // }
//
return null; // return null;
} // }
}; // };
//
ngramProgressBar.progressProperty().bind(task.progressProperty()); // ngramProgressBar.progressProperty().bind(task.progressProperty());
progressLabel.textProperty().bind(task.messageProperty()); // progressLabel.textProperty().bind(task.messageProperty());
//
task.setOnSucceeded(e -> { // task.setOnSucceeded(e -> {
try { // try {
// first, we have to recalculate all occurrences to detailed statistics // // first, we have to recalculate all occurrences to detailed statistics
boolean successullySaved = statistic.recalculateAndSaveResultToDisk(); // boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
//
if (successullySaved) { // if (successullySaved) {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); // showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
} else { // } else {
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); // showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
} // }
} catch (UnsupportedEncodingException e1) { // } catch (UnsupportedEncodingException e1) {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); // showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
logger.error("Error while saving", e1); // logger.error("Error while saving", e1);
} // }
//
ngramProgressBar.progressProperty().unbind(); // ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK); // ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
progressLabel.textProperty().unbind(); // progressLabel.textProperty().unbind();
progressLabel.setText(""); // progressLabel.setText("");
}); // });
//
task.setOnFailed(e -> { // task.setOnFailed(e -> {
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING")); // showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
logger.error("Error while executing", e); // logger.error("Error while executing", e);
ngramProgressBar.progressProperty().unbind(); // ngramProgressBar.progressProperty().unbind();
ngramProgressBar.setProgress(0.0); // ngramProgressBar.setProgress(0.0);
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK); // ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
progressLabel.textProperty().unbind(); // progressLabel.textProperty().unbind();
progressLabel.setText(""); // progressLabel.setText("");
}); // });
//
final Thread thread = new Thread(task, "task"); // final Thread thread = new Thread(task, "task");
thread.setDaemon(true); // thread.setDaemon(true);
thread.start(); // thread.start();
} // }
//
private void logAlert(String alert) { // private void logAlert(String alert) {
logger.info("alert: " + alert); // logger.info("alert: " + alert);
} // }
//
//
public void setCorpus(Corpus corpus) { // public void setCorpus(Corpus corpus) {
this.corpus = corpus; // this.corpus = corpus;
//
if (corpus.getCorpusType() != CorpusType.SOLAR) { // if (corpus.getCorpusType() != CorpusType.SOLAR) {
setSelectedFiltersLabel(null); // setSelectedFiltersLabel(null);
} else { // } else {
setSelectedFiltersLabel("/"); // setSelectedFiltersLabel("/");
} // }
} // }
//
public void setSelectedFiltersLabel(String content) { // public void setSelectedFiltersLabel(String content) {
if (content != null) { // if (content != null) {
solarFilters.setVisible(true); // solarFilters.setVisible(true);
selectedFiltersLabel.setVisible(true); // selectedFiltersLabel.setVisible(true);
selectedFiltersLabel.setText(content); // selectedFiltersLabel.setText(content);
} else { // } else {
solarFilters.setVisible(false); // solarFilters.setVisible(false);
selectedFiltersLabel.setVisible(false); // selectedFiltersLabel.setVisible(false);
} // }
} // }
//
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) { // public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
this.solarFiltersMap = solarFiltersMap; // this.solarFiltersMap = solarFiltersMap;
} // }
//
public void setHostServices(HostServices hostServices){ // public void setHostServices(HostServices hostServices){
this.hostService = hostServices; // this.hostService = hostServices;
} // }
} //}

View File

@ -228,13 +228,13 @@ public class WordLevelTab {
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"}; private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"}; private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica"); // private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"}; private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY)); private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka"); // private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"}; private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
@ -320,7 +320,7 @@ public class WordLevelTab {
alsoVisualizeCCB.getItems().removeAll(); alsoVisualizeCCB.getItems().removeAll();
if (newValue.equals(CalculateFor.LEMMA.toString())) { if (newValue.equals(CalculateFor.LEMMA.toString())) {
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA)); alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
} else if (newValue.equals(CalculateFor.WORD.toString())) { } else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) {
if (corpus.getCorpusType() == CorpusType.GOS) if (corpus.getCorpusType() == CorpusType.GOS)
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS)); alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
else else

View File

@ -23,41 +23,41 @@ import data.Enums.WordLevelType;
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public class Export { public class Export {
public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) { // public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
JSONArray wrapper = new JSONArray(); // JSONArray wrapper = new JSONArray();
//
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) { // for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
JSONArray data_wrapper = new JSONArray(); // JSONArray data_wrapper = new JSONArray();
JSONObject metric = new JSONObject(); // JSONObject metric = new JSONObject();
//
String title = p.getLeft(); // String title = p.getLeft();
Map<MultipleHMKeys, Long> map = p.getRight(); // Map<MultipleHMKeys, Long> map = p.getRight();
//
if (map.isEmpty()) // if (map.isEmpty())
continue; // continue;
//
long total = Util.mapSumFrequencies(map); // long total = Util.mapSumFrequencies(map);
//
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) { // for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
JSONObject data_entry = new JSONObject(); // JSONObject data_entry = new JSONObject();
data_entry.put("word", e.getKey()); // data_entry.put("word", e.getKey());
data_entry.put("frequency", e.getValue()); // data_entry.put("frequency", e.getValue());
data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total)); // data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
//
data_wrapper.add(data_entry); // data_wrapper.add(data_entry);
} // }
//
metric.put("Title", title); // metric.put("Title", title);
metric.put("data", data_wrapper); // metric.put("data", data_wrapper);
wrapper.add(metric); // wrapper.add(metric);
} // }
//
try (FileWriter file = new FileWriter("statistics.json")) { // try (FileWriter file = new FileWriter("statistics.json")) {
file.write(wrapper.toJSONString()); // file.write(wrapper.toJSONString());
} catch (IOException e) { // } catch (IOException e) {
e.printStackTrace(); // e.printStackTrace();
} // }
} // }
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock, public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
StatisticsNew statistics, Filter filter) { StatisticsNew statistics, Filter filter) {
@ -127,12 +127,6 @@ public class Export {
FILE_HEADER_AL.add(I18N.get("exportTable.totalRelativeFrequency")); FILE_HEADER_AL.add(I18N.get("exportTable.totalRelativeFrequency"));
if (filter.getCollocability().size() > 0){
for (Collocability c : filter.getCollocability()) {
FILE_HEADER_AL.add(c.toHeaderString());
}
}
for (Taxonomy key : taxonomyResults.keySet()) { for (Taxonomy key : taxonomyResults.keySet()) {
if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) { if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
FILE_HEADER_AL.add(I18N.get("exportTable.absoluteFrequency") + " [" + key.toString() + "]"); FILE_HEADER_AL.add(I18N.get("exportTable.absoluteFrequency") + " [" + key.toString() + "]");
@ -141,6 +135,13 @@ public class Export {
} }
} }
if (filter.getCollocability().size() > 0){
for (Collocability c : filter.getCollocability()) {
FILE_HEADER_AL.add(c.toHeaderString());
}
}
if (filter.getWriteMsdAtTheEnd()) { if (filter.getWriteMsdAtTheEnd()) {
String msd = ""; String msd = "";
int maxMsdLength = 0; int maxMsdLength = 0;
@ -280,14 +281,14 @@ public class Export {
dataEntry.add(e.getValue().toString()); dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()))); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()), statistics.getCorpus().getPunctuation()));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue())); dataEntry.add(formatNumberForExport(((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue(), statistics.getCorpus().getPunctuation()));
for (Taxonomy key : taxonomyResults.keySet()){ for (Taxonomy key : taxonomyResults.keySet()){
if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) { if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString()); dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key))); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key), statistics.getCorpus().getPunctuation()));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue())); dataEntry.add(formatNumberForExport(((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue(), statistics.getCorpus().getPunctuation()));
// dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences())); // dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences())); // dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
} }
@ -296,7 +297,7 @@ public class Export {
if (filter.getCollocability().size() > 0){ if (filter.getCollocability().size() > 0){
for (Collocability c : filter.getCollocability()) { for (Collocability c : filter.getCollocability()) {
dataEntry.add(statistics.getCollocability().get(c).get(e.getKey())); dataEntry.add(formatNumberForLongExport(statistics.getCollocability().get(c).get(e.getKey()), statistics.getCorpus().getPunctuation()));
} }
} }
@ -371,66 +372,66 @@ public class Export {
return s; return s;
} }
public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) { // public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
//Delimiter used in CSV file // //Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n"; // String NEW_LINE_SEPARATOR = "\n";
//
//CSV file header // //CSV file header
Object[] FILE_HEADER = {"word", "frequency", "percent"}; // Object[] FILE_HEADER = {"word", "frequency", "percent"};
//
String fileName = ""; // String fileName = "";
//
fileName = title.replace(": ", "-"); // fileName = title.replace(": ", "-");
fileName = fileName.replace(" ", "_").concat(".csv"); // fileName = fileName.replace(" ", "_").concat(".csv");
//
fileName = resultsPath.toString().concat(File.separator).concat(fileName); // fileName = resultsPath.toString().concat(File.separator).concat(fileName);
//
OutputStreamWriter fileWriter = null; // OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null; // CSVPrinter csvFilePrinter = null;
//
//Create the CSVFormat object with "\n" as a record delimiter // //Create the CSVFormat object with "\n" as a record delimiter
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';'); // CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
//
try { // try {
//initialize FileWriter object // //initialize FileWriter object
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8); // fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
//
//initialize CSVPrinter object // //initialize CSVPrinter object
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat); // csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
//
// write info block // // write info block
printHeaderInfo(csvFilePrinter, headerInfoBlock); // printHeaderInfo(csvFilePrinter, headerInfoBlock);
//
//Create CSV file header // //Create CSV file header
csvFilePrinter.printRecord(FILE_HEADER); // csvFilePrinter.printRecord(FILE_HEADER);
//
for (Object[] resultEntry : result) { // for (Object[] resultEntry : result) {
List dataEntry = new ArrayList<>(); // List dataEntry = new ArrayList<>();
dataEntry.add(resultEntry[0]); // dataEntry.add(resultEntry[0]);
dataEntry.add(resultEntry[1]); // dataEntry.add(resultEntry[1]);
dataEntry.add(formatNumberAsPercent(resultEntry[2])); // dataEntry.add(formatNumberAsPercent(resultEntry[2]), statistics.getCorpus().getPunctuation());
csvFilePrinter.printRecord(dataEntry); // csvFilePrinter.printRecord(dataEntry);
} // }
} catch (Exception e) { // } catch (Exception e) {
System.out.println("Error in CsvFileWriter!"); // System.out.println("Error in CsvFileWriter!");
e.printStackTrace(); // e.printStackTrace();
} finally { // } finally {
try { // try {
if (fileWriter != null) { // if (fileWriter != null) {
fileWriter.flush(); // fileWriter.flush();
fileWriter.close(); // fileWriter.close();
} // }
if (csvFilePrinter != null) { // if (csvFilePrinter != null) {
csvFilePrinter.close(); // csvFilePrinter.close();
} // }
} catch (IOException e) { // } catch (IOException e) {
System.out.println("Error while flushing/closing fileWriter/csvPrinter!"); // System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
e.printStackTrace(); // e.printStackTrace();
} // }
} // }
//
return fileName; // return fileName;
} // }
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) { public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
//Delimiter used in CSV file //Delimiter used in CSV file

View File

@ -54,8 +54,29 @@ public class Util {
return "- invalid input format -"; return "- invalid input format -";
} }
public static String formatNumberAsPercent(Object o) { public static String formatNumberAsPercent(Object o, String punctuation) {
if(punctuation.equals("punctuation.COMMA")) {
return MessageFormat.format("{0,number,#.### %}", o).replace('.', ','); return MessageFormat.format("{0,number,#.### %}", o).replace('.', ',');
} else {
return MessageFormat.format("{0,number,#.### %}", o);
}
}
public static String formatNumberForExport(Object o, String punctuation) {
if(punctuation.equals("punctuation.COMMA")) {
return MessageFormat.format("{0,number,#.##}", o).replace('.', ',');
} else {
return MessageFormat.format("{0,number,#.##}", o);
}
}
public static String formatNumberForLongExport(Object o, String punctuation) {
if(punctuation.equals("punctuation.COMMA")) {
return MessageFormat.format("{0,number,#.########}", o).replace('.', ',');
} else {
return MessageFormat.format("{0,number,#.########}", o);
}
} }
private static boolean isInstanceOfInteger(Object o) { private static boolean isInstanceOfInteger(Object o) {

View File

@ -57,6 +57,13 @@
<Image url="questionmark.png" backgroundLoading="true"/> <Image url="questionmark.png" backgroundLoading="true"/>
</ImageView> </ImageView>
<Label fx:id="punctuationL" layoutX="10.0" layoutY="340.0" prefHeight="25.0" text="Decimalno znamenje"/>
<ComboBox fx:id="punctuationCB" layoutX="225.0" layoutY="340.0" minWidth="140.0" prefWidth="140.0"
visibleRowCount="5"/>
<ImageView fx:id="punctuationI" layoutX="370.0" layoutY="347.5" pickOnBounds="true" preserveRatio="true">
<Image url="questionmark.png" backgroundLoading="true"/>
</ImageView>
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0"/> <Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0"/>
<Button fx:id="changeLanguageB" layoutX="710.0" layoutY="40.0" mnemonicParsing="false" prefWidth="50.0"/> <Button fx:id="changeLanguageB" layoutX="710.0" layoutY="40.0" mnemonicParsing="false" prefWidth="50.0"/>
</children> </children>

View File

@ -17,18 +17,20 @@ tab.stringLevelTabNew2=Word sets
# corpus tab # corpus tab
label.setCorpusLocation=Set corpus location label.setCorpusLocation=Set corpus location
button.setCorpusLocation=Set location button.setCorpusLocation=Set location
label.readHeaderInfo=Read info from headers label.readHeaderInfo=Read tax. from corpus files
checkBox.readHeaderInfo= checkBox.readHeaderInfo=
label.chooseResultsLocation=Choose result location label.chooseResultsLocation=Choose result location
button.chooseResultsLocation=Set location button.chooseResultsLocation=Set location
label.selectReader=Select reader label.selectReader=Select reader
label.outputName=Output file name label.outputName=Output file name
label.punctuation=Decimal separator
label.corpusTab.chooseCorpusLocationH=Select the folder which contains the corpus. The folder should only contain one corpus and should not contain files that are not part of the corpus. label.corpusTab.chooseCorpusLocationH=Select the folder which contains the corpus. The folder should only contain one corpus and should not contain files that are not part of the corpus.
label.corpusTab.readHeaderInfoH=If you select this option, the taxonomy will be read separately. This might take a while. label.corpusTab.readHeaderInfoH=The program will read the taxonomy from corpus files. This might take a while.
label.corpusTab.chooseResultsLocationH=Choose result location label.corpusTab.chooseResultsLocationH=Choose result location
label.corpusTab.selectReaderH=Select reader label.corpusTab.selectReaderH=Select reader
label.corpusTab.outputNameH=Output file name label.corpusTab.outputNameH=Output file name
label.corpusTab.punctuationH=Select decimal separator used in export files.
# character analysis tab # character analysis tab
label.stringLength=Number of characters label.stringLength=Number of characters
@ -40,7 +42,7 @@ label.taxonomy=Filter by taxonomy
label.minimalOccurrences=Min. nr. occurrences label.minimalOccurrences=Min. nr. occurrences
label.minimalTaxonomy=Min. nr. tax. branches label.minimalTaxonomy=Min. nr. tax. branches
label.minimalRelFre=Min. rel. frequency label.minimalRelFre=Min. rel. frequency
label.taxonomySetOperation=Filtriraj taksonomijo po label.taxonomySetOperation=Filter taxonomy by
label.solarFilters=Selected filters: label.solarFilters=Selected filters:
string.lemma=lemma string.lemma=lemma
string.word=word string.word=word
@ -52,7 +54,7 @@ label.letter.msdH=Character strings will be counted only in words with the provi
label.letter.taxonomyH=Character strings will be counted only in selected text types. label.letter.taxonomyH=Character strings will be counted only in selected text types.
label.letter.minimalOccurrencesH=Character strings with fewer occurrences will not be included in the output. label.letter.minimalOccurrencesH=Character strings with fewer occurrences will not be included in the output.
label.letter.minimalTaxonomyH=Character strings that occur in fewer taxonomy branches will not be included in the output. label.letter.minimalTaxonomyH=Character strings that occur in fewer taxonomy branches will not be included in the output.
label.letter.taxonomySetOperationH=Izpisuj iz besedil, ki ustrezajo vsaj eni od izbranih vej (unija) ali vsem izbranim vejam (presek) label.letter.taxonomySetOperationH=Extract information from texts that fit into at least one (union) or all (intersection) of the selected branches.
# word part tab # word part tab
label.alsoVisualize=Also split by label.alsoVisualize=Also split by
@ -101,8 +103,13 @@ label.wordSet.taxonomyH=Word sets will only be extracted from the selected taxon
label.wordSet.minimalOccurrencesH=Word sets with fewer occurrences will not be included in the output. label.wordSet.minimalOccurrencesH=Word sets with fewer occurrences will not be included in the output.
label.wordSet.minimalTaxonomyH=Word sets that occur in fewer taxonomy branches will not be included in the output. label.wordSet.minimalTaxonomyH=Word sets that occur in fewer taxonomy branches will not be included in the output.
# punctuations
punctuation.COMMA=comma (,)
punctuation.POINT=point (.)
# calculate for # calculate for
calculateFor.WORD=word calculateFor.WORD=word
calculateFor.LOWERCASE_WORD=lowercase word
calculateFor.NORMALIZED_WORD=normalized word calculateFor.NORMALIZED_WORD=normalized word
calculateFor.LEMMA=lemma calculateFor.LEMMA=lemma
calculateFor.MORPHOSYNTACTIC_SPECS=morphosyntactic tag calculateFor.MORPHOSYNTACTIC_SPECS=morphosyntactic tag
@ -187,7 +194,7 @@ windowTitles.warning=Warning
windowTitles.confirmation=Confirmation windowTitles.confirmation=Confirmation
# export header translations # export header translations
exportHeader.corpus=Corpus: exportHeader.corpus=Reader:
exportHeader.date=Date: exportHeader.date=Date:
exportHeader.executionTime=Execution time: exportHeader.executionTime=Execution time:
exportHeader.analysis=Analysis: exportHeader.analysis=Analysis:
@ -212,6 +219,7 @@ exportHeader.msd=Morphosyntactic tag:
exportHeader.taxonomy=Filter by taxonomy: exportHeader.taxonomy=Filter by taxonomy:
exportHeader.minOccurrences=Min. nr. occurrences: exportHeader.minOccurrences=Min. nr. occurrences:
exportHeader.minTaxonomies=Min. nr. taxonomy branches: exportHeader.minTaxonomies=Min. nr. taxonomy branches:
exportHeader.minRelFre=Min. rel. frequency nr.:
exportHeader.additionalFilters=Additional filters: exportHeader.additionalFilters=Additional filters:
exportHeader.yes=yes exportHeader.yes=yes
exportHeader.no=no exportHeader.no=no
@ -231,6 +239,7 @@ exportTable.relativeFrequency=Relative frequency
exportTable.msd=msd exportTable.msd=msd
# parts # parts
exportTable.part.word=words: exportTable.part.word=words:
exportTable.part.lowercaseWord=lowercase words:
exportTable.part.normalizedWord=normalized words: exportTable.part.normalizedWord=normalized words:
exportTable.part.lemma=lemmas: exportTable.part.lemma=lemmas:
exportTable.part.msd=msd: exportTable.part.msd=msd:
@ -239,6 +248,7 @@ exportTable.part.wordType=word type:
exportTable.part.letterSet=character set exportTable.part.letterSet=character set
exportTable.part.word2=word exportTable.part.word2=word
exportTable.part.lowercaseWord2=lowercase word
exportTable.part.normalizedWord2=normalized word exportTable.part.normalizedWord2=normalized word
exportTable.part.lemma2=lemma exportTable.part.lemma2=lemma
exportTable.part.msd2=msd exportTable.part.msd2=msd
@ -248,6 +258,7 @@ exportTable.part.wordType2=word type
exportTable.part.letterSet2=Share of total sum of all letter sets exportTable.part.letterSet2=Share of total sum of all letter sets
exportTable.part.letterSet3=Letter set exportTable.part.letterSet3=Letter set
exportTable.part.word3=Word exportTable.part.word3=Word
exportTable.part.lowercaseWord3=Lowercase word
exportTable.part.normalizedWord3=Normalized word exportTable.part.normalizedWord3=Normalized word
exportTable.part.lemma3=Lemma exportTable.part.lemma3=Lemma
exportTable.part.msd3=Msd exportTable.part.msd3=Msd

View File

@ -1,5 +1,5 @@
# general # general
window.title=Luščilnik window.title=Korpusni luščilnik
hyperlink.help=Pomoč hyperlink.help=Pomoč
button.language=EN button.language=EN
@ -17,18 +17,20 @@ tab.stringLevelTabNew2=Besedni nizi
# corpus tab # corpus tab
label.setCorpusLocation=Nastavi lokacijo korpusa label.setCorpusLocation=Nastavi lokacijo korpusa
button.setCorpusLocation=Ponastavi button.setCorpusLocation=Ponastavi
label.readHeaderInfo=Preberi info iz headerjev label.readHeaderInfo=Preberi taks. iz korp. datotek
checkBox.readHeaderInfo= checkBox.readHeaderInfo=
label.chooseResultsLocation=Nastavi lokacijo rezultatov label.chooseResultsLocation=Nastavi lokacijo izpisov
button.chooseResultsLocation=Ponastavi button.chooseResultsLocation=Ponastavi
label.selectReader=Izberi bralnik label.selectReader=Izberi bralnik
label.outputName=Ime izhodne datoteke label.outputName=Ime izhodne datoteke
label.punctuation=Decimalno znamenje
label.corpusTab.chooseCorpusLocationH=Izberite mapo, v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa. label.corpusTab.chooseCorpusLocationH=Izberite mapo, v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.
label.corpusTab.readHeaderInfoH=Če izberete to opcijo, se bo iz korpusnih datotek prebrala razpoložljiva taksonomija oz. filtri. Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki. label.corpusTab.readHeaderInfoH=Program bo iz korpusnih datotek prebral taksonomijo. Ta operacija lahko traja dlje časa.
label.corpusTab.chooseResultsLocationH=Nastavi lokacijo rezultatov label.corpusTab.chooseResultsLocationH=Nastavi lokacijo rezultatov
label.corpusTab.selectReaderH=Izberi bralnik label.corpusTab.selectReaderH=Izberi bralnik
label.corpusTab.outputNameH=Ime izhodne datoteke label.corpusTab.outputNameH=Ime izhodne datoteke
label.corpusTab.punctuationH=Izberite željeno decimalno znamenje v izpisu.
# character analysis tab # character analysis tab
label.stringLength=Dolžina znakovnih nizov label.stringLength=Dolžina znakovnih nizov
@ -101,8 +103,13 @@ label.wordSet.taxonomyH=Besedni nizi bodo izpisani samo iz izbranih taksonomskih
label.wordSet.minimalOccurrencesH=Besedni nizi, ki se pojavijo redkeje, ne bodo vključeni v izpis. label.wordSet.minimalOccurrencesH=Besedni nizi, ki se pojavijo redkeje, ne bodo vključeni v izpis.
label.wordSet.minimalTaxonomyH=Besedni nizi, ki so prisotni v manj vejah, ne bodo vključeni v izpis. label.wordSet.minimalTaxonomyH=Besedni nizi, ki so prisotni v manj vejah, ne bodo vključeni v izpis.
# punctuations
punctuation.COMMA=vejica (,)
punctuation.POINT=pika (.)
# calculate for # calculate for
calculateFor.WORD=oblike calculateFor.WORD=oblike
calculateFor.LOWERCASE_WORD=oblike z malimi črkami
calculateFor.NORMALIZED_WORD=normalizirane oblike calculateFor.NORMALIZED_WORD=normalizirane oblike
calculateFor.LEMMA=leme calculateFor.LEMMA=leme
calculateFor.MORPHOSYNTACTIC_SPECS=oblikoskladenjske oznake calculateFor.MORPHOSYNTACTIC_SPECS=oblikoskladenjske oznake
@ -187,7 +194,7 @@ windowTitles.warning=Opozorilo
windowTitles.confirmation=Potrdilo windowTitles.confirmation=Potrdilo
# export header translations # export header translations
exportHeader.corpus=Korpus: exportHeader.corpus=Bralnik:
exportHeader.date=Datum: exportHeader.date=Datum:
exportHeader.executionTime=Čas izvajanja: exportHeader.executionTime=Čas izvajanja:
exportHeader.analysis=Analiza: exportHeader.analysis=Analiza:
@ -212,6 +219,7 @@ exportHeader.msd=Oblikoskladenjska oznaka:
exportHeader.taxonomy=Filtriranje po taksonomiji: exportHeader.taxonomy=Filtriranje po taksonomiji:
exportHeader.minOccurrences=Min. št. pojavitev: exportHeader.minOccurrences=Min. št. pojavitev:
exportHeader.minTaxonomies=Min. št. taksonomskih vej: exportHeader.minTaxonomies=Min. št. taksonomskih vej:
exportHeader.minRelFre=Min. rel. pogostost:
exportHeader.additionalFilters=Dodatni filtri: exportHeader.additionalFilters=Dodatni filtri:
exportHeader.yes=da exportHeader.yes=da
exportHeader.no=ne exportHeader.no=ne
@ -231,6 +239,7 @@ exportTable.relativeFrequency=Relativna pogostost
exportTable.msd=msd exportTable.msd=msd
# parts # parts
exportTable.part.word=oblik: exportTable.part.word=oblik:
exportTable.part.lowercaseWord=oblik z malimi črkami:
exportTable.part.normalizedWord=normaliziranih oblik: exportTable.part.normalizedWord=normaliziranih oblik:
exportTable.part.lemma=lem: exportTable.part.lemma=lem:
exportTable.part.msd=oblikoskladenjskih oznak: exportTable.part.msd=oblikoskladenjskih oznak:
@ -239,6 +248,7 @@ exportTable.part.wordType=besednih vrst:
exportTable.part.letterSet=znakovnega niza exportTable.part.letterSet=znakovnega niza
exportTable.part.word2=oblike exportTable.part.word2=oblike
exportTable.part.lowercaseWord2=oblike z malimi črkami
exportTable.part.normalizedWord2=normalizirane oblike exportTable.part.normalizedWord2=normalizirane oblike
exportTable.part.lemma2=leme exportTable.part.lemma2=leme
exportTable.part.msd2=oblikoskladenjske oznake exportTable.part.msd2=oblikoskladenjske oznake
@ -248,6 +258,7 @@ exportTable.part.wordType2=besedne vrste
exportTable.part.letterSet2=Delež glede na skupno vsoto vseh najdenih znakovnih nizov exportTable.part.letterSet2=Delež glede na skupno vsoto vseh najdenih znakovnih nizov
exportTable.part.letterSet3=Znakovni niz exportTable.part.letterSet3=Znakovni niz
exportTable.part.word3=Oblika exportTable.part.word3=Oblika
exportTable.part.lowercaseWord3=Oblika z malimi črkami
exportTable.part.normalizedWord3=Normalizirana oblika exportTable.part.normalizedWord3=Normalizirana oblika
exportTable.part.lemma3=Lema exportTable.part.lemma3=Lema
exportTable.part.msd3=Oblikoskladenjska oznaka exportTable.part.msd3=Oblikoskladenjska oznaka