Added translataion fixes + punctuation in export fix
This commit is contained in:
parent
39624fa4f2
commit
10666b4453
|
@ -1744,7 +1744,7 @@ public class XML_processing {
|
||||||
|
|
||||||
// if we're calculating values for letters, omit words that are shorter than string length
|
// if we're calculating values for letters, omit words that are shorter than string length
|
||||||
if (filter.getNgramValue() == 0) {
|
if (filter.getNgramValue() == 0) {
|
||||||
sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord(filter.getWordParts()).length() < filter.getStringLength())
|
sentence.removeIf(w -> ((filter.getCalculateFor() == CalculateFor.WORD || filter.getCalculateFor() == CalculateFor.LOWERCASE_WORD) && w.getWord(filter.getWordParts()).length() < filter.getStringLength())
|
||||||
|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma(filter.getWordParts()).length() < filter.getStringLength()));
|
|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma(filter.getWordParts()).length() < filter.getStringLength()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1766,7 +1766,7 @@ public class XML_processing {
|
||||||
|
|
||||||
public static Word createWord(String word, String lemma, String msd, String normalizedWord, Filter f){
|
public static Word createWord(String word, String lemma, String msd, String normalizedWord, Filter f){
|
||||||
List<String> wString = new ArrayList<>();
|
List<String> wString = new ArrayList<>();
|
||||||
if (f.getWordParts().contains(CalculateFor.WORD))
|
if (f.getWordParts().contains(CalculateFor.WORD) || f.getWordParts().contains(CalculateFor.LOWERCASE_WORD))
|
||||||
wString.add(word);
|
wString.add(word);
|
||||||
if (f.getWordParts().contains(CalculateFor.LEMMA))
|
if (f.getWordParts().contains(CalculateFor.LEMMA))
|
||||||
wString.add(lemma);
|
wString.add(lemma);
|
||||||
|
|
|
@ -222,6 +222,12 @@ public class Ngrams {
|
||||||
.map(w -> w.getLemma(wordParts))
|
.map(w -> w.getLemma(wordParts))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
return StringUtils.join(candidate, " ");
|
return StringUtils.join(candidate, " ");
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
candidate.addAll(ngramCandidate
|
||||||
|
.stream()
|
||||||
|
.map(w -> w.getWord(wordParts).toLowerCase())
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
return StringUtils.join(candidate, " ");
|
||||||
case WORD:
|
case WORD:
|
||||||
candidate.addAll(ngramCandidate
|
candidate.addAll(ngramCandidate
|
||||||
.stream()
|
.stream()
|
||||||
|
@ -298,6 +304,10 @@ public class Ngrams {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(stats.getFilter().getCalculateFor().equals(CalculateFor.LOWERCASE_WORD)){
|
||||||
|
word = word.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
|
for (int i = 0; i < word.length() - stats.getFilter().getStringLength() + 1; i++) {
|
||||||
// TODO: locila?
|
// TODO: locila?
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ public enum CalculateFor {
|
||||||
// DIST_LEMMAS("lema");
|
// DIST_LEMMAS("lema");
|
||||||
|
|
||||||
WORD("calculateFor.WORD"),
|
WORD("calculateFor.WORD"),
|
||||||
|
LOWERCASE_WORD("calculateFor.LOWERCASE_WORD"),
|
||||||
NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"),
|
NORMALIZED_WORD("calculateFor.NORMALIZED_WORD"),
|
||||||
LEMMA("calculateFor.LEMMA"),
|
LEMMA("calculateFor.LEMMA"),
|
||||||
MORPHOSYNTACTIC_SPECS("calculateFor.MORPHOSYNTACTIC_SPECS"),
|
MORPHOSYNTACTIC_SPECS("calculateFor.MORPHOSYNTACTIC_SPECS"),
|
||||||
|
@ -47,6 +48,9 @@ public enum CalculateFor {
|
||||||
if (WORD.toString().equals(cf)) {
|
if (WORD.toString().equals(cf)) {
|
||||||
return WORD;
|
return WORD;
|
||||||
}
|
}
|
||||||
|
if (LOWERCASE_WORD.toString().equals(cf)) {
|
||||||
|
return LOWERCASE_WORD;
|
||||||
|
}
|
||||||
if (LEMMA.toString().equals(cf)) {
|
if (LEMMA.toString().equals(cf)) {
|
||||||
return LEMMA;
|
return LEMMA;
|
||||||
}
|
}
|
||||||
|
@ -71,6 +75,8 @@ public enum CalculateFor {
|
||||||
switch (this) {
|
switch (this) {
|
||||||
case WORD:
|
case WORD:
|
||||||
return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.word");
|
return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.word");
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.lowercaseWord");
|
||||||
case NORMALIZED_WORD:
|
case NORMALIZED_WORD:
|
||||||
return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.normalizedWord");
|
return I18N.get("exportTable.part.totalSumLetters") + " " + I18N.get("exportTable.part.normalizedWord");
|
||||||
case LEMMA:
|
case LEMMA:
|
||||||
|
@ -92,6 +98,8 @@ public enum CalculateFor {
|
||||||
switch (this) {
|
switch (this) {
|
||||||
case WORD:
|
case WORD:
|
||||||
return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.word");
|
return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.word");
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.lowercaseWord");
|
||||||
case NORMALIZED_WORD:
|
case NORMALIZED_WORD:
|
||||||
return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.normalizedWord");
|
return I18N.get("exportTable.part.totalSumString") + " " + I18N.get("exportTable.part.normalizedWord");
|
||||||
case LEMMA:
|
case LEMMA:
|
||||||
|
@ -118,6 +126,8 @@ public enum CalculateFor {
|
||||||
switch (this) {
|
switch (this) {
|
||||||
case WORD:
|
case WORD:
|
||||||
return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.word");
|
return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.word");
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.lowercaseWord");
|
||||||
case NORMALIZED_WORD:
|
case NORMALIZED_WORD:
|
||||||
return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.normalizedWord");
|
return I18N.get("exportTable.part.totalFoundLetters") + " " + I18N.get("exportTable.part.normalizedWord");
|
||||||
case LEMMA:
|
case LEMMA:
|
||||||
|
@ -139,6 +149,8 @@ public enum CalculateFor {
|
||||||
switch (this) {
|
switch (this) {
|
||||||
case WORD:
|
case WORD:
|
||||||
return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.word");
|
return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.word");
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.lowercaseWord");
|
||||||
case NORMALIZED_WORD:
|
case NORMALIZED_WORD:
|
||||||
return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.normalizedWord");
|
return I18N.get("exportTable.part.totalFound") + " " + I18N.get("exportTable.part.normalizedWord");
|
||||||
case LEMMA:
|
case LEMMA:
|
||||||
|
@ -168,6 +180,8 @@ public enum CalculateFor {
|
||||||
switch(this){
|
switch(this){
|
||||||
case WORD:
|
case WORD:
|
||||||
return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.word2");
|
return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.word2");
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.lowercaseWord2");
|
||||||
case NORMALIZED_WORD:
|
case NORMALIZED_WORD:
|
||||||
return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.normalizedWord2");
|
return I18N.get("exportTable.part.absoluteFrequency") + " " + I18N.get("exportTable.part.normalizedWord2");
|
||||||
case LEMMA:
|
case LEMMA:
|
||||||
|
@ -194,6 +208,8 @@ public enum CalculateFor {
|
||||||
switch(this){
|
switch(this){
|
||||||
case WORD:
|
case WORD:
|
||||||
return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.word2");
|
return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.word2");
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.lowercaseWord2");
|
||||||
case NORMALIZED_WORD:
|
case NORMALIZED_WORD:
|
||||||
return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.normalizedWord2");
|
return I18N.get("exportTable.part.share") + " " + I18N.get("exportTable.part.normalizedWord2");
|
||||||
case LEMMA:
|
case LEMMA:
|
||||||
|
@ -221,6 +237,8 @@ public enum CalculateFor {
|
||||||
case WORD:
|
case WORD:
|
||||||
case DIST_WORDS:
|
case DIST_WORDS:
|
||||||
return I18N.get("exportTable.part.word3");
|
return I18N.get("exportTable.part.word3");
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
return I18N.get("exportTable.part.lowercaseWord3");
|
||||||
case NORMALIZED_WORD:
|
case NORMALIZED_WORD:
|
||||||
return I18N.get("exportTable.part.normalizedWord3");
|
return I18N.get("exportTable.part.normalizedWord3");
|
||||||
case LEMMA:
|
case LEMMA:
|
||||||
|
@ -240,6 +258,8 @@ public enum CalculateFor {
|
||||||
case WORD:
|
case WORD:
|
||||||
case DIST_WORDS:
|
case DIST_WORDS:
|
||||||
return I18N.get("exportTable.part.word3") + " " + I18N.get("exportTable.part.set");
|
return I18N.get("exportTable.part.word3") + " " + I18N.get("exportTable.part.set");
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
return I18N.get("exportTable.part.lowercaseWord3") + " " + I18N.get("exportTable.part.set");
|
||||||
case NORMALIZED_WORD:
|
case NORMALIZED_WORD:
|
||||||
return I18N.get("exportTable.part.normalizedWord3") + " " + I18N.get("exportTable.part.set");
|
return I18N.get("exportTable.part.normalizedWord3") + " " + I18N.get("exportTable.part.set");
|
||||||
case LEMMA:
|
case LEMMA:
|
||||||
|
|
|
@ -36,6 +36,7 @@ public class Corpus {
|
||||||
boolean hasMsdData;
|
boolean hasMsdData;
|
||||||
private ArrayList<String> validationErrors;
|
private ArrayList<String> validationErrors;
|
||||||
private String corpusName = "";
|
private String corpusName = "";
|
||||||
|
private String punctuation = "punctuation.COMMA";
|
||||||
|
|
||||||
public Corpus() {
|
public Corpus() {
|
||||||
validationErrors = new ArrayList<>();
|
validationErrors = new ArrayList<>();
|
||||||
|
@ -52,6 +53,16 @@ public class Corpus {
|
||||||
logger.info("Corpus.set: ", corpusName);
|
logger.info("Corpus.set: ", corpusName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getPunctuation() {
|
||||||
|
return punctuation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPunctuation(String punctuation) {
|
||||||
|
// System.out.println(corpusName);
|
||||||
|
this.punctuation = punctuation;
|
||||||
|
logger.info("Punctuation.set: ", punctuation);
|
||||||
|
}
|
||||||
|
|
||||||
public CorpusType getCorpusType() {
|
public CorpusType getCorpusType() {
|
||||||
return corpusType;
|
return corpusType;
|
||||||
}
|
}
|
||||||
|
|
|
@ -320,6 +320,10 @@ public class Filter implements Cloneable {
|
||||||
ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));
|
ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));
|
||||||
|
|
||||||
switch (wp) {
|
switch (wp) {
|
||||||
|
case LOWERCASE_WORD:
|
||||||
|
if (!oldWp.contains(CalculateFor.LOWERCASE_WORD))
|
||||||
|
oldWp.add(CalculateFor.LOWERCASE_WORD);
|
||||||
|
break;
|
||||||
case WORD:
|
case WORD:
|
||||||
case DIST_WORDS:
|
case DIST_WORDS:
|
||||||
if (!oldWp.contains(CalculateFor.WORD))
|
if (!oldWp.contains(CalculateFor.WORD))
|
||||||
|
|
|
@ -324,28 +324,28 @@ public class StatisticsNew {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
|
// public boolean recalculateAndSaveResultToDisk() throws UnsupportedEncodingException {
|
||||||
filter.setAl(AnalysisLevel.WORD_FORMATION);
|
// filter.setAl(AnalysisLevel.WORD_FORMATION);
|
||||||
resultTitle = generateResultTitle();
|
// resultTitle = generateResultTitle();
|
||||||
|
//
|
||||||
if (useDB) {
|
// if (useDB) {
|
||||||
result = db.getDump();
|
// result = db.getDump();
|
||||||
db.delete();
|
// db.delete();
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
// if no results and nothing to save, return false
|
// // if no results and nothing to save, return false
|
||||||
if (!(result.size() > 0)) {
|
// if (!(result.size() > 0)) {
|
||||||
analysisProducedResults = false;
|
// analysisProducedResults = false;
|
||||||
return false;
|
// return false;
|
||||||
} else {
|
// } else {
|
||||||
analysisProducedResults = true;
|
// analysisProducedResults = true;
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
WordFormation.calculateStatistics(this);
|
// WordFormation.calculateStatistics(this);
|
||||||
|
//
|
||||||
Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
|
// Export.SetToCSV(resultTitle, resultCustom, corpus.getChosenResultsLocation(), headerInfoBlock());
|
||||||
return true;
|
// return true;
|
||||||
}
|
// }
|
||||||
|
|
||||||
private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {
|
private Map<String, Map<MultipleHMKeys, Long>> sortNestedMap(Map<String, ConcurrentHashMap<MultipleHMKeys, AtomicLong>> nestedMap, int limit) {
|
||||||
Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();
|
Map<String, Map<MultipleHMKeys, Long>> sorted = new HashMap<>();
|
||||||
|
@ -682,6 +682,11 @@ public class StatisticsNew {
|
||||||
info.put(I18N.get("exportHeader.minOccurrences"), String.valueOf(filter.getMinimalOccurrences()));
|
info.put(I18N.get("exportHeader.minOccurrences"), String.valueOf(filter.getMinimalOccurrences()));
|
||||||
info.put(I18N.get("exportHeader.minTaxonomies"), String.valueOf(filter.getMinimalTaxonomy()));
|
info.put(I18N.get("exportHeader.minTaxonomies"), String.valueOf(filter.getMinimalTaxonomy()));
|
||||||
|
|
||||||
|
// if not letters extraction
|
||||||
|
if(filter.getNgramValue() > 0) {
|
||||||
|
info.put(I18N.get("exportHeader.minRelFre"), String.valueOf(filter.getMinimalRelFre()));
|
||||||
|
}
|
||||||
|
|
||||||
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
if (corpus.getCorpusType() == CorpusType.SOLAR) {
|
||||||
HashMap<String, ObservableList<String>> filters = corpus.getSolarSelectedFilters();
|
HashMap<String, ObservableList<String>> filters = corpus.getSolarSelectedFilters();
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,11 @@ public interface Word {
|
||||||
}
|
}
|
||||||
|
|
||||||
default String getWord(ArrayList<CalculateFor> wordParts){
|
default String getWord(ArrayList<CalculateFor> wordParts){
|
||||||
return get(wordParts, CalculateFor.WORD);
|
String w = get(wordParts, CalculateFor.WORD);
|
||||||
|
if (w == null){
|
||||||
|
return get(wordParts, CalculateFor.LOWERCASE_WORD);
|
||||||
|
}
|
||||||
|
return w;
|
||||||
}
|
}
|
||||||
|
|
||||||
default String getLemma(ArrayList<CalculateFor> wordParts){
|
default String getLemma(ArrayList<CalculateFor> wordParts){
|
||||||
|
@ -102,9 +106,9 @@ public interface Word {
|
||||||
String returnValue = "";
|
String returnValue = "";
|
||||||
|
|
||||||
if (cvv) {
|
if (cvv) {
|
||||||
returnValue = calculateFor == CalculateFor.WORD ? getCVVWord(cf) : getCVVLemma(cf);
|
returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getCVVWord(cf) : getCVVLemma(cf);
|
||||||
} else {
|
} else {
|
||||||
returnValue = calculateFor == CalculateFor.WORD ? getWord(cf) : getLemma(cf);
|
returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getWord(cf) : getLemma(cf);
|
||||||
}
|
}
|
||||||
|
|
||||||
return returnValue;
|
return returnValue;
|
||||||
|
|
|
@ -179,7 +179,7 @@ public class CharacterAnalysisTab {
|
||||||
private ChangeListener<Boolean> minimalOccurrencesListener;
|
private ChangeListener<Boolean> minimalOccurrencesListener;
|
||||||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||||
|
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"};
|
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
|
||||||
|
|
||||||
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
||||||
|
@ -623,7 +623,7 @@ public class CharacterAnalysisTab {
|
||||||
}
|
}
|
||||||
|
|
||||||
// if calculateFor was selected for something other than a word or a lemma -> reset
|
// if calculateFor was selected for something other than a word or a lemma -> reset
|
||||||
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA)) {
|
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA || calculateFor == CalculateFor.LOWERCASE_WORD)) {
|
||||||
// if the user selected something else before selecting ngram for letters, reset that choice
|
// if the user selected something else before selecting ngram for letters, reset that choice
|
||||||
calculateFor = CalculateFor.WORD;
|
calculateFor = CalculateFor.WORD;
|
||||||
|
|
||||||
|
|
|
@ -92,6 +92,9 @@ public class CorpusTab {
|
||||||
@FXML
|
@FXML
|
||||||
public Label outputNameL;
|
public Label outputNameL;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
public Label punctuationL;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
public ImageView chooseCorpusLocationI;
|
public ImageView chooseCorpusLocationI;
|
||||||
|
|
||||||
|
@ -107,6 +110,9 @@ public class CorpusTab {
|
||||||
@FXML
|
@FXML
|
||||||
public ImageView outputNameI;
|
public ImageView outputNameI;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
public ImageView punctuationI;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
public TextField outputNameTF;
|
public TextField outputNameTF;
|
||||||
public String outputName = "";
|
public String outputName = "";
|
||||||
|
@ -115,6 +121,10 @@ public class CorpusTab {
|
||||||
public ComboBox<String> selectReaderCB;
|
public ComboBox<String> selectReaderCB;
|
||||||
public String selectReader;
|
public String selectReader;
|
||||||
|
|
||||||
|
@FXML
|
||||||
|
public ComboBox<String> punctuationCB;
|
||||||
|
public String punctuation;
|
||||||
|
|
||||||
@FXML
|
@FXML
|
||||||
private ProgressIndicator locationScanPI;
|
private ProgressIndicator locationScanPI;
|
||||||
|
|
||||||
|
@ -137,7 +147,7 @@ public class CorpusTab {
|
||||||
private OneWordAnalysisTab oneWordTabController;
|
private OneWordAnalysisTab oneWordTabController;
|
||||||
private CharacterAnalysisTab catController;
|
private CharacterAnalysisTab catController;
|
||||||
private FiltersForSolar ffsController;
|
private FiltersForSolar ffsController;
|
||||||
private WordFormationTab wfController;
|
// private WordFormationTab wfController;
|
||||||
private WordLevelTab wlController;
|
private WordLevelTab wlController;
|
||||||
private HostServices hostService;
|
private HostServices hostService;
|
||||||
|
|
||||||
|
@ -146,6 +156,10 @@ public class CorpusTab {
|
||||||
|
|
||||||
private static final String [] SELECT_READER_ARRAY = {"VERT + REGI", "XML (Šolar 1.0)", "XML (GOS 1.0)", "XML (ssj500k 2.1)", "XML (Gigafida 2.0)", "XML (Gigafida 1.0, Kres 1.0)"};
|
private static final String [] SELECT_READER_ARRAY = {"VERT + REGI", "XML (Šolar 1.0)", "XML (GOS 1.0)", "XML (ssj500k 2.1)", "XML (Gigafida 2.0)", "XML (Gigafida 1.0, Kres 1.0)"};
|
||||||
private static final ArrayList<String> SELECT_READER = new ArrayList<>(Arrays.asList(SELECT_READER_ARRAY));
|
private static final ArrayList<String> SELECT_READER = new ArrayList<>(Arrays.asList(SELECT_READER_ARRAY));
|
||||||
|
|
||||||
|
private static final String [] PUNCTUATION_ARRAY = {"punctuation.COMMA", "punctuation.POINT"};
|
||||||
|
private static final ArrayList<String> PUNCTUATION = new ArrayList<>(Arrays.asList(PUNCTUATION_ARRAY));
|
||||||
|
|
||||||
private Collection<File> corpusFiles;
|
private Collection<File> corpusFiles;
|
||||||
private File selectedDirectory;
|
private File selectedDirectory;
|
||||||
|
|
||||||
|
@ -219,6 +233,23 @@ public class CorpusTab {
|
||||||
|
|
||||||
selectReaderCB.getSelectionModel().select(0);
|
selectReaderCB.getSelectionModel().select(0);
|
||||||
|
|
||||||
|
// comma / point choice
|
||||||
|
punctuationCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
if(newValue == null){
|
||||||
|
// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
||||||
|
newValue = I18N.getTranslatedValue(oldValue, PUNCTUATION);
|
||||||
|
punctuationCB.getSelectionModel().select(newValue);
|
||||||
|
}
|
||||||
|
// System.out.println(oldValue);
|
||||||
|
// System.out.println(newValue);
|
||||||
|
punctuation = newValue;
|
||||||
|
if(corpus != null) {
|
||||||
|
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
punctuationCB.getSelectionModel().select(0);
|
||||||
|
|
||||||
// add listeners
|
// add listeners
|
||||||
chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
|
chooseCorpusLocationB.setOnAction(e -> chooseCorpusLocation());
|
||||||
// chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB")));
|
// chooseCorpusLocationB.setTooltip(new Tooltip(I18N.get("message.TOOLTIP_chooseCorpusLocationB")));
|
||||||
|
@ -301,12 +332,16 @@ public class CorpusTab {
|
||||||
readHeaderInfoL.textProperty().bind(I18N.createStringBinding("label.readHeaderInfo"));
|
readHeaderInfoL.textProperty().bind(I18N.createStringBinding("label.readHeaderInfo"));
|
||||||
selectReaderL.textProperty().bind(I18N.createStringBinding("label.selectReader"));
|
selectReaderL.textProperty().bind(I18N.createStringBinding("label.selectReader"));
|
||||||
outputNameL.textProperty().bind(I18N.createStringBinding("label.outputName"));
|
outputNameL.textProperty().bind(I18N.createStringBinding("label.outputName"));
|
||||||
|
punctuationL.textProperty().bind(I18N.createStringBinding("label.punctuation"));
|
||||||
|
|
||||||
addTooltipToImage(chooseCorpusLocationI, I18N.createStringBinding("label.corpusTab.chooseCorpusLocationH"));
|
addTooltipToImage(chooseCorpusLocationI, I18N.createStringBinding("label.corpusTab.chooseCorpusLocationH"));
|
||||||
addTooltipToImage(readHeaderInfoI, I18N.createStringBinding("label.corpusTab.readHeaderInfoH"));
|
addTooltipToImage(readHeaderInfoI, I18N.createStringBinding("label.corpusTab.readHeaderInfoH"));
|
||||||
addTooltipToImage(chooseResultsLocationI, I18N.createStringBinding("label.corpusTab.chooseResultsLocationH"));
|
addTooltipToImage(chooseResultsLocationI, I18N.createStringBinding("label.corpusTab.chooseResultsLocationH"));
|
||||||
addTooltipToImage(selectReaderI, I18N.createStringBinding("label.corpusTab.selectReaderH"));
|
addTooltipToImage(selectReaderI, I18N.createStringBinding("label.corpusTab.selectReaderH"));
|
||||||
addTooltipToImage(outputNameI, I18N.createStringBinding("label.corpusTab.outputNameH"));
|
addTooltipToImage(outputNameI, I18N.createStringBinding("label.corpusTab.outputNameH"));
|
||||||
|
addTooltipToImage(punctuationI, I18N.createStringBinding("label.corpusTab.punctuationH"));
|
||||||
|
|
||||||
|
punctuationCB.itemsProperty().bind(I18N.createObjectBinding(PUNCTUATION));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void togglePiAndSetCorpusWrapper(boolean piIsActive) {
|
private void togglePiAndSetCorpusWrapper(boolean piIsActive) {
|
||||||
|
@ -456,8 +491,9 @@ public class CorpusTab {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
System.out.println(outputName);
|
// System.out.println(outputName);
|
||||||
corpus.setCorpusName(outputName);
|
corpus.setCorpusName(outputName);
|
||||||
|
corpus.setPunctuation(I18N.getRootValue(punctuation, PUNCTUATION));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -167,6 +167,30 @@ public final class I18N {
|
||||||
// return MessageFormat.format(bundle.getString(key), args);
|
// return MessageFormat.format(bundle.getString(key), args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String getRootValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
|
||||||
|
Locale loc;
|
||||||
|
if(getLocale().equals(Locale.ENGLISH)) {
|
||||||
|
loc = new Locale.Builder().setLanguage("sl").setRegion("SI").build();
|
||||||
|
} else {
|
||||||
|
loc = Locale.ENGLISH;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String el : nGramComputeForLetters){
|
||||||
|
if (oldValue.equals(getIndependent(el, loc))){
|
||||||
|
return el;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// in case translated language doesn't contain specified word, try original language
|
||||||
|
for (String el : nGramComputeForLetters){
|
||||||
|
if (oldValue.equals(get(el))){
|
||||||
|
return el;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public static String getTranslatedValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
|
public static String getTranslatedValue(String oldValue, ArrayList<String> nGramComputeForLetters) {
|
||||||
Locale loc;
|
Locale loc;
|
||||||
if(getLocale().equals(Locale.ENGLISH)) {
|
if(getLocale().equals(Locale.ENGLISH)) {
|
||||||
|
|
|
@ -210,13 +210,13 @@ public class OneWordAnalysisTab {
|
||||||
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
|
@ -300,7 +300,7 @@ public class OneWordAnalysisTab {
|
||||||
alsoVisualizeCCB.getItems().removeAll();
|
alsoVisualizeCCB.getItems().removeAll();
|
||||||
if (newValue.equals(CalculateFor.LEMMA.toString())) {
|
if (newValue.equals(CalculateFor.LEMMA.toString())) {
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
||||||
} else if (newValue.equals(CalculateFor.WORD.toString())) {
|
} else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) {
|
||||||
if (corpus.getCorpusType() == CorpusType.GOS)
|
if (corpus.getCorpusType() == CorpusType.GOS)
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
|
||||||
else
|
else
|
||||||
|
|
|
@ -250,13 +250,13 @@ public class StringAnalysisTabNew2 {
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
// private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
|
||||||
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.MORPHOSYNTACTIC_SPECS", "calculateFor.NORMALIZED_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
|
@ -408,7 +408,7 @@ public class StringAnalysisTabNew2 {
|
||||||
alsoVisualizeCCB.getItems().removeAll();
|
alsoVisualizeCCB.getItems().removeAll();
|
||||||
if (newValue.equals(CalculateFor.LEMMA.toString())) {
|
if (newValue.equals(CalculateFor.LEMMA.toString())) {
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
||||||
} else if (newValue.equals(CalculateFor.WORD.toString())) {
|
} else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) {
|
||||||
if (corpus.getCorpusType() == CorpusType.GOS)
|
if (corpus.getCorpusType() == CorpusType.GOS)
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
|
||||||
else
|
else
|
||||||
|
|
|
@ -1,260 +1,260 @@
|
||||||
package gui;
|
//package gui;
|
||||||
|
//
|
||||||
import static alg.XML_processing.*;
|
//import static alg.XML_processing.*;
|
||||||
import static gui.GUIController.*;
|
//import static gui.GUIController.*;
|
||||||
|
//
|
||||||
import java.io.File;
|
//import java.io.File;
|
||||||
import java.io.UnsupportedEncodingException;
|
//import java.io.UnsupportedEncodingException;
|
||||||
import java.util.*;
|
//import java.util.*;
|
||||||
|
//
|
||||||
import javafx.application.HostServices;
|
//import javafx.application.HostServices;
|
||||||
import javafx.scene.control.*;
|
//import javafx.scene.control.*;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
//import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.logging.log4j.LogManager;
|
//import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
//import org.apache.logging.log4j.Logger;
|
||||||
import org.controlsfx.control.CheckComboBox;
|
//import org.controlsfx.control.CheckComboBox;
|
||||||
|
//
|
||||||
import data.*;
|
//import data.*;
|
||||||
import javafx.collections.ListChangeListener;
|
//import javafx.collections.ListChangeListener;
|
||||||
import javafx.collections.ObservableList;
|
//import javafx.collections.ObservableList;
|
||||||
import javafx.concurrent.Task;
|
//import javafx.concurrent.Task;
|
||||||
import javafx.fxml.FXML;
|
//import javafx.fxml.FXML;
|
||||||
import javafx.scene.layout.AnchorPane;
|
//import javafx.scene.layout.AnchorPane;
|
||||||
|
//
|
||||||
@SuppressWarnings("Duplicates")
|
//@SuppressWarnings("Duplicates")
|
||||||
public class WordFormationTab {
|
//public class WordFormationTab {
|
||||||
public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
|
// public final static Logger logger = LogManager.getLogger(WordFormationTab.class);
|
||||||
|
//
|
||||||
public AnchorPane wordAnalysisTabPane;
|
// public AnchorPane wordAnalysisTabPane;
|
||||||
|
//
|
||||||
@FXML
|
// @FXML
|
||||||
public Label selectedFiltersLabel;
|
// public Label selectedFiltersLabel;
|
||||||
@FXML
|
// @FXML
|
||||||
public Label solarFilters;
|
// public Label solarFilters;
|
||||||
|
//
|
||||||
@FXML
|
// @FXML
|
||||||
private CheckComboBox<String> taxonomyCCB;
|
// private CheckComboBox<String> taxonomyCCB;
|
||||||
private ArrayList<Taxonomy> taxonomy;
|
// private ArrayList<Taxonomy> taxonomy;
|
||||||
|
//
|
||||||
@FXML
|
// @FXML
|
||||||
private TextField minimalOccurrencesTF;
|
// private TextField minimalOccurrencesTF;
|
||||||
private Integer minimalOccurrences;
|
// private Integer minimalOccurrences;
|
||||||
|
//
|
||||||
@FXML
|
// @FXML
|
||||||
private TextField minimalTaxonomyTF;
|
// private TextField minimalTaxonomyTF;
|
||||||
private Integer minimalTaxonomy;
|
// private Integer minimalTaxonomy;
|
||||||
|
//
|
||||||
@FXML
|
// @FXML
|
||||||
private Button computeB;
|
// private Button computeB;
|
||||||
|
//
|
||||||
@FXML
|
// @FXML
|
||||||
public ProgressBar ngramProgressBar;
|
// public ProgressBar ngramProgressBar;
|
||||||
@FXML
|
// @FXML
|
||||||
public Label progressLabel;
|
// public Label progressLabel;
|
||||||
|
//
|
||||||
@FXML
|
// @FXML
|
||||||
private Hyperlink helpH;
|
// private Hyperlink helpH;
|
||||||
|
//
|
||||||
private Corpus corpus;
|
// private Corpus corpus;
|
||||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
// private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||||
private HostServices hostService;
|
// private HostServices hostService;
|
||||||
|
//
|
||||||
// after header scan
|
// // after header scan
|
||||||
private ObservableList<String> taxonomyCCBValues;
|
// private ObservableList<String> taxonomyCCBValues;
|
||||||
private CorpusType currentCorpusType;
|
// private CorpusType currentCorpusType;
|
||||||
private boolean useDb;
|
// private boolean useDb;
|
||||||
|
//
|
||||||
|
//
|
||||||
public void init() {
|
// public void init() {
|
||||||
// taxonomy
|
// // taxonomy
|
||||||
if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
// if (Tax.getCorpusTypesWithTaxonomy().contains(corpus.getCorpusType())) {
|
||||||
taxonomyCCB.getItems().removeAll();
|
// taxonomyCCB.getItems().removeAll();
|
||||||
taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
// taxonomyCCB.getItems().setAll(corpus.getObservableListTaxonomy());
|
||||||
taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
// taxonomyCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
|
||||||
taxonomy = new ArrayList<>();
|
// taxonomy = new ArrayList<>();
|
||||||
ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
// ObservableList<String> checkedItems = taxonomyCCB.getCheckModel().getCheckedItems();
|
||||||
ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
|
// ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.convertStringListToTaxonomyList(checkedItems, corpus);
|
||||||
taxonomy.addAll(checkedItemsTaxonomy);
|
// taxonomy.addAll(checkedItemsTaxonomy);
|
||||||
logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
// logger.info(String.format("Selected taxonomy: %s", StringUtils.join(checkedItems, ",")));
|
||||||
});
|
// });
|
||||||
taxonomyCCB.getCheckModel().clearChecks();
|
// taxonomyCCB.getCheckModel().clearChecks();
|
||||||
} else {
|
// } else {
|
||||||
taxonomyCCB.setDisable(true);
|
// taxonomyCCB.setDisable(true);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
// set default values
|
// // set default values
|
||||||
minimalOccurrencesTF.setText("1");
|
// minimalOccurrencesTF.setText("1");
|
||||||
minimalOccurrences = 1;
|
// minimalOccurrences = 1;
|
||||||
|
//
|
||||||
minimalTaxonomyTF.setText("1");
|
// minimalTaxonomyTF.setText("1");
|
||||||
minimalTaxonomy = 1;
|
// minimalTaxonomy = 1;
|
||||||
|
//
|
||||||
minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
// minimalOccurrencesTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
if (!newValue) {
|
// if (!newValue) {
|
||||||
// focus lost
|
// // focus lost
|
||||||
String value = minimalOccurrencesTF.getText();
|
// String value = minimalOccurrencesTF.getText();
|
||||||
if (!ValidationUtil.isEmpty(value)) {
|
// if (!ValidationUtil.isEmpty(value)) {
|
||||||
if (!ValidationUtil.isNumber(value)) {
|
// if (!ValidationUtil.isNumber(value)) {
|
||||||
logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
// logAlert("minimalOccurrencesTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||||
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||||
} else {
|
// } else {
|
||||||
minimalOccurrences = Integer.parseInt(value);
|
// minimalOccurrences = Integer.parseInt(value);
|
||||||
}
|
// }
|
||||||
} else {
|
// } else {
|
||||||
minimalOccurrencesTF.setText("1");
|
// minimalOccurrencesTF.setText("1");
|
||||||
minimalOccurrences = 1;
|
// minimalOccurrences = 1;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
});
|
// });
|
||||||
|
//
|
||||||
minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
// minimalTaxonomyTF.focusedProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
if (!newValue) {
|
// if (!newValue) {
|
||||||
// focus lost
|
// // focus lost
|
||||||
String value = minimalTaxonomyTF.getText();
|
// String value = minimalTaxonomyTF.getText();
|
||||||
if (!ValidationUtil.isEmpty(value)) {
|
// if (!ValidationUtil.isEmpty(value)) {
|
||||||
if (!ValidationUtil.isNumber(value)) {
|
// if (!ValidationUtil.isNumber(value)) {
|
||||||
logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
// logAlert("minimalTaxonomyTF: " + I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||||
GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
// GUIController.showAlert(Alert.AlertType.ERROR, I18N.get("message.WARNING_ONLY_NUMBERS_ALLOWED"));
|
||||||
} else {
|
// } else {
|
||||||
minimalTaxonomy = Integer.parseInt(value);
|
// minimalTaxonomy = Integer.parseInt(value);
|
||||||
}
|
// }
|
||||||
} else {
|
// } else {
|
||||||
minimalTaxonomyTF.setText("1");
|
// minimalTaxonomyTF.setText("1");
|
||||||
minimalTaxonomy = 1;
|
// minimalTaxonomy = 1;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
});
|
// });
|
||||||
|
//
|
||||||
computeB.setOnAction(e -> {
|
// computeB.setOnAction(e -> {
|
||||||
compute();
|
// compute();
|
||||||
logger.info("compute button");
|
// logger.info("compute button");
|
||||||
});
|
// });
|
||||||
|
//
|
||||||
helpH.setOnAction(e -> openHelpWebsite());
|
// helpH.setOnAction(e -> openHelpWebsite());
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
private void compute() {
|
// private void compute() {
|
||||||
Filter filter = new Filter();
|
// Filter filter = new Filter();
|
||||||
filter.setNgramValue(1);
|
// filter.setNgramValue(1);
|
||||||
filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
// filter.setCalculateFor(CalculateFor.MORPHOSYNTACTIC_PROPERTY);
|
||||||
filter.setTaxonomy(taxonomy);
|
// filter.setTaxonomy(taxonomy);
|
||||||
filter.setAl(AnalysisLevel.STRING_LEVEL);
|
// filter.setAl(AnalysisLevel.STRING_LEVEL);
|
||||||
filter.setSkipValue(0);
|
// filter.setSkipValue(0);
|
||||||
filter.setMsd(new ArrayList<>());
|
// filter.setMsd(new ArrayList<>());
|
||||||
filter.setIsCvv(false);
|
// filter.setIsCvv(false);
|
||||||
filter.setSolarFilters(solarFiltersMap);
|
// filter.setSolarFilters(solarFiltersMap);
|
||||||
filter.setMinimalOccurrences(minimalOccurrences);
|
// filter.setMinimalOccurrences(minimalOccurrences);
|
||||||
filter.setMinimalTaxonomy(minimalTaxonomy);
|
// filter.setMinimalTaxonomy(minimalTaxonomy);
|
||||||
|
//
|
||||||
String message = Validation.validateForStringLevel(filter);
|
// String message = Validation.validateForStringLevel(filter);
|
||||||
if (message == null) {
|
// if (message == null) {
|
||||||
// no errors
|
// // no errors
|
||||||
logger.info("Executing: ", filter.toString());
|
// logger.info("Executing: ", filter.toString());
|
||||||
StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
// StatisticsNew statistic = new StatisticsNew(corpus, filter, useDb);
|
||||||
execute(statistic);
|
// execute(statistic);
|
||||||
} else {
|
// } else {
|
||||||
logAlert(message);
|
// logAlert(message);
|
||||||
showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
// showAlert(Alert.AlertType.ERROR, "Prosim izpolnite polja:", message);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
private void openHelpWebsite(){
|
// private void openHelpWebsite(){
|
||||||
hostService.showDocument(Messages.HELP_URL);
|
// hostService.showDocument(Messages.HELP_URL);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
private void execute(StatisticsNew statistic) {
|
// private void execute(StatisticsNew statistic) {
|
||||||
logger.info("Started execution: ", statistic.getFilter());
|
// logger.info("Started execution: ", statistic.getFilter());
|
||||||
|
//
|
||||||
Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
// Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();
|
||||||
|
//
|
||||||
final Task<Void> task = new Task<Void>() {
|
// final Task<Void> task = new Task<Void>() {
|
||||||
@SuppressWarnings("Duplicates")
|
// @SuppressWarnings("Duplicates")
|
||||||
@Override
|
// @Override
|
||||||
protected Void call() throws Exception {
|
// protected Void call() throws Exception {
|
||||||
int i = 0;
|
// int i = 0;
|
||||||
Date startTime = new Date();
|
// Date startTime = new Date();
|
||||||
Date previousTime = new Date();
|
// Date previousTime = new Date();
|
||||||
for (File f : corpusFiles) {
|
// for (File f : corpusFiles) {
|
||||||
readXML(f.toString(), statistic);
|
// readXML(f.toString(), statistic);
|
||||||
i++;
|
// i++;
|
||||||
this.updateProgress(i, corpusFiles.size());
|
// this.updateProgress(i, corpusFiles.size());
|
||||||
this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
// this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusFiles.size(), f.getName()));
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
return null;
|
// return null;
|
||||||
}
|
// }
|
||||||
};
|
// };
|
||||||
|
//
|
||||||
ngramProgressBar.progressProperty().bind(task.progressProperty());
|
// ngramProgressBar.progressProperty().bind(task.progressProperty());
|
||||||
progressLabel.textProperty().bind(task.messageProperty());
|
// progressLabel.textProperty().bind(task.messageProperty());
|
||||||
|
//
|
||||||
task.setOnSucceeded(e -> {
|
// task.setOnSucceeded(e -> {
|
||||||
try {
|
// try {
|
||||||
// first, we have to recalculate all occurrences to detailed statistics
|
// // first, we have to recalculate all occurrences to detailed statistics
|
||||||
boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
|
// boolean successullySaved = statistic.recalculateAndSaveResultToDisk();
|
||||||
|
//
|
||||||
if (successullySaved) {
|
// if (successullySaved) {
|
||||||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
|
||||||
} else {
|
// } else {
|
||||||
showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
// showAlert(Alert.AlertType.INFORMATION, I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
|
||||||
}
|
// }
|
||||||
} catch (UnsupportedEncodingException e1) {
|
// } catch (UnsupportedEncodingException e1) {
|
||||||
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
|
||||||
logger.error("Error while saving", e1);
|
// logger.error("Error while saving", e1);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
ngramProgressBar.progressProperty().unbind();
|
// ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
// ngramProgressBar.setStyle(Settings.FX_ACCENT_OK);
|
||||||
progressLabel.textProperty().unbind();
|
// progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
// progressLabel.setText("");
|
||||||
});
|
// });
|
||||||
|
//
|
||||||
task.setOnFailed(e -> {
|
// task.setOnFailed(e -> {
|
||||||
showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
// showAlert(Alert.AlertType.ERROR, I18N.get("message.ERROR_WHILE_EXECUTING"));
|
||||||
logger.error("Error while executing", e);
|
// logger.error("Error while executing", e);
|
||||||
ngramProgressBar.progressProperty().unbind();
|
// ngramProgressBar.progressProperty().unbind();
|
||||||
ngramProgressBar.setProgress(0.0);
|
// ngramProgressBar.setProgress(0.0);
|
||||||
ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
// ngramProgressBar.setStyle(Settings.FX_ACCENT_NOK);
|
||||||
progressLabel.textProperty().unbind();
|
// progressLabel.textProperty().unbind();
|
||||||
progressLabel.setText("");
|
// progressLabel.setText("");
|
||||||
});
|
// });
|
||||||
|
//
|
||||||
final Thread thread = new Thread(task, "task");
|
// final Thread thread = new Thread(task, "task");
|
||||||
thread.setDaemon(true);
|
// thread.setDaemon(true);
|
||||||
thread.start();
|
// thread.start();
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
private void logAlert(String alert) {
|
// private void logAlert(String alert) {
|
||||||
logger.info("alert: " + alert);
|
// logger.info("alert: " + alert);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
|
//
|
||||||
public void setCorpus(Corpus corpus) {
|
// public void setCorpus(Corpus corpus) {
|
||||||
this.corpus = corpus;
|
// this.corpus = corpus;
|
||||||
|
//
|
||||||
if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
// if (corpus.getCorpusType() != CorpusType.SOLAR) {
|
||||||
setSelectedFiltersLabel(null);
|
// setSelectedFiltersLabel(null);
|
||||||
} else {
|
// } else {
|
||||||
setSelectedFiltersLabel("/");
|
// setSelectedFiltersLabel("/");
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
public void setSelectedFiltersLabel(String content) {
|
// public void setSelectedFiltersLabel(String content) {
|
||||||
if (content != null) {
|
// if (content != null) {
|
||||||
solarFilters.setVisible(true);
|
// solarFilters.setVisible(true);
|
||||||
selectedFiltersLabel.setVisible(true);
|
// selectedFiltersLabel.setVisible(true);
|
||||||
selectedFiltersLabel.setText(content);
|
// selectedFiltersLabel.setText(content);
|
||||||
} else {
|
// } else {
|
||||||
solarFilters.setVisible(false);
|
// solarFilters.setVisible(false);
|
||||||
selectedFiltersLabel.setVisible(false);
|
// selectedFiltersLabel.setVisible(false);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
// public void setSolarFiltersMap(HashMap<String, HashSet<String>> solarFiltersMap) {
|
||||||
this.solarFiltersMap = solarFiltersMap;
|
// this.solarFiltersMap = solarFiltersMap;
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
public void setHostServices(HostServices hostServices){
|
// public void setHostServices(HostServices hostServices){
|
||||||
this.hostService = hostServices;
|
// this.hostService = hostServices;
|
||||||
}
|
// }
|
||||||
}
|
//}
|
||||||
|
|
|
@ -228,13 +228,13 @@ public class WordLevelTab {
|
||||||
|
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS = FXCollections.observableArrayList("lema", "različnica");
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_LETTERS = FXCollections.observableArrayList("lema", "različnica");
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = FXCollections.observableArrayList("različnica");
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_ORTH = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_ORTH_ARRAY));
|
||||||
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
// private static final ObservableList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = FXCollections.observableArrayList("lema", "različnica", "normalizirana različnica");
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||||
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
// private static final ObservableList<String> alsoVisualizeItemsLemma = FXCollections.observableArrayList("besedna vrsta", "oblikoskladenjska oznaka");
|
||||||
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
private static final String [] ALSO_VISUALIZE_ITEMS_LEMMA_ARRAY = {"calculateFor.WORD_TYPE", "calculateFor.MORPHOSYNTACTIC_SPECS"};
|
||||||
|
@ -320,7 +320,7 @@ public class WordLevelTab {
|
||||||
alsoVisualizeCCB.getItems().removeAll();
|
alsoVisualizeCCB.getItems().removeAll();
|
||||||
if (newValue.equals(CalculateFor.LEMMA.toString())) {
|
if (newValue.equals(CalculateFor.LEMMA.toString())) {
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_LEMMA));
|
||||||
} else if (newValue.equals(CalculateFor.WORD.toString())) {
|
} else if (newValue.equals(CalculateFor.WORD.toString()) || newValue.equals(CalculateFor.LOWERCASE_WORD.toString())) {
|
||||||
if (corpus.getCorpusType() == CorpusType.GOS)
|
if (corpus.getCorpusType() == CorpusType.GOS)
|
||||||
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
|
alsoVisualizeCCB.getItems().setAll(I18N.translatedObservableList(ALSO_VISUALIZE_ITEMS_WORDS_GOS));
|
||||||
else
|
else
|
||||||
|
|
|
@ -23,41 +23,41 @@ import data.Enums.WordLevelType;
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public class Export {
|
public class Export {
|
||||||
public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
|
// public static void SetToJSON(Set<Pair<String, Map<MultipleHMKeys, Long>>> set) {
|
||||||
JSONArray wrapper = new JSONArray();
|
// JSONArray wrapper = new JSONArray();
|
||||||
|
//
|
||||||
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
// for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||||
JSONArray data_wrapper = new JSONArray();
|
// JSONArray data_wrapper = new JSONArray();
|
||||||
JSONObject metric = new JSONObject();
|
// JSONObject metric = new JSONObject();
|
||||||
|
//
|
||||||
String title = p.getLeft();
|
// String title = p.getLeft();
|
||||||
Map<MultipleHMKeys, Long> map = p.getRight();
|
// Map<MultipleHMKeys, Long> map = p.getRight();
|
||||||
|
//
|
||||||
if (map.isEmpty())
|
// if (map.isEmpty())
|
||||||
continue;
|
// continue;
|
||||||
|
//
|
||||||
long total = Util.mapSumFrequencies(map);
|
// long total = Util.mapSumFrequencies(map);
|
||||||
|
//
|
||||||
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
// for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
||||||
JSONObject data_entry = new JSONObject();
|
// JSONObject data_entry = new JSONObject();
|
||||||
data_entry.put("word", e.getKey());
|
// data_entry.put("word", e.getKey());
|
||||||
data_entry.put("frequency", e.getValue());
|
// data_entry.put("frequency", e.getValue());
|
||||||
data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
|
// data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
|
||||||
|
//
|
||||||
data_wrapper.add(data_entry);
|
// data_wrapper.add(data_entry);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
metric.put("Title", title);
|
// metric.put("Title", title);
|
||||||
metric.put("data", data_wrapper);
|
// metric.put("data", data_wrapper);
|
||||||
wrapper.add(metric);
|
// wrapper.add(metric);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
try (FileWriter file = new FileWriter("statistics.json")) {
|
// try (FileWriter file = new FileWriter("statistics.json")) {
|
||||||
file.write(wrapper.toJSONString());
|
// file.write(wrapper.toJSONString());
|
||||||
} catch (IOException e) {
|
// } catch (IOException e) {
|
||||||
e.printStackTrace();
|
// e.printStackTrace();
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
public static String SetToCSV(Set<Pair<String, Map<MultipleHMKeys, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
|
||||||
StatisticsNew statistics, Filter filter) {
|
StatisticsNew statistics, Filter filter) {
|
||||||
|
@ -127,12 +127,6 @@ public class Export {
|
||||||
|
|
||||||
FILE_HEADER_AL.add(I18N.get("exportTable.totalRelativeFrequency"));
|
FILE_HEADER_AL.add(I18N.get("exportTable.totalRelativeFrequency"));
|
||||||
|
|
||||||
if (filter.getCollocability().size() > 0){
|
|
||||||
for (Collocability c : filter.getCollocability()) {
|
|
||||||
FILE_HEADER_AL.add(c.toHeaderString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Taxonomy key : taxonomyResults.keySet()) {
|
for (Taxonomy key : taxonomyResults.keySet()) {
|
||||||
if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
|
if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
|
||||||
FILE_HEADER_AL.add(I18N.get("exportTable.absoluteFrequency") + " [" + key.toString() + "]");
|
FILE_HEADER_AL.add(I18N.get("exportTable.absoluteFrequency") + " [" + key.toString() + "]");
|
||||||
|
@ -141,6 +135,13 @@ public class Export {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (filter.getCollocability().size() > 0){
|
||||||
|
for (Collocability c : filter.getCollocability()) {
|
||||||
|
FILE_HEADER_AL.add(c.toHeaderString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (filter.getWriteMsdAtTheEnd()) {
|
if (filter.getWriteMsdAtTheEnd()) {
|
||||||
String msd = "";
|
String msd = "";
|
||||||
int maxMsdLength = 0;
|
int maxMsdLength = 0;
|
||||||
|
@ -280,14 +281,14 @@ public class Export {
|
||||||
|
|
||||||
|
|
||||||
dataEntry.add(e.getValue().toString());
|
dataEntry.add(e.getValue().toString());
|
||||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal())));
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(statistics.getCorpus().getTotal()), statistics.getCorpus().getPunctuation()));
|
||||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue()));
|
dataEntry.add(formatNumberForExport(((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(statistics.getCorpus().getTotal()).longValue(), statistics.getCorpus().getPunctuation()));
|
||||||
for (Taxonomy key : taxonomyResults.keySet()){
|
for (Taxonomy key : taxonomyResults.keySet()){
|
||||||
if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
|
if(!key.equals(statistics.getCorpus().getTotal()) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
|
||||||
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
||||||
dataEntry.add(frequency.toString());
|
dataEntry.add(frequency.toString());
|
||||||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key)));
|
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key), statistics.getCorpus().getPunctuation()));
|
||||||
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue()));
|
dataEntry.add(formatNumberForExport(((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue(), statistics.getCorpus().getPunctuation()));
|
||||||
// dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
|
// dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
|
||||||
// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
|
// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
|
||||||
}
|
}
|
||||||
|
@ -296,7 +297,7 @@ public class Export {
|
||||||
|
|
||||||
if (filter.getCollocability().size() > 0){
|
if (filter.getCollocability().size() > 0){
|
||||||
for (Collocability c : filter.getCollocability()) {
|
for (Collocability c : filter.getCollocability()) {
|
||||||
dataEntry.add(statistics.getCollocability().get(c).get(e.getKey()));
|
dataEntry.add(formatNumberForLongExport(statistics.getCollocability().get(c).get(e.getKey()), statistics.getCorpus().getPunctuation()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -371,66 +372,66 @@ public class Export {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
// public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||||
//Delimiter used in CSV file
|
// //Delimiter used in CSV file
|
||||||
String NEW_LINE_SEPARATOR = "\n";
|
// String NEW_LINE_SEPARATOR = "\n";
|
||||||
|
//
|
||||||
//CSV file header
|
// //CSV file header
|
||||||
Object[] FILE_HEADER = {"word", "frequency", "percent"};
|
// Object[] FILE_HEADER = {"word", "frequency", "percent"};
|
||||||
|
//
|
||||||
String fileName = "";
|
// String fileName = "";
|
||||||
|
//
|
||||||
fileName = title.replace(": ", "-");
|
// fileName = title.replace(": ", "-");
|
||||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
// fileName = fileName.replace(" ", "_").concat(".csv");
|
||||||
|
//
|
||||||
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
// fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||||
|
//
|
||||||
OutputStreamWriter fileWriter = null;
|
// OutputStreamWriter fileWriter = null;
|
||||||
CSVPrinter csvFilePrinter = null;
|
// CSVPrinter csvFilePrinter = null;
|
||||||
|
//
|
||||||
//Create the CSVFormat object with "\n" as a record delimiter
|
// //Create the CSVFormat object with "\n" as a record delimiter
|
||||||
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
// CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
||||||
|
//
|
||||||
try {
|
// try {
|
||||||
//initialize FileWriter object
|
// //initialize FileWriter object
|
||||||
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
// fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
||||||
|
//
|
||||||
//initialize CSVPrinter object
|
// //initialize CSVPrinter object
|
||||||
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
// csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
||||||
|
//
|
||||||
// write info block
|
// // write info block
|
||||||
printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
// printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
||||||
|
//
|
||||||
//Create CSV file header
|
// //Create CSV file header
|
||||||
csvFilePrinter.printRecord(FILE_HEADER);
|
// csvFilePrinter.printRecord(FILE_HEADER);
|
||||||
|
//
|
||||||
for (Object[] resultEntry : result) {
|
// for (Object[] resultEntry : result) {
|
||||||
List dataEntry = new ArrayList<>();
|
// List dataEntry = new ArrayList<>();
|
||||||
dataEntry.add(resultEntry[0]);
|
// dataEntry.add(resultEntry[0]);
|
||||||
dataEntry.add(resultEntry[1]);
|
// dataEntry.add(resultEntry[1]);
|
||||||
dataEntry.add(formatNumberAsPercent(resultEntry[2]));
|
// dataEntry.add(formatNumberAsPercent(resultEntry[2]), statistics.getCorpus().getPunctuation());
|
||||||
csvFilePrinter.printRecord(dataEntry);
|
// csvFilePrinter.printRecord(dataEntry);
|
||||||
}
|
// }
|
||||||
} catch (Exception e) {
|
// } catch (Exception e) {
|
||||||
System.out.println("Error in CsvFileWriter!");
|
// System.out.println("Error in CsvFileWriter!");
|
||||||
e.printStackTrace();
|
// e.printStackTrace();
|
||||||
} finally {
|
// } finally {
|
||||||
try {
|
// try {
|
||||||
if (fileWriter != null) {
|
// if (fileWriter != null) {
|
||||||
fileWriter.flush();
|
// fileWriter.flush();
|
||||||
fileWriter.close();
|
// fileWriter.close();
|
||||||
}
|
// }
|
||||||
if (csvFilePrinter != null) {
|
// if (csvFilePrinter != null) {
|
||||||
csvFilePrinter.close();
|
// csvFilePrinter.close();
|
||||||
}
|
// }
|
||||||
} catch (IOException e) {
|
// } catch (IOException e) {
|
||||||
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
// System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
||||||
e.printStackTrace();
|
// e.printStackTrace();
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
return fileName;
|
// return fileName;
|
||||||
}
|
// }
|
||||||
|
|
||||||
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||||
//Delimiter used in CSV file
|
//Delimiter used in CSV file
|
||||||
|
|
|
@ -54,10 +54,31 @@ public class Util {
|
||||||
return "- invalid input format -";
|
return "- invalid input format -";
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String formatNumberAsPercent(Object o) {
|
public static String formatNumberAsPercent(Object o, String punctuation) {
|
||||||
return MessageFormat.format("{0,number,#.### %}", o).replace('.', ',');
|
if(punctuation.equals("punctuation.COMMA")) {
|
||||||
|
return MessageFormat.format("{0,number,#.### %}", o).replace('.', ',');
|
||||||
|
} else {
|
||||||
|
return MessageFormat.format("{0,number,#.### %}", o);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String formatNumberForExport(Object o, String punctuation) {
|
||||||
|
if(punctuation.equals("punctuation.COMMA")) {
|
||||||
|
return MessageFormat.format("{0,number,#.##}", o).replace('.', ',');
|
||||||
|
} else {
|
||||||
|
return MessageFormat.format("{0,number,#.##}", o);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String formatNumberForLongExport(Object o, String punctuation) {
|
||||||
|
if(punctuation.equals("punctuation.COMMA")) {
|
||||||
|
return MessageFormat.format("{0,number,#.########}", o).replace('.', ',');
|
||||||
|
} else {
|
||||||
|
return MessageFormat.format("{0,number,#.########}", o);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static boolean isInstanceOfInteger(Object o) {
|
private static boolean isInstanceOfInteger(Object o) {
|
||||||
Set<Class<?>> types = new HashSet<>();
|
Set<Class<?>> types = new HashSet<>();
|
||||||
types.add(Byte.class);
|
types.add(Byte.class);
|
||||||
|
|
|
@ -57,6 +57,13 @@
|
||||||
<Image url="questionmark.png" backgroundLoading="true"/>
|
<Image url="questionmark.png" backgroundLoading="true"/>
|
||||||
</ImageView>
|
</ImageView>
|
||||||
|
|
||||||
|
<Label fx:id="punctuationL" layoutX="10.0" layoutY="340.0" prefHeight="25.0" text="Decimalno znamenje"/>
|
||||||
|
<ComboBox fx:id="punctuationCB" layoutX="225.0" layoutY="340.0" minWidth="140.0" prefWidth="140.0"
|
||||||
|
visibleRowCount="5"/>
|
||||||
|
<ImageView fx:id="punctuationI" layoutX="370.0" layoutY="347.5" pickOnBounds="true" preserveRatio="true">
|
||||||
|
<Image url="questionmark.png" backgroundLoading="true"/>
|
||||||
|
</ImageView>
|
||||||
|
|
||||||
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0"/>
|
<Hyperlink fx:id="helpH" alignment="TOP_LEFT" layoutX="710.0" layoutY="16.0"/>
|
||||||
<Button fx:id="changeLanguageB" layoutX="710.0" layoutY="40.0" mnemonicParsing="false" prefWidth="50.0"/>
|
<Button fx:id="changeLanguageB" layoutX="710.0" layoutY="40.0" mnemonicParsing="false" prefWidth="50.0"/>
|
||||||
</children>
|
</children>
|
||||||
|
|
|
@ -17,18 +17,20 @@ tab.stringLevelTabNew2=Word sets
|
||||||
# corpus tab
|
# corpus tab
|
||||||
label.setCorpusLocation=Set corpus location
|
label.setCorpusLocation=Set corpus location
|
||||||
button.setCorpusLocation=Set location
|
button.setCorpusLocation=Set location
|
||||||
label.readHeaderInfo=Read info from headers
|
label.readHeaderInfo=Read tax. from corpus files
|
||||||
checkBox.readHeaderInfo=
|
checkBox.readHeaderInfo=
|
||||||
label.chooseResultsLocation=Choose result location
|
label.chooseResultsLocation=Choose result location
|
||||||
button.chooseResultsLocation=Set location
|
button.chooseResultsLocation=Set location
|
||||||
label.selectReader=Select reader
|
label.selectReader=Select reader
|
||||||
label.outputName=Output file name
|
label.outputName=Output file name
|
||||||
|
label.punctuation=Decimal separator
|
||||||
|
|
||||||
label.corpusTab.chooseCorpusLocationH=Select the folder which contains the corpus. The folder should only contain one corpus and should not contain files that are not part of the corpus.
|
label.corpusTab.chooseCorpusLocationH=Select the folder which contains the corpus. The folder should only contain one corpus and should not contain files that are not part of the corpus.
|
||||||
label.corpusTab.readHeaderInfoH=If you select this option, the taxonomy will be read separately. This might take a while.
|
label.corpusTab.readHeaderInfoH=The program will read the taxonomy from corpus files. This might take a while.
|
||||||
label.corpusTab.chooseResultsLocationH=Choose result location
|
label.corpusTab.chooseResultsLocationH=Choose result location
|
||||||
label.corpusTab.selectReaderH=Select reader
|
label.corpusTab.selectReaderH=Select reader
|
||||||
label.corpusTab.outputNameH=Output file name
|
label.corpusTab.outputNameH=Output file name
|
||||||
|
label.corpusTab.punctuationH=Select decimal separator used in export files.
|
||||||
|
|
||||||
# character analysis tab
|
# character analysis tab
|
||||||
label.stringLength=Number of characters
|
label.stringLength=Number of characters
|
||||||
|
@ -40,7 +42,7 @@ label.taxonomy=Filter by taxonomy
|
||||||
label.minimalOccurrences=Min. nr. occurrences
|
label.minimalOccurrences=Min. nr. occurrences
|
||||||
label.minimalTaxonomy=Min. nr. tax. branches
|
label.minimalTaxonomy=Min. nr. tax. branches
|
||||||
label.minimalRelFre=Min. rel. frequency
|
label.minimalRelFre=Min. rel. frequency
|
||||||
label.taxonomySetOperation=Filtriraj taksonomijo po
|
label.taxonomySetOperation=Filter taxonomy by
|
||||||
label.solarFilters=Selected filters:
|
label.solarFilters=Selected filters:
|
||||||
string.lemma=lemma
|
string.lemma=lemma
|
||||||
string.word=word
|
string.word=word
|
||||||
|
@ -52,7 +54,7 @@ label.letter.msdH=Character strings will be counted only in words with the provi
|
||||||
label.letter.taxonomyH=Character strings will be counted only in selected text types.
|
label.letter.taxonomyH=Character strings will be counted only in selected text types.
|
||||||
label.letter.minimalOccurrencesH=Character strings with fewer occurrences will not be included in the output.
|
label.letter.minimalOccurrencesH=Character strings with fewer occurrences will not be included in the output.
|
||||||
label.letter.minimalTaxonomyH=Character strings that occur in fewer taxonomy branches will not be included in the output.
|
label.letter.minimalTaxonomyH=Character strings that occur in fewer taxonomy branches will not be included in the output.
|
||||||
label.letter.taxonomySetOperationH=Izpisuj iz besedil, ki ustrezajo vsaj eni od izbranih vej (unija) ali vsem izbranim vejam (presek)
|
label.letter.taxonomySetOperationH=Extract information from texts that fit into at least one (union) or all (intersection) of the selected branches.
|
||||||
|
|
||||||
# word part tab
|
# word part tab
|
||||||
label.alsoVisualize=Also split by
|
label.alsoVisualize=Also split by
|
||||||
|
@ -101,8 +103,13 @@ label.wordSet.taxonomyH=Word sets will only be extracted from the selected taxon
|
||||||
label.wordSet.minimalOccurrencesH=Word sets with fewer occurrences will not be included in the output.
|
label.wordSet.minimalOccurrencesH=Word sets with fewer occurrences will not be included in the output.
|
||||||
label.wordSet.minimalTaxonomyH=Word sets that occur in fewer taxonomy branches will not be included in the output.
|
label.wordSet.minimalTaxonomyH=Word sets that occur in fewer taxonomy branches will not be included in the output.
|
||||||
|
|
||||||
|
# punctuations
|
||||||
|
punctuation.COMMA=comma (,)
|
||||||
|
punctuation.POINT=point (.)
|
||||||
|
|
||||||
# calculate for
|
# calculate for
|
||||||
calculateFor.WORD=word
|
calculateFor.WORD=word
|
||||||
|
calculateFor.LOWERCASE_WORD=lowercase word
|
||||||
calculateFor.NORMALIZED_WORD=normalized word
|
calculateFor.NORMALIZED_WORD=normalized word
|
||||||
calculateFor.LEMMA=lemma
|
calculateFor.LEMMA=lemma
|
||||||
calculateFor.MORPHOSYNTACTIC_SPECS=morphosyntactic tag
|
calculateFor.MORPHOSYNTACTIC_SPECS=morphosyntactic tag
|
||||||
|
@ -187,7 +194,7 @@ windowTitles.warning=Warning
|
||||||
windowTitles.confirmation=Confirmation
|
windowTitles.confirmation=Confirmation
|
||||||
|
|
||||||
# export header translations
|
# export header translations
|
||||||
exportHeader.corpus=Corpus:
|
exportHeader.corpus=Reader:
|
||||||
exportHeader.date=Date:
|
exportHeader.date=Date:
|
||||||
exportHeader.executionTime=Execution time:
|
exportHeader.executionTime=Execution time:
|
||||||
exportHeader.analysis=Analysis:
|
exportHeader.analysis=Analysis:
|
||||||
|
@ -212,6 +219,7 @@ exportHeader.msd=Morphosyntactic tag:
|
||||||
exportHeader.taxonomy=Filter by taxonomy:
|
exportHeader.taxonomy=Filter by taxonomy:
|
||||||
exportHeader.minOccurrences=Min. nr. occurrences:
|
exportHeader.minOccurrences=Min. nr. occurrences:
|
||||||
exportHeader.minTaxonomies=Min. nr. taxonomy branches:
|
exportHeader.minTaxonomies=Min. nr. taxonomy branches:
|
||||||
|
exportHeader.minRelFre=Min. rel. frequency nr.:
|
||||||
exportHeader.additionalFilters=Additional filters:
|
exportHeader.additionalFilters=Additional filters:
|
||||||
exportHeader.yes=yes
|
exportHeader.yes=yes
|
||||||
exportHeader.no=no
|
exportHeader.no=no
|
||||||
|
@ -231,6 +239,7 @@ exportTable.relativeFrequency=Relative frequency
|
||||||
exportTable.msd=msd
|
exportTable.msd=msd
|
||||||
# parts
|
# parts
|
||||||
exportTable.part.word=words:
|
exportTable.part.word=words:
|
||||||
|
exportTable.part.lowercaseWord=lowercase words:
|
||||||
exportTable.part.normalizedWord=normalized words:
|
exportTable.part.normalizedWord=normalized words:
|
||||||
exportTable.part.lemma=lemmas:
|
exportTable.part.lemma=lemmas:
|
||||||
exportTable.part.msd=msd:
|
exportTable.part.msd=msd:
|
||||||
|
@ -239,6 +248,7 @@ exportTable.part.wordType=word type:
|
||||||
|
|
||||||
exportTable.part.letterSet=character set
|
exportTable.part.letterSet=character set
|
||||||
exportTable.part.word2=word
|
exportTable.part.word2=word
|
||||||
|
exportTable.part.lowercaseWord2=lowercase word
|
||||||
exportTable.part.normalizedWord2=normalized word
|
exportTable.part.normalizedWord2=normalized word
|
||||||
exportTable.part.lemma2=lemma
|
exportTable.part.lemma2=lemma
|
||||||
exportTable.part.msd2=msd
|
exportTable.part.msd2=msd
|
||||||
|
@ -248,6 +258,7 @@ exportTable.part.wordType2=word type
|
||||||
exportTable.part.letterSet2=Share of total sum of all letter sets
|
exportTable.part.letterSet2=Share of total sum of all letter sets
|
||||||
exportTable.part.letterSet3=Letter set
|
exportTable.part.letterSet3=Letter set
|
||||||
exportTable.part.word3=Word
|
exportTable.part.word3=Word
|
||||||
|
exportTable.part.lowercaseWord3=Lowercase word
|
||||||
exportTable.part.normalizedWord3=Normalized word
|
exportTable.part.normalizedWord3=Normalized word
|
||||||
exportTable.part.lemma3=Lemma
|
exportTable.part.lemma3=Lemma
|
||||||
exportTable.part.msd3=Msd
|
exportTable.part.msd3=Msd
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# general
|
# general
|
||||||
window.title=Luščilnik
|
window.title=Korpusni luščilnik
|
||||||
|
|
||||||
hyperlink.help=Pomoč
|
hyperlink.help=Pomoč
|
||||||
button.language=EN
|
button.language=EN
|
||||||
|
@ -17,18 +17,20 @@ tab.stringLevelTabNew2=Besedni nizi
|
||||||
# corpus tab
|
# corpus tab
|
||||||
label.setCorpusLocation=Nastavi lokacijo korpusa
|
label.setCorpusLocation=Nastavi lokacijo korpusa
|
||||||
button.setCorpusLocation=Ponastavi
|
button.setCorpusLocation=Ponastavi
|
||||||
label.readHeaderInfo=Preberi info iz headerjev
|
label.readHeaderInfo=Preberi taks. iz korp. datotek
|
||||||
checkBox.readHeaderInfo=
|
checkBox.readHeaderInfo=
|
||||||
label.chooseResultsLocation=Nastavi lokacijo rezultatov
|
label.chooseResultsLocation=Nastavi lokacijo izpisov
|
||||||
button.chooseResultsLocation=Ponastavi
|
button.chooseResultsLocation=Ponastavi
|
||||||
label.selectReader=Izberi bralnik
|
label.selectReader=Izberi bralnik
|
||||||
label.outputName=Ime izhodne datoteke
|
label.outputName=Ime izhodne datoteke
|
||||||
|
label.punctuation=Decimalno znamenje
|
||||||
|
|
||||||
label.corpusTab.chooseCorpusLocationH=Izberite mapo, v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.
|
label.corpusTab.chooseCorpusLocationH=Izberite mapo, v kateri se nahaja korpus. Program izbrano mapo preišče rekurzivno, zato bodite pozorni, da ne izberete mape z več korpusi ali z mnogo datotekami, ki niso del korpusa.
|
||||||
label.corpusTab.readHeaderInfoH=Če izberete to opcijo, se bo iz korpusnih datotek prebrala razpoložljiva taksonomija oz. filtri. Ta operacija lahko traja dlje časa, sploh če je korpus združen v eni sami datoteki.
|
label.corpusTab.readHeaderInfoH=Program bo iz korpusnih datotek prebral taksonomijo. Ta operacija lahko traja dlje časa.
|
||||||
label.corpusTab.chooseResultsLocationH=Nastavi lokacijo rezultatov
|
label.corpusTab.chooseResultsLocationH=Nastavi lokacijo rezultatov
|
||||||
label.corpusTab.selectReaderH=Izberi bralnik
|
label.corpusTab.selectReaderH=Izberi bralnik
|
||||||
label.corpusTab.outputNameH=Ime izhodne datoteke
|
label.corpusTab.outputNameH=Ime izhodne datoteke
|
||||||
|
label.corpusTab.punctuationH=Izberite željeno decimalno znamenje v izpisu.
|
||||||
|
|
||||||
# character analysis tab
|
# character analysis tab
|
||||||
label.stringLength=Dolžina znakovnih nizov
|
label.stringLength=Dolžina znakovnih nizov
|
||||||
|
@ -101,8 +103,13 @@ label.wordSet.taxonomyH=Besedni nizi bodo izpisani samo iz izbranih taksonomskih
|
||||||
label.wordSet.minimalOccurrencesH=Besedni nizi, ki se pojavijo redkeje, ne bodo vključeni v izpis.
|
label.wordSet.minimalOccurrencesH=Besedni nizi, ki se pojavijo redkeje, ne bodo vključeni v izpis.
|
||||||
label.wordSet.minimalTaxonomyH=Besedni nizi, ki so prisotni v manj vejah, ne bodo vključeni v izpis.
|
label.wordSet.minimalTaxonomyH=Besedni nizi, ki so prisotni v manj vejah, ne bodo vključeni v izpis.
|
||||||
|
|
||||||
|
# punctuations
|
||||||
|
punctuation.COMMA=vejica (,)
|
||||||
|
punctuation.POINT=pika (.)
|
||||||
|
|
||||||
# calculate for
|
# calculate for
|
||||||
calculateFor.WORD=oblike
|
calculateFor.WORD=oblike
|
||||||
|
calculateFor.LOWERCASE_WORD=oblike z malimi črkami
|
||||||
calculateFor.NORMALIZED_WORD=normalizirane oblike
|
calculateFor.NORMALIZED_WORD=normalizirane oblike
|
||||||
calculateFor.LEMMA=leme
|
calculateFor.LEMMA=leme
|
||||||
calculateFor.MORPHOSYNTACTIC_SPECS=oblikoskladenjske oznake
|
calculateFor.MORPHOSYNTACTIC_SPECS=oblikoskladenjske oznake
|
||||||
|
@ -187,7 +194,7 @@ windowTitles.warning=Opozorilo
|
||||||
windowTitles.confirmation=Potrdilo
|
windowTitles.confirmation=Potrdilo
|
||||||
|
|
||||||
# export header translations
|
# export header translations
|
||||||
exportHeader.corpus=Korpus:
|
exportHeader.corpus=Bralnik:
|
||||||
exportHeader.date=Datum:
|
exportHeader.date=Datum:
|
||||||
exportHeader.executionTime=Čas izvajanja:
|
exportHeader.executionTime=Čas izvajanja:
|
||||||
exportHeader.analysis=Analiza:
|
exportHeader.analysis=Analiza:
|
||||||
|
@ -212,6 +219,7 @@ exportHeader.msd=Oblikoskladenjska oznaka:
|
||||||
exportHeader.taxonomy=Filtriranje po taksonomiji:
|
exportHeader.taxonomy=Filtriranje po taksonomiji:
|
||||||
exportHeader.minOccurrences=Min. št. pojavitev:
|
exportHeader.minOccurrences=Min. št. pojavitev:
|
||||||
exportHeader.minTaxonomies=Min. št. taksonomskih vej:
|
exportHeader.minTaxonomies=Min. št. taksonomskih vej:
|
||||||
|
exportHeader.minRelFre=Min. rel. pogostost:
|
||||||
exportHeader.additionalFilters=Dodatni filtri:
|
exportHeader.additionalFilters=Dodatni filtri:
|
||||||
exportHeader.yes=da
|
exportHeader.yes=da
|
||||||
exportHeader.no=ne
|
exportHeader.no=ne
|
||||||
|
@ -231,6 +239,7 @@ exportTable.relativeFrequency=Relativna pogostost
|
||||||
exportTable.msd=msd
|
exportTable.msd=msd
|
||||||
# parts
|
# parts
|
||||||
exportTable.part.word=oblik:
|
exportTable.part.word=oblik:
|
||||||
|
exportTable.part.lowercaseWord=oblik z malimi črkami:
|
||||||
exportTable.part.normalizedWord=normaliziranih oblik:
|
exportTable.part.normalizedWord=normaliziranih oblik:
|
||||||
exportTable.part.lemma=lem:
|
exportTable.part.lemma=lem:
|
||||||
exportTable.part.msd=oblikoskladenjskih oznak:
|
exportTable.part.msd=oblikoskladenjskih oznak:
|
||||||
|
@ -239,6 +248,7 @@ exportTable.part.wordType=besednih vrst:
|
||||||
|
|
||||||
exportTable.part.letterSet=znakovnega niza
|
exportTable.part.letterSet=znakovnega niza
|
||||||
exportTable.part.word2=oblike
|
exportTable.part.word2=oblike
|
||||||
|
exportTable.part.lowercaseWord2=oblike z malimi črkami
|
||||||
exportTable.part.normalizedWord2=normalizirane oblike
|
exportTable.part.normalizedWord2=normalizirane oblike
|
||||||
exportTable.part.lemma2=leme
|
exportTable.part.lemma2=leme
|
||||||
exportTable.part.msd2=oblikoskladenjske oznake
|
exportTable.part.msd2=oblikoskladenjske oznake
|
||||||
|
@ -248,6 +258,7 @@ exportTable.part.wordType2=besedne vrste
|
||||||
exportTable.part.letterSet2=Delež glede na skupno vsoto vseh najdenih znakovnih nizov
|
exportTable.part.letterSet2=Delež glede na skupno vsoto vseh najdenih znakovnih nizov
|
||||||
exportTable.part.letterSet3=Znakovni niz
|
exportTable.part.letterSet3=Znakovni niz
|
||||||
exportTable.part.word3=Oblika
|
exportTable.part.word3=Oblika
|
||||||
|
exportTable.part.lowercaseWord3=Oblika z malimi črkami
|
||||||
exportTable.part.normalizedWord3=Normalizirana oblika
|
exportTable.part.normalizedWord3=Normalizirana oblika
|
||||||
exportTable.part.lemma3=Lema
|
exportTable.part.lemma3=Lema
|
||||||
exportTable.part.msd3=Oblikoskladenjska oznaka
|
exportTable.part.msd3=Oblikoskladenjska oznaka
|
||||||
|
|
Loading…
Reference in New Issue
Block a user