Reimplementation of other signs (,/*() etc.) in ngrams.
This commit is contained in:
		
							parent
							
								
									a8d147de52
								
							
						
					
					
						commit
						1c00f1a283
					
				@ -260,6 +260,12 @@ public class XML_processing {
 | 
				
			|||||||
						} else if (qName.equals("c3")) {
 | 
											} else if (qName.equals("c3")) {
 | 
				
			||||||
							String c3Content = eventReader.nextEvent().asCharacters().getData();
 | 
												String c3Content = eventReader.nextEvent().asCharacters().getData();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                            if(stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() &&
 | 
				
			||||||
 | 
					                                    stavek.size() > 0){
 | 
				
			||||||
 | 
					                                stavek.add(new Word(c3Content, c3Content, "/"));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							if (c3Content.equals(".") && includeThisBlock) {
 | 
												if (c3Content.equals(".") && includeThisBlock) {
 | 
				
			||||||
								// add sentence to corpus
 | 
													// add sentence to corpus
 | 
				
			||||||
								corpus.add(new Sentence(stavek, null));
 | 
													corpus.add(new Sentence(stavek, null));
 | 
				
			||||||
@ -276,9 +282,6 @@ public class XML_processing {
 | 
				
			|||||||
									// the data anymore
 | 
														// the data anymore
 | 
				
			||||||
									corpus.clear();
 | 
														corpus.clear();
 | 
				
			||||||
								}
 | 
													}
 | 
				
			||||||
							}
 | 
					 | 
				
			||||||
							else if(includeThisBlock){
 | 
					 | 
				
			||||||
                                inPunctuation = true;
 | 
					 | 
				
			||||||
							}
 | 
												}
 | 
				
			||||||
						} else if (headTags.contains(qName)) {
 | 
											} else if (headTags.contains(qName)) {
 | 
				
			||||||
							String tagContent = eventReader.nextEvent().asCharacters().getData();
 | 
												String tagContent = eventReader.nextEvent().asCharacters().getData();
 | 
				
			||||||
@ -296,16 +299,6 @@ public class XML_processing {
 | 
				
			|||||||
						if (in_word) {
 | 
											if (in_word) {
 | 
				
			||||||
							stavek.add(new Word(characters.getData(), lemma, msd));
 | 
												stavek.add(new Word(characters.getData(), lemma, msd));
 | 
				
			||||||
							in_word = false;
 | 
												in_word = false;
 | 
				
			||||||
						} else if(inPunctuation){
 | 
					 | 
				
			||||||
                            String punctuation = ",";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                            if (stavek.size() > 0){
 | 
					 | 
				
			||||||
								stavek.get(stavek.size()-1).setWord(stavek.get(stavek.size()-1).getWord() + punctuation);
 | 
					 | 
				
			||||||
								stavek.get(stavek.size()-1).setLemma(stavek.get(stavek.size()-1).getLemma() + punctuation);
 | 
					 | 
				
			||||||
								stavek.get(stavek.size()-1).setMsd(stavek.get(stavek.size()-1).getMsd() + punctuation);
 | 
					 | 
				
			||||||
							}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
						    inPunctuation = false;
 | 
					 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
						break;
 | 
											break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -548,13 +541,16 @@ public class XML_processing {
 | 
				
			|||||||
							inWord = false;
 | 
												inWord = false;
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
						if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
 | 
											if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
 | 
				
			||||||
//						    String punctuation = characters.getData();
 | 
											    String punctuation = characters.getData();
 | 
				
			||||||
						    String punctuation = ",";
 | 
												sentence.add(new Word(punctuation, punctuation, "/"));
 | 
				
			||||||
 | 
					 | 
				
			||||||
                            sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
 | 
					 | 
				
			||||||
                            sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
 | 
					 | 
				
			||||||
                            sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
 | 
					 | 
				
			||||||
							inPunctuation = false;
 | 
												inPunctuation = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//						    String punctuation = ",";
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//                            sentence.get(sentence.size() - 1).setWord(sentence.get(sentence.size() - 1).getWord() + punctuation);
 | 
				
			||||||
 | 
					//                            sentence.get(sentence.size() - 1).setLemma(sentence.get(sentence.size() - 1).getLemma() + punctuation);
 | 
				
			||||||
 | 
					//                            sentence.get(sentence.size() - 1).setMsd(sentence.get(sentence.size() - 1).getMsd() + punctuation);
 | 
				
			||||||
 | 
					//                            inPunctuation = false;
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
						break;
 | 
											break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -56,8 +56,8 @@ public class Ngrams {
 | 
				
			|||||||
//					String test = key;
 | 
					//					String test = key;
 | 
				
			||||||
//				}
 | 
					//				}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				if (stats.getFilter().getNotePunctuations())
 | 
					//				if (stats.getFilter().getNotePunctuations())
 | 
				
			||||||
					key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
 | 
					//					key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				MultipleHMKeys multipleKeys;
 | 
									MultipleHMKeys multipleKeys;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -68,28 +68,28 @@ public class Ngrams {
 | 
				
			|||||||
						break;
 | 
											break;
 | 
				
			||||||
					case 1:
 | 
										case 1:
 | 
				
			||||||
						String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
 | 
											String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
 | 
				
			||||||
						if (stats.getFilter().getNotePunctuations())
 | 
					//						if (stats.getFilter().getNotePunctuations())
 | 
				
			||||||
							k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
 | 
					//							k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
 | 
				
			||||||
						multipleKeys = new MultipleHMKeys2(key, k1_2);
 | 
											multipleKeys = new MultipleHMKeys2(key, k1_2);
 | 
				
			||||||
						break;
 | 
											break;
 | 
				
			||||||
					case 2:
 | 
										case 2:
 | 
				
			||||||
						String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
 | 
											String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
 | 
				
			||||||
						String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
 | 
											String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
 | 
				
			||||||
						if (stats.getFilter().getNotePunctuations()) {
 | 
					//						if (stats.getFilter().getNotePunctuations()) {
 | 
				
			||||||
							k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
 | 
					//							k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
 | 
				
			||||||
							k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
 | 
					//							k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
 | 
				
			||||||
						}
 | 
					//						}
 | 
				
			||||||
						multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
 | 
											multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
 | 
				
			||||||
						break;
 | 
											break;
 | 
				
			||||||
					case 3:
 | 
										case 3:
 | 
				
			||||||
						String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
 | 
											String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
 | 
				
			||||||
						String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
 | 
											String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
 | 
				
			||||||
						String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
 | 
											String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
 | 
				
			||||||
						if (stats.getFilter().getNotePunctuations()) {
 | 
					//						if (stats.getFilter().getNotePunctuations()) {
 | 
				
			||||||
							k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
 | 
					//							k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
 | 
				
			||||||
							k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
 | 
					//							k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
 | 
				
			||||||
							k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
 | 
					//							k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
 | 
				
			||||||
						}
 | 
					//						}
 | 
				
			||||||
						multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
 | 
											multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
 | 
				
			||||||
						break;
 | 
											break;
 | 
				
			||||||
					case 4:
 | 
										case 4:
 | 
				
			||||||
@ -97,12 +97,12 @@ public class Ngrams {
 | 
				
			|||||||
						String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
 | 
											String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
 | 
				
			||||||
						String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
 | 
											String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
 | 
				
			||||||
						String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
 | 
											String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
 | 
				
			||||||
						if (stats.getFilter().getNotePunctuations()) {
 | 
					//						if (stats.getFilter().getNotePunctuations()) {
 | 
				
			||||||
							k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
 | 
					//							k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
 | 
				
			||||||
							k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
 | 
					//							k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
 | 
				
			||||||
							k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
 | 
					//							k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
 | 
				
			||||||
							k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
 | 
					//							k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
 | 
				
			||||||
						}
 | 
					//						}
 | 
				
			||||||
						multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
 | 
											multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
 | 
				
			||||||
						break;
 | 
											break;
 | 
				
			||||||
					default:
 | 
										default:
 | 
				
			||||||
@ -241,22 +241,22 @@ public class Ngrams {
 | 
				
			|||||||
	 */
 | 
						 */
 | 
				
			||||||
	private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){
 | 
						private static Word checkAndModifySkipgramPunctuation(List<Word> sentence, int i, int j, StatisticsNew stats){
 | 
				
			||||||
		// if punctuation checkbox selected and there words at indexes i and j are not next to each other
 | 
							// if punctuation checkbox selected and there words at indexes i and j are not next to each other
 | 
				
			||||||
		if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
 | 
					//		if(stats.getFilter().getNotePunctuations() && j - i > 1 && sentence.get(i).getWord().charAt(sentence.get(i).getWord().length() - 1) != ','){
 | 
				
			||||||
			boolean middleWordsHavePunctuation = false;
 | 
					//			boolean middleWordsHavePunctuation = false;
 | 
				
			||||||
			for (int n = i + 1; n < j; n++){
 | 
					//			for (int n = i + 1; n < j; n++){
 | 
				
			||||||
				if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
 | 
					//				if (sentence.get(n).getWord().charAt(sentence.get(n).getWord().length() - 1) == ','){
 | 
				
			||||||
					middleWordsHavePunctuation = true;
 | 
					//					middleWordsHavePunctuation = true;
 | 
				
			||||||
					break;
 | 
					//					break;
 | 
				
			||||||
				}
 | 
					//				}
 | 
				
			||||||
			}
 | 
					//			}
 | 
				
			||||||
			if (middleWordsHavePunctuation){
 | 
					//			if (middleWordsHavePunctuation){
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
				String punctuation = ",";
 | 
					//				String punctuation = ",";
 | 
				
			||||||
				return new Word(sentence.get(i).getWord() + punctuation,
 | 
					//				return new Word(sentence.get(i).getWord() + punctuation,
 | 
				
			||||||
						sentence.get(i).getLemma() + punctuation,
 | 
					//						sentence.get(i).getLemma() + punctuation,
 | 
				
			||||||
						sentence.get(i).getMsd() + punctuation);
 | 
					//						sentence.get(i).getMsd() + punctuation);
 | 
				
			||||||
			}
 | 
					//			}
 | 
				
			||||||
		}
 | 
					//		}
 | 
				
			||||||
		return sentence.get(i);
 | 
							return sentence.get(i);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@ -348,8 +348,8 @@ public class Ngrams {
 | 
				
			|||||||
//					String test = key;
 | 
					//					String test = key;
 | 
				
			||||||
//				}
 | 
					//				}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (stats.getFilter().getNotePunctuations())
 | 
					//			if (stats.getFilter().getNotePunctuations())
 | 
				
			||||||
				key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
 | 
					//				key = (!key.equals("") && key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			MultipleHMKeys multipleKeys;
 | 
								MultipleHMKeys multipleKeys;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -360,28 +360,28 @@ public class Ngrams {
 | 
				
			|||||||
					break;
 | 
										break;
 | 
				
			||||||
				case 1:
 | 
									case 1:
 | 
				
			||||||
					String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
 | 
										String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
 | 
				
			||||||
					if (stats.getFilter().getNotePunctuations())
 | 
					//					if (stats.getFilter().getNotePunctuations())
 | 
				
			||||||
						k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
 | 
					//						k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
 | 
				
			||||||
					multipleKeys = new MultipleHMKeys2(key, k1_2);
 | 
										multipleKeys = new MultipleHMKeys2(key, k1_2);
 | 
				
			||||||
					break;
 | 
										break;
 | 
				
			||||||
				case 2:
 | 
									case 2:
 | 
				
			||||||
					String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
 | 
										String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
 | 
				
			||||||
					String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
 | 
										String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
 | 
				
			||||||
					if (stats.getFilter().getNotePunctuations()) {
 | 
					//					if (stats.getFilter().getNotePunctuations()) {
 | 
				
			||||||
						k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
 | 
					//						k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
 | 
				
			||||||
						k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
 | 
					//						k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
 | 
				
			||||||
					}
 | 
					//					}
 | 
				
			||||||
					multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
 | 
										multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
 | 
				
			||||||
					break;
 | 
										break;
 | 
				
			||||||
				case 3:
 | 
									case 3:
 | 
				
			||||||
					String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
 | 
										String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
 | 
				
			||||||
					String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
 | 
										String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
 | 
				
			||||||
					String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
 | 
										String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
 | 
				
			||||||
					if (stats.getFilter().getNotePunctuations()) {
 | 
					//					if (stats.getFilter().getNotePunctuations()) {
 | 
				
			||||||
						k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
 | 
					//						k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
 | 
				
			||||||
						k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
 | 
					//						k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
 | 
				
			||||||
						k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
 | 
					//						k3_4 = (!k3_4.equals("") && k3_4.charAt(k3_4.length() - 1) == ',') ? k3_4.substring(0, k3_4.length() - 1) : k3_4;
 | 
				
			||||||
					}
 | 
					//					}
 | 
				
			||||||
					multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
 | 
										multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
 | 
				
			||||||
					break;
 | 
										break;
 | 
				
			||||||
				case 4:
 | 
									case 4:
 | 
				
			||||||
@ -389,12 +389,12 @@ public class Ngrams {
 | 
				
			|||||||
					String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
 | 
										String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
 | 
				
			||||||
					String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
 | 
										String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
 | 
				
			||||||
					String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
 | 
										String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
 | 
				
			||||||
					if (stats.getFilter().getNotePunctuations()) {
 | 
					//					if (stats.getFilter().getNotePunctuations()) {
 | 
				
			||||||
						k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
 | 
					//						k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
 | 
				
			||||||
						k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
 | 
					//						k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
 | 
				
			||||||
						k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
 | 
					//						k4_4 = (!k4_4.equals("") && k4_4.charAt(k4_4.length() - 1) == ',') ? k4_4.substring(0, k4_4.length() - 1) : k4_4;
 | 
				
			||||||
						k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
 | 
					//						k4_5 = (!k4_5.equals("") && k4_5.charAt(k4_5.length() - 1) == ',') ? k4_5.substring(0, k4_5.length() - 1) : k4_5;
 | 
				
			||||||
					}
 | 
					//					}
 | 
				
			||||||
					multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
 | 
										multipleKeys = new MultipleHMKeys5(key, k4_2, k4_3, k4_4, k4_5);
 | 
				
			||||||
					break;
 | 
										break;
 | 
				
			||||||
				default:
 | 
									default:
 | 
				
			||||||
 | 
				
			|||||||
@ -45,6 +45,29 @@ public enum CalculateFor {
 | 
				
			|||||||
		return null;
 | 
							return null;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						public String toMetadataString() {
 | 
				
			||||||
 | 
							switch(this){
 | 
				
			||||||
 | 
								case WORD:
 | 
				
			||||||
 | 
									return "Skupna vsota vseh različnic:";
 | 
				
			||||||
 | 
								case NORMALIZED_WORD:
 | 
				
			||||||
 | 
									return "Skupna vsota vseh normaliziranih različnic:";
 | 
				
			||||||
 | 
								case LEMMA:
 | 
				
			||||||
 | 
									return "Skupna vsota vseh lem:";
 | 
				
			||||||
 | 
								case MORPHOSYNTACTIC_SPECS:
 | 
				
			||||||
 | 
									return "Skupna vsota vseh oblikoskladenjskih oznak:";
 | 
				
			||||||
 | 
								case MORPHOSYNTACTIC_PROPERTY:
 | 
				
			||||||
 | 
									return "Skupna vsota vseh oblikoskladenjskih lastnosti:";
 | 
				
			||||||
 | 
								case WORD_TYPE:
 | 
				
			||||||
 | 
									return "Skupna vsota vseh besednih vrst:";
 | 
				
			||||||
 | 
								case DIST_WORDS:
 | 
				
			||||||
 | 
									return "Skupna vsota vseh različnic:";
 | 
				
			||||||
 | 
								case DIST_LEMMAS:
 | 
				
			||||||
 | 
									return "Skupna vsota vseh lem:";
 | 
				
			||||||
 | 
								default:
 | 
				
			||||||
 | 
									return null;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	public String toHeaderString() {
 | 
						public String toHeaderString() {
 | 
				
			||||||
		switch(this){
 | 
							switch(this){
 | 
				
			||||||
			case WORD:
 | 
								case WORD:
 | 
				
			||||||
 | 
				
			|||||||
@ -25,6 +25,7 @@ public class Filter {
 | 
				
			|||||||
		DISPLAY_TAXONOMY,
 | 
							DISPLAY_TAXONOMY,
 | 
				
			||||||
		MSD,
 | 
							MSD,
 | 
				
			||||||
		HAS_MSD,
 | 
							HAS_MSD,
 | 
				
			||||||
 | 
							WRITE_MSD_AT_THE_END,
 | 
				
			||||||
		SOLAR_FILTERS,
 | 
							SOLAR_FILTERS,
 | 
				
			||||||
		MULTIPLE_KEYS,
 | 
							MULTIPLE_KEYS,
 | 
				
			||||||
		NOTE_PUNCTUATIONS,
 | 
							NOTE_PUNCTUATIONS,
 | 
				
			||||||
@ -34,6 +35,7 @@ public class Filter {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	public Filter() {
 | 
						public Filter() {
 | 
				
			||||||
		filter = new HashMap<>();
 | 
							filter = new HashMap<>();
 | 
				
			||||||
 | 
							filter.put(WRITE_MSD_AT_THE_END, false);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	public Filter(AnalysisLevel al, CalculateFor cf) {
 | 
						public Filter(AnalysisLevel al, CalculateFor cf) {
 | 
				
			||||||
@ -41,6 +43,7 @@ public class Filter {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
		filter.put(ANALYSIS_LEVEL, al);
 | 
							filter.put(ANALYSIS_LEVEL, al);
 | 
				
			||||||
		filter.put(CALCULATE_FOR, cf);
 | 
							filter.put(CALCULATE_FOR, cf);
 | 
				
			||||||
 | 
							filter.put(WRITE_MSD_AT_THE_END, false);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	public void setAl(AnalysisLevel al) {
 | 
						public void setAl(AnalysisLevel al) {
 | 
				
			||||||
@ -124,6 +127,14 @@ public class Filter {
 | 
				
			|||||||
		return (ArrayList<Pattern>) filter.get(MSD);
 | 
							return (ArrayList<Pattern>) filter.get(MSD);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						public void setWriteMsdAtTheEnd(boolean writeMsdAtTheEnd) {
 | 
				
			||||||
 | 
							filter.put(WRITE_MSD_AT_THE_END, writeMsdAtTheEnd);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						public boolean getWriteMsdAtTheEnd() {
 | 
				
			||||||
 | 
							return (boolean) filter.get(WRITE_MSD_AT_THE_END);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	public void setHasMsd(boolean hasMsd) {
 | 
						public void setHasMsd(boolean hasMsd) {
 | 
				
			||||||
		filter.put(HAS_MSD, hasMsd);
 | 
							filter.put(HAS_MSD, hasMsd);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
				
			|||||||
@ -8,7 +8,6 @@ import javafx.collections.ObservableList;
 | 
				
			|||||||
import javafx.concurrent.Task;
 | 
					import javafx.concurrent.Task;
 | 
				
			||||||
import javafx.fxml.FXML;
 | 
					import javafx.fxml.FXML;
 | 
				
			||||||
import javafx.scene.control.*;
 | 
					import javafx.scene.control.*;
 | 
				
			||||||
import javafx.scene.layout.Pane;
 | 
					 | 
				
			||||||
import org.apache.commons.lang3.StringUtils;
 | 
					import org.apache.commons.lang3.StringUtils;
 | 
				
			||||||
import org.apache.logging.log4j.LogManager;
 | 
					import org.apache.logging.log4j.LogManager;
 | 
				
			||||||
import org.apache.logging.log4j.Logger;
 | 
					import org.apache.logging.log4j.Logger;
 | 
				
			||||||
@ -17,6 +16,7 @@ import org.controlsfx.control.CheckComboBox;
 | 
				
			|||||||
import java.io.File;
 | 
					import java.io.File;
 | 
				
			||||||
import java.io.UnsupportedEncodingException;
 | 
					import java.io.UnsupportedEncodingException;
 | 
				
			||||||
import java.util.*;
 | 
					import java.util.*;
 | 
				
			||||||
 | 
					import java.util.concurrent.atomic.AtomicBoolean;
 | 
				
			||||||
import java.util.regex.Pattern;
 | 
					import java.util.regex.Pattern;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import static alg.XML_processing.readXML;
 | 
					import static alg.XML_processing.readXML;
 | 
				
			||||||
@ -49,6 +49,10 @@ public class OneWordAnalysisTab {
 | 
				
			|||||||
    private CheckBox displayTaxonomyChB;
 | 
					    private CheckBox displayTaxonomyChB;
 | 
				
			||||||
    private boolean displayTaxonomy;
 | 
					    private boolean displayTaxonomy;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @FXML
 | 
				
			||||||
 | 
					    private CheckBox writeMsdAtTheEndChB;
 | 
				
			||||||
 | 
					    private boolean writeMsdAtTheEnd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @FXML
 | 
					    @FXML
 | 
				
			||||||
    private ComboBox<String> calculateForCB;
 | 
					    private ComboBox<String> calculateForCB;
 | 
				
			||||||
    private CalculateFor calculateFor;
 | 
					    private CalculateFor calculateFor;
 | 
				
			||||||
@ -96,6 +100,7 @@ public class OneWordAnalysisTab {
 | 
				
			|||||||
    private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
 | 
					    private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
 | 
				
			||||||
    private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
 | 
					    private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
 | 
				
			||||||
    private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
 | 
					    private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
 | 
				
			||||||
 | 
					    private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
 | 
				
			||||||
    private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
 | 
					    private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // TODO: pass observables for taxonomy based on header scan
 | 
					    // TODO: pass observables for taxonomy based on header scan
 | 
				
			||||||
@ -107,6 +112,8 @@ public class OneWordAnalysisTab {
 | 
				
			|||||||
        currentMode = MODE.WORD;
 | 
					        currentMode = MODE.WORD;
 | 
				
			||||||
        toggleMode(currentMode);
 | 
					        toggleMode(currentMode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        AtomicBoolean writeMsdAtTheEndEnableCalculateFor = new AtomicBoolean(false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // calculateForCB
 | 
					        // calculateForCB
 | 
				
			||||||
        calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
 | 
					        calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
 | 
				
			||||||
            calculateFor = CalculateFor.factory(newValue);
 | 
					            calculateFor = CalculateFor.factory(newValue);
 | 
				
			||||||
@ -121,9 +128,22 @@ public class OneWordAnalysisTab {
 | 
				
			|||||||
                    alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
 | 
					                    alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
 | 
				
			||||||
            } else if(newValue.equals("normalizirana različnica")) {
 | 
					            } else if(newValue.equals("normalizirana različnica")) {
 | 
				
			||||||
                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
 | 
					                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
 | 
				
			||||||
 | 
					            } else if(newValue.equals("oblikoskladenjska oznaka")) {
 | 
				
			||||||
 | 
					                writeMsdAtTheEndEnableCalculateFor.set(true);
 | 
				
			||||||
 | 
					                writeMsdAtTheEndChB.setDisable(false);
 | 
				
			||||||
 | 
					                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
 | 
				
			||||||
            }else {
 | 
					            }else {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
 | 
					                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if (!newValue.equals("oblikoskladenjska oznaka")){
 | 
				
			||||||
 | 
					                writeMsdAtTheEnd = false;
 | 
				
			||||||
 | 
					                writeMsdAtTheEndChB.setSelected(false);
 | 
				
			||||||
 | 
					                writeMsdAtTheEndChB.setDisable(true);
 | 
				
			||||||
 | 
					                writeMsdAtTheEndEnableCalculateFor.set(false);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
 | 
					            alsoVisualizeCCB.getCheckModel().getCheckedItems().addListener((ListChangeListener<String>) c -> {
 | 
				
			||||||
                alsoVisualize = new ArrayList<>();
 | 
					                alsoVisualize = new ArrayList<>();
 | 
				
			||||||
                ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
 | 
					                ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
 | 
				
			||||||
@ -177,6 +197,13 @@ public class OneWordAnalysisTab {
 | 
				
			|||||||
            alsoVisualize = new ArrayList<>();
 | 
					            alsoVisualize = new ArrayList<>();
 | 
				
			||||||
            ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
 | 
					            ObservableList<String> checkedItems = alsoVisualizeCCB.getCheckModel().getCheckedItems();
 | 
				
			||||||
            alsoVisualize.addAll(checkedItems);
 | 
					            alsoVisualize.addAll(checkedItems);
 | 
				
			||||||
 | 
					            if (checkedItems.contains("oblikoskladenjska oznaka") || writeMsdAtTheEndEnableCalculateFor.get()){
 | 
				
			||||||
 | 
					                writeMsdAtTheEndChB.setDisable(false);
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                writeMsdAtTheEnd = false;
 | 
				
			||||||
 | 
					                writeMsdAtTheEndChB.setSelected(false);
 | 
				
			||||||
 | 
					                writeMsdAtTheEndChB.setDisable(true);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
            logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
 | 
					            logger.info(String.format("Selected also visualize items: %s", StringUtils.join(checkedItems, ",")));
 | 
				
			||||||
        });
 | 
					        });
 | 
				
			||||||
        alsoVisualizeCCB.getCheckModel().clearChecks();
 | 
					        alsoVisualizeCCB.getCheckModel().clearChecks();
 | 
				
			||||||
@ -204,6 +231,15 @@ public class OneWordAnalysisTab {
 | 
				
			|||||||
        });
 | 
					        });
 | 
				
			||||||
        displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
 | 
					        displayTaxonomyChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        writeMsdAtTheEnd = false;
 | 
				
			||||||
 | 
					        writeMsdAtTheEndChB.setDisable(true);
 | 
				
			||||||
 | 
					        // set
 | 
				
			||||||
 | 
					        writeMsdAtTheEndChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
 | 
				
			||||||
 | 
					            writeMsdAtTheEnd = newValue;
 | 
				
			||||||
 | 
					            logger.info("write msd at the end: ", writeMsdAtTheEnd);
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					//        writeMsdAtTheEndChB.setTooltip(new Tooltip(TOOLTIP_readDisplayTaxonomyChB));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // set default values
 | 
					        // set default values
 | 
				
			||||||
        minimalOccurrencesTF.setText("1");
 | 
					        minimalOccurrencesTF.setText("1");
 | 
				
			||||||
        minimalOccurrences = 1;
 | 
					        minimalOccurrences = 1;
 | 
				
			||||||
@ -390,6 +426,7 @@ public class OneWordAnalysisTab {
 | 
				
			|||||||
        filter.setMultipleKeys(alsoVisualize);
 | 
					        filter.setMultipleKeys(alsoVisualize);
 | 
				
			||||||
        filter.setMinimalOccurrences(minimalOccurrences);
 | 
					        filter.setMinimalOccurrences(minimalOccurrences);
 | 
				
			||||||
        filter.setMinimalTaxonomy(minimalTaxonomy);
 | 
					        filter.setMinimalTaxonomy(minimalTaxonomy);
 | 
				
			||||||
 | 
					        filter.setWriteMsdAtTheEnd(writeMsdAtTheEnd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        String message = Validation.validateForStringLevel(filter);
 | 
					        String message = Validation.validateForStringLevel(filter);
 | 
				
			||||||
        if (message == null) {
 | 
					        if (message == null) {
 | 
				
			||||||
 | 
				
			|||||||
@ -124,6 +124,7 @@ public class StringAnalysisTabNew2 {
 | 
				
			|||||||
    private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
 | 
					    private static final ObservableList<String> alsoVisualizeItemsWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
 | 
				
			||||||
    private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
 | 
					    private static final ObservableList<String> alsoVisualizeItemsWordGos = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka", "normalizirana različnica");
 | 
				
			||||||
    private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
 | 
					    private static final ObservableList<String> alsoVisualizeItemsNormalizedWord = FXCollections.observableArrayList("lema", "besedna vrsta", "oblikoskladenjska oznaka");
 | 
				
			||||||
 | 
					    private static final ObservableList<String> alsoVisualizeItemsMsd = FXCollections.observableArrayList("besedna vrsta");
 | 
				
			||||||
    private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
 | 
					    private static final ObservableList<String> alsoVisualizeItemsEmpty = FXCollections.observableArrayList();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -169,7 +170,7 @@ public class StringAnalysisTabNew2 {
 | 
				
			|||||||
        minimalTaxonomyTF.setText("1");
 | 
					        minimalTaxonomyTF.setText("1");
 | 
				
			||||||
        minimalTaxonomy = 1;
 | 
					        minimalTaxonomy = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        notePunctuations = true;
 | 
					        notePunctuations = false;
 | 
				
			||||||
        // set
 | 
					        // set
 | 
				
			||||||
        notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
 | 
					        notePunctuationsChB.selectedProperty().addListener((observable, oldValue, newValue) -> {
 | 
				
			||||||
            notePunctuations = newValue;
 | 
					            notePunctuations = newValue;
 | 
				
			||||||
@ -199,6 +200,8 @@ public class StringAnalysisTabNew2 {
 | 
				
			|||||||
                    alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
 | 
					                    alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsWord);
 | 
				
			||||||
            } else if(newValue.equals("normalizirana različnica")) {
 | 
					            } else if(newValue.equals("normalizirana različnica")) {
 | 
				
			||||||
                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
 | 
					                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsNormalizedWord);
 | 
				
			||||||
 | 
					            }else if(newValue.equals("oblikoskladenjska oznaka")) {
 | 
				
			||||||
 | 
					                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsMsd);
 | 
				
			||||||
            }else {
 | 
					            }else {
 | 
				
			||||||
                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
 | 
					                alsoVisualizeCCB.getItems().setAll(alsoVisualizeItemsEmpty);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
				
			|||||||
@ -90,6 +90,7 @@ public class Export {
 | 
				
			|||||||
		FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString());
 | 
							FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString());
 | 
				
			||||||
		if (filter.getCalculateFor().equals(CalculateFor.LEMMA))
 | 
							if (filter.getCalculateFor().equals(CalculateFor.LEMMA))
 | 
				
			||||||
			FILE_HEADER_AL.add("Lema male črke");
 | 
								FILE_HEADER_AL.add("Lema male črke");
 | 
				
			||||||
 | 
							headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//		if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
 | 
					//		if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
 | 
				
			||||||
//			if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
 | 
					//			if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
 | 
				
			||||||
@ -161,7 +162,7 @@ public class Export {
 | 
				
			|||||||
//		} else {
 | 
					//		} else {
 | 
				
			||||||
//			FILE_HEADER_AL.add("Delež glede na vse leme");
 | 
					//			FILE_HEADER_AL.add("Delež glede na vse leme");
 | 
				
			||||||
//		}
 | 
					//		}
 | 
				
			||||||
		FILE_HEADER_AL.add("Skupna relativna pogostost");
 | 
							FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
 | 
				
			||||||
		for (String key : taxonomyResults.keySet()) {
 | 
							for (String key : taxonomyResults.keySet()) {
 | 
				
			||||||
			if(!key.equals("Total")) {
 | 
								if(!key.equals("Total")) {
 | 
				
			||||||
				FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
 | 
									FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
 | 
				
			||||||
@ -213,8 +214,7 @@ public class Export {
 | 
				
			|||||||
				for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
 | 
									for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
 | 
				
			||||||
					List dataEntry = new ArrayList<>();
 | 
										List dataEntry = new ArrayList<>();
 | 
				
			||||||
					dataEntry.add(e.getKey().getK1());
 | 
										dataEntry.add(e.getKey().getK1());
 | 
				
			||||||
                    if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi")) &&
 | 
					                    if (filter.getCalculateFor().equals(CalculateFor.LEMMA)){
 | 
				
			||||||
                            headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")){
 | 
					 | 
				
			||||||
                        dataEntry.add(e.getKey().getK1().toLowerCase());
 | 
					                        dataEntry.add(e.getKey().getK1().toLowerCase());
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -255,16 +255,55 @@ public class Export {
 | 
				
			|||||||
//					}
 | 
					//					}
 | 
				
			||||||
					dataEntry.add(e.getValue().toString());
 | 
										dataEntry.add(e.getValue().toString());
 | 
				
			||||||
					dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
 | 
										dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
 | 
				
			||||||
					dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
 | 
										dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
 | 
				
			||||||
					for (String key : taxonomyResults.keySet()){
 | 
										for (String key : taxonomyResults.keySet()){
 | 
				
			||||||
						if(!key.equals("Total")) {
 | 
											if(!key.equals("Total")) {
 | 
				
			||||||
							AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
 | 
												AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
 | 
				
			||||||
							dataEntry.add(frequency.toString());
 | 
												dataEntry.add(frequency.toString());
 | 
				
			||||||
							dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
 | 
												dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
 | 
				
			||||||
							dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
 | 
												dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										// Write msd separated per letters at the end of each line in csv
 | 
				
			||||||
 | 
										if (filter.getWriteMsdAtTheEnd()) {
 | 
				
			||||||
 | 
											String msd = "";
 | 
				
			||||||
 | 
											if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
 | 
				
			||||||
 | 
												msd = e.getKey().getK1();
 | 
				
			||||||
 | 
											} else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
 | 
				
			||||||
 | 
												i = 0;
 | 
				
			||||||
 | 
												for (CalculateFor otherKey : filter.getMultipleKeys()){
 | 
				
			||||||
 | 
													switch(i){
 | 
				
			||||||
 | 
														case 0:
 | 
				
			||||||
 | 
															if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
 | 
				
			||||||
 | 
																msd = e.getKey().getK2();
 | 
				
			||||||
 | 
															}
 | 
				
			||||||
 | 
															break;
 | 
				
			||||||
 | 
														case 1:
 | 
				
			||||||
 | 
															if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
 | 
				
			||||||
 | 
																msd = e.getKey().getK3();
 | 
				
			||||||
 | 
															}
 | 
				
			||||||
 | 
															break;
 | 
				
			||||||
 | 
														case 2:
 | 
				
			||||||
 | 
															if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
 | 
				
			||||||
 | 
																msd = e.getKey().getK4();
 | 
				
			||||||
 | 
															}
 | 
				
			||||||
 | 
															break;
 | 
				
			||||||
 | 
														case 3:
 | 
				
			||||||
 | 
															if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
 | 
				
			||||||
 | 
																msd = e.getKey().getK5();
 | 
				
			||||||
 | 
															}
 | 
				
			||||||
 | 
															break;
 | 
				
			||||||
 | 
													}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
													i++;
 | 
				
			||||||
 | 
												}
 | 
				
			||||||
 | 
											}
 | 
				
			||||||
 | 
											String [] charArray = msd.split("(?!^)");
 | 
				
			||||||
 | 
											dataEntry.addAll(Arrays.asList(charArray));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					csvFilePrinter.printRecord(dataEntry);
 | 
										csvFilePrinter.printRecord(dataEntry);
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			} catch (Exception e) {
 | 
								} catch (Exception e) {
 | 
				
			||||||
 | 
				
			|||||||
@ -37,19 +37,22 @@
 | 
				
			|||||||
        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
 | 
					        <Label layoutX="10.0" layoutY="100.0" prefHeight="25.0" text="Izpiši taksonomije" />
 | 
				
			||||||
        <CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
 | 
					        <CheckBox fx:id="displayTaxonomyChB" layoutX="263.0" layoutY="105.0" selected="false" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        <Label layoutX="10.0" layoutY="140.0" prefHeight="25.0" text="Izpiši razbit MSD" />
 | 
				
			||||||
 | 
					        <CheckBox fx:id="writeMsdAtTheEndChB" layoutX="263.0" layoutY="145.0" selected="false" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <!-- MSD and Taxonomy separated -->
 | 
					        <!-- MSD and Taxonomy separated -->
 | 
				
			||||||
        <Label layoutX="10.0" layoutY="160.0" prefHeight="25.0" text="Omejitev podatkov" />
 | 
					        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Omejitev podatkov" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <Label layoutX="10.0" layoutY="200.0" prefHeight="25.0" text="Oznaka MSD"/>
 | 
					        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Oznaka MSD"/>
 | 
				
			||||||
        <TextField fx:id="msdTF" layoutX="185.0" layoutY="200.0" prefWidth="180.0"/>
 | 
					        <TextField fx:id="msdTF" layoutX="185.0" layoutY="240.0" prefWidth="180.0"/>
 | 
				
			||||||
        <Label layoutX="10.0" layoutY="240.0" prefHeight="25.0" text="Taksonomija"/>
 | 
					        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Taksonomija"/>
 | 
				
			||||||
        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="240.0" prefHeight="25.0" prefWidth="180.0"/>
 | 
					        <CheckComboBox fx:id="taxonomyCCB" layoutX="185.0" layoutY="280.0" prefHeight="25.0" prefWidth="180.0"/>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <Label layoutX="10.0" layoutY="280.0" prefHeight="25.0" text="Min. št. pojavitev" />
 | 
					        <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. pojavitev" />
 | 
				
			||||||
        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="280.0" prefWidth="180.0" />
 | 
					        <TextField fx:id="minimalOccurrencesTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <Label layoutX="10.0" layoutY="320.0" prefHeight="25.0" text="Min. št. taksonomij" />
 | 
					        <Label layoutX="10.0" layoutY="360.0" prefHeight="25.0" text="Min. št. taksonomij" />
 | 
				
			||||||
        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="320.0" prefWidth="180.0" />
 | 
					        <TextField fx:id="minimalTaxonomyTF" layoutX="185.0" layoutY="360.0" prefWidth="180.0" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
 | 
					        <Button fx:id="computeNgramsB" layoutX="10.0" layoutY="440.0" mnemonicParsing="false"
 | 
				
			||||||
                prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
 | 
					                prefHeight="25.0" prefWidth="250.0" text="Izračunaj"/>
 | 
				
			||||||
 | 
				
			|||||||
@ -70,7 +70,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Upoštevaj ločila" />
 | 
					        <Label layoutX="10.0" layoutY="220.0" prefHeight="25.0" text="Upoštevaj ločila" />
 | 
				
			||||||
        <CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="true" />
 | 
					        <CheckBox fx:id="notePunctuationsChB" layoutX="263.0" layoutY="225.0" selected="false" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <!-- MSD and Taxonomy separated -->
 | 
					        <!-- MSD and Taxonomy separated -->
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user