Added filter parameters to CSV + created names of columns for MSDs + [partly] fixed number of words parameter

2018-11-13 13:57:49 +01:00
parent a4df732678
commit cbfe3e6025
9 changed files with 502 additions and 219 deletions
--- a/src/main/java/data/MultipleHMKeys.java
+++ b/src/main/java/data/MultipleHMKeys.java
@@ -15,6 +15,42 @@ public interface MultipleHMKeys {

    default ArrayList<MultipleHMKeys> getSplittedMultipleHMKeys(){ return null; }

+    default String getMsd(Filter filter) {
+        String msd = "";
+        if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
+            msd = getK1();
+        } else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
+            int i = 0;
+            for (CalculateFor otherKey : filter.getMultipleKeys()) {
+                switch (i) {
+                    case 0:
+                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
+                            msd = getK2();
+                        }
+                        break;
+                    case 1:
+                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
+                            msd = getK3();
+                        }
+                        break;
+                    case 2:
+                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
+                            msd = getK4();
+                        }
+                        break;
+                    case 3:
+                        if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
+                            msd = getK5();
+                        }
+                        break;
+                }
+
+                i++;
+            }
+        }
+        return msd;
+    }
+
    @Override
    int hashCode();

--- a/src/main/java/data/StatisticsNew.java
+++ b/src/main/java/data/StatisticsNew.java
@@ -5,6 +5,7 @@ import static gui.ValidationUtil.*;
 import java.io.UnsupportedEncodingException;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
 import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
@@ -39,8 +40,10 @@ public class StatisticsNew {
 	private boolean useDB;
 	private RDB db;
 	private boolean analysisProducedResults;
-	private LocalDateTime time;
+	private LocalDateTime timeBeginning;
+	private LocalDateTime timeEnding;
 	private Map<Collocability, Map<MultipleHMKeys, Double>> collocability;
+	private AtomicLong uniGramOccurrences;

 	public StatisticsNew(Corpus corpus, Filter filter, boolean useDB) {
 		this.corpus = corpus;
@@ -48,8 +51,10 @@ public class StatisticsNew {
 		this.taxonomyResult = new ConcurrentHashMap<>();
 		this.taxonomyResult.put("Total", new ConcurrentHashMap<>());
 		this.collocability = new ConcurrentHashMap<>();
+		this.uniGramOccurrences = new AtomicLong(0L);

-		// create table for counting word occurrences per taxonomies
+
+        // create table for counting word occurrences per taxonomies
 		if (this.corpus.getTaxonomy() != null && filter.getDisplayTaxonomy()) {
 			if (this.filter.getTaxonomy().isEmpty()) {
 				for (int i = 0; i < this.corpus.getTaxonomy().size(); i++) {
@@ -75,7 +80,9 @@ public class StatisticsNew {
 			result = new ConcurrentHashMap<>();
 		}

-		resultTitle = generateResultTitle();
+        this.timeBeginning = LocalDateTime.now();
+
+//		resultTitle = generateResultTitle();

 		logger.debug(toString());
 	}
@@ -94,7 +101,7 @@ public class StatisticsNew {
 	 *
 	 * @return
 	 */
-	private String generateResultTitle() {
+	public String generateResultTitle() {
 		String separator = "_";
 		StringBuilder sb = new StringBuilder();

@@ -108,12 +115,21 @@ public class StatisticsNew {
 						.append(filter.getCalculateFor())
 						.append(separator);
 			} else if(ngramLevel == 1) {
-				sb.append(corpus.getCorpusType().toString())
-						.append(separator)
-						.append("besede")
-						.append(separator)
-						.append(filter.getCalculateFor())
-						.append(separator);
+				if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) {
+					sb.append(corpus.getCorpusType().toString())
+							.append(separator)
+							.append("besedni-deli")
+							.append(separator)
+							.append(filter.getCalculateFor())
+							.append(separator);
+				} else {
+					sb.append(corpus.getCorpusType().toString())
+							.append(separator)
+							.append("besede")
+							.append(separator)
+							.append(filter.getCalculateFor())
+							.append(separator);
+				}
 			}
 			else {
 				sb.append(filter.getAl().toString())
@@ -141,13 +157,20 @@ public class StatisticsNew {
 		// if taxonomy -> taxonomy
 		// if cvv -> cvv + dolžina

-		this.time = this.time != null ? this.time : LocalDateTime.now();

-		sb.append(time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss")));
+		sb.append(getTimeEnding());
 		return sb.toString();

 	}

+	public void setTimeEnding(){
+        this.timeEnding = LocalDateTime.now();
+    }
+
+    public String getTimeEnding(){
+        return timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy_hh.mm.ss"));
+    }
+
 	public boolean isAnalysisProducedResults() {
 		return analysisProducedResults;
 	}
@@ -319,6 +342,14 @@ public class StatisticsNew {
 		return Util.sortByValue(Util.atomicInt2StringAndInt(map), limit);
 	}

+	public void updateUniGramOccurrences(int amount){
+        uniGramOccurrences.set(uniGramOccurrences.get() + amount);
+    }
+
+    public long getUniGramOccurrences(){
+	    return uniGramOccurrences.longValue();
+    }
+
 	public void updateTaxonomyResults(MultipleHMKeys o, List<String> taxonomy) {
 		for (String key : taxonomyResult.keySet()) {
 			// first word should have the same taxonomy as others
@@ -423,22 +454,23 @@ public class StatisticsNew {
 		LinkedHashMap<String, String> info = new LinkedHashMap<>();

 		info.put("Korpus:", corpus.getCorpusType().toString());
-		info.put("Datum:", time.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
+		setTimeEnding();
+		info.put("Datum:", timeEnding.format(DateTimeFormatter.ofPattern("dd.MM.yyyy hh:mm")));
 		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
 			Integer ngramLevel = filter.getNgramValue();
 			if (ngramLevel == 0)
-				info.put("Analiza", "Črke");
+				info.put("Analiza:", "Črke");
 			else if (ngramLevel == 1) {
 				// if suffixes or prefixes are not null print word parts
 				if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
-					info.put("Analiza", "Besedni deli");
+					info.put("Analiza:", "Besedni deli");
 				} else {
-					info.put("Analiza", "Besede");
+					info.put("Analiza:", "Besede");
 				}
 			} else
-				info.put("Analiza", filter.getAl().toString());
+				info.put("Analiza:", filter.getAl().toString());
 		} else {
-			info.put("Analiza", filter.getAl().toString());
+			info.put("Analiza:", filter.getAl().toString());
 		}

 		if (filter.getAl() == AnalysisLevel.STRING_LEVEL) {
@@ -453,9 +485,68 @@ public class StatisticsNew {
 			if (ngramLevel > 1)
 				info.put("Skip:", isNotEmpty(filter.getSkipValue()) ? filter.getSkipValue().toString() : "0");

-			// izračunaj za
+			// calculate for
 			info.put("Izračunaj za:", filter.getCalculateFor().toString());

+			// also write
+            if (filter.getMultipleKeys().size() > 0){
+
+                StringBuilder mk = new StringBuilder();
+                for (CalculateFor s : filter.getMultipleKeys()) {
+                    mk.append(s.toString()).append("; ");
+                }
+                info.put("Izpiši tudi: ", String.join("; ", mk.substring(0, mk.length() - 2)));
+            }
+
+			// time elapsed
+//            setTimeEnding();
+            long seconds = ChronoUnit.MILLIS.between(timeBeginning, timeEnding) / 1000;
+			info.put("Čas izvajanja:", String.valueOf(seconds) + " s");
+
+            // data limitations
+            if (filter.getDisplayTaxonomy()){
+                info.put("Izpiši taksonomije: ", "Da");
+            } else {
+                info.put("Izpiši taksonomije: ", "Ne");
+            }
+
+            // note punctuations - ngram > 1
+            if(ngramLevel > 1) {
+                if (filter.getNotePunctuations()) {
+                    info.put("Upoštevaj ločila: ", "Da");
+                } else {
+                    info.put("Upoštevaj ločila: ", "Ne");
+                }
+            }
+
+            // also write - n - gram > 1
+            if (ngramLevel > 1 && filter.getCollocability().size() > 0){
+                StringBuilder mk = new StringBuilder();
+                for (Collocability s : filter.getCollocability()) {
+                    mk.append(s.toString()).append("; ");
+                }
+                info.put("Kolokabilnost: ", String.join("; ", mk.substring(0, mk.length() - 2)));
+            }
+
+            // fragmented MSD - n-gram = 1
+            if (info.get("Analiza:").equals("Besede")){
+                if (filter.getWriteMsdAtTheEnd()){
+                    info.put("Izpiši razbit MSD: ", "Da");
+                } else {
+                    info.put("Izpiši razbit MSD: ", "Ne");
+                }
+            }
+
+            if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
+                if (filter.getPrefixLength() > 0 || filter.getSuffixLength() > 0) {
+                    info.put("Dolžina predpone: ", String.valueOf(filter.getPrefixLength()));
+                    info.put("Dolžina pripone: ", String.valueOf(filter.getSuffixLength()));
+                } else {
+                    info.put("Seznam predpon: ", String.join("; ", filter.getPrefixList()));
+                    info.put("Seznam pripon: ", String.join("; ", filter.getSuffixList()));
+                }
+            }
+
 			// msd
 			if (!isEmpty(filter.getMsd())) {
 				StringBuilder msdPattern = new StringBuilder();
@@ -479,6 +570,9 @@ public class StatisticsNew {
 			}
 		}

+		info.put("Min. št. pojavitev: ", String.valueOf(filter.getMinimalOccurrences()));
+		info.put("Min. št. taksonomij: ", String.valueOf(filter.getMinimalTaxonomy()));
+
 		if (corpus.getCorpusType() == CorpusType.SOLAR) {
 			HashMap<String, ObservableList<String>> filters = corpus.getSolarFilters();