Reimplementation of other signs (,/*() etc.) in ngrams.

This commit is contained in:
2018-08-28 11:41:19 +02:00
parent a8d147de52
commit 1c00f1a283
9 changed files with 203 additions and 91 deletions

View File

@@ -90,6 +90,7 @@ public class Export {
FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString());
if (filter.getCalculateFor().equals(CalculateFor.LEMMA))
FILE_HEADER_AL.add("Lema male črke");
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
// if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi"))) {
// if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
@@ -161,7 +162,7 @@ public class Export {
// } else {
// FILE_HEADER_AL.add("Delež glede na vse leme");
// }
FILE_HEADER_AL.add("Skupna relativna pogostost");
FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
for (String key : taxonomyResults.keySet()) {
if(!key.equals("Total")) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
@@ -213,8 +214,7 @@ public class Export {
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
List dataEntry = new ArrayList<>();
dataEntry.add(e.getKey().getK1());
if (headerInfoBlock.containsKey("Analiza") && (headerInfoBlock.get("Analiza").equals("Besede") || headerInfoBlock.get("Analiza").equals("Besedni nizi")) &&
headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")){
if (filter.getCalculateFor().equals(CalculateFor.LEMMA)){
dataEntry.add(e.getKey().getK1().toLowerCase());
}
@@ -255,16 +255,55 @@ public class Export {
// }
dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
for (String key : taxonomyResults.keySet()){
if(!key.equals("Total")) {
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
}
}
// Write msd separated per letters at the end of each line in csv
if (filter.getWriteMsdAtTheEnd()) {
String msd = "";
if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK1();
} else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
i = 0;
for (CalculateFor otherKey : filter.getMultipleKeys()){
switch(i){
case 0:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK2();
}
break;
case 1:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK3();
}
break;
case 2:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK4();
}
break;
case 3:
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
msd = e.getKey().getK5();
}
break;
}
i++;
}
}
String [] charArray = msd.split("(?!^)");
dataEntry.addAll(Arrays.asList(charArray));
}
csvFilePrinter.printRecord(dataEntry);
}
} catch (Exception e) {