Added fixes on ssj500k functionality, fixed prefix/suffix bug and some other bugs.
This commit is contained in:
@@ -64,27 +64,27 @@ public class Export {
|
||||
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
|
||||
List<Object> FILE_HEADER_AL = new ArrayList<>();
|
||||
Object[] FILE_HEADER;
|
||||
|
||||
//Count frequencies
|
||||
long num_frequencies = 0;
|
||||
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||
Map<MultipleHMKeys, Long> map = p.getRight();
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
num_frequencies = Util.mapSumFrequencies(map);
|
||||
}
|
||||
|
||||
// Map<String, Long> num_taxonomy_frequencies = new ConcurrentHashMap<>();
|
||||
// for (String taxonomyKey : taxonomyResults.keySet()) {
|
||||
// num_taxonomy_frequencies.put(taxonomyKey, (long) 0);
|
||||
// for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
|
||||
// long val = num_taxonomy_frequencies.get(taxonomyKey);
|
||||
// val += value.get();
|
||||
// num_taxonomy_frequencies.put(taxonomyKey, val);
|
||||
// }
|
||||
// long num_frequencies = 0;
|
||||
// for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
||||
// Map<MultipleHMKeys, Long> map = p.getRight();
|
||||
// if (map.isEmpty())
|
||||
// continue;
|
||||
// num_frequencies = Util.mapSumFrequencies(map);
|
||||
// }
|
||||
|
||||
Map<Taxonomy, Long> num_selected_taxonomy_frequencies = new ConcurrentHashMap<>();
|
||||
for (Taxonomy taxonomyKey : taxonomyResults.keySet()) {
|
||||
num_selected_taxonomy_frequencies.put(taxonomyKey, (long) 0);
|
||||
for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
|
||||
long val = num_selected_taxonomy_frequencies.get(taxonomyKey);
|
||||
val += value.get();
|
||||
num_selected_taxonomy_frequencies.put(taxonomyKey, val);
|
||||
}
|
||||
}
|
||||
Map<Taxonomy, AtomicLong> num_taxonomy_frequencies = statistics.getUniGramOccurrences();
|
||||
|
||||
|
||||
@@ -92,32 +92,37 @@ public class Export {
|
||||
if (!ValidationUtil.isEmpty(filter.getSkipValue()) && filter.getSkipValue() > 0) {
|
||||
FILE_HEADER_AL.add("Izpuščene besede");
|
||||
}
|
||||
FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString());
|
||||
if (filter.getCalculateFor().equals(CalculateFor.LEMMA))
|
||||
FILE_HEADER_AL.add("Lema male črke");
|
||||
|
||||
FILE_HEADER_AL.add(filter.getCalculateFor().toHeaderString(filter.getNgramValue()));
|
||||
if (filter.getCalculateFor().equals(CalculateFor.LEMMA)) {
|
||||
if(filter.getNgramValue() == 0) {
|
||||
FILE_HEADER_AL.add("Črkovni niz (male črke)");
|
||||
} else if(filter.getNgramValue() >= 1) {
|
||||
FILE_HEADER_AL.add("Lema (male črke)");
|
||||
}
|
||||
}
|
||||
if (filter.getSuffixLength() != null && filter.getSuffixList() != null && filter.getPrefixLength() != null && filter.getPrefixList() != null) {
|
||||
if (filter.getPrefixLength() > 0 || filter.getPrefixList().size() > 0) {
|
||||
FILE_HEADER_AL.add("Predpona");
|
||||
FILE_HEADER_AL.add("Začetni del besede");
|
||||
}
|
||||
FILE_HEADER_AL.add("Preostali del besede");
|
||||
if (filter.getSuffixLength() > 0 || filter.getSuffixList().size() > 0) {
|
||||
FILE_HEADER_AL.add("Pripona");
|
||||
FILE_HEADER_AL.add("Končni del besede");
|
||||
}
|
||||
}
|
||||
|
||||
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences().get(Taxonomy.TOTAL).longValue()));
|
||||
headerInfoBlock.put(filter.getCalculateFor().totalSumString(filter.getNgramValue()), String.valueOf(num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
|
||||
headerInfoBlock.put(filter.getCalculateFor().foundSumString(filter.getNgramValue()), String.valueOf(num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
|
||||
// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
|
||||
|
||||
for (CalculateFor otherKey : filter.getMultipleKeys()) {
|
||||
FILE_HEADER_AL.add(otherKey.toHeaderString());
|
||||
FILE_HEADER_AL.add(otherKey.toHeaderString(filter.getNgramValue()));
|
||||
if (otherKey.equals(CalculateFor.LEMMA))
|
||||
FILE_HEADER_AL.add("Lema male črke");
|
||||
FILE_HEADER_AL.add("Lema (male črke)");
|
||||
}
|
||||
|
||||
|
||||
FILE_HEADER_AL.add("Skupna absolutna pogostost");
|
||||
FILE_HEADER_AL.add(filter.getCalculateFor().toPercentString());
|
||||
FILE_HEADER_AL.add(filter.getCalculateFor().totalAbsoluteFrequencyString(filter.getNgramValue()));
|
||||
FILE_HEADER_AL.add(filter.getCalculateFor().shareOfTotalString(filter.getNgramValue()));
|
||||
|
||||
FILE_HEADER_AL.add("Skupna relativna pogostost (na milijon pojavitev)");
|
||||
|
||||
@@ -216,6 +221,9 @@ public class Export {
|
||||
// real prefix
|
||||
String rpf = "";
|
||||
for(String pf : filter.getPrefixList()){
|
||||
if (key.length() < pf.length()) {
|
||||
continue;
|
||||
}
|
||||
if (pf.equals(key.substring(0, pf.length()))){
|
||||
rpf = pf;
|
||||
break;
|
||||
@@ -225,6 +233,9 @@ public class Export {
|
||||
// real suffix
|
||||
String rsf = "";
|
||||
for(String sf : filter.getSuffixList()){
|
||||
if (key.length() < sf.length()) {
|
||||
continue;
|
||||
}
|
||||
if (sf.equals(key.substring(key.length() - sf.length()))){
|
||||
rsf = sf;
|
||||
break;
|
||||
@@ -268,13 +279,13 @@ public class Export {
|
||||
|
||||
|
||||
dataEntry.add(e.getValue().toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_selected_taxonomy_frequencies.get(Taxonomy.TOTAL)));
|
||||
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_taxonomy_frequencies.get(Taxonomy.TOTAL).longValue()));
|
||||
for (Taxonomy key : taxonomyResults.keySet()){
|
||||
if(!key.equals(Taxonomy.TOTAL) && num_taxonomy_frequencies.containsKey(key) && num_taxonomy_frequencies.get(key).longValue() > 0) {
|
||||
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
||||
dataEntry.add(frequency.toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key).longValue()));
|
||||
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_selected_taxonomy_frequencies.get(key)));
|
||||
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key).longValue()));
|
||||
// dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
|
||||
// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
|
||||
|
||||
Reference in New Issue
Block a user