You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

336 lines
11 KiB

package util;
import static util.Util.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong;
import data.Filter;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.lang3.tuple.Pair;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import data.Enums.WordLevelType;
@SuppressWarnings("unchecked")
public class Export {
public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
JSONArray wrapper = new JSONArray();
for (Pair<String, Map<String, Long>> p : set) {
JSONArray data_wrapper = new JSONArray();
JSONObject metric = new JSONObject();
String title = p.getLeft();
Map<String, Long> map = p.getRight();
if (map.isEmpty())
continue;
long total = Util.mapSumFrequencies(map);
for (Map.Entry<String, Long> e : map.entrySet()) {
JSONObject data_entry = new JSONObject();
data_entry.put("word", e.getKey());
data_entry.put("frequency", e.getValue());
data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
data_wrapper.add(data_entry);
}
metric.put("Title", title);
metric.put("data", data_wrapper);
wrapper.add(metric);
}
try (FileWriter file = new FileWriter("statistics.json")) {
file.write(wrapper.toJSONString());
} catch (IOException e) {
e.printStackTrace();
}
}
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock,
Map<String, Map<String, AtomicLong>> taxonomyResults) {
//Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n";
List<Object> FILE_HEADER_AL = new ArrayList<Object>();
Object[] FILE_HEADER;
//Count frequencies
long num_frequencies = 0;
for (Pair<String, Map<String, Long>> p : set) {
Map<String, Long> map = p.getRight();
if (map.isEmpty())
continue;
num_frequencies = Util.mapSumFrequencies(map);
}
Map<String, Long> num_taxonomy_frequencies = new ConcurrentHashMap<>();
for (String taxonomyKey : taxonomyResults.keySet()) {
num_taxonomy_frequencies.put(taxonomyKey, (long) 0);
for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){
long val = num_taxonomy_frequencies.get(taxonomyKey);
val += value.get();
num_taxonomy_frequencies.put(taxonomyKey, val);
}
}
//CSV file header
if (headerInfoBlock.containsKey("Analiza") && headerInfoBlock.get("Analiza").equals("Besede")) {
if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) {
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Različnica");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse različnice");
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) {
headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Lema");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse leme");
} else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) {
headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Oblikoskladenjska oznaka");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake");
} else {
headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies));
FILE_HEADER_AL.add("Lema");
FILE_HEADER_AL.add("Skupna absolutna pogostost");
FILE_HEADER_AL.add("Delež glede na vse leme");
}
FILE_HEADER_AL.add("Skupna relativna pogostost");
for (String key : taxonomyResults.keySet()) {
FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]");
FILE_HEADER_AL.add("Delež [" + key + "]");
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
}
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
FILE_HEADER_AL.toArray(FILE_HEADER);
} else {
FILE_HEADER = new Object[]{"word", "frequency", "percent"};
}
String fileName = "";
for (Pair<String, Map<String, Long>> p : set) {
String title = p.getLeft();
fileName = title.replace(": ", "-");
fileName = fileName.replace(" ", "_").concat(".csv");
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
Map<String, Long> map = p.getRight();
if (map.isEmpty())
continue;
// long total = Util.mapSumFrequencies(map);
OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null;
//Create the CSVFormat object with "\n" as a record delimiter
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
try {
//initialize FileWriter object
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
//initialize CSVPrinter object
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
// write info block
printHeaderInfo(csvFilePrinter, headerInfoBlock);
//Create CSV file header
csvFilePrinter.printRecord(FILE_HEADER);
for (Map.Entry<String, Long> e : map.entrySet()) {
List dataEntry = new ArrayList<>();
dataEntry.add(e.getKey());
dataEntry.add(e.getValue().toString());
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies));
for (String key : taxonomyResults.keySet()){
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
dataEntry.add(frequency.toString());
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key)));
}
csvFilePrinter.printRecord(dataEntry);
}
} catch (Exception e) {
System.out.println("Error in CsvFileWriter!");
e.printStackTrace();
} finally {
try {
if (fileWriter != null) {
fileWriter.flush();
fileWriter.close();
}
if (csvFilePrinter != null) {
csvFilePrinter.close();
}
} catch (IOException e) {
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
e.printStackTrace();
}
}
}
return fileName;
}
public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
//Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n";
//CSV file header
Object[] FILE_HEADER = {"word", "frequency", "percent"};
String fileName = "";
fileName = title.replace(": ", "-");
fileName = fileName.replace(" ", "_").concat(".csv");
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null;
//Create the CSVFormat object with "\n" as a record delimiter
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
try {
//initialize FileWriter object
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
//initialize CSVPrinter object
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
// write info block
printHeaderInfo(csvFilePrinter, headerInfoBlock);
//Create CSV file header
csvFilePrinter.printRecord(FILE_HEADER);
for (Object[] resultEntry : result) {
List dataEntry = new ArrayList<>();
dataEntry.add(resultEntry[0]);
dataEntry.add(resultEntry[1]);
dataEntry.add(formatNumberAsPercent(resultEntry[2]));
csvFilePrinter.printRecord(dataEntry);
}
} catch (Exception e) {
System.out.println("Error in CsvFileWriter!");
e.printStackTrace();
} finally {
try {
if (fileWriter != null) {
fileWriter.flush();
fileWriter.close();
}
if (csvFilePrinter != null) {
csvFilePrinter.close();
}
} catch (IOException e) {
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
e.printStackTrace();
}
}
return fileName;
}
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
//Delimiter used in CSV file
String NEW_LINE_SEPARATOR = "\n";
//CSV file header
Object[] FILE_HEADER = {"type", "key", "word", "frequency"};
String fileName = "";
fileName = title.replace(": ", "-");
fileName = fileName.replace(" ", "_").concat(".csv");
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
OutputStreamWriter fileWriter = null;
CSVPrinter csvFilePrinter = null;
//Create the CSVFormat object with "\n" as a record delimiter
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
try {
//initialize FileWriter object
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
//initialize CSVPrinter object
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
// write info block
printHeaderInfo(csvFilePrinter, headerInfoBlock);
//Create CSV file header
csvFilePrinter.printRecord(FILE_HEADER);
for (Map.Entry<WordLevelType, Map<String, Map<String, Long>>> typeEntry : result.entrySet()) {
for (Map.Entry<String, Map<String, Long>> keyWordEntry : typeEntry.getValue().entrySet()) {
for (Map.Entry<String, Long> calculationResults : keyWordEntry.getValue().entrySet()) {
List values = new ArrayList();
values.add(typeEntry.getKey().getName());
values.add(keyWordEntry.getKey());
values.add(calculationResults.getKey());
values.add(calculationResults.getValue());
csvFilePrinter.printRecord(values);
}
}
}
} catch (Exception e) {
System.out.println("Error in CsvFileWriter!");
e.printStackTrace();
} finally {
try {
if (fileWriter != null) {
fileWriter.flush();
fileWriter.close();
}
if (csvFilePrinter != null) {
csvFilePrinter.close();
}
} catch (IOException e) {
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
e.printStackTrace();
}
}
return fileName;
}
private static void printHeaderInfo(CSVPrinter csvFilePrinter, LinkedHashMap<String, String> headerInfoBlock) throws IOException {
for (Map.Entry<String, String> entry : headerInfoBlock.entrySet()) {
List values = new ArrayList();
values.add(entry.getKey());
values.add(entry.getValue());
csvFilePrinter.printRecord(values);
}
// 2 empty lines
List values = new ArrayList();
csvFilePrinter.printRecord(values);
csvFilePrinter.printRecord(values);
}
}