package util; import static util.Util.*; import java.io.*; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicLong; import data.Filter; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVPrinter; import org.apache.commons.lang3.tuple.Pair; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import data.Enums.WordLevelType; @SuppressWarnings("unchecked") public class Export { public static void SetToJSON(Set>> set) { JSONArray wrapper = new JSONArray(); for (Pair> p : set) { JSONArray data_wrapper = new JSONArray(); JSONObject metric = new JSONObject(); String title = p.getLeft(); Map map = p.getRight(); if (map.isEmpty()) continue; long total = Util.mapSumFrequencies(map); for (Map.Entry e : map.entrySet()) { JSONObject data_entry = new JSONObject(); data_entry.put("word", e.getKey()); data_entry.put("frequency", e.getValue()); data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total)); data_wrapper.add(data_entry); } metric.put("Title", title); metric.put("data", data_wrapper); wrapper.add(metric); } try (FileWriter file = new FileWriter("statistics.json")) { file.write(wrapper.toJSONString()); } catch (IOException e) { e.printStackTrace(); } } public static String SetToCSV(Set>> set, File resultsPath, LinkedHashMap headerInfoBlock, Map> taxonomyResults) { //Delimiter used in CSV file String NEW_LINE_SEPARATOR = "\n"; List FILE_HEADER_AL = new ArrayList(); Object[] FILE_HEADER; //Count frequencies long num_frequencies = 0; for (Pair> p : set) { Map map = p.getRight(); if (map.isEmpty()) continue; num_frequencies = Util.mapSumFrequencies(map); } Map num_taxonomy_frequencies = new ConcurrentHashMap<>(); for (String taxonomyKey : taxonomyResults.keySet()) { num_taxonomy_frequencies.put(taxonomyKey, (long) 0); for (AtomicLong value : taxonomyResults.get(taxonomyKey).values()){ long val = num_taxonomy_frequencies.get(taxonomyKey); val += value.get(); num_taxonomy_frequencies.put(taxonomyKey, val); } } //CSV file header if (headerInfoBlock.containsKey("Analiza") && headerInfoBlock.get("Analiza").equals("Besede")) { if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("različnica")) { headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies)); FILE_HEADER_AL.add("Različnica"); FILE_HEADER_AL.add("Skupna absolutna pogostost"); FILE_HEADER_AL.add("Delež glede na vse različnice"); } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("lema")) { headerInfoBlock.put("Skupna vsota vseh lem:", String.valueOf(num_frequencies)); FILE_HEADER_AL.add("Lema"); FILE_HEADER_AL.add("Skupna absolutna pogostost"); FILE_HEADER_AL.add("Delež glede na vse leme"); } else if (headerInfoBlock.containsKey("Izračunaj za:") && headerInfoBlock.get("Izračunaj za:").equals("oblikoskladenjska oznaka")) { headerInfoBlock.put("Skupna vsota vseh oblikoskladenjskih oznak:", String.valueOf(num_frequencies)); FILE_HEADER_AL.add("Oblikoskladenjska oznaka"); FILE_HEADER_AL.add("Skupna absolutna pogostost"); FILE_HEADER_AL.add("Delež glede na vse oblikoskladenjske oznake"); } else { headerInfoBlock.put("Skupna vsota vseh različnic:", String.valueOf(num_frequencies)); FILE_HEADER_AL.add("Lema"); FILE_HEADER_AL.add("Skupna absolutna pogostost"); FILE_HEADER_AL.add("Delež glede na vse leme"); } FILE_HEADER_AL.add("Skupna relativna pogostost"); for (String key : taxonomyResults.keySet()) { FILE_HEADER_AL.add("Absolutna pogostost [" + key + "]"); FILE_HEADER_AL.add("Delež [" + key + "]"); FILE_HEADER_AL.add("Relativna pogostost [" + key + "]"); } FILE_HEADER = new String[ FILE_HEADER_AL.size() ]; FILE_HEADER_AL.toArray(FILE_HEADER); } else { FILE_HEADER = new Object[]{"word", "frequency", "percent"}; } String fileName = ""; for (Pair> p : set) { String title = p.getLeft(); fileName = title.replace(": ", "-"); fileName = fileName.replace(" ", "_").concat(".csv"); fileName = resultsPath.toString().concat(File.separator).concat(fileName); Map map = p.getRight(); if (map.isEmpty()) continue; // long total = Util.mapSumFrequencies(map); OutputStreamWriter fileWriter = null; CSVPrinter csvFilePrinter = null; //Create the CSVFormat object with "\n" as a record delimiter CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';'); try { //initialize FileWriter object fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8); //initialize CSVPrinter object csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat); // write info block printHeaderInfo(csvFilePrinter, headerInfoBlock); //Create CSV file header csvFilePrinter.printRecord(FILE_HEADER); for (Map.Entry e : map.entrySet()) { List dataEntry = new ArrayList<>(); dataEntry.add(e.getKey()); dataEntry.add(e.getValue().toString()); dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies)); dataEntry.add(String.format("%.2f", ((double) e.getValue() * 10000)/num_frequencies)); for (String key : taxonomyResults.keySet()){ AtomicLong frequency = taxonomyResults.get(key).get(e.getKey()); dataEntry.add(frequency.toString()); dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key))); dataEntry.add(String.format("%.2f", ((double) frequency.get() * 10000) / num_taxonomy_frequencies.get(key))); } csvFilePrinter.printRecord(dataEntry); } } catch (Exception e) { System.out.println("Error in CsvFileWriter!"); e.printStackTrace(); } finally { try { if (fileWriter != null) { fileWriter.flush(); fileWriter.close(); } if (csvFilePrinter != null) { csvFilePrinter.close(); } } catch (IOException e) { System.out.println("Error while flushing/closing fileWriter/csvPrinter!"); e.printStackTrace(); } } } return fileName; } public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap headerInfoBlock) { //Delimiter used in CSV file String NEW_LINE_SEPARATOR = "\n"; //CSV file header Object[] FILE_HEADER = {"word", "frequency", "percent"}; String fileName = ""; fileName = title.replace(": ", "-"); fileName = fileName.replace(" ", "_").concat(".csv"); fileName = resultsPath.toString().concat(File.separator).concat(fileName); OutputStreamWriter fileWriter = null; CSVPrinter csvFilePrinter = null; //Create the CSVFormat object with "\n" as a record delimiter CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';'); try { //initialize FileWriter object fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8); //initialize CSVPrinter object csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat); // write info block printHeaderInfo(csvFilePrinter, headerInfoBlock); //Create CSV file header csvFilePrinter.printRecord(FILE_HEADER); for (Object[] resultEntry : result) { List dataEntry = new ArrayList<>(); dataEntry.add(resultEntry[0]); dataEntry.add(resultEntry[1]); dataEntry.add(formatNumberAsPercent(resultEntry[2])); csvFilePrinter.printRecord(dataEntry); } } catch (Exception e) { System.out.println("Error in CsvFileWriter!"); e.printStackTrace(); } finally { try { if (fileWriter != null) { fileWriter.flush(); fileWriter.close(); } if (csvFilePrinter != null) { csvFilePrinter.close(); } } catch (IOException e) { System.out.println("Error while flushing/closing fileWriter/csvPrinter!"); e.printStackTrace(); } } return fileName; } public static String nestedMapToCSV(String title, Map>> result, File resultsPath, LinkedHashMap headerInfoBlock) { //Delimiter used in CSV file String NEW_LINE_SEPARATOR = "\n"; //CSV file header Object[] FILE_HEADER = {"type", "key", "word", "frequency"}; String fileName = ""; fileName = title.replace(": ", "-"); fileName = fileName.replace(" ", "_").concat(".csv"); fileName = resultsPath.toString().concat(File.separator).concat(fileName); OutputStreamWriter fileWriter = null; CSVPrinter csvFilePrinter = null; //Create the CSVFormat object with "\n" as a record delimiter CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';'); try { //initialize FileWriter object fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8); //initialize CSVPrinter object csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat); // write info block printHeaderInfo(csvFilePrinter, headerInfoBlock); //Create CSV file header csvFilePrinter.printRecord(FILE_HEADER); for (Map.Entry>> typeEntry : result.entrySet()) { for (Map.Entry> keyWordEntry : typeEntry.getValue().entrySet()) { for (Map.Entry calculationResults : keyWordEntry.getValue().entrySet()) { List values = new ArrayList(); values.add(typeEntry.getKey().getName()); values.add(keyWordEntry.getKey()); values.add(calculationResults.getKey()); values.add(calculationResults.getValue()); csvFilePrinter.printRecord(values); } } } } catch (Exception e) { System.out.println("Error in CsvFileWriter!"); e.printStackTrace(); } finally { try { if (fileWriter != null) { fileWriter.flush(); fileWriter.close(); } if (csvFilePrinter != null) { csvFilePrinter.close(); } } catch (IOException e) { System.out.println("Error while flushing/closing fileWriter/csvPrinter!"); e.printStackTrace(); } } return fileName; } private static void printHeaderInfo(CSVPrinter csvFilePrinter, LinkedHashMap headerInfoBlock) throws IOException { for (Map.Entry entry : headerInfoBlock.entrySet()) { List values = new ArrayList(); values.add(entry.getKey()); values.add(entry.getValue()); csvFilePrinter.printRecord(values); } // 2 empty lines List values = new ArrayList(); csvFilePrinter.printRecord(values); csvFilePrinter.printRecord(values); } }