|
|
|
@ -105,7 +105,8 @@ public class Export {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
|
|
|
|
|
headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(statistics.getUniGramOccurrences()));
|
|
|
|
|
// headerInfoBlock.put(filter.getCalculateFor().toMetadataString(), String.valueOf(num_frequencies));
|
|
|
|
|
|
|
|
|
|
for (CalculateFor otherKey : filter.getMultipleKeys()) {
|
|
|
|
|
FILE_HEADER_AL.add(otherKey.toHeaderString());
|
|
|
|
@ -132,55 +133,75 @@ public class Export {
|
|
|
|
|
FILE_HEADER_AL.add("Relativna pogostost [" + key + "]");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (filter.getWriteMsdAtTheEnd()) {
|
|
|
|
|
String msd = "";
|
|
|
|
|
int maxMsdLength = 0;
|
|
|
|
|
for(MultipleHMKeys key : set.iterator().next().getRight().keySet()){
|
|
|
|
|
msd = key.getMsd(filter);
|
|
|
|
|
if (msd.length() > maxMsdLength){
|
|
|
|
|
maxMsdLength = msd.length();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for(int i = 0; i < maxMsdLength; i++){
|
|
|
|
|
FILE_HEADER_AL.add("msd" + String.format("%02d", i + 1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
FILE_HEADER = new String[ FILE_HEADER_AL.size() ];
|
|
|
|
|
FILE_HEADER_AL.toArray(FILE_HEADER);
|
|
|
|
|
|
|
|
|
|
String fileName = "";
|
|
|
|
|
|
|
|
|
|
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
|
|
|
|
String title = p.getLeft();
|
|
|
|
|
fileName = title.replace(": ", "-");
|
|
|
|
|
fileName = fileName.replace(" ", "_").concat(".csv");
|
|
|
|
|
for (Pair<String, Map<MultipleHMKeys, Long>> p : set) {
|
|
|
|
|
String title = p.getLeft();
|
|
|
|
|
|
|
|
|
|
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
|
|
|
|
// statistics.setTimeEnding();
|
|
|
|
|
title = statistics.generateResultTitle();
|
|
|
|
|
// statistics.
|
|
|
|
|
|
|
|
|
|
Map<MultipleHMKeys, Long> map = p.getRight();
|
|
|
|
|
fileName = title.replace(": ", "-");
|
|
|
|
|
fileName = fileName.replace(" ", "_").concat(".csv");
|
|
|
|
|
|
|
|
|
|
if (map.isEmpty())
|
|
|
|
|
continue;
|
|
|
|
|
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
|
|
|
|
|
|
|
|
|
Map<MultipleHMKeys, Long> map = p.getRight();
|
|
|
|
|
|
|
|
|
|
if (map.isEmpty())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// long total = Util.mapSumFrequencies(map);
|
|
|
|
|
|
|
|
|
|
OutputStreamWriter fileWriter = null;
|
|
|
|
|
CSVPrinter csvFilePrinter = null;
|
|
|
|
|
OutputStreamWriter fileWriter = null;
|
|
|
|
|
CSVPrinter csvFilePrinter = null;
|
|
|
|
|
|
|
|
|
|
//Create the CSVFormat object with "\n" as a record delimiter it puts all words in braces
|
|
|
|
|
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';').withQuoteMode(QuoteMode.ALL);
|
|
|
|
|
//Create the CSVFormat object with "\n" as a record delimiter it puts all words in braces
|
|
|
|
|
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';').withQuoteMode(QuoteMode.ALL);
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
//initialize FileWriter object
|
|
|
|
|
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
|
|
|
|
try {
|
|
|
|
|
//initialize FileWriter object
|
|
|
|
|
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
|
|
|
|
|
|
|
|
|
//initialize CSVPrinter object
|
|
|
|
|
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
|
|
|
|
//initialize CSVPrinter object
|
|
|
|
|
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
|
|
|
|
|
|
|
|
|
// write info block
|
|
|
|
|
printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
|
|
|
|
// write info block
|
|
|
|
|
printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
|
|
|
|
|
|
|
|
|
//Create CSV file header
|
|
|
|
|
csvFilePrinter.printRecord(FILE_HEADER);
|
|
|
|
|
//Create CSV file header
|
|
|
|
|
csvFilePrinter.printRecord(FILE_HEADER);
|
|
|
|
|
|
|
|
|
|
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
|
|
|
|
List dataEntry = new ArrayList<>();
|
|
|
|
|
if (!ValidationUtil.isEmpty(filter.getSkipValue()) && filter.getSkipValue() > 0) {
|
|
|
|
|
dataEntry.add(e.getKey().getK1());
|
|
|
|
|
}
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK1(), filter));
|
|
|
|
|
for (Map.Entry<MultipleHMKeys, Long> e : map.entrySet()) {
|
|
|
|
|
List dataEntry = new ArrayList<>();
|
|
|
|
|
if (!ValidationUtil.isEmpty(filter.getSkipValue()) && filter.getSkipValue() > 0) {
|
|
|
|
|
dataEntry.add(e.getKey().getK1());
|
|
|
|
|
}
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK1(), filter));
|
|
|
|
|
if (filter.getCalculateFor().equals(CalculateFor.LEMMA)){
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK1().toLowerCase(), filter));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
|
|
|
|
|
if (filter.getSuffixLength() != null || filter.getSuffixList() != null || filter.getPrefixLength() != null || filter.getPrefixList() != null) {
|
|
|
|
|
if(filter.getSuffixLength() > 0 || filter.getPrefixLength() > 0) {
|
|
|
|
|
if (filter.getPrefixLength() > 0) {
|
|
|
|
|
dataEntry.add(((String) dataEntry.get(0)).substring(0, filter.getPrefixLength()));
|
|
|
|
@ -217,46 +238,48 @@ public class Export {
|
|
|
|
|
dataEntry.add(rsf);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
for (CalculateFor otherKey : filter.getMultipleKeys()){
|
|
|
|
|
switch(i){
|
|
|
|
|
case 0:
|
|
|
|
|
if (otherKey.equals(CalculateFor.LEMMA)){
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK2(), filter));
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK2().toLowerCase(), filter));
|
|
|
|
|
} else {
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK2(), filter));
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 1:
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK3(), filter));
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK4(), filter));
|
|
|
|
|
break;
|
|
|
|
|
case 3:
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK5(), filter));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
switch(i){
|
|
|
|
|
case 0:
|
|
|
|
|
if (otherKey.equals(CalculateFor.LEMMA)){
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK2(), filter));
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK2().toLowerCase(), filter));
|
|
|
|
|
} else {
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK2(), filter));
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 1:
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK3(), filter));
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK4(), filter));
|
|
|
|
|
break;
|
|
|
|
|
case 3:
|
|
|
|
|
dataEntry.add(eraseSkipgramStars(e.getKey().getK5(), filter));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dataEntry.add(e.getValue().toString());
|
|
|
|
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
|
|
|
|
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
|
|
|
|
|
for (String key : taxonomyResults.keySet()){
|
|
|
|
|
if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) {
|
|
|
|
|
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
|
|
|
|
dataEntry.add(frequency.toString());
|
|
|
|
|
dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
|
|
|
|
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
dataEntry.add(e.getValue().toString());
|
|
|
|
|
dataEntry.add(formatNumberAsPercent((double) e.getValue() / num_frequencies));
|
|
|
|
|
dataEntry.add(String.format("%.2f", ((double) e.getValue() * 1000000)/num_frequencies));
|
|
|
|
|
for (String key : taxonomyResults.keySet()){
|
|
|
|
|
if(!key.equals("Total") && num_taxonomy_frequencies.get(key) > 0) {
|
|
|
|
|
AtomicLong frequency = taxonomyResults.get(key).get(e.getKey());
|
|
|
|
|
dataEntry.add(frequency.toString());
|
|
|
|
|
// dataEntry.add(formatNumberAsPercent((double) frequency.get() / num_taxonomy_frequencies.get(key)));
|
|
|
|
|
// dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / num_taxonomy_frequencies.get(key)));
|
|
|
|
|
dataEntry.add(formatNumberAsPercent((double) frequency.get() / statistics.getUniGramOccurrences()));
|
|
|
|
|
dataEntry.add(String.format("%.2f", ((double) frequency.get() * 1000000) / statistics.getUniGramOccurrences()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (filter.getCollocability().size() > 0){
|
|
|
|
|
for (Collocability c : filter.getCollocability()) {
|
|
|
|
@ -264,67 +287,68 @@ public class Export {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Write msd separated per letters at the end of each line in csv
|
|
|
|
|
if (filter.getWriteMsdAtTheEnd()) {
|
|
|
|
|
String msd = "";
|
|
|
|
|
if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
msd = e.getKey().getK1();
|
|
|
|
|
} else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
|
|
|
|
i = 0;
|
|
|
|
|
for (CalculateFor otherKey : filter.getMultipleKeys()){
|
|
|
|
|
switch(i){
|
|
|
|
|
case 0:
|
|
|
|
|
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
msd = e.getKey().getK2();
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 1:
|
|
|
|
|
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
msd = e.getKey().getK3();
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
|
|
|
|
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
msd = e.getKey().getK4();
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case 3:
|
|
|
|
|
if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
msd = e.getKey().getK5();
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
String [] charArray = msd.split("(?!^)");
|
|
|
|
|
dataEntry.addAll(Arrays.asList(charArray));
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
// Write msd separated per letters at the end of each line in csv
|
|
|
|
|
if (filter.getWriteMsdAtTheEnd()) {
|
|
|
|
|
// String msd = "";
|
|
|
|
|
//
|
|
|
|
|
// if (filter.getCalculateFor().equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
// msd = e.getKey().getK1();
|
|
|
|
|
// } else if (filter.getMultipleKeys().contains(CalculateFor.MORPHOSYNTACTIC_SPECS)) {
|
|
|
|
|
// i = 0;
|
|
|
|
|
// for (CalculateFor otherKey : filter.getMultipleKeys()){
|
|
|
|
|
// switch(i){
|
|
|
|
|
// case 0:
|
|
|
|
|
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
// msd = e.getKey().getK2();
|
|
|
|
|
// }
|
|
|
|
|
// break;
|
|
|
|
|
// case 1:
|
|
|
|
|
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
// msd = e.getKey().getK3();
|
|
|
|
|
// }
|
|
|
|
|
// break;
|
|
|
|
|
// case 2:
|
|
|
|
|
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
// msd = e.getKey().getK4();
|
|
|
|
|
// }
|
|
|
|
|
// break;
|
|
|
|
|
// case 3:
|
|
|
|
|
// if (otherKey.equals(CalculateFor.MORPHOSYNTACTIC_SPECS)){
|
|
|
|
|
// msd = e.getKey().getK5();
|
|
|
|
|
// }
|
|
|
|
|
// break;
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// i++;
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
String msd = e.getKey().getMsd(filter);
|
|
|
|
|
String [] charArray = msd.split("(?!^)");
|
|
|
|
|
dataEntry.addAll(Arrays.asList(charArray));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
csvFilePrinter.printRecord(dataEntry);
|
|
|
|
|
}
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
System.out.println("Error in CsvFileWriter!");
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
} finally {
|
|
|
|
|
try {
|
|
|
|
|
if (fileWriter != null) {
|
|
|
|
|
fileWriter.flush();
|
|
|
|
|
fileWriter.close();
|
|
|
|
|
}
|
|
|
|
|
if (csvFilePrinter != null) {
|
|
|
|
|
csvFilePrinter.close();
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
csvFilePrinter.printRecord(dataEntry);
|
|
|
|
|
}
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
System.out.println("Error in CsvFileWriter!");
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
} finally {
|
|
|
|
|
try {
|
|
|
|
|
if (fileWriter != null) {
|
|
|
|
|
fileWriter.flush();
|
|
|
|
|
fileWriter.close();
|
|
|
|
|
}
|
|
|
|
|
if (csvFilePrinter != null) {
|
|
|
|
|
csvFilePrinter.close();
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return fileName;
|
|
|
|
|
return fileName;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static String eraseSkipgramStars(String s, Filter filter){
|
|
|
|
|