Project copied
This commit is contained in:
25
src/main/java/util/ByteUtils.java
Normal file
25
src/main/java/util/ByteUtils.java
Normal file
@@ -0,0 +1,25 @@
|
||||
package util;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class ByteUtils {
|
||||
|
||||
/*
|
||||
* Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
|
||||
*/
|
||||
public static byte[] longToBytes(long x) {
|
||||
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
|
||||
buffer.putLong(x);
|
||||
return buffer.array();
|
||||
}
|
||||
|
||||
/*
|
||||
* Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
|
||||
*/
|
||||
public static long bytesToLong(byte[] bytes) {
|
||||
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
|
||||
buffer.put(bytes);
|
||||
buffer.flip();//need flip
|
||||
return buffer.getLong();
|
||||
}
|
||||
}
|
||||
46
src/main/java/util/Combinations.java
Normal file
46
src/main/java/util/Combinations.java
Normal file
@@ -0,0 +1,46 @@
|
||||
package util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class Combinations {
|
||||
private static HashSet<HashSet<Integer>> result = new HashSet<>();
|
||||
|
||||
|
||||
/* arr[] ---> Input Array
|
||||
data[] ---> Temporary array to store current combination
|
||||
start & end ---> Staring and Ending indexes in arr[]
|
||||
index ---> Current index in data[]
|
||||
r ---> Size of a combination to be printed */
|
||||
static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) {
|
||||
// Current combination is ready to be printed, print it
|
||||
if (index == combinationLength) {
|
||||
result.add(new HashSet<>(Arrays.asList(data)));
|
||||
return;
|
||||
}
|
||||
|
||||
// replace index with all possible elements. The condition
|
||||
// "end-i+1 >= r-index" makes sure that including one element
|
||||
// at index will make a combination with remaining elements
|
||||
// at remaining positions
|
||||
for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) {
|
||||
data[index] = arr[i];
|
||||
combinationUtil(arr, data, i + 1, end, index + 1, combinationLength);
|
||||
}
|
||||
}
|
||||
|
||||
public static HashSet<HashSet<Integer>> generateIndices(int maxNOfIndices) {
|
||||
result = new HashSet<>();
|
||||
int[] arr = IntStream.range(1, maxNOfIndices).toArray();
|
||||
for (int i = 1; i < maxNOfIndices - 1; i++) {
|
||||
// A temporary array to store all combination one by one
|
||||
combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i);
|
||||
}
|
||||
|
||||
// also add an empty one for X.... (all of this type)
|
||||
result.add(new HashSet<>());
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
267
src/main/java/util/Export.java
Normal file
267
src/main/java/util/Export.java
Normal file
@@ -0,0 +1,267 @@
|
||||
package util;
|
||||
|
||||
import static util.Util.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVPrinter;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
|
||||
import data.Enums.WordLevelType;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class Export {
|
||||
public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
|
||||
JSONArray wrapper = new JSONArray();
|
||||
|
||||
for (Pair<String, Map<String, Long>> p : set) {
|
||||
JSONArray data_wrapper = new JSONArray();
|
||||
JSONObject metric = new JSONObject();
|
||||
|
||||
String title = p.getLeft();
|
||||
Map<String, Long> map = p.getRight();
|
||||
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
|
||||
long total = Util.mapSumFrequencies(map);
|
||||
|
||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
||||
JSONObject data_entry = new JSONObject();
|
||||
data_entry.put("word", e.getKey());
|
||||
data_entry.put("frequency", e.getValue());
|
||||
data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
|
||||
|
||||
data_wrapper.add(data_entry);
|
||||
}
|
||||
|
||||
metric.put("Title", title);
|
||||
metric.put("data", data_wrapper);
|
||||
wrapper.add(metric);
|
||||
}
|
||||
|
||||
try (FileWriter file = new FileWriter("statistics.json")) {
|
||||
file.write(wrapper.toJSONString());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
|
||||
//CSV file header
|
||||
Object[] FILE_HEADER = {"word", "frequency", "percent"};
|
||||
|
||||
String fileName = "";
|
||||
|
||||
for (Pair<String, Map<String, Long>> p : set) {
|
||||
String title = p.getLeft();
|
||||
fileName = title.replace(": ", "-");
|
||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||
|
||||
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||
|
||||
Map<String, Long> map = p.getRight();
|
||||
|
||||
if (map.isEmpty())
|
||||
continue;
|
||||
|
||||
long total = Util.mapSumFrequencies(map);
|
||||
|
||||
OutputStreamWriter fileWriter = null;
|
||||
CSVPrinter csvFilePrinter = null;
|
||||
|
||||
//Create the CSVFormat object with "\n" as a record delimiter
|
||||
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
||||
|
||||
try {
|
||||
//initialize FileWriter object
|
||||
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
||||
|
||||
//initialize CSVPrinter object
|
||||
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
||||
|
||||
// write info block
|
||||
printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
||||
|
||||
//Create CSV file header
|
||||
csvFilePrinter.printRecord(FILE_HEADER);
|
||||
|
||||
for (Map.Entry<String, Long> e : map.entrySet()) {
|
||||
List dataEntry = new ArrayList<>();
|
||||
dataEntry.add(e.getKey());
|
||||
dataEntry.add(e.getValue().toString());
|
||||
dataEntry.add(formatNumberAsPercent((double) e.getValue() / total));
|
||||
csvFilePrinter.printRecord(dataEntry);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.out.println("Error in CsvFileWriter!");
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (fileWriter != null) {
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
}
|
||||
if (csvFilePrinter != null) {
|
||||
csvFilePrinter.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
|
||||
//CSV file header
|
||||
Object[] FILE_HEADER = {"word", "frequency", "percent"};
|
||||
|
||||
String fileName = "";
|
||||
|
||||
fileName = title.replace(": ", "-");
|
||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||
|
||||
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||
|
||||
OutputStreamWriter fileWriter = null;
|
||||
CSVPrinter csvFilePrinter = null;
|
||||
|
||||
//Create the CSVFormat object with "\n" as a record delimiter
|
||||
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
||||
|
||||
try {
|
||||
//initialize FileWriter object
|
||||
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
||||
|
||||
//initialize CSVPrinter object
|
||||
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
||||
|
||||
// write info block
|
||||
printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
||||
|
||||
//Create CSV file header
|
||||
csvFilePrinter.printRecord(FILE_HEADER);
|
||||
|
||||
for (Object[] resultEntry : result) {
|
||||
List dataEntry = new ArrayList<>();
|
||||
dataEntry.add(resultEntry[0]);
|
||||
dataEntry.add(resultEntry[1]);
|
||||
dataEntry.add(formatNumberAsPercent(resultEntry[2]));
|
||||
csvFilePrinter.printRecord(dataEntry);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.out.println("Error in CsvFileWriter!");
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (fileWriter != null) {
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
}
|
||||
if (csvFilePrinter != null) {
|
||||
csvFilePrinter.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
|
||||
//Delimiter used in CSV file
|
||||
String NEW_LINE_SEPARATOR = "\n";
|
||||
|
||||
//CSV file header
|
||||
Object[] FILE_HEADER = {"type", "key", "word", "frequency"};
|
||||
|
||||
String fileName = "";
|
||||
|
||||
fileName = title.replace(": ", "-");
|
||||
fileName = fileName.replace(" ", "_").concat(".csv");
|
||||
|
||||
fileName = resultsPath.toString().concat(File.separator).concat(fileName);
|
||||
|
||||
OutputStreamWriter fileWriter = null;
|
||||
CSVPrinter csvFilePrinter = null;
|
||||
|
||||
//Create the CSVFormat object with "\n" as a record delimiter
|
||||
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
|
||||
|
||||
try {
|
||||
//initialize FileWriter object
|
||||
fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
|
||||
|
||||
//initialize CSVPrinter object
|
||||
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
||||
|
||||
// write info block
|
||||
printHeaderInfo(csvFilePrinter, headerInfoBlock);
|
||||
|
||||
//Create CSV file header
|
||||
csvFilePrinter.printRecord(FILE_HEADER);
|
||||
|
||||
for (Map.Entry<WordLevelType, Map<String, Map<String, Long>>> typeEntry : result.entrySet()) {
|
||||
for (Map.Entry<String, Map<String, Long>> keyWordEntry : typeEntry.getValue().entrySet()) {
|
||||
for (Map.Entry<String, Long> calculationResults : keyWordEntry.getValue().entrySet()) {
|
||||
List values = new ArrayList();
|
||||
values.add(typeEntry.getKey().getName());
|
||||
values.add(keyWordEntry.getKey());
|
||||
values.add(calculationResults.getKey());
|
||||
values.add(calculationResults.getValue());
|
||||
csvFilePrinter.printRecord(values);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.out.println("Error in CsvFileWriter!");
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (fileWriter != null) {
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
}
|
||||
if (csvFilePrinter != null) {
|
||||
csvFilePrinter.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
return fileName;
|
||||
}
|
||||
|
||||
private static void printHeaderInfo(CSVPrinter csvFilePrinter, LinkedHashMap<String, String> headerInfoBlock) throws IOException {
|
||||
for (Map.Entry<String, String> entry : headerInfoBlock.entrySet()) {
|
||||
List values = new ArrayList();
|
||||
values.add(entry.getKey());
|
||||
values.add(entry.getValue());
|
||||
csvFilePrinter.printRecord(values);
|
||||
}
|
||||
|
||||
// 2 empty lines
|
||||
List values = new ArrayList();
|
||||
csvFilePrinter.printRecord(values);
|
||||
csvFilePrinter.printRecord(values);
|
||||
|
||||
}
|
||||
}
|
||||
31
src/main/java/util/Key.java
Normal file
31
src/main/java/util/Key.java
Normal file
@@ -0,0 +1,31 @@
|
||||
package util;
|
||||
|
||||
public class Key /*implements Comparable<Key> */ {
|
||||
// private final String value;
|
||||
//
|
||||
// Key(String value) {
|
||||
// this.value = value;
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public int compareTo(Key o) {
|
||||
// return Objects.compare(this.value, o.value);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public boolean equals(Object o) {
|
||||
// if (this.equals(o)) {
|
||||
// return true;
|
||||
// }
|
||||
// if (o == null || getClass() != o.getClass()) {
|
||||
// return false;
|
||||
// }
|
||||
// Key key = (Key) o;
|
||||
// return Objects.equals(value, key.value);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public int hashCode() {
|
||||
// return 0;
|
||||
// }
|
||||
}
|
||||
63
src/main/java/util/TimeWatch.java
Normal file
63
src/main/java/util/TimeWatch.java
Normal file
@@ -0,0 +1,63 @@
|
||||
package util;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Adapted from http://memorynotfound.com/calculating-elapsed-time-java/
|
||||
*/
|
||||
public class TimeWatch {
|
||||
|
||||
private long starts;
|
||||
|
||||
private TimeWatch() {
|
||||
reset();
|
||||
}
|
||||
|
||||
public static TimeWatch start() {
|
||||
return new TimeWatch();
|
||||
}
|
||||
|
||||
private TimeWatch reset() {
|
||||
starts = System.nanoTime();
|
||||
return this;
|
||||
}
|
||||
|
||||
private long time() {
|
||||
long ends = System.nanoTime();
|
||||
return ends - starts;
|
||||
}
|
||||
|
||||
private long time(TimeUnit unit) {
|
||||
return unit.convert(time(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
private String toMinuteSeconds() {
|
||||
return String.format("%d min, %d sec", time(TimeUnit.MINUTES),
|
||||
time(TimeUnit.SECONDS) - time(TimeUnit.MINUTES));
|
||||
}
|
||||
|
||||
public String toFullTime() {
|
||||
long hours = time(TimeUnit.HOURS);
|
||||
long minutes = time(TimeUnit.MINUTES) - TimeUnit.HOURS.toMinutes(hours);
|
||||
long seconds = time(TimeUnit.SECONDS) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
|
||||
long milliseconds = time(TimeUnit.MILLISECONDS) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
|
||||
|
||||
return String.format("%d h, %d min, %d s, %d ms", hours, minutes, seconds, milliseconds);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
||||
return "Elapsed Time in nano seconds: ";
|
||||
}
|
||||
|
||||
private void exampleUsage() {
|
||||
TimeWatch watch = TimeWatch.start();
|
||||
|
||||
// do something...
|
||||
|
||||
System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds());
|
||||
System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS));
|
||||
System.out.println("Elapsed Time in nano seconds: " + watch.time());
|
||||
|
||||
}
|
||||
}
|
||||
225
src/main/java/util/Util.java
Normal file
225
src/main/java/util/Util.java
Normal file
@@ -0,0 +1,225 @@
|
||||
package util;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import data.Settings;
|
||||
import gui.GUIController;
|
||||
import gui.ValidationUtil;
|
||||
|
||||
public class Util {
|
||||
public final static Logger logger = LogManager.getLogger(Util.class);
|
||||
|
||||
|
||||
public static String toReadableTime(long time) {
|
||||
long hours = time(TimeUnit.HOURS, time);
|
||||
long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours);
|
||||
long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
|
||||
long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
|
||||
long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds);
|
||||
long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds);
|
||||
|
||||
return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds);
|
||||
}
|
||||
|
||||
private static long time(TimeUnit unit, long t) {
|
||||
return unit.convert(t, TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a number to a more readable format.
|
||||
* 12345 -> 12.345
|
||||
* 12345,678 -> 12.345,67
|
||||
*
|
||||
* @param o byte, double, float, int,long, short
|
||||
*
|
||||
* @return number formatted with thousands separator and 2 decimal places (floats)
|
||||
*/
|
||||
private static String formatNumberReadable(Object o) {
|
||||
if (isInstanceOfInteger(o))
|
||||
return String.format("%,d", o);
|
||||
else if (isInstanceOfFloat(o))
|
||||
return String.format("%,.2f", o);
|
||||
else
|
||||
return "- invalid input format -";
|
||||
}
|
||||
|
||||
public static String formatNumberAsPercent(Object o) {
|
||||
return MessageFormat.format("{0,number,#.###%}", o);
|
||||
}
|
||||
|
||||
private static boolean isInstanceOfInteger(Object o) {
|
||||
Set<Class<?>> types = new HashSet<>();
|
||||
types.add(Byte.class);
|
||||
types.add(Short.class);
|
||||
types.add(Integer.class);
|
||||
types.add(Long.class);
|
||||
|
||||
return types.contains(o.getClass());
|
||||
}
|
||||
|
||||
private static boolean isInstanceOfFloat(Object o) {
|
||||
Set<Class<?>> types = new HashSet<>();
|
||||
types.add(Float.class);
|
||||
types.add(Double.class);
|
||||
|
||||
return types.contains(o.getClass());
|
||||
}
|
||||
|
||||
public static <K, V> void printMap(Map<K, V> map) {
|
||||
System.out.println("\nkey: value");
|
||||
map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v)));
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic map converter -> since AtomicLongs aren't as comparable.
|
||||
* Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
|
||||
*/
|
||||
public static <K, V> Map<String, Long> atomicInt2StringAndInt(Map<K, V> map) {
|
||||
Map m = new HashMap<String, Long>();
|
||||
|
||||
for (Map.Entry<K, V> e : map.entrySet()) {
|
||||
m.put(e.getKey().toString(), ((AtomicLong) e.getValue()).longValue());
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts a map in a descending order by value.
|
||||
*/
|
||||
public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue(Map<K, V> map, int limit) {
|
||||
/*
|
||||
sorted() in itself is O(1), since it's an intermediate operation that
|
||||
doesn't consume the stream, but simply adds an operation to the pipeline.
|
||||
Once the stream is consumed by a terminal operation, the sort happens and
|
||||
either
|
||||
- it doesn't do anything (O(1)) because the stream knows that the
|
||||
elements are already sorted (because they come from a SortedSet, for example)
|
||||
- or the stream is not parallel, and it delegates to Arrays.sort() (O(n log n))
|
||||
- or the stream is parallel, and it delegates to Arrays.parallelSort() (O(n log n))
|
||||
|
||||
As of JDK 8, the main sorting algorithm which is also used in standard
|
||||
stream API implementation for sequential sorting is TimSort. Its worst
|
||||
case is O(n log n), but it works incredibly fast (with O(n) and quite
|
||||
small constant) if data is presorted (in forward or reverse direction)
|
||||
or partially presorted (for example, if you concatenate two sorted lists
|
||||
and sort them again).
|
||||
*/
|
||||
// if limit is set to 0 or less, we take that to mean no limit at all
|
||||
if (limit <= 0) {
|
||||
limit = map.size();
|
||||
}
|
||||
|
||||
Map<K, V> result = new LinkedHashMap<>();
|
||||
TimeWatch watch = TimeWatch.start();
|
||||
|
||||
Stream<Map.Entry<K, V>> st = map.entrySet().stream();
|
||||
|
||||
st.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).limit(limit)
|
||||
.forEachOrdered(e -> result.put(e.getKey(), e.getValue()));
|
||||
|
||||
if (Settings.PRINT_LOG) {
|
||||
System.out.println(String.format("Elapsed time for sorting %s items: %s",
|
||||
formatNumberReadable(result.size()),
|
||||
watch.toFullTime()));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static <K, V> void printMap(Map<K, Integer> map, String title, int number_of_words) {
|
||||
System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title));
|
||||
map.forEach((k, v) ->
|
||||
System.out.println(String.format("%s:\t %s\t %s%%",
|
||||
k,
|
||||
Util.formatNumberReadable(v),
|
||||
Util.formatNumberReadable((double) v / number_of_words * 100))));
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
static long mapSumFrequencies(Map<String, Long> map) {
|
||||
long sum = 0;
|
||||
|
||||
for (long value : map.values()) {
|
||||
sum += value;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for passing optional integer values for sorting.
|
||||
*/
|
||||
public static int getValidInt(int... i) {
|
||||
if (i == null || i.length < 1 || i[0] <= 0) {
|
||||
return 0;
|
||||
} else {
|
||||
return i[0];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a map is empty. It also considers an edge case where map's keys are lists to check if those lists are empty.
|
||||
*/
|
||||
public static <K, V> boolean isMapEmpty(Map<K, V> map) {
|
||||
if (map.isEmpty()) {
|
||||
// default
|
||||
return true;
|
||||
}
|
||||
|
||||
// otherwise check if keys map to values that are empty
|
||||
for (V v : map.values()) {
|
||||
// todo: generalize to all collections if/when needed
|
||||
ArrayList<String> vl = new ArrayList((List<String>) v);
|
||||
if (!vl.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the location of the main class if possible, otherwise null
|
||||
*/
|
||||
public static File getWorkingDirectory() {
|
||||
// get location of the currently executing class
|
||||
String path = GUIController.class.getProtectionDomain().getCodeSource().getLocation().getPath();
|
||||
|
||||
logger.info("working dir path: ", path);
|
||||
|
||||
String decodedPath = null;
|
||||
try {
|
||||
decodedPath = URLDecoder.decode(path, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
logger.error("decoding: ", e);
|
||||
// e.printStackTrace();
|
||||
}
|
||||
|
||||
if (decodedPath != null) {
|
||||
File workingDirectory = new File(decodedPath);
|
||||
|
||||
// in case it's a file (class is packaged inside a jar), select its parent folder
|
||||
workingDirectory = workingDirectory.isFile() ? workingDirectory.getParentFile() : workingDirectory;
|
||||
|
||||
if (ValidationUtil.isReadableDirectory(workingDirectory)) {
|
||||
logger.info("working dir is ok: ", workingDirectory.getAbsolutePath());
|
||||
return workingDirectory;
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("working dir returing null");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
132
src/main/java/util/db/RDB.java
Normal file
132
src/main/java/util/db/RDB.java
Normal file
@@ -0,0 +1,132 @@
|
||||
package util.db;
|
||||
|
||||
import static util.ByteUtils.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.rocksdb.*;
|
||||
|
||||
import util.TimeWatch;
|
||||
|
||||
public class RDB {
|
||||
|
||||
private RocksDB db;
|
||||
private String path;
|
||||
private static final String UTF_8 = "UTF-8";
|
||||
|
||||
public RDB() {
|
||||
// different dbs i ncase of concurrent calculations
|
||||
this.path = System.getProperty("java.io.tmpdir")
|
||||
.concat(File.separator)
|
||||
.concat(String.format("corpusAnalyzer_db%d", LocalDateTime.now().toString().hashCode()));
|
||||
|
||||
this.db = createDB();
|
||||
}
|
||||
|
||||
|
||||
private RocksDB createDB() {
|
||||
RocksDB.loadLibrary();
|
||||
|
||||
// the Options class contains a set of configurable DB options
|
||||
// that determines the behaviour of the database.
|
||||
try (final Options options = new Options()) {
|
||||
options.setCreateIfMissing(true);
|
||||
|
||||
// a factory method that returns a RocksDB instance
|
||||
try (final RocksDB rdb = RocksDB.open(options, path)) {
|
||||
if (db != null) {
|
||||
return rdb;
|
||||
} else {
|
||||
this.db = rdb;
|
||||
}
|
||||
}
|
||||
} catch (RocksDBException e) {
|
||||
// do some error handling
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void writeBatch(Map<String, AtomicLong> results) throws UnsupportedEncodingException {
|
||||
RocksDB.loadLibrary();
|
||||
|
||||
// a factory method that returns a RocksDB instance
|
||||
try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
|
||||
final WriteBatch wb = new WriteBatch();
|
||||
|
||||
for (Map.Entry<String, AtomicLong> entry : results.entrySet()) {
|
||||
byte[] key = entry.getKey().getBytes(UTF_8);
|
||||
long resultValue = entry.getValue().longValue();
|
||||
|
||||
try {
|
||||
final byte[] dbValue = rdb.get(key);
|
||||
if (dbValue != null) {
|
||||
// value == null if key does not exist in db.
|
||||
wb.put(key, longToBytes(bytesToLong(dbValue) + resultValue));
|
||||
} else {
|
||||
wb.put(key, longToBytes(entry.getValue().longValue()));
|
||||
}
|
||||
} catch (RocksDBException e) {
|
||||
// TODO: error handling
|
||||
}
|
||||
}
|
||||
TimeWatch watch = TimeWatch.start();
|
||||
rdb.write(new WriteOptions(), wb);
|
||||
System.out.println(String.format("Writing %d entries took: %s", wb.count(), watch.toFullTime()));
|
||||
} catch (RocksDBException e) {
|
||||
// do some error handling
|
||||
}
|
||||
}
|
||||
|
||||
// public byte[] atomicIntToByteArray(final AtomicLong i) {
|
||||
// BigInteger bigInt = BigInteger.valueOf(i.intValue());
|
||||
//
|
||||
// return bigInt.toByteArray();
|
||||
// }
|
||||
|
||||
public RocksDB getDb() {
|
||||
return db;
|
||||
}
|
||||
|
||||
public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
|
||||
Map<String, AtomicLong> dump = new HashMap<>();
|
||||
RocksDB.loadLibrary();
|
||||
|
||||
// the Options class contains a set of configurable DB options
|
||||
// that determines the behaviour of the database.
|
||||
// a factory method that returns a RocksDB instance
|
||||
try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
|
||||
try (RocksIterator it = rdb.newIterator()) {
|
||||
it.seekToFirst();
|
||||
// it.next();
|
||||
|
||||
while (it.isValid()) {
|
||||
byte[] key = it.key();
|
||||
byte[] value = it.value();
|
||||
|
||||
dump.put(new String(key, UTF_8), new AtomicLong(bytesToLong(value)));
|
||||
|
||||
it.next();
|
||||
}
|
||||
}
|
||||
} catch (RocksDBException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
return dump;
|
||||
}
|
||||
|
||||
public void delete() {
|
||||
try {
|
||||
FileUtils.deleteDirectory(new File(path));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user