Project copied

2018-06-19 09:15:37 +02:00
commit a18e52a599
94 changed files with 87092 additions and 0 deletions
--- a/src/main/java/util/ByteUtils.java
+++ b/src/main/java/util/ByteUtils.java
@@ -0,0 +1,25 @@
+package util;
+
+import java.nio.ByteBuffer;
+
+public class ByteUtils {
+
+	/*
+	 * Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
+	 */
+	public static byte[] longToBytes(long x) {
+		ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
+		buffer.putLong(x);
+		return buffer.array();
+	}
+
+	/*
+	 * Taken from <a href="https://stackoverflow.com/a/4485196">StackOverflow</a>
+	 */
+	public static long bytesToLong(byte[] bytes) {
+		ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
+		buffer.put(bytes);
+		buffer.flip();//need flip
+		return buffer.getLong();
+	}
+}
--- a/src/main/java/util/Combinations.java
+++ b/src/main/java/util/Combinations.java
@@ -0,0 +1,46 @@
+package util;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.stream.IntStream;
+
+public class Combinations {
+	private static HashSet<HashSet<Integer>> result = new HashSet<>();
+
+
+	/* arr[]  ---> Input Array
+		data[] ---> Temporary array to store current combination
+		start & end ---> Staring and Ending indexes in arr[]
+		index  ---> Current index in data[]
+		r ---> Size of a combination to be printed */
+	static void combinationUtil(int arr[], Integer data[], int start, int end, int index, int combinationLength) {
+		// Current combination is ready to be printed, print it
+		if (index == combinationLength) {
+			result.add(new HashSet<>(Arrays.asList(data)));
+			return;
+		}
+
+		// replace index with all possible elements. The condition
+		// "end-i+1 >= r-index" makes sure that including one element
+		// at index will make a combination with remaining elements
+		// at remaining positions
+		for (int i = start; i <= end && end - i + 1 >= combinationLength - index; i++) {
+			data[index] = arr[i];
+			combinationUtil(arr, data, i + 1, end, index + 1, combinationLength);
+		}
+	}
+
+	public static HashSet<HashSet<Integer>> generateIndices(int maxNOfIndices) {
+		result = new HashSet<>();
+		int[] arr = IntStream.range(1, maxNOfIndices).toArray();
+		for (int i = 1; i < maxNOfIndices - 1; i++) {
+			// A temporary array to store all combination one by one
+			combinationUtil(arr, new Integer[i], 0, arr.length - 1, 0, i);
+		}
+
+		// also add an empty one for X.... (all of this type)
+		result.add(new HashSet<>());
+
+		return result;
+	}
+}
--- a/src/main/java/util/Export.java
+++ b/src/main/java/util/Export.java
@@ -0,0 +1,267 @@
+package util;
+
+import static util.Util.*;
+
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+import org.apache.commons.lang3.tuple.Pair;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+import data.Enums.WordLevelType;
+
+@SuppressWarnings("unchecked")
+public class Export {
+	public static void SetToJSON(Set<Pair<String, Map<String, Long>>> set) {
+		JSONArray wrapper = new JSONArray();
+
+		for (Pair<String, Map<String, Long>> p : set) {
+			JSONArray data_wrapper = new JSONArray();
+			JSONObject metric = new JSONObject();
+
+			String title = p.getLeft();
+			Map<String, Long> map = p.getRight();
+
+			if (map.isEmpty())
+				continue;
+
+			long total = Util.mapSumFrequencies(map);
+
+			for (Map.Entry<String, Long> e : map.entrySet()) {
+				JSONObject data_entry = new JSONObject();
+				data_entry.put("word", e.getKey());
+				data_entry.put("frequency", e.getValue());
+				data_entry.put("percent", formatNumberAsPercent((double) e.getValue() / total));
+
+				data_wrapper.add(data_entry);
+			}
+
+			metric.put("Title", title);
+			metric.put("data", data_wrapper);
+			wrapper.add(metric);
+		}
+
+		try (FileWriter file = new FileWriter("statistics.json")) {
+			file.write(wrapper.toJSONString());
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+	public static String SetToCSV(Set<Pair<String, Map<String, Long>>> set, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+		//Delimiter used in CSV file
+		String NEW_LINE_SEPARATOR = "\n";
+
+		//CSV file header
+		Object[] FILE_HEADER = {"word", "frequency", "percent"};
+
+		String fileName = "";
+
+		for (Pair<String, Map<String, Long>> p : set) {
+			String title = p.getLeft();
+			fileName = title.replace(": ", "-");
+			fileName = fileName.replace(" ", "_").concat(".csv");
+
+			fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+
+			Map<String, Long> map = p.getRight();
+
+			if (map.isEmpty())
+				continue;
+
+			long total = Util.mapSumFrequencies(map);
+
+			OutputStreamWriter fileWriter = null;
+			CSVPrinter csvFilePrinter = null;
+
+			//Create the CSVFormat object with "\n" as a record delimiter
+			CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+
+			try {
+				//initialize FileWriter object
+				fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+
+				//initialize CSVPrinter object
+				csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+
+				// write info block
+				printHeaderInfo(csvFilePrinter, headerInfoBlock);
+
+				//Create CSV file header
+				csvFilePrinter.printRecord(FILE_HEADER);
+
+				for (Map.Entry<String, Long> e : map.entrySet()) {
+					List dataEntry = new ArrayList<>();
+					dataEntry.add(e.getKey());
+					dataEntry.add(e.getValue().toString());
+					dataEntry.add(formatNumberAsPercent((double) e.getValue() / total));
+					csvFilePrinter.printRecord(dataEntry);
+				}
+			} catch (Exception e) {
+				System.out.println("Error in CsvFileWriter!");
+				e.printStackTrace();
+			} finally {
+				try {
+					if (fileWriter != null) {
+						fileWriter.flush();
+						fileWriter.close();
+					}
+					if (csvFilePrinter != null) {
+						csvFilePrinter.close();
+					}
+				} catch (IOException e) {
+					System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+					e.printStackTrace();
+				}
+			}
+		}
+
+		return fileName;
+	}
+
+	public static String SetToCSV(String title, Object[][] result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+		//Delimiter used in CSV file
+		String NEW_LINE_SEPARATOR = "\n";
+
+		//CSV file header
+		Object[] FILE_HEADER = {"word", "frequency", "percent"};
+
+		String fileName = "";
+
+		fileName = title.replace(": ", "-");
+		fileName = fileName.replace(" ", "_").concat(".csv");
+
+		fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+
+		OutputStreamWriter fileWriter = null;
+		CSVPrinter csvFilePrinter = null;
+
+		//Create the CSVFormat object with "\n" as a record delimiter
+		CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+
+		try {
+			//initialize FileWriter object
+			fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+
+			//initialize CSVPrinter object
+			csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+
+			// write info block
+			printHeaderInfo(csvFilePrinter, headerInfoBlock);
+
+			//Create CSV file header
+			csvFilePrinter.printRecord(FILE_HEADER);
+
+			for (Object[] resultEntry : result) {
+				List dataEntry = new ArrayList<>();
+				dataEntry.add(resultEntry[0]);
+				dataEntry.add(resultEntry[1]);
+				dataEntry.add(formatNumberAsPercent(resultEntry[2]));
+				csvFilePrinter.printRecord(dataEntry);
+			}
+		} catch (Exception e) {
+			System.out.println("Error in CsvFileWriter!");
+			e.printStackTrace();
+		} finally {
+			try {
+				if (fileWriter != null) {
+					fileWriter.flush();
+					fileWriter.close();
+				}
+				if (csvFilePrinter != null) {
+					csvFilePrinter.close();
+				}
+			} catch (IOException e) {
+				System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+				e.printStackTrace();
+			}
+		}
+
+		return fileName;
+	}
+
+	public static String nestedMapToCSV(String title, Map<WordLevelType, Map<String, Map<String, Long>>> result, File resultsPath, LinkedHashMap<String, String> headerInfoBlock) {
+		//Delimiter used in CSV file
+		String NEW_LINE_SEPARATOR = "\n";
+
+		//CSV file header
+		Object[] FILE_HEADER = {"type", "key", "word", "frequency"};
+
+		String fileName = "";
+
+		fileName = title.replace(": ", "-");
+		fileName = fileName.replace(" ", "_").concat(".csv");
+
+		fileName = resultsPath.toString().concat(File.separator).concat(fileName);
+
+		OutputStreamWriter fileWriter = null;
+		CSVPrinter csvFilePrinter = null;
+
+		//Create the CSVFormat object with "\n" as a record delimiter
+		CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR).withDelimiter(';');
+
+		try {
+			//initialize FileWriter object
+			fileWriter = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+
+			//initialize CSVPrinter object
+			csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
+
+			// write info block
+			printHeaderInfo(csvFilePrinter, headerInfoBlock);
+
+			//Create CSV file header
+			csvFilePrinter.printRecord(FILE_HEADER);
+
+			for (Map.Entry<WordLevelType, Map<String, Map<String, Long>>> typeEntry : result.entrySet()) {
+				for (Map.Entry<String, Map<String, Long>> keyWordEntry : typeEntry.getValue().entrySet()) {
+					for (Map.Entry<String, Long> calculationResults : keyWordEntry.getValue().entrySet()) {
+						List values = new ArrayList();
+						values.add(typeEntry.getKey().getName());
+						values.add(keyWordEntry.getKey());
+						values.add(calculationResults.getKey());
+						values.add(calculationResults.getValue());
+						csvFilePrinter.printRecord(values);
+					}
+				}
+			}
+		} catch (Exception e) {
+			System.out.println("Error in CsvFileWriter!");
+			e.printStackTrace();
+		} finally {
+			try {
+				if (fileWriter != null) {
+					fileWriter.flush();
+					fileWriter.close();
+				}
+				if (csvFilePrinter != null) {
+					csvFilePrinter.close();
+				}
+			} catch (IOException e) {
+				System.out.println("Error while flushing/closing fileWriter/csvPrinter!");
+				e.printStackTrace();
+			}
+		}
+
+		return fileName;
+	}
+
+	private static void printHeaderInfo(CSVPrinter csvFilePrinter, LinkedHashMap<String, String> headerInfoBlock) throws IOException {
+		for (Map.Entry<String, String> entry : headerInfoBlock.entrySet()) {
+			List values = new ArrayList();
+			values.add(entry.getKey());
+			values.add(entry.getValue());
+			csvFilePrinter.printRecord(values);
+		}
+
+		// 2 empty lines
+		List values = new ArrayList();
+		csvFilePrinter.printRecord(values);
+		csvFilePrinter.printRecord(values);
+
+	}
+}
--- a/src/main/java/util/Key.java
+++ b/src/main/java/util/Key.java
@@ -0,0 +1,31 @@
+package util;
+
+public class Key /*implements Comparable<Key> */ {
+	// private final String value;
+	//
+	// Key(String value) {
+	// 	this.value = value;
+	// }
+	//
+	// @Override
+	// public int compareTo(Key o) {
+	// 	return Objects.compare(this.value, o.value);
+	// }
+	//
+	// @Override
+	// public boolean equals(Object o) {
+	// 	if (this.equals(o)) {
+	// 		return true;
+	// 	}
+	// 	if (o == null || getClass() != o.getClass()) {
+	// 		return false;
+	// 	}
+	// 	Key key = (Key) o;
+	// 	return Objects.equals(value, key.value);
+	// }
+	//
+	// @Override
+	// public int hashCode() {
+	// 	return 0;
+	// }
+}
--- a/src/main/java/util/TimeWatch.java
+++ b/src/main/java/util/TimeWatch.java
@@ -0,0 +1,63 @@
+package util;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Adapted from http://memorynotfound.com/calculating-elapsed-time-java/
+ */
+public class TimeWatch {
+
+	private long starts;
+
+	private TimeWatch() {
+		reset();
+	}
+
+	public static TimeWatch start() {
+		return new TimeWatch();
+	}
+
+	private TimeWatch reset() {
+		starts = System.nanoTime();
+		return this;
+	}
+
+	private long time() {
+		long ends = System.nanoTime();
+		return ends - starts;
+	}
+
+	private long time(TimeUnit unit) {
+		return unit.convert(time(), TimeUnit.NANOSECONDS);
+	}
+
+	private String toMinuteSeconds() {
+		return String.format("%d min, %d sec", time(TimeUnit.MINUTES),
+				time(TimeUnit.SECONDS) - time(TimeUnit.MINUTES));
+	}
+
+	public String toFullTime() {
+		long hours = time(TimeUnit.HOURS);
+		long minutes = time(TimeUnit.MINUTES) - TimeUnit.HOURS.toMinutes(hours);
+		long seconds = time(TimeUnit.SECONDS) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
+		long milliseconds = time(TimeUnit.MILLISECONDS) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
+
+		return String.format("%d h, %d min, %d s, %d ms", hours, minutes, seconds, milliseconds);
+	}
+
+	public String toString() {
+
+		return "Elapsed Time in nano seconds: ";
+	}
+
+	private void exampleUsage() {
+		TimeWatch watch = TimeWatch.start();
+
+		// do something...
+
+		System.out.println("Elapsed Time custom format: " + watch.toMinuteSeconds());
+		System.out.println("Elapsed Time in seconds: " + watch.time(TimeUnit.SECONDS));
+		System.out.println("Elapsed Time in nano seconds: " + watch.time());
+
+	}
+}
--- a/src/main/java/util/Util.java
+++ b/src/main/java/util/Util.java
@@ -0,0 +1,225 @@
+package util;
+
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.text.MessageFormat;
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Stream;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import data.Settings;
+import gui.GUIController;
+import gui.ValidationUtil;
+
+public class Util {
+	public final static Logger logger = LogManager.getLogger(Util.class);
+
+
+	public static String toReadableTime(long time) {
+		long hours = time(TimeUnit.HOURS, time);
+		long minutes = time(TimeUnit.MINUTES, time) - TimeUnit.HOURS.toMinutes(hours);
+		long seconds = time(TimeUnit.SECONDS, time) - TimeUnit.HOURS.toSeconds(hours) - TimeUnit.MINUTES.toSeconds(minutes);
+		long milliseconds = time(TimeUnit.MILLISECONDS, time) - TimeUnit.HOURS.toMillis(hours) - TimeUnit.MINUTES.toMillis(minutes) - TimeUnit.SECONDS.toMillis(seconds);
+		long microseconds = time(TimeUnit.MICROSECONDS, time) - TimeUnit.HOURS.toMicros(hours) - TimeUnit.MINUTES.toMicros(minutes) - TimeUnit.SECONDS.toMicros(seconds) - TimeUnit.MILLISECONDS.toMicros(milliseconds);
+		long nanoseconds = time(TimeUnit.NANOSECONDS, time) - TimeUnit.HOURS.toNanos(hours) - TimeUnit.MINUTES.toNanos(minutes) - TimeUnit.SECONDS.toNanos(seconds) - TimeUnit.MILLISECONDS.toNanos(milliseconds) - TimeUnit.MICROSECONDS.toNanos(microseconds);
+
+		return String.format("%d h, %d min, %d s, %d ms, %d µs, %d ns", hours, minutes, seconds, milliseconds, microseconds, nanoseconds);
+	}
+
+	private static long time(TimeUnit unit, long t) {
+		return unit.convert(t, TimeUnit.NANOSECONDS);
+	}
+
+	/**
+	 * Converts a number to a more readable format.
+	 * 12345 -> 12.345
+	 * 12345,678 -> 12.345,67
+	 *
+	 * @param o byte, double, float, int,long, short
+	 *
+	 * @return number formatted with thousands separator and 2 decimal places (floats)
+	 */
+	private static String formatNumberReadable(Object o) {
+		if (isInstanceOfInteger(o))
+			return String.format("%,d", o);
+		else if (isInstanceOfFloat(o))
+			return String.format("%,.2f", o);
+		else
+			return "- invalid input format -";
+	}
+
+	public static String formatNumberAsPercent(Object o) {
+		return MessageFormat.format("{0,number,#.###%}", o);
+	}
+
+	private static boolean isInstanceOfInteger(Object o) {
+		Set<Class<?>> types = new HashSet<>();
+		types.add(Byte.class);
+		types.add(Short.class);
+		types.add(Integer.class);
+		types.add(Long.class);
+
+		return types.contains(o.getClass());
+	}
+
+	private static boolean isInstanceOfFloat(Object o) {
+		Set<Class<?>> types = new HashSet<>();
+		types.add(Float.class);
+		types.add(Double.class);
+
+		return types.contains(o.getClass());
+	}
+
+	public static <K, V> void printMap(Map<K, V> map) {
+		System.out.println("\nkey: value");
+		map.forEach((k, v) -> System.out.print(String.format("%s:\t %,8d%n", k, v)));
+		System.out.println();
+	}
+
+	/**
+	 * Generic map converter -> since AtomicLongs aren't as comparable.
+	 * Converts ConcurrentHashMap<K, AtomicLong> to HashMap<K, Long>
+	 */
+	public static <K, V> Map<String, Long> atomicInt2StringAndInt(Map<K, V> map) {
+		Map m = new HashMap<String, Long>();
+
+		for (Map.Entry<K, V> e : map.entrySet()) {
+			m.put(e.getKey().toString(), ((AtomicLong) e.getValue()).longValue());
+		}
+
+		return m;
+	}
+
+	/**
+	 * Sorts a map in a descending order by value.
+	 */
+	public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue(Map<K, V> map, int limit) {
+		/*
+		sorted() in itself is O(1), since it's an intermediate operation that
+		doesn't consume the stream, but simply adds an operation to the pipeline.
+		Once the stream is consumed by a terminal operation, the sort happens and
+		 either
+		- it doesn't do anything (O(1)) because the stream knows that the
+		elements are already sorted (because they come from a SortedSet, for example)
+		- or the stream is not parallel, and it delegates to Arrays.sort() (O(n log n))
+		- or the stream is parallel, and it delegates to Arrays.parallelSort() (O(n log n))
+
+		As of JDK 8, the main sorting algorithm which is also used in standard
+		stream API implementation for sequential sorting is TimSort. Its worst
+		case is O(n log n), but it works incredibly fast (with O(n) and quite
+		small constant) if data is presorted (in forward or reverse direction)
+		or partially presorted (for example, if you concatenate two sorted lists
+		and sort them again).
+		*/
+		// if limit is set to 0 or less, we take that to mean no limit at all
+		if (limit <= 0) {
+			limit = map.size();
+		}
+
+		Map<K, V> result = new LinkedHashMap<>();
+		TimeWatch watch = TimeWatch.start();
+
+		Stream<Map.Entry<K, V>> st = map.entrySet().stream();
+
+		st.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).limit(limit)
+				.forEachOrdered(e -> result.put(e.getKey(), e.getValue()));
+
+		if (Settings.PRINT_LOG) {
+			System.out.println(String.format("Elapsed time for sorting %s items: %s",
+					formatNumberReadable(result.size()),
+					watch.toFullTime()));
+		}
+
+		return result;
+	}
+
+	public static <K, V> void printMap(Map<K, Integer> map, String title, int number_of_words) {
+		System.out.println(String.format("\n%s\n------------\nkey: value\tpercent", title));
+		map.forEach((k, v) ->
+				System.out.println(String.format("%s:\t %s\t %s%%",
+						k,
+						Util.formatNumberReadable(v),
+						Util.formatNumberReadable((double) v / number_of_words * 100))));
+		System.out.println();
+	}
+
+	static long mapSumFrequencies(Map<String, Long> map) {
+		long sum = 0;
+
+		for (long value : map.values()) {
+			sum += value;
+		}
+
+		return sum;
+	}
+
+	/**
+	 * Used for passing optional integer values for sorting.
+	 */
+	public static int getValidInt(int... i) {
+		if (i == null || i.length < 1 || i[0] <= 0) {
+			return 0;
+		} else {
+			return i[0];
+		}
+	}
+
+	/**
+	 * Check whether a map is empty. It also considers an edge case where map's keys are lists to check if those lists are empty.
+	 */
+	public static <K, V> boolean isMapEmpty(Map<K, V> map) {
+		if (map.isEmpty()) {
+			// default
+			return true;
+		}
+
+		// otherwise check if keys map to values that are empty
+		for (V v : map.values()) {
+			// todo: generalize to all collections if/when needed
+			ArrayList<String> vl = new ArrayList((List<String>) v);
+			if (!vl.isEmpty()) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	/**
+	 * Returns the location of the main class if possible, otherwise null
+	 */
+	public static File getWorkingDirectory() {
+		// get location of the currently executing class
+		String path = GUIController.class.getProtectionDomain().getCodeSource().getLocation().getPath();
+
+		logger.info("working dir path: ", path);
+
+		String decodedPath = null;
+		try {
+			decodedPath = URLDecoder.decode(path, "UTF-8");
+		} catch (UnsupportedEncodingException e) {
+			logger.error("decoding: ", e);
+			// e.printStackTrace();
+		}
+
+		if (decodedPath != null) {
+			File workingDirectory = new File(decodedPath);
+
+			// in case it's a file (class is packaged inside a jar), select its parent folder
+			workingDirectory = workingDirectory.isFile() ? workingDirectory.getParentFile() : workingDirectory;
+
+			if (ValidationUtil.isReadableDirectory(workingDirectory)) {
+				logger.info("working dir is ok: ", workingDirectory.getAbsolutePath());
+				return workingDirectory;
+			}
+		}
+
+		logger.info("working dir returing null");
+		return null;
+	}
+}
--- a/src/main/java/util/db/RDB.java
+++ b/src/main/java/util/db/RDB.java
@@ -0,0 +1,132 @@
+package util.db;
+
+import static util.ByteUtils.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.time.LocalDateTime;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.commons.io.FileUtils;
+import org.rocksdb.*;
+
+import util.TimeWatch;
+
+public class RDB {
+
+	private RocksDB db;
+	private String path;
+	private static final String UTF_8 = "UTF-8";
+
+	public RDB() {
+		// different dbs i ncase of concurrent calculations
+		this.path = System.getProperty("java.io.tmpdir")
+				.concat(File.separator)
+				.concat(String.format("corpusAnalyzer_db%d", LocalDateTime.now().toString().hashCode()));
+
+		this.db = createDB();
+	}
+
+
+	private RocksDB createDB() {
+		RocksDB.loadLibrary();
+
+		// the Options class contains a set of configurable DB options
+		// that determines the behaviour of the database.
+		try (final Options options = new Options()) {
+			options.setCreateIfMissing(true);
+
+			// a factory method that returns a RocksDB instance
+			try (final RocksDB rdb = RocksDB.open(options, path)) {
+				if (db != null) {
+					return rdb;
+				} else {
+					this.db = rdb;
+				}
+			}
+		} catch (RocksDBException e) {
+			// do some error handling
+		}
+		return null;
+	}
+
+	public void writeBatch(Map<String, AtomicLong> results) throws UnsupportedEncodingException {
+		RocksDB.loadLibrary();
+
+		// a factory method that returns a RocksDB instance
+		try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
+			final WriteBatch wb = new WriteBatch();
+
+			for (Map.Entry<String, AtomicLong> entry : results.entrySet()) {
+				byte[] key = entry.getKey().getBytes(UTF_8);
+				long resultValue = entry.getValue().longValue();
+
+				try {
+					final byte[] dbValue = rdb.get(key);
+					if (dbValue != null) {
+						// value == null if key does not exist in db.
+						wb.put(key, longToBytes(bytesToLong(dbValue) + resultValue));
+					} else {
+						wb.put(key, longToBytes(entry.getValue().longValue()));
+					}
+				} catch (RocksDBException e) {
+					// TODO: error handling
+				}
+			}
+			TimeWatch watch = TimeWatch.start();
+			rdb.write(new WriteOptions(), wb);
+			System.out.println(String.format("Writing %d entries took: %s", wb.count(), watch.toFullTime()));
+		} catch (RocksDBException e) {
+			// do some error handling
+		}
+	}
+
+	// public byte[] atomicIntToByteArray(final AtomicLong i) {
+	// 	BigInteger bigInt = BigInteger.valueOf(i.intValue());
+	//
+	// 	return bigInt.toByteArray();
+	// }
+
+	public RocksDB getDb() {
+		return db;
+	}
+
+	public Map<String, AtomicLong> getDump() throws UnsupportedEncodingException {
+		Map<String, AtomicLong> dump = new HashMap<>();
+		RocksDB.loadLibrary();
+
+		// the Options class contains a set of configurable DB options
+		// that determines the behaviour of the database.
+		// a factory method that returns a RocksDB instance
+		try (final RocksDB rdb = RocksDB.open(new Options(), path)) {
+			try (RocksIterator it = rdb.newIterator()) {
+				it.seekToFirst();
+				// it.next();
+
+				while (it.isValid()) {
+					byte[] key = it.key();
+					byte[] value = it.value();
+
+					dump.put(new String(key, UTF_8), new AtomicLong(bytesToLong(value)));
+
+					it.next();
+				}
+			}
+		} catch (RocksDBException e) {
+			e.printStackTrace();
+		}
+
+		return dump;
+	}
+
+	public void delete() {
+		try {
+			FileUtils.deleteDirectory(new File(path));
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+}