You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
171 lines
5.1 KiB
171 lines
5.1 KiB
6 years ago
|
package alg.inflectedJOS;
|
||
|
|
||
|
import java.util.ArrayList;
|
||
|
import java.util.HashMap;
|
||
|
import java.util.List;
|
||
|
|
||
|
import org.apache.commons.lang3.StringUtils;
|
||
|
|
||
|
import alg.Common;
|
||
|
import data.Sentence;
|
||
|
import data.Statistics;
|
||
|
import data.StatisticsNew;
|
||
|
import data.Word;
|
||
|
|
||
|
public class InflectedJOSCount {
|
||
|
|
||
|
public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
|
||
|
|
||
|
// static {
|
||
|
// // calculate all possible combinations of indices we will substitute with a '-' for substring statistics
|
||
|
// indices = new HashMap<>();
|
||
|
// for (int i = 5; i <= 8; i++) {
|
||
|
// indices.put(i, calculateCombinations(i));
|
||
|
// }
|
||
|
// }
|
||
|
//
|
||
|
// private static List<Integer> calculateCombinations(int i) {
|
||
|
// int arr[] = {1, 2, 3, 4, 5};
|
||
|
// int r = 3;
|
||
|
// int n = arr.length;
|
||
|
// ArrayList<ArrayList<Integer>> result = new ArrayList<>();
|
||
|
//
|
||
|
// return printCombination(arr, n, r);
|
||
|
// }
|
||
|
//
|
||
|
// /* arr[] ---> Input Array
|
||
|
// data[] ---> Temporary array to store current combination
|
||
|
// start & end ---> Staring and Ending indexes in arr[]
|
||
|
// index ---> Current index in data[]
|
||
|
// r ---> Size of a combination to be printed */
|
||
|
// static void combinationUtil(int arr[], int data[], int start,
|
||
|
// int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
|
||
|
// // Current combination is ready to be printed, print it
|
||
|
// ArrayList<Integer> tmpResult = new ArrayList<>();
|
||
|
//
|
||
|
// if (index == r) {
|
||
|
// ArrayList<Integer> tmpResult = new ArrayList<>();
|
||
|
// for (int j = 0; j < r; j++)
|
||
|
// System.out.print(data[j] + " ");
|
||
|
// System.out.println("");
|
||
|
// return;
|
||
|
// }
|
||
|
//
|
||
|
// // replace index with all possible elements. The condition
|
||
|
// // "end-i+1 >= r-index" makes sure that including one element
|
||
|
// // at index will make a combination with remaining elements
|
||
|
// // at remaining positions
|
||
|
// for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
|
||
|
// data[index] = arr[i];
|
||
|
// combinationUtil(arr, data, i + 1, end, index + 1, r);
|
||
|
// }
|
||
|
// }
|
||
|
//
|
||
|
// // The main function that prints all combinations of size r
|
||
|
// // in arr[] of size n. This function mainly uses combinationUtil()
|
||
|
// static void printCombination(int arr[], int n, int r) {
|
||
|
// // A temporary array to store all combination one by one
|
||
|
// int data[] = new int[r];
|
||
|
//
|
||
|
// // Print all combination using temprary array 'data[]'
|
||
|
// combinationUtil(arr, data, 0, n - 1, 0, r);
|
||
|
// }
|
||
|
|
||
|
// public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||
|
// for (Sentence s : corpus) {
|
||
|
// // disregard if wrong taxonomy
|
||
|
// if (!(s.getTaxonomy().startsWith(taxonomy))) {
|
||
|
// continue;
|
||
|
// }
|
||
|
//
|
||
|
// calculateCommon(s, stats.result);
|
||
|
//
|
||
|
// for (Word word : s.getWords()) {
|
||
|
// // skip if current word is not inflected
|
||
|
// if (!(word.getMsd().length() > 0)) {
|
||
|
// continue;
|
||
|
// }
|
||
|
//
|
||
|
// String msd = word.getMsd();
|
||
|
//
|
||
|
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||
|
//
|
||
|
// for (int i = 1; i < msd.length(); i++) {
|
||
|
// entry.setCharAt(i, msd.charAt(i));
|
||
|
// Common.updateMap(stats.result, entry.toString());
|
||
|
// entry.setCharAt(i, '-');
|
||
|
// }
|
||
|
// }
|
||
|
// }
|
||
|
// }
|
||
|
|
||
|
// public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
||
|
// for (Sentence s : corpus) {
|
||
|
// for (Word word : s.getWords()) {
|
||
|
// if (!(word.getMsd().length() > 0)) {
|
||
|
// continue;
|
||
|
// }
|
||
|
//
|
||
|
// String msd = word.getMsd();
|
||
|
//
|
||
|
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||
|
//
|
||
|
// for (int i = 1; i < msd.length(); i++) {
|
||
|
// entry.setCharAt(i, msd.charAt(i));
|
||
|
// Common.updateMap(stats.result, entry.toString());
|
||
|
// entry.setCharAt(i, '-');
|
||
|
// }
|
||
|
// }
|
||
|
// }
|
||
|
// }
|
||
|
|
||
|
static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||
|
for (Sentence s : corpus) {
|
||
|
// disregard if wrong taxonomy
|
||
|
if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
for (Word word : s.getWords()) {
|
||
|
// skip if current word is not inflected
|
||
|
if (!(word.getMsd().length() > 0)) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
String msd = word.getMsd();
|
||
|
|
||
|
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||
|
|
||
|
for (int i = 1; i < msd.length(); i++) {
|
||
|
entry.setCharAt(i, msd.charAt(i));
|
||
|
Common.updateMap(stats.result, entry.toString());
|
||
|
entry.setCharAt(i, '-');
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
|
||
|
for (Sentence s : corpus) {
|
||
|
|
||
|
for (Word word : s.getWords()) {
|
||
|
// skip if current word is not inflected
|
||
|
// // TODO: if has defined msd and is of correct type (create a set)
|
||
|
// if (!(word.getMsd().length() > 0)) {
|
||
|
// continue;
|
||
|
// }
|
||
|
|
||
|
String msd = word.getMsd();
|
||
|
|
||
|
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||
|
|
||
|
for (int i = 1; i < msd.length(); i++) {
|
||
|
entry.setCharAt(i, msd.charAt(i));
|
||
|
stats.updateResults(entry.toString());
|
||
|
entry.setCharAt(i, '-');
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|