You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

171 lines
5.1 KiB

package alg.inflectedJOS;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import alg.Common;
import data.Sentence;
import data.Statistics;
import data.StatisticsNew;
import data.Word;
public class InflectedJOSCount {
public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
// static {
// // calculate all possible combinations of indices we will substitute with a '-' for substring statistics
// indices = new HashMap<>();
// for (int i = 5; i <= 8; i++) {
// indices.put(i, calculateCombinations(i));
// }
// }
//
// private static List<Integer> calculateCombinations(int i) {
// int arr[] = {1, 2, 3, 4, 5};
// int r = 3;
// int n = arr.length;
// ArrayList<ArrayList<Integer>> result = new ArrayList<>();
//
// return printCombination(arr, n, r);
// }
//
// /* arr[] ---> Input Array
// data[] ---> Temporary array to store current combination
// start & end ---> Staring and Ending indexes in arr[]
// index ---> Current index in data[]
// r ---> Size of a combination to be printed */
// static void combinationUtil(int arr[], int data[], int start,
// int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
// // Current combination is ready to be printed, print it
// ArrayList<Integer> tmpResult = new ArrayList<>();
//
// if (index == r) {
// ArrayList<Integer> tmpResult = new ArrayList<>();
// for (int j = 0; j < r; j++)
// System.out.print(data[j] + " ");
// System.out.println("");
// return;
// }
//
// // replace index with all possible elements. The condition
// // "end-i+1 >= r-index" makes sure that including one element
// // at index will make a combination with remaining elements
// // at remaining positions
// for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
// data[index] = arr[i];
// combinationUtil(arr, data, i + 1, end, index + 1, r);
// }
// }
//
// // The main function that prints all combinations of size r
// // in arr[] of size n. This function mainly uses combinationUtil()
// static void printCombination(int arr[], int n, int r) {
// // A temporary array to store all combination one by one
// int data[] = new int[r];
//
// // Print all combination using temprary array 'data[]'
// combinationUtil(arr, data, 0, n - 1, 0, r);
// }
// public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
// for (Sentence s : corpus) {
// // disregard if wrong taxonomy
// if (!(s.getTaxonomy().startsWith(taxonomy))) {
// continue;
// }
//
// calculateCommon(s, stats.result);
//
// for (Word word : s.getWords()) {
// // skip if current word is not inflected
// if (!(word.getMsd().length() > 0)) {
// continue;
// }
//
// String msd = word.getMsd();
//
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
//
// for (int i = 1; i < msd.length(); i++) {
// entry.setCharAt(i, msd.charAt(i));
// Common.updateMap(stats.result, entry.toString());
// entry.setCharAt(i, '-');
// }
// }
// }
// }
// public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
// for (Sentence s : corpus) {
// for (Word word : s.getWords()) {
// if (!(word.getMsd().length() > 0)) {
// continue;
// }
//
// String msd = word.getMsd();
//
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
//
// for (int i = 1; i < msd.length(); i++) {
// entry.setCharAt(i, msd.charAt(i));
// Common.updateMap(stats.result, entry.toString());
// entry.setCharAt(i, '-');
// }
// }
// }
// }
static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
for (Sentence s : corpus) {
// disregard if wrong taxonomy
if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
continue;
}
for (Word word : s.getWords()) {
// skip if current word is not inflected
if (!(word.getMsd().length() > 0)) {
continue;
}
String msd = word.getMsd();
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
for (int i = 1; i < msd.length(); i++) {
entry.setCharAt(i, msd.charAt(i));
Common.updateMap(stats.result, entry.toString());
entry.setCharAt(i, '-');
}
}
}
}
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
for (Sentence s : corpus) {
for (Word word : s.getWords()) {
// skip if current word is not inflected
// // TODO: if has defined msd and is of correct type (create a set)
// if (!(word.getMsd().length() > 0)) {
// continue;
// }
String msd = word.getMsd();
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
for (int i = 1; i < msd.length(); i++) {
entry.setCharAt(i, msd.charAt(i));
stats.updateResults(entry.toString());
entry.setCharAt(i, '-');
}
}
}
}
}