package alg.inflectedJOS; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.apache.commons.lang3.StringUtils; import alg.Common; import data.Sentence; import data.Statistics; import data.StatisticsNew; import data.Word; public class InflectedJOSCount { public static HashMap>> indices; // static { // // calculate all possible combinations of indices we will substitute with a '-' for substring statistics // indices = new HashMap<>(); // for (int i = 5; i <= 8; i++) { // indices.put(i, calculateCombinations(i)); // } // } // // private static List calculateCombinations(int i) { // int arr[] = {1, 2, 3, 4, 5}; // int r = 3; // int n = arr.length; // ArrayList> result = new ArrayList<>(); // // return printCombination(arr, n, r); // } // // /* arr[] ---> Input Array // data[] ---> Temporary array to store current combination // start & end ---> Staring and Ending indexes in arr[] // index ---> Current index in data[] // r ---> Size of a combination to be printed */ // static void combinationUtil(int arr[], int data[], int start, // int end, int index, int r, ArrayList> result) { // // Current combination is ready to be printed, print it // ArrayList tmpResult = new ArrayList<>(); // // if (index == r) { // ArrayList tmpResult = new ArrayList<>(); // for (int j = 0; j < r; j++) // System.out.print(data[j] + " "); // System.out.println(""); // return; // } // // // replace index with all possible elements. The condition // // "end-i+1 >= r-index" makes sure that including one element // // at index will make a combination with remaining elements // // at remaining positions // for (int i = start; i <= end && end - i + 1 >= r - index; i++) { // data[index] = arr[i]; // combinationUtil(arr, data, i + 1, end, index + 1, r); // } // } // // // The main function that prints all combinations of size r // // in arr[] of size n. This function mainly uses combinationUtil() // static void printCombination(int arr[], int n, int r) { // // A temporary array to store all combination one by one // int data[] = new int[r]; // // // Print all combination using temprary array 'data[]' // combinationUtil(arr, data, 0, n - 1, 0, r); // } // public static void calculateForAll(List corpus, Statistics stats, String taxonomy) { // for (Sentence s : corpus) { // // disregard if wrong taxonomy // if (!(s.getTaxonomy().startsWith(taxonomy))) { // continue; // } // // calculateCommon(s, stats.result); // // for (Word word : s.getWords()) { // // skip if current word is not inflected // if (!(word.getMsd().length() > 0)) { // continue; // } // // String msd = word.getMsd(); // // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1))); // // for (int i = 1; i < msd.length(); i++) { // entry.setCharAt(i, msd.charAt(i)); // Common.updateMap(stats.result, entry.toString()); // entry.setCharAt(i, '-'); // } // } // } // } // public static void calculateForAll(List corpus, Statistics stats) { // for (Sentence s : corpus) { // for (Word word : s.getWords()) { // if (!(word.getMsd().length() > 0)) { // continue; // } // // String msd = word.getMsd(); // // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1))); // // for (int i = 1; i < msd.length(); i++) { // entry.setCharAt(i, msd.charAt(i)); // Common.updateMap(stats.result, entry.toString()); // entry.setCharAt(i, '-'); // } // } // } // } static void calculateForAll(List corpus, Statistics stats, String taxonomy) { for (Sentence s : corpus) { // disregard if wrong taxonomy if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) { continue; } for (Word word : s.getWords()) { // skip if current word is not inflected if (!(word.getMsd().length() > 0)) { continue; } String msd = word.getMsd(); StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1))); for (int i = 1; i < msd.length(); i++) { entry.setCharAt(i, msd.charAt(i)); Common.updateMap(stats.result, entry.toString()); entry.setCharAt(i, '-'); } } } } public static void calculateForAll(List corpus, StatisticsNew stats, String taxonomy) { for (Sentence s : corpus) { for (Word word : s.getWords()) { // skip if current word is not inflected // // TODO: if has defined msd and is of correct type (create a set) // if (!(word.getMsd().length() > 0)) { // continue; // } String msd = word.getMsd(); StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1))); for (int i = 1; i < msd.length(); i++) { entry.setCharAt(i, msd.charAt(i)); stats.updateResults(entry.toString()); entry.setCharAt(i, '-'); } } } } }