Added some optimizations and new taxonomy names
This commit is contained in:
@@ -262,7 +262,7 @@ public class XML_processing {
|
||||
|
||||
if(stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() &&
|
||||
stavek.size() > 0){
|
||||
stavek.add(new Word(c3Content, c3Content, "/"));
|
||||
stavek.add(createWord(c3Content, c3Content, "/", "", stats.getFilter()));
|
||||
|
||||
}
|
||||
|
||||
@@ -297,7 +297,7 @@ public class XML_processing {
|
||||
|
||||
// "word" node value
|
||||
if (in_word) {
|
||||
stavek.add(new Word(characters.getData(), lemma, msd));
|
||||
stavek.add(createWord(characters.getData(), lemma, msd, "", stats.getFilter()));
|
||||
in_word = false;
|
||||
}
|
||||
break;
|
||||
@@ -537,12 +537,12 @@ public class XML_processing {
|
||||
// "word" node value
|
||||
if (inWord) {
|
||||
String word = characters.getData();
|
||||
sentence.add(new Word(word, lemma, msd));
|
||||
sentence.add(createWord(word, lemma, msd, word, stats.getFilter()));
|
||||
inWord = false;
|
||||
}
|
||||
if (stats.getFilter().getNgramValue() > 1 && stats.getFilter().getNotePunctuations() && inPunctuation && sentence.size() > 0) {
|
||||
String punctuation = characters.getData();
|
||||
sentence.add(new Word(punctuation, punctuation, "/"));
|
||||
sentence.add(createWord(punctuation, punctuation, "/", punctuation, stats.getFilter()));
|
||||
inPunctuation = false;
|
||||
|
||||
// String punctuation = ",";
|
||||
@@ -761,7 +761,7 @@ public class XML_processing {
|
||||
// GOSCorpusHM.put(GOSCorpusHMKey, sentence);
|
||||
String word = "";
|
||||
Characters characters = event.asCharacters();
|
||||
sentence.add(new Word(characters.getData(), "", ""));
|
||||
sentence.add(createWord(characters.getData(), "", "", "", stats.getFilter()));
|
||||
// if algorithm is in normalized part find orthodox word and add other info to it
|
||||
} else {
|
||||
Characters characters = event.asCharacters();
|
||||
@@ -769,15 +769,16 @@ public class XML_processing {
|
||||
// System.out.println(GOSCorpusHMKey + " " + lemma + " " + wordIndex);
|
||||
if (wordIndex < GOSCorpusHM.get(GOSCorpusHMKey).size()) {
|
||||
Word currentWord = GOSCorpusHM.get(GOSCorpusHMKey).get(wordIndex);
|
||||
currentWord.setLemma(lemma);
|
||||
currentWord.setMsd(msd);
|
||||
currentWord.setNormalizedWord(characters.getData());
|
||||
currentWord.setLemma(lemma, stats.getFilter().getWordParts());
|
||||
currentWord.setMsd(msd, stats.getFilter().getWordParts());
|
||||
currentWord.setNormalizedWord(characters.getData(), stats.getFilter().getWordParts());
|
||||
|
||||
wordIndex += 1;
|
||||
|
||||
// when a word is separated from one to many we have to create these duplicates
|
||||
if (inSeparatedWord){
|
||||
GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, new Word(currentWord.getWord(), "", ""));
|
||||
GOSCorpusHM.get(GOSCorpusHMKey).add(wordIndex, createWord(currentWord.getWord(stats.getFilter().getWordParts()),
|
||||
"", "", "", stats.getFilter()));
|
||||
}
|
||||
} //else {
|
||||
// System.out.println("Error");
|
||||
@@ -893,8 +894,8 @@ public class XML_processing {
|
||||
|
||||
// if we're calculating values for letters, omit words that are shorter than string length
|
||||
if (filter.getNgramValue() == 0) {
|
||||
sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord().length() < filter.getStringLength())
|
||||
|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma().length() < filter.getStringLength()));
|
||||
sentence.removeIf(w -> (filter.getCalculateFor() == CalculateFor.WORD && w.getWord(filter.getWordParts()).length() < filter.getStringLength())
|
||||
|| (filter.getCalculateFor() == CalculateFor.LEMMA && w.getLemma(filter.getWordParts()).length() < filter.getStringLength()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -912,4 +913,38 @@ public class XML_processing {
|
||||
|
||||
return atts;
|
||||
}
|
||||
|
||||
private static Word createWord(String word, String lemma, String msd, String normalizedWord, Filter f){
|
||||
List<String> wString = new ArrayList<>();
|
||||
if (f.getWordParts().contains(CalculateFor.WORD))
|
||||
wString.add(word);
|
||||
if (f.getWordParts().contains(CalculateFor.LEMMA))
|
||||
wString.add(lemma);
|
||||
if (f.getWordParts().contains(CalculateFor.MORPHOSYNTACTIC_SPECS))
|
||||
wString.add(msd);
|
||||
if (f.getWordParts().contains(CalculateFor.NORMALIZED_WORD))
|
||||
wString.add(normalizedWord);
|
||||
|
||||
// find appropriate strings and put them in word
|
||||
Word w;
|
||||
|
||||
switch (f.getWordParts().size()) {
|
||||
case 1:
|
||||
w = new Word1(wString.get(0));
|
||||
break;
|
||||
case 2:
|
||||
w = new Word2(wString.get(0), wString.get(1));
|
||||
break;
|
||||
case 3:
|
||||
w = new Word3(wString.get(0), wString.get(1), wString.get(2));
|
||||
break;
|
||||
case 4:
|
||||
w = new Word4(wString.get(0), wString.get(1), wString.get(2), wString.get(3));
|
||||
break;
|
||||
default:
|
||||
w = null;
|
||||
|
||||
}
|
||||
return w;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,67 +1,67 @@
|
||||
package alg.inflectedJOS;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.RecursiveAction;
|
||||
|
||||
import data.Sentence;
|
||||
import data.Statistics;
|
||||
|
||||
public class ForkJoin extends RecursiveAction {
|
||||
private static final long serialVersionUID = -1260951004477299634L;
|
||||
|
||||
private static final int ACCEPTABLE_SIZE = 1000;
|
||||
private List<Sentence> corpus;
|
||||
private Statistics stats;
|
||||
private int start;
|
||||
private int end;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for subproblems.
|
||||
*/
|
||||
private ForkJoin(List<Sentence> corpus, int start, int end, Statistics stats) {
|
||||
this.corpus = corpus;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor for the initial problem
|
||||
*/
|
||||
public ForkJoin(List<Sentence> corpus, Statistics stats) {
|
||||
this.corpus = corpus;
|
||||
this.start = 0;
|
||||
this.end = corpus.size();
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
private void computeDirectly() {
|
||||
List<Sentence> subCorpus = corpus.subList(start, end);
|
||||
|
||||
if (stats.isTaxonomySet()) {
|
||||
InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy());
|
||||
} else {
|
||||
InflectedJOSCount.calculateForAll(subCorpus, stats, null);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void compute() {
|
||||
int subCorpusSize = end - start;
|
||||
|
||||
if (subCorpusSize < ACCEPTABLE_SIZE) {
|
||||
computeDirectly();
|
||||
} else {
|
||||
int mid = start + subCorpusSize / 2;
|
||||
ForkJoin left = new ForkJoin(corpus, start, mid, stats);
|
||||
ForkJoin right = new ForkJoin(corpus, mid, end, stats);
|
||||
|
||||
// fork (push to queue)-> compute -> join
|
||||
left.fork();
|
||||
right.fork();
|
||||
left.join();
|
||||
right.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
//package alg.inflectedJOS;
|
||||
//
|
||||
//import java.util.List;
|
||||
//import java.util.concurrent.RecursiveAction;
|
||||
//
|
||||
//import data.Sentence;
|
||||
//import data.Statistics;
|
||||
//
|
||||
//public class ForkJoin extends RecursiveAction {
|
||||
// private static final long serialVersionUID = -1260951004477299634L;
|
||||
//
|
||||
// private static final int ACCEPTABLE_SIZE = 1000;
|
||||
// private List<Sentence> corpus;
|
||||
// private Statistics stats;
|
||||
// private int start;
|
||||
// private int end;
|
||||
//
|
||||
//
|
||||
// /**
|
||||
// * Constructor for subproblems.
|
||||
// */
|
||||
// private ForkJoin(List<Sentence> corpus, int start, int end, Statistics stats) {
|
||||
// this.corpus = corpus;
|
||||
// this.start = start;
|
||||
// this.end = end;
|
||||
// this.stats = stats;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Default constructor for the initial problem
|
||||
// */
|
||||
// public ForkJoin(List<Sentence> corpus, Statistics stats) {
|
||||
// this.corpus = corpus;
|
||||
// this.start = 0;
|
||||
// this.end = corpus.size();
|
||||
// this.stats = stats;
|
||||
// }
|
||||
//
|
||||
// private void computeDirectly() {
|
||||
// List<Sentence> subCorpus = corpus.subList(start, end);
|
||||
//
|
||||
// if (stats.isTaxonomySet()) {
|
||||
// InflectedJOSCount.calculateForAll(subCorpus, stats, stats.getInflectedJosTaxonomy());
|
||||
// } else {
|
||||
// InflectedJOSCount.calculateForAll(subCorpus, stats, null);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// protected void compute() {
|
||||
// int subCorpusSize = end - start;
|
||||
//
|
||||
// if (subCorpusSize < ACCEPTABLE_SIZE) {
|
||||
// computeDirectly();
|
||||
// } else {
|
||||
// int mid = start + subCorpusSize / 2;
|
||||
// ForkJoin left = new ForkJoin(corpus, start, mid, stats);
|
||||
// ForkJoin right = new ForkJoin(corpus, mid, end, stats);
|
||||
//
|
||||
// // fork (push to queue)-> compute -> join
|
||||
// left.fork();
|
||||
// right.fork();
|
||||
// left.join();
|
||||
// right.join();
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
@@ -1,170 +1,170 @@
|
||||
package alg.inflectedJOS;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import alg.Common;
|
||||
import data.Sentence;
|
||||
import data.Statistics;
|
||||
import data.StatisticsNew;
|
||||
import data.Word;
|
||||
|
||||
public class InflectedJOSCount {
|
||||
|
||||
public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
|
||||
|
||||
// static {
|
||||
// // calculate all possible combinations of indices we will substitute with a '-' for substring statistics
|
||||
// indices = new HashMap<>();
|
||||
// for (int i = 5; i <= 8; i++) {
|
||||
// indices.put(i, calculateCombinations(i));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private static List<Integer> calculateCombinations(int i) {
|
||||
// int arr[] = {1, 2, 3, 4, 5};
|
||||
// int r = 3;
|
||||
// int n = arr.length;
|
||||
// ArrayList<ArrayList<Integer>> result = new ArrayList<>();
|
||||
//
|
||||
// return printCombination(arr, n, r);
|
||||
// }
|
||||
//
|
||||
// /* arr[] ---> Input Array
|
||||
// data[] ---> Temporary array to store current combination
|
||||
// start & end ---> Staring and Ending indexes in arr[]
|
||||
// index ---> Current index in data[]
|
||||
// r ---> Size of a combination to be printed */
|
||||
// static void combinationUtil(int arr[], int data[], int start,
|
||||
// int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
|
||||
// // Current combination is ready to be printed, print it
|
||||
// ArrayList<Integer> tmpResult = new ArrayList<>();
|
||||
//
|
||||
// if (index == r) {
|
||||
// ArrayList<Integer> tmpResult = new ArrayList<>();
|
||||
// for (int j = 0; j < r; j++)
|
||||
// System.out.print(data[j] + " ");
|
||||
// System.out.println("");
|
||||
// return;
|
||||
// }
|
||||
//
|
||||
// // replace index with all possible elements. The condition
|
||||
// // "end-i+1 >= r-index" makes sure that including one element
|
||||
// // at index will make a combination with remaining elements
|
||||
// // at remaining positions
|
||||
// for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
|
||||
// data[index] = arr[i];
|
||||
// combinationUtil(arr, data, i + 1, end, index + 1, r);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // The main function that prints all combinations of size r
|
||||
// // in arr[] of size n. This function mainly uses combinationUtil()
|
||||
// static void printCombination(int arr[], int n, int r) {
|
||||
// // A temporary array to store all combination one by one
|
||||
// int data[] = new int[r];
|
||||
//
|
||||
// // Print all combination using temprary array 'data[]'
|
||||
// combinationUtil(arr, data, 0, n - 1, 0, r);
|
||||
// }
|
||||
|
||||
// public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||||
// for (Sentence s : corpus) {
|
||||
// // disregard if wrong taxonomy
|
||||
// if (!(s.getTaxonomy().startsWith(taxonomy))) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// calculateCommon(s, stats.result);
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// // skip if current word is not inflected
|
||||
// if (!(word.getMsd().length() > 0)) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// String msd = word.getMsd();
|
||||
//
|
||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
//
|
||||
// for (int i = 1; i < msd.length(); i++) {
|
||||
// entry.setCharAt(i, msd.charAt(i));
|
||||
// Common.updateMap(stats.result, entry.toString());
|
||||
// entry.setCharAt(i, '-');
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// for (Word word : s.getWords()) {
|
||||
// if (!(word.getMsd().length() > 0)) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// String msd = word.getMsd();
|
||||
//
|
||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
//
|
||||
// for (int i = 1; i < msd.length(); i++) {
|
||||
// entry.setCharAt(i, msd.charAt(i));
|
||||
// Common.updateMap(stats.result, entry.toString());
|
||||
// entry.setCharAt(i, '-');
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||||
for (Sentence s : corpus) {
|
||||
// disregard if wrong taxonomy
|
||||
// if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
|
||||
// continue;
|
||||
//package alg.inflectedJOS;
|
||||
//
|
||||
//import java.util.ArrayList;
|
||||
//import java.util.HashMap;
|
||||
//import java.util.List;
|
||||
//
|
||||
//import org.apache.commons.lang3.StringUtils;
|
||||
//
|
||||
//import alg.Common;
|
||||
//import data.Sentence;
|
||||
//import data.Statistics;
|
||||
//import data.StatisticsNew;
|
||||
//import data.Word;
|
||||
//
|
||||
//public class InflectedJOSCount {
|
||||
//
|
||||
// public static HashMap<Integer, ArrayList<ArrayList<Integer>>> indices;
|
||||
//
|
||||
// // static {
|
||||
// // // calculate all possible combinations of indices we will substitute with a '-' for substring statistics
|
||||
// // indices = new HashMap<>();
|
||||
// // for (int i = 5; i <= 8; i++) {
|
||||
// // indices.put(i, calculateCombinations(i));
|
||||
// // }
|
||||
// // }
|
||||
// //
|
||||
// // private static List<Integer> calculateCombinations(int i) {
|
||||
// // int arr[] = {1, 2, 3, 4, 5};
|
||||
// // int r = 3;
|
||||
// // int n = arr.length;
|
||||
// // ArrayList<ArrayList<Integer>> result = new ArrayList<>();
|
||||
// //
|
||||
// // return printCombination(arr, n, r);
|
||||
// // }
|
||||
// //
|
||||
// // /* arr[] ---> Input Array
|
||||
// // data[] ---> Temporary array to store current combination
|
||||
// // start & end ---> Staring and Ending indexes in arr[]
|
||||
// // index ---> Current index in data[]
|
||||
// // r ---> Size of a combination to be printed */
|
||||
// // static void combinationUtil(int arr[], int data[], int start,
|
||||
// // int end, int index, int r, ArrayList<ArrayList<Integer>> result) {
|
||||
// // // Current combination is ready to be printed, print it
|
||||
// // ArrayList<Integer> tmpResult = new ArrayList<>();
|
||||
// //
|
||||
// // if (index == r) {
|
||||
// // ArrayList<Integer> tmpResult = new ArrayList<>();
|
||||
// // for (int j = 0; j < r; j++)
|
||||
// // System.out.print(data[j] + " ");
|
||||
// // System.out.println("");
|
||||
// // return;
|
||||
// // }
|
||||
// //
|
||||
// // // replace index with all possible elements. The condition
|
||||
// // // "end-i+1 >= r-index" makes sure that including one element
|
||||
// // // at index will make a combination with remaining elements
|
||||
// // // at remaining positions
|
||||
// // for (int i = start; i <= end && end - i + 1 >= r - index; i++) {
|
||||
// // data[index] = arr[i];
|
||||
// // combinationUtil(arr, data, i + 1, end, index + 1, r);
|
||||
// // }
|
||||
// // }
|
||||
// //
|
||||
// // // The main function that prints all combinations of size r
|
||||
// // // in arr[] of size n. This function mainly uses combinationUtil()
|
||||
// // static void printCombination(int arr[], int n, int r) {
|
||||
// // // A temporary array to store all combination one by one
|
||||
// // int data[] = new int[r];
|
||||
// //
|
||||
// // // Print all combination using temprary array 'data[]'
|
||||
// // combinationUtil(arr, data, 0, n - 1, 0, r);
|
||||
// // }
|
||||
//
|
||||
// // public static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||||
// // for (Sentence s : corpus) {
|
||||
// // // disregard if wrong taxonomy
|
||||
// // if (!(s.getTaxonomy().startsWith(taxonomy))) {
|
||||
// // continue;
|
||||
// // }
|
||||
// //
|
||||
// // calculateCommon(s, stats.result);
|
||||
// //
|
||||
// // for (Word word : s.getWords()) {
|
||||
// // // skip if current word is not inflected
|
||||
// // if (!(word.getMsd().length() > 0)) {
|
||||
// // continue;
|
||||
// // }
|
||||
// //
|
||||
// // String msd = word.getMsd();
|
||||
// //
|
||||
// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
// //
|
||||
// // for (int i = 1; i < msd.length(); i++) {
|
||||
// // entry.setCharAt(i, msd.charAt(i));
|
||||
// // Common.updateMap(stats.result, entry.toString());
|
||||
// // entry.setCharAt(i, '-');
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
//
|
||||
// // public static void calculateForAll(List<Sentence> corpus, Statistics stats) {
|
||||
// // for (Sentence s : corpus) {
|
||||
// // for (Word word : s.getWords()) {
|
||||
// // if (!(word.getMsd().length() > 0)) {
|
||||
// // continue;
|
||||
// // }
|
||||
// //
|
||||
// // String msd = word.getMsd();
|
||||
// //
|
||||
// // StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
// //
|
||||
// // for (int i = 1; i < msd.length(); i++) {
|
||||
// // entry.setCharAt(i, msd.charAt(i));
|
||||
// // Common.updateMap(stats.result, entry.toString());
|
||||
// // entry.setCharAt(i, '-');
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
//
|
||||
// static void calculateForAll(List<Sentence> corpus, Statistics stats, String taxonomy) {
|
||||
// for (Sentence s : corpus) {
|
||||
// // disregard if wrong taxonomy
|
||||
//// if (taxonomy != null && !(s.getTaxonomy().startsWith(taxonomy))) {
|
||||
//// continue;
|
||||
//// }
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// // skip if current word is not inflected
|
||||
// if (!(word.getMsd().length() > 0)) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// String msd = word.getMsd();
|
||||
//
|
||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
//
|
||||
// for (int i = 1; i < msd.length(); i++) {
|
||||
// entry.setCharAt(i, msd.charAt(i));
|
||||
// Common.updateMap(stats.result, entry.toString());
|
||||
// entry.setCharAt(i, '-');
|
||||
// }
|
||||
// }
|
||||
|
||||
for (Word word : s.getWords()) {
|
||||
// skip if current word is not inflected
|
||||
if (!(word.getMsd().length() > 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String msd = word.getMsd();
|
||||
|
||||
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
|
||||
for (int i = 1; i < msd.length(); i++) {
|
||||
entry.setCharAt(i, msd.charAt(i));
|
||||
Common.updateMap(stats.result, entry.toString());
|
||||
entry.setCharAt(i, '-');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
|
||||
for (Sentence s : corpus) {
|
||||
|
||||
for (Word word : s.getWords()) {
|
||||
// skip if current word is not inflected
|
||||
// // TODO: if has defined msd and is of correct type (create a set)
|
||||
// if (!(word.getMsd().length() > 0)) {
|
||||
// continue;
|
||||
// }
|
||||
|
||||
String msd = word.getMsd();
|
||||
|
||||
StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
|
||||
for (int i = 1; i < msd.length(); i++) {
|
||||
entry.setCharAt(i, msd.charAt(i));
|
||||
stats.updateResults(entry.toString());
|
||||
entry.setCharAt(i, '-');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats, String taxonomy) {
|
||||
// for (Sentence s : corpus) {
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// // skip if current word is not inflected
|
||||
// // // TODO: if has defined msd and is of correct type (create a set)
|
||||
// // if (!(word.getMsd().length() > 0)) {
|
||||
// // continue;
|
||||
// // }
|
||||
//
|
||||
// String msd = word.getMsd();
|
||||
//
|
||||
// StringBuilder entry = new StringBuilder(msd.charAt(0) + StringUtils.repeat('-', (msd.length() - 1)));
|
||||
//
|
||||
// for (int i = 1; i < msd.length(); i++) {
|
||||
// entry.setCharAt(i, msd.charAt(i));
|
||||
// stats.updateResults(entry.toString());
|
||||
// entry.setCharAt(i, '-');
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
@@ -43,12 +43,12 @@ public class Ngrams {
|
||||
List<Word> ngramCandidate = s.getSublist(i, i + stats.getFilter().getNgramValue());
|
||||
|
||||
// if msd regex is set and this candidate doesn't pass it, skip this iteration
|
||||
if (stats.getFilter().hasMsd() && !passesRegex(ngramCandidate, stats.getFilter().getMsd())) {
|
||||
if (stats.getFilter().hasMsd() && !passesRegex(ngramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// generate proper MultipleHMKeys depending on filter data
|
||||
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor());
|
||||
String key = wordToString(ngramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
|
||||
|
||||
// if last letter is ',' erase it
|
||||
|
||||
@@ -67,14 +67,14 @@ public class Ngrams {
|
||||
multipleKeys = new MultipleHMKeys1(key);
|
||||
break;
|
||||
case 1:
|
||||
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
String k1_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length()-1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||
break;
|
||||
case 2:
|
||||
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
||||
String k2_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k2_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
@@ -82,9 +82,9 @@ public class Ngrams {
|
||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||
break;
|
||||
case 3:
|
||||
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
||||
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2));
|
||||
String k3_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k3_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
String k3_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
@@ -93,10 +93,10 @@ public class Ngrams {
|
||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||
break;
|
||||
case 4:
|
||||
String k4_2 = wordToString(ngramCandidate, otherKeys.get(0));
|
||||
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1));
|
||||
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2));
|
||||
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3));
|
||||
String k4_2 = wordToString(ngramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k4_3 = wordToString(ngramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
String k4_4 = wordToString(ngramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||
String k4_5 = wordToString(ngramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
@@ -137,7 +137,7 @@ public class Ngrams {
|
||||
/**
|
||||
* Checks whether an ngram candidate passes specified regex filter.
|
||||
*/
|
||||
private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex) {
|
||||
private static boolean passesRegex(List<Word> ngramCandidate, ArrayList<Pattern> regex, ArrayList<CalculateFor> wordParts) {
|
||||
if (ngramCandidate.size() != regex.size()) {
|
||||
logger.error("ngramCandidate.size() & msd.size() mismatch"); // should not occur anyway
|
||||
return false;
|
||||
@@ -145,7 +145,7 @@ public class Ngrams {
|
||||
|
||||
for (int i = 0; i < regex.size(); i++) {
|
||||
//if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern())) {
|
||||
if (!ngramCandidate.get(i).getMsd().matches(regex.get(i).pattern() + ".*")) {
|
||||
if (!ngramCandidate.get(i).getMsd(wordParts).matches(regex.get(i).pattern() + ".*")) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -153,33 +153,33 @@ public class Ngrams {
|
||||
return true;
|
||||
}
|
||||
|
||||
private static String wordToString(List<Word> ngramCandidate, CalculateFor calculateFor) {
|
||||
private static String wordToString(List<Word> ngramCandidate, CalculateFor calculateFor, ArrayList<CalculateFor> wordParts) {
|
||||
ArrayList<String> candidate = new ArrayList<>(ngramCandidate.size());
|
||||
|
||||
switch (calculateFor) {
|
||||
case LEMMA:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.map(w -> w.getLemma(wordParts))
|
||||
.collect(Collectors.toList()));
|
||||
return StringUtils.join(candidate, " ");
|
||||
case WORD:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.map(w -> w.getWord(wordParts))
|
||||
.collect(Collectors.toList()));
|
||||
return StringUtils.join(candidate, " ");
|
||||
case MORPHOSYNTACTIC_SPECS:
|
||||
case MORPHOSYNTACTIC_PROPERTY:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(Word::getMsd)
|
||||
.map(w -> w.getMsd(wordParts))
|
||||
.collect(Collectors.toList()));
|
||||
return StringUtils.join(candidate, " ");
|
||||
case WORD_TYPE:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(w -> Character.toString(w.getMsd().charAt(0)))
|
||||
.map(w -> Character.toString(w.getMsd(wordParts).charAt(0)))
|
||||
.collect(Collectors.toList()));
|
||||
// candidate.addAll(ngramCandidate
|
||||
// .stream()
|
||||
@@ -190,7 +190,7 @@ public class Ngrams {
|
||||
case NORMALIZED_WORD:
|
||||
candidate.addAll(ngramCandidate
|
||||
.stream()
|
||||
.map(Word::getNormalizedWord)
|
||||
.map(w -> w.getNormalizedWord(wordParts))
|
||||
.collect(Collectors.toList()));
|
||||
return StringUtils.join(candidate, " ");
|
||||
}
|
||||
@@ -208,14 +208,14 @@ public class Ngrams {
|
||||
for (Sentence s : corpus) {
|
||||
for (Word w : s.getWords()) {
|
||||
List<String> taxonomy = s.getTaxonomy();
|
||||
String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv());
|
||||
String word = w.getForCf(stats.getFilter().getCalculateFor(), stats.getFilter().isCvv(), stats.getFilter().getWordParts());
|
||||
|
||||
// skip this iteration if:
|
||||
// - word doesn't contain a proper version (missing lemma for example)
|
||||
// - msd regex is given but this word's msd doesn't match it, skip this iteration
|
||||
// - given substring length is larger than the word length
|
||||
if (ValidationUtil.isEmpty(word)
|
||||
|| stats.getFilter().hasMsd() && !w.getMsd().matches(stats.getFilter().getMsd().get(0).pattern())
|
||||
|| stats.getFilter().hasMsd() && !w.getMsd(stats.getFilter().getWordParts()).matches(stats.getFilter().getMsd().get(0).pattern())
|
||||
|| word.length() < stats.getFilter().getStringLength()) {
|
||||
continue;
|
||||
}
|
||||
@@ -331,7 +331,7 @@ public class Ngrams {
|
||||
|
||||
private static void validateAndCountSkipgramCandidate(ArrayList<Word> skipgramCandidate, StatisticsNew stats, List<String> taxonomy) {
|
||||
// count if no regex is set or if it is & candidate passes it
|
||||
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd())) {
|
||||
if (!stats.getFilter().hasMsd() || passesRegex(skipgramCandidate, stats.getFilter().getMsd(), stats.getFilter().getWordParts())) {
|
||||
// String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
|
||||
// key = (key.charAt(key.length()-1) == ',') ? key.substring(0, key.length() - 1) : key;
|
||||
// stats.updateTaxonomyResults(new MultipleHMKeys1(key),
|
||||
@@ -340,7 +340,7 @@ public class Ngrams {
|
||||
|
||||
ArrayList<CalculateFor> otherKeys = stats.getFilter().getMultipleKeys();
|
||||
|
||||
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor());
|
||||
String key = wordToString(skipgramCandidate, stats.getFilter().getCalculateFor(), stats.getFilter().getWordParts());
|
||||
|
||||
// if last letter is ',' erase it
|
||||
|
||||
@@ -359,14 +359,14 @@ public class Ngrams {
|
||||
multipleKeys = new MultipleHMKeys1(key);
|
||||
break;
|
||||
case 1:
|
||||
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
String k1_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations())
|
||||
// k1_2 = (!k1_2.equals("") && k1_2.charAt(k1_2.length() - 1) == ',') ? k1_2.substring(0, k1_2.length() - 1) : k1_2;
|
||||
multipleKeys = new MultipleHMKeys2(key, k1_2);
|
||||
break;
|
||||
case 2:
|
||||
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
||||
String k2_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k2_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k2_2 = (!k2_2.equals("") && k2_2.charAt(k2_2.length() - 1) == ',') ? k2_2.substring(0, k2_2.length() - 1) : k2_2;
|
||||
// k2_3 = (!k2_3.equals("") && k2_3.charAt(k2_3.length() - 1) == ',') ? k2_3.substring(0, k2_3.length() - 1) : k2_3;
|
||||
@@ -374,9 +374,9 @@ public class Ngrams {
|
||||
multipleKeys = new MultipleHMKeys3(key, k2_2, k2_3);
|
||||
break;
|
||||
case 3:
|
||||
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
||||
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2));
|
||||
String k3_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k3_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
String k3_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k3_2 = (!k3_2.equals("") && k3_2.charAt(k3_2.length() - 1) == ',') ? k3_2.substring(0, k3_2.length() - 1) : k3_2;
|
||||
// k3_3 = (!k3_3.equals("") && k3_3.charAt(k3_3.length() - 1) == ',') ? k3_3.substring(0, k3_3.length() - 1) : k3_3;
|
||||
@@ -385,10 +385,10 @@ public class Ngrams {
|
||||
multipleKeys = new MultipleHMKeys4(key, k3_2, k3_3, k3_4);
|
||||
break;
|
||||
case 4:
|
||||
String k4_2 = wordToString(skipgramCandidate, otherKeys.get(0));
|
||||
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1));
|
||||
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2));
|
||||
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3));
|
||||
String k4_2 = wordToString(skipgramCandidate, otherKeys.get(0), stats.getFilter().getWordParts());
|
||||
String k4_3 = wordToString(skipgramCandidate, otherKeys.get(1), stats.getFilter().getWordParts());
|
||||
String k4_4 = wordToString(skipgramCandidate, otherKeys.get(2), stats.getFilter().getWordParts());
|
||||
String k4_5 = wordToString(skipgramCandidate, otherKeys.get(3), stats.getFilter().getWordParts());
|
||||
// if (stats.getFilter().getNotePunctuations()) {
|
||||
// k4_2 = (!k4_2.equals("") && k4_2.charAt(k4_2.length() - 1) == ',') ? k4_2.substring(0, k4_2.length() - 1) : k4_2;
|
||||
// k4_3 = (!k4_3.equals("") && k4_3.charAt(k4_3.length() - 1) == ',') ? k4_3.substring(0, k4_3.length() - 1) : k4_3;
|
||||
|
||||
@@ -10,84 +10,84 @@ import data.Sentence;
|
||||
import data.Statistics;
|
||||
import data.Word;
|
||||
|
||||
class WordCount {
|
||||
private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
Common.updateMap(stats.result, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getCVVLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(s.getWords()
|
||||
.stream()
|
||||
.map(Word::getCVVWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
if (word.length() > stats.getSubstringLength()) {
|
||||
for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
|
||||
String substring = word.substring(i, i + stats.getSubstringLength());
|
||||
Common.updateMap(stats.result, substring);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
|
||||
for (Sentence s : corpus) {
|
||||
List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
List<Word> filteredWords = new ArrayList<>();
|
||||
|
||||
for (Word word : s.getWords()) {
|
||||
if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
||||
filteredWords.add(word);
|
||||
}
|
||||
}
|
||||
|
||||
if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
sentence.addAll(filteredWords
|
||||
.stream()
|
||||
.map(Word::getLemma)
|
||||
.collect(Collectors.toList()));
|
||||
} else if (stats.getCf() == CalculateFor.WORD) {
|
||||
sentence.addAll(filteredWords
|
||||
.stream()
|
||||
.map(Word::getWord)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
for (String word : sentence) {
|
||||
Common.updateMap(stats.result, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
//class WordCount {
|
||||
// private static void calculateNoFilter(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// Common.updateMap(stats.result, word);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private static void calculateVCC(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getCVVLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(s.getWords()
|
||||
// .stream()
|
||||
// .map(Word::getCVVWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// if (word.length() > stats.getSubstringLength()) {
|
||||
// for (int i = 0; i <= word.length() - stats.getSubstringLength(); i++) {
|
||||
// String substring = word.substring(i, i + stats.getSubstringLength());
|
||||
// Common.updateMap(stats.result, substring);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private static void calculateForJosType(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
// List<String> sentence = new ArrayList<>(s.getWords().size());
|
||||
// List<Word> filteredWords = new ArrayList<>();
|
||||
//
|
||||
// for (Word word : s.getWords()) {
|
||||
// if (word.getMsd() != null && word.getMsd().charAt(0) == stats.getDistributionJosWordType()) {
|
||||
// filteredWords.add(word);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (stats.getCf() == CalculateFor.LEMMA) {
|
||||
// sentence.addAll(filteredWords
|
||||
// .stream()
|
||||
// .map(Word::getLemma)
|
||||
// .collect(Collectors.toList()));
|
||||
// } else if (stats.getCf() == CalculateFor.WORD) {
|
||||
// sentence.addAll(filteredWords
|
||||
// .stream()
|
||||
// .map(Word::getWord)
|
||||
// .collect(Collectors.toList()));
|
||||
// }
|
||||
//
|
||||
// for (String word : sentence) {
|
||||
// Common.updateMap(stats.result, word);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// private static void calculateForTaxonomyAndJosType(List<Sentence> corpus, Statistics stats) {
|
||||
// for (Sentence s : corpus) {
|
||||
@@ -164,4 +164,4 @@ class WordCount {
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
||||
//}
|
||||
@@ -34,8 +34,8 @@ public class WordLevel {
|
||||
public static void calculateForAll(List<Sentence> corpus, StatisticsNew stats) {
|
||||
for (Sentence s : corpus) {
|
||||
for (Word word : s.getWords()) {
|
||||
calculateForSuffixes(word.getWord(), stats);
|
||||
calculateForPrefixes(word.getWord(), stats);
|
||||
calculateForSuffixes(word.getWord(stats.getFilter().getWordParts()), stats);
|
||||
calculateForPrefixes(word.getWord(stats.getFilter().getWordParts()), stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user