Added GOS normalized words analysis in letter extraction + Fixing normalized words bugs with anonymous names in GOS (when extracting data with in collocability)
This commit is contained in:
parent
e103bfa38d
commit
edcd8062bc
|
@ -27,6 +27,10 @@ public final class MultipleHMKeys2 implements MultipleHMKeys {
|
||||||
String[] splitedK1 = k1.split("\\s+");
|
String[] splitedK1 = k1.split("\\s+");
|
||||||
String[] splitedK2 = k2.split("\\s+");
|
String[] splitedK2 = k2.split("\\s+");
|
||||||
for(int i = 0; i < splitedK1.length; i ++){
|
for(int i = 0; i < splitedK1.length; i ++){
|
||||||
|
// in GOS words and normalized words may not both have specific word due to anon
|
||||||
|
if(!(i < splitedK2.length)){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
MultipleHMKeys search = new MultipleHMKeys2(splitedK1[i], splitedK2[i]);
|
MultipleHMKeys search = new MultipleHMKeys2(splitedK1[i], splitedK2[i]);
|
||||||
r.add(search);
|
r.add(search);
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,10 @@ public final class MultipleHMKeys3 implements MultipleHMKeys {
|
||||||
String[] splitedK2 = k2.split("\\s+");
|
String[] splitedK2 = k2.split("\\s+");
|
||||||
String[] splitedK3 = k3.split("\\s+");
|
String[] splitedK3 = k3.split("\\s+");
|
||||||
for(int i = 0; i < splitedK1.length; i ++){
|
for(int i = 0; i < splitedK1.length; i ++){
|
||||||
|
// in GOS words and normalized words may not both have specific word due to anon
|
||||||
|
if(!(i < splitedK2.length && i < splitedK3.length)){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
MultipleHMKeys search = new MultipleHMKeys3(splitedK1[i], splitedK2[i], splitedK3[i]);
|
MultipleHMKeys search = new MultipleHMKeys3(splitedK1[i], splitedK2[i], splitedK3[i]);
|
||||||
r.add(search);
|
r.add(search);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,10 @@ public final class MultipleHMKeys4 implements MultipleHMKeys {
|
||||||
String[] splitedK3 = k3.split("\\s+");
|
String[] splitedK3 = k3.split("\\s+");
|
||||||
String[] splitedK4 = k4.split("\\s+");
|
String[] splitedK4 = k4.split("\\s+");
|
||||||
for(int i = 0; i < splitedK1.length; i ++){
|
for(int i = 0; i < splitedK1.length; i ++){
|
||||||
|
// in GOS words and normalized words may not both have specific word due to anon
|
||||||
|
if(!(i < splitedK2.length && i < splitedK3.length && i < splitedK4.length)){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
MultipleHMKeys search = new MultipleHMKeys4(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i]);
|
MultipleHMKeys search = new MultipleHMKeys4(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i]);
|
||||||
r.add(search);
|
r.add(search);
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,10 @@ public final class MultipleHMKeys5 implements MultipleHMKeys {
|
||||||
String[] splitedK4 = k4.split("\\s+");
|
String[] splitedK4 = k4.split("\\s+");
|
||||||
String[] splitedK5 = k5.split("\\s+");
|
String[] splitedK5 = k5.split("\\s+");
|
||||||
for(int i = 0; i < splitedK1.length; i ++){
|
for(int i = 0; i < splitedK1.length; i ++){
|
||||||
|
// in GOS words and normalized words may not both have specific word due to anon
|
||||||
|
if(!(i < splitedK2.length && i < splitedK3.length && i < splitedK4.length && i < splitedK5.length)){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
MultipleHMKeys search = new MultipleHMKeys5(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i], splitedK5[i]);
|
MultipleHMKeys search = new MultipleHMKeys5(splitedK1[i], splitedK2[i], splitedK3[i], splitedK4[i], splitedK5[i]);
|
||||||
r.add(search);
|
r.add(search);
|
||||||
}
|
}
|
||||||
|
|
|
@ -108,7 +108,14 @@ public interface Word {
|
||||||
if (cvv) {
|
if (cvv) {
|
||||||
returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getCVVWord(cf) : getCVVLemma(cf);
|
returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getCVVWord(cf) : getCVVLemma(cf);
|
||||||
} else {
|
} else {
|
||||||
returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getWord(cf) : getLemma(cf);
|
if (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD){
|
||||||
|
returnValue = getWord(cf);
|
||||||
|
} else if (calculateFor == CalculateFor.LEMMA) {
|
||||||
|
returnValue = getLemma(cf);
|
||||||
|
} else if (calculateFor == CalculateFor.NORMALIZED_WORD){
|
||||||
|
returnValue = getNormalizedWord(cf);
|
||||||
|
}
|
||||||
|
// returnValue = (calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LOWERCASE_WORD) ? getWord(cf) : getLemma(cf);
|
||||||
}
|
}
|
||||||
|
|
||||||
return returnValue;
|
return returnValue;
|
||||||
|
|
|
@ -161,6 +161,7 @@ public class CharacterAnalysisTab {
|
||||||
private HashMap<String, HashSet<String>> solarFiltersMap;
|
private HashMap<String, HashSet<String>> solarFiltersMap;
|
||||||
private HostServices hostService;
|
private HostServices hostService;
|
||||||
private ListChangeListener<String> taxonomyListener;
|
private ListChangeListener<String> taxonomyListener;
|
||||||
|
private ChangeListener<String> calculateForListener;
|
||||||
private ChangeListener<Boolean> msdListener;
|
private ChangeListener<Boolean> msdListener;
|
||||||
private ChangeListener<Boolean> minimalOccurrencesListener;
|
private ChangeListener<Boolean> minimalOccurrencesListener;
|
||||||
private ChangeListener<Boolean> minimalTaxonomyListener;
|
private ChangeListener<Boolean> minimalTaxonomyListener;
|
||||||
|
@ -169,6 +170,9 @@ public class CharacterAnalysisTab {
|
||||||
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
private static final String [] N_GRAM_COMPUTE_FOR_LETTERS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA"};
|
||||||
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_LETTERS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_LETTERS_ARRAY));
|
||||||
|
|
||||||
|
private static final String [] N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY = {"calculateFor.WORD", "calculateFor.LOWERCASE_WORD", "calculateFor.LEMMA", "calculateFor.NORMALIZED_WORD"};
|
||||||
|
private static final ArrayList<String> N_GRAM_COMPUTE_FOR_WORDS_GOS = new ArrayList<>(Arrays.asList(N_GRAM_COMPUTE_FOR_WORDS_GOS_ARRAY));
|
||||||
|
|
||||||
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
private static final String [] TAXONOMY_SET_OPERATION_ARRAY = {"taxonomySetOperation.UNION", "taxonomySetOperation.INTERSECTION"};
|
||||||
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
|
private static final ArrayList<String> TAXONOMY_SET_OPERATION = new ArrayList<>(Arrays.asList(TAXONOMY_SET_OPERATION_ARRAY));
|
||||||
|
|
||||||
|
@ -184,14 +188,46 @@ public class CharacterAnalysisTab {
|
||||||
currentMode = MODE.LETTER;
|
currentMode = MODE.LETTER;
|
||||||
toggleMode(currentMode);
|
toggleMode(currentMode);
|
||||||
|
|
||||||
calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
if (calculateForListener != null){
|
||||||
if(newValue == null){
|
calculateForCB.valueProperty().removeListener(calculateForListener);
|
||||||
newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
}
|
||||||
calculateForCB.getSelectionModel().select(newValue);
|
|
||||||
|
// calculateForCB
|
||||||
|
calculateForListener = new ChangeListener<String>() {
|
||||||
|
boolean ignoreCode = false;
|
||||||
|
@Override
|
||||||
|
public void changed(ObservableValue<? extends String> observable, String oldValue, String newValue) {
|
||||||
|
if (ignoreCode) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
boolean languageChanged = newValue == null;
|
||||||
|
if (languageChanged) {
|
||||||
|
ignoreCode = true;
|
||||||
|
if (corpus.getCorpusType() == CorpusType.GOS) {
|
||||||
|
newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_WORDS_GOS);
|
||||||
|
calculateForCB.getSelectionModel().select(newValue);
|
||||||
|
} else {
|
||||||
|
newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
||||||
|
calculateForCB.getSelectionModel().select(newValue);
|
||||||
|
}
|
||||||
|
ignoreCode = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
calculateFor = CalculateFor.factory(newValue);
|
||||||
|
logger.info("calculateForCB:", calculateFor.toString());
|
||||||
}
|
}
|
||||||
calculateFor = CalculateFor.factory(newValue);
|
};
|
||||||
logger.info("calculateForCB:", calculateFor.toString());
|
|
||||||
});
|
calculateForCB.valueProperty().addListener(calculateForListener);
|
||||||
|
|
||||||
|
// calculateForCB.valueProperty().addListener((observable, oldValue, newValue) -> {
|
||||||
|
// if(newValue == null){
|
||||||
|
// newValue = I18N.getTranslatedValue(oldValue, N_GRAM_COMPUTE_FOR_LETTERS);
|
||||||
|
// calculateForCB.getSelectionModel().select(newValue);
|
||||||
|
// }
|
||||||
|
// calculateFor = CalculateFor.factory(newValue);
|
||||||
|
// logger.info("calculateForCB:", calculateFor.toString());
|
||||||
|
// });
|
||||||
|
|
||||||
calculateForCB.getSelectionModel().select(0);
|
calculateForCB.getSelectionModel().select(0);
|
||||||
|
|
||||||
|
@ -497,12 +533,19 @@ public class CharacterAnalysisTab {
|
||||||
stringLengthTF.setText(String.valueOf(stringLength));
|
stringLengthTF.setText(String.valueOf(stringLength));
|
||||||
}
|
}
|
||||||
|
|
||||||
// if calculateFor was selected for something other than a word or a lemma -> reset
|
// // if calculateFor was selected for something other than a word or a lemma -> reset
|
||||||
if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA || calculateFor == CalculateFor.LOWERCASE_WORD)) {
|
// if (!(calculateFor == CalculateFor.WORD || calculateFor == CalculateFor.LEMMA || calculateFor == CalculateFor.LOWERCASE_WORD)) {
|
||||||
// if the user selected something else before selecting ngram for letters, reset that choice
|
// // if the user selected something else before selecting ngram for letters, reset that choice
|
||||||
calculateFor = CalculateFor.WORD;
|
// calculateFor = CalculateFor.WORD;
|
||||||
|
//
|
||||||
calculateForCB.getSelectionModel().select(0);
|
// calculateForCB.getSelectionModel().select(0);
|
||||||
|
// }
|
||||||
|
if (corpus.getCorpusType() == CorpusType.GOS) {
|
||||||
|
calculateForCB.itemsProperty().unbind();
|
||||||
|
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_WORDS_GOS));
|
||||||
|
} else {
|
||||||
|
calculateForCB.itemsProperty().unbind();
|
||||||
|
calculateForCB.itemsProperty().bind(I18N.createObjectBinding(N_GRAM_COMPUTE_FOR_LETTERS));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user