You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
361 lines
8.9 KiB
361 lines
8.9 KiB
package data;
|
|
|
|
import static data.Filter.filterName.*;
|
|
|
|
import java.util.*;
|
|
import java.util.regex.Pattern;
|
|
|
|
import gui.ValidationUtil;
|
|
|
|
@SuppressWarnings("unchecked")
|
|
public class Filter implements Cloneable {
|
|
private HashMap<filterName, Object> filter;
|
|
|
|
public enum filterName {
|
|
ANALYSIS_LEVEL,
|
|
CALCULATE_FOR,
|
|
WORD_PARTS,
|
|
NGRAM_VALUE,
|
|
SKIP_VALUE,
|
|
IS_CVV,
|
|
STRING_LENGTH,
|
|
TAXONOMY,
|
|
DISPLAY_TAXONOMY,
|
|
MSD,
|
|
HAS_MSD,
|
|
WRITE_MSD_AT_THE_END,
|
|
SOLAR_FILTERS,
|
|
MULTIPLE_KEYS,
|
|
NOTE_PUNCTUATIONS,
|
|
MINIMAL_OCCURRENCES,
|
|
MINIMAL_TAXONOMY,
|
|
MINIMAL_REL_FRE,
|
|
IS_MINIMAL_REL_FRE_SCRAPER,
|
|
TAXONOMY_SET_OPERATION,
|
|
COLLOCABILITY,
|
|
PREFIX_LENGTH,
|
|
SUFFIX_LENGTH,
|
|
PREFIX_LIST,
|
|
SUFFIX_LIST
|
|
}
|
|
|
|
public Filter() {
|
|
filter = new HashMap<>();
|
|
filter.put(WRITE_MSD_AT_THE_END, false);
|
|
filter.put(WORD_PARTS, new ArrayList<CalculateFor>());
|
|
filter.put(IS_MINIMAL_REL_FRE_SCRAPER, false);
|
|
}
|
|
|
|
public Filter(AnalysisLevel al, CalculateFor cf) {
|
|
filter = new HashMap<>();
|
|
|
|
filter.put(ANALYSIS_LEVEL, al);
|
|
filter.put(CALCULATE_FOR, cf);
|
|
|
|
filter.put(WORD_PARTS, new ArrayList<CalculateFor>());
|
|
addWordPart(cf);
|
|
|
|
filter.put(WRITE_MSD_AT_THE_END, false);
|
|
}
|
|
|
|
public void setAl(AnalysisLevel al) {
|
|
filter.put(ANALYSIS_LEVEL, al);
|
|
}
|
|
|
|
public AnalysisLevel getAl() {
|
|
return (AnalysisLevel) filter.get(ANALYSIS_LEVEL);
|
|
}
|
|
|
|
public void setCalculateFor(CalculateFor cf) {
|
|
filter.put(CALCULATE_FOR, cf);
|
|
filter.put(WORD_PARTS, new ArrayList<CalculateFor>());
|
|
addWordPart(cf);
|
|
}
|
|
|
|
public CalculateFor getCalculateFor() {
|
|
return (CalculateFor) filter.get(CALCULATE_FOR);
|
|
}
|
|
|
|
public void setNgramValue(Integer ngramValue) {
|
|
filter.put(NGRAM_VALUE, ngramValue);
|
|
}
|
|
|
|
public Integer getNgramValue() {
|
|
return (Integer) filter.get(NGRAM_VALUE);
|
|
}
|
|
|
|
public void setSkipValue(Integer skipValue) {
|
|
filter.put(SKIP_VALUE, skipValue);
|
|
}
|
|
|
|
public Integer getSkipValue() {
|
|
return (Integer) filter.get(SKIP_VALUE);
|
|
}
|
|
|
|
public void setIsCvv(boolean isCvv) {
|
|
filter.put(IS_CVV, isCvv);
|
|
}
|
|
|
|
public boolean isCvv() {
|
|
return filter.containsKey(IS_CVV) && (boolean) filter.get(IS_CVV);
|
|
}
|
|
|
|
public void setStringLength(int stringLength) {
|
|
filter.put(STRING_LENGTH, stringLength);
|
|
}
|
|
|
|
public Integer getStringLength() {
|
|
return (Integer) filter.get(STRING_LENGTH);
|
|
}
|
|
|
|
public void setTaxonomySetOperation(String taxonomySetOperation) {
|
|
filter.put(TAXONOMY_SET_OPERATION, taxonomySetOperation);
|
|
}
|
|
|
|
public String getTaxonomySetOperation() {
|
|
return (String) filter.get(TAXONOMY_SET_OPERATION);
|
|
}
|
|
|
|
public void setTaxonomy(ArrayList<Taxonomy> taxonomy) {
|
|
filter.put(TAXONOMY, taxonomy);
|
|
}
|
|
|
|
public ArrayList<Taxonomy> getTaxonomy() {
|
|
if (filter.containsKey(TAXONOMY) && filter.get(TAXONOMY) != null) {
|
|
return (ArrayList<Taxonomy>) filter.get(TAXONOMY);
|
|
} else {
|
|
return new ArrayList<>();
|
|
}
|
|
}
|
|
|
|
public void setDisplayTaxonomy(boolean displayTaxonomy) {
|
|
filter.put(DISPLAY_TAXONOMY, displayTaxonomy);
|
|
}
|
|
|
|
public boolean getDisplayTaxonomy() {
|
|
return (boolean) filter.get(DISPLAY_TAXONOMY);
|
|
}
|
|
|
|
public void setMsd(ArrayList<Pattern> msd) {
|
|
filter.put(MSD, msd);
|
|
if (!ValidationUtil.isEmpty(msd)) {
|
|
setHasMsd(true);
|
|
} else {
|
|
setHasMsd(false);
|
|
}
|
|
}
|
|
|
|
public ArrayList<Pattern> getMsd() {
|
|
return (ArrayList<Pattern>) filter.get(MSD);
|
|
}
|
|
|
|
public void setWriteMsdAtTheEnd(boolean writeMsdAtTheEnd) {
|
|
filter.put(WRITE_MSD_AT_THE_END, writeMsdAtTheEnd);
|
|
}
|
|
|
|
public boolean getWriteMsdAtTheEnd() {
|
|
return (boolean) filter.get(WRITE_MSD_AT_THE_END);
|
|
}
|
|
|
|
public void setHasMsd(boolean hasMsd) {
|
|
filter.put(HAS_MSD, hasMsd);
|
|
if (hasMsd && !((ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS)).contains(CalculateFor.MORPHOSYNTACTIC_SPECS))
|
|
addWordPart(CalculateFor.MORPHOSYNTACTIC_SPECS);
|
|
}
|
|
|
|
public boolean hasMsd() {
|
|
return filter.containsKey(HAS_MSD) && (boolean) filter.get(HAS_MSD);
|
|
}
|
|
|
|
public String toString() {
|
|
String newLine = "\n\t- ";
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
sb.append(newLine).append("Filter:");
|
|
for (Map.Entry<filterName, Object> entry : filter.entrySet()) {
|
|
sb.append(newLine)
|
|
.append(entry.getKey().toString())
|
|
.append(": ")
|
|
.append(entry.getValue() != null ? entry.getValue().toString() : "null");
|
|
}
|
|
|
|
return sb.toString();
|
|
}
|
|
|
|
public void setSolarFilters(HashMap<String, HashSet<String>> filters) {
|
|
filter.put(SOLAR_FILTERS, filters);
|
|
}
|
|
|
|
public HashMap<String, HashSet<String>> getSolarFilters() {
|
|
return (HashMap<String, HashSet<String>>) filter.get(SOLAR_FILTERS);
|
|
}
|
|
|
|
public void setMultipleKeys(ArrayList<String> keys) {
|
|
ArrayList<CalculateFor> newKeys = new ArrayList<>();
|
|
if (keys != null) {
|
|
for (String key : keys) {
|
|
CalculateFor cf = CalculateFor.factory(key);
|
|
newKeys.add(cf);
|
|
addWordPart(cf);
|
|
}
|
|
}
|
|
|
|
filter.put(MULTIPLE_KEYS, newKeys);
|
|
}
|
|
|
|
public void setCollocability(ArrayList<Collocability> keys) {
|
|
ArrayList<Collocability> newKeys = new ArrayList<>();
|
|
if (keys != null) {
|
|
newKeys.addAll(keys);
|
|
}
|
|
|
|
filter.put(COLLOCABILITY, newKeys);
|
|
}
|
|
|
|
public ArrayList<Collocability> getCollocability() {
|
|
if (filter.containsKey(COLLOCABILITY) && filter.get(COLLOCABILITY) != null) {
|
|
return (ArrayList<Collocability>) filter.get(COLLOCABILITY);
|
|
} else {
|
|
return new ArrayList<>();
|
|
}
|
|
}
|
|
|
|
public ArrayList<CalculateFor> getMultipleKeys() {
|
|
if (filter.containsKey(MULTIPLE_KEYS) && filter.get(MULTIPLE_KEYS) != null) {
|
|
return (ArrayList<CalculateFor>) filter.get(MULTIPLE_KEYS);
|
|
} else {
|
|
return new ArrayList<>();
|
|
}
|
|
}
|
|
|
|
public ArrayList<CalculateFor> getWordParts() {
|
|
if (filter.containsKey(WORD_PARTS) && filter.get(WORD_PARTS) != null) {
|
|
return (ArrayList<CalculateFor>) filter.get(WORD_PARTS);
|
|
} else {
|
|
return new ArrayList<>();
|
|
}
|
|
}
|
|
|
|
public void setNotePunctuations(boolean notePunctuations) {
|
|
filter.put(NOTE_PUNCTUATIONS, notePunctuations);
|
|
}
|
|
|
|
public boolean getNotePunctuations() {
|
|
return filter.containsKey(NOTE_PUNCTUATIONS) && (boolean) filter.get(NOTE_PUNCTUATIONS);
|
|
}
|
|
|
|
public void setMinimalOccurrences(Integer minOccurrences) {
|
|
filter.put(MINIMAL_OCCURRENCES, minOccurrences);
|
|
}
|
|
|
|
public Integer getMinimalOccurrences() {
|
|
return (Integer) filter.get(MINIMAL_OCCURRENCES);
|
|
}
|
|
|
|
|
|
public void setMinimalTaxonomy(Integer minTaxonomy) {
|
|
filter.put(MINIMAL_TAXONOMY, minTaxonomy);
|
|
}
|
|
|
|
public Integer getMinimalTaxonomy() {
|
|
return (Integer) filter.get(MINIMAL_TAXONOMY);
|
|
}
|
|
|
|
|
|
public void setMinimalRelFre(Integer minimalRelFre) {
|
|
filter.put(MINIMAL_REL_FRE, minimalRelFre);
|
|
}
|
|
|
|
public Integer getMinimalRelFre() {
|
|
return (Integer) filter.get(MINIMAL_REL_FRE);
|
|
}
|
|
|
|
|
|
public void setIsMinimalRelFreScraper(boolean isMinimalRelFreScraper) {
|
|
filter.put(IS_MINIMAL_REL_FRE_SCRAPER, isMinimalRelFreScraper);
|
|
}
|
|
|
|
public boolean getIsMinimalRelFreScraper() {
|
|
return (boolean) filter.get(IS_MINIMAL_REL_FRE_SCRAPER);
|
|
}
|
|
|
|
// PREFIX_LENGTH,
|
|
// SUFFIX_LENGTH,
|
|
// PREFIX_LIST,
|
|
// SUFFIX_LIST
|
|
|
|
public void setPrefixLength(Integer v) {
|
|
filter.put(PREFIX_LENGTH, v);
|
|
}
|
|
|
|
public Integer getPrefixLength() {
|
|
return (Integer) filter.get(PREFIX_LENGTH);
|
|
}
|
|
|
|
public void setSuffixLength(Integer v) {
|
|
filter.put(SUFFIX_LENGTH, v);
|
|
}
|
|
|
|
public Integer getSuffixLength() {
|
|
return (Integer) filter.get(SUFFIX_LENGTH);
|
|
}
|
|
|
|
public void setPrefixList(ArrayList<String> v) {
|
|
filter.put(PREFIX_LIST, v);
|
|
}
|
|
|
|
public ArrayList<String> getPrefixList() {
|
|
return (ArrayList<String>) filter.get(PREFIX_LIST);
|
|
}
|
|
|
|
public void setSuffixList(ArrayList<String> v) {
|
|
filter.put(SUFFIX_LIST, v);
|
|
}
|
|
|
|
public ArrayList<String> getSuffixList() {
|
|
return (ArrayList<String>) filter.get(SUFFIX_LIST);
|
|
}
|
|
|
|
private void addWordPart(CalculateFor wp){
|
|
ArrayList<CalculateFor> oldWp = ((ArrayList<CalculateFor>) filter.get(WORD_PARTS));
|
|
|
|
switch (wp) {
|
|
case WORD:
|
|
case DIST_WORDS:
|
|
if (!oldWp.contains(CalculateFor.WORD))
|
|
oldWp.add(CalculateFor.WORD);
|
|
break;
|
|
case LEMMA:
|
|
case DIST_LEMMAS:
|
|
if (!oldWp.contains(CalculateFor.LEMMA))
|
|
oldWp.add(CalculateFor.LEMMA);
|
|
break;
|
|
case MORPHOSYNTACTIC_PROPERTY:
|
|
case MORPHOSYNTACTIC_SPECS:
|
|
case WORD_TYPE:
|
|
if (!oldWp.contains(CalculateFor.MORPHOSYNTACTIC_SPECS))
|
|
oldWp.add(CalculateFor.MORPHOSYNTACTIC_SPECS);
|
|
break;
|
|
case NORMALIZED_WORD:
|
|
if (!oldWp.contains(CalculateFor.NORMALIZED_WORD))
|
|
oldWp.add(CalculateFor.NORMALIZED_WORD);
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public Object clone() throws CloneNotSupportedException{
|
|
Filter f = null;
|
|
try {
|
|
f = (Filter) super.clone();
|
|
} catch (CloneNotSupportedException e) {
|
|
f = new Filter();
|
|
}
|
|
f.filter = (HashMap<filterName, Object>) f.filter.clone();
|
|
|
|
return f;
|
|
}
|
|
}
|