package nogui; import alg.XML_processing; import data.*; import gui.GUIController; import gui.I18N; import javafx.scene.control.Alert; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.json.simple.JSONArray; import java.io.File; import java.io.UnsupportedEncodingException; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; import static gui.GUIController.showAlert; public class Utils { public final static Logger logger = LogManager.getLogger(GUIController.class); public static ArrayList getTaxonomy(JSONArray taxonomyArray, Corpus corpus) { // convert JSONArray to ObservableList ArrayList checkedItems = new ArrayList<>(); for (Object o : taxonomyArray) { checkedItems.add((String) o); } ArrayList taxonomy = new ArrayList<>(); ArrayList checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus); return checkedItemsTaxonomy; } public static ArrayList getCollocability(JSONArray collocabilityArray) { // convert JSONArray to ObservableList ArrayList checkedItems = new ArrayList<>(); for (Object o : collocabilityArray) { checkedItems.add(Collocability.factory((String) o)); } return checkedItems; } public static ArrayList getArrayList(JSONArray array) { // convert JSONArray to ObservableList ArrayList arrayList = new ArrayList<>(); for (Object o : array) { arrayList.add((String) o); } return arrayList; } public static ArrayList getAlsoVisualizeList(JSONArray array) { // convert JSONArray to ObservableList ArrayList arrayList = new ArrayList<>(); for (Object o : array) { arrayList.add(I18N.get((String) o)); } return arrayList; } public static ArrayList getMsd(String stringMsd) { ArrayList msd = new ArrayList<>(); if (stringMsd.equals("")) { return msd; } ArrayList msdTmp = new ArrayList<>(Arrays.asList(stringMsd.split(" "))); for (String msdToken : msdTmp) { msd.add(Pattern.compile(msdToken)); } return msd; } public static void updateProgress(int i, int size, String format) { } public static void updateProgress(double i, int size, String format) { } public static void prepareTaskForMinRelFre(StatisticsNew statistic, Corpus corpus) { Filter fi = statistic.getFilter(); logger.info("Started execution: ", fi); try{ Filter f2 = (Filter) fi.clone(); f2.setIsMinimalRelFreScraper(true); StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, false); Collection corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles(); final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType()); Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; int corpusSize; int i; if(statistic.getFilter().getCollocability().size() > 0){ i = 0; corpusSize = corpusFiles.size() * 3; } else { i = 0; corpusSize = corpusFiles.size() * 2; } for (File f : corpusFiles) { final int iFinal = i; XML_processing xml_processing = new XML_processing(); i++; if (multipleFiles) { if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000); previousTime = new Date(); } } else {} xml_processing.readXML(f.toString(), statisticsMinRelFre); } // add remaining minRelFre results if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) { long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue(); double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor); // reset all values for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){ statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>()); } for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){ statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0)); } } prepareMainTask(statistic, corpus); }catch(CloneNotSupportedException c){} } public static void prepareMainTask(StatisticsNew statistic, Corpus corpus) { Filter f = statistic.getFilter(); logger.info("Started execution: ", f); Collection corpusFiles = statistic.getCorpus().getDetectedCorpusFiles(); final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; int corpusSize; int i; int taskIndex = 0; if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){ i = corpusFiles.size(); corpusSize = corpusFiles.size() * 3; } else if (statistic.getFilter().getMinimalRelFre() > 1) { i = corpusFiles.size(); corpusSize = corpusFiles.size() * 2; } else if (statistic.getFilter().getCollocability().size() > 0) { i = 0; corpusSize = corpusFiles.size() * 2; } else { i = 0; corpusSize = corpusFiles.size(); } for (File fi : corpusFiles) { final int iFinal = i; XML_processing xml_processing = new XML_processing(); xml_processing.isCancelled = false; i++; taskIndex++; // if(xml_processing.progressBarListener != null) { // xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); // } if (multipleFiles) { if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); previousTime = new Date(); } // this.updateProgress(i, corpusSize); // this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds)); } else { // xml_processing.progressBarListener = new InvalidationListener() { // int remainingSeconds = -1; // Date previousTime = new Date(); // @Override // public void invalidated(Observable observable) { // if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ // remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * // (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * // ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); // previousTime = new Date(); // } // xml_processing.isCancelled = isCancelled(); // updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100); // updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds)); // } // }; // // xml_processing.progressProperty().addListener(xml_processing.progressBarListener); } xml_processing.readXML(fi.toString(), statistic); } // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing if (statistic.getFilter().getMinimalRelFre() > 1){ long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue(); double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords; for(Map.Entry entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){ if(entry.getValue().longValue() < absToRelFactor){ statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey()); } } statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor); } if (f.getCollocability().size() > 0) { try{ Filter f2 = (Filter) f.clone(); f2.setNgramValue(1); StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, false); prepareTaskForCollocability(statistic, statisticsOneGrams); }catch(CloneNotSupportedException c){} } else { try { boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { logger.info(I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); } else { logger.info(I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); } } catch (UnsupportedEncodingException e1) { logger.error(I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); logger.error("Error while saving", e1); } catch (OutOfMemoryError e1) { logger.error(I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); logger.error("Out of memory error", e1); } } } public static void prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) { Collection corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles(); final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType()); Date startTime = new Date(); Date previousTime = new Date(); int remainingSeconds = -1; int corpusSize; int i; int taskIndex = 0; if(statistic.getFilter().getMinimalRelFre() > 1){ i = corpusFiles.size() * 2; corpusSize = corpusFiles.size() * 3; } else { i = corpusFiles.size(); corpusSize = corpusFiles.size() * 2; } for (File f : corpusFiles) { XML_processing xml_processing = new XML_processing(); i++; taskIndex++; if(xml_processing.progressBarListener != null) { xml_processing.progressProperty().removeListener(xml_processing.progressBarListener); } if (multipleFiles) { if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000); previousTime = new Date(); } } else { // xml_processing.progressBarListener = new InvalidationListener() { // int remainingSeconds = -1; // Date previousTime = new Date(); // @Override // public void invalidated(Observable observable) { // if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){ // remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) * // (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) * // ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000); // previousTime = new Date(); // } // } // }; } xml_processing.isCollocability = true; xml_processing.readXML(f.toString(), statisticsOneGrams); xml_processing.isCollocability = false; } try { System.out.print(statistic); statistic.updateCalculateCollocabilities(statisticsOneGrams); boolean successullySaved = statistic.saveResultToDisk(); if (successullySaved) { logger.info(I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED")); } else { logger.info(I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS")); } } catch (UnsupportedEncodingException e1) { logger.error(I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV")); logger.error("Error while saving", e1); } catch (OutOfMemoryError e1) { logger.error(I18N.get("message.ERROR_NOT_ENOUGH_MEMORY")); logger.error("Out of memory error", e1); } } }