list/src/main/java/nogui/Utils.java

package nogui;

import alg.XML_processing;
import data.*;
import gui.GUIController;
import gui.I18N;
import javafx.scene.control.Alert;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.simple.JSONArray;

import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;

import static gui.GUIController.showAlert;

public class Utils {
    public final static Logger logger = LogManager.getLogger(GUIController.class);

    public static ArrayList<Taxonomy> getTaxonomy(JSONArray taxonomyArray, Corpus corpus) {
        // convert JSONArray to ObservableList<String>
        ArrayList<String> checkedItems = new ArrayList<>();
        for (Object o : taxonomyArray) {
            checkedItems.add((String) o);
        }

        ArrayList<Taxonomy> taxonomy = new ArrayList<>();
        ArrayList<Taxonomy> checkedItemsTaxonomy = Taxonomy.modifyingTaxonomy(taxonomy, checkedItems, corpus);
        return checkedItemsTaxonomy;
    }

    public static ArrayList<Collocability> getCollocability(JSONArray collocabilityArray) {
        // convert JSONArray to ObservableList<String>
        ArrayList<Collocability> checkedItems = new ArrayList<>();
        for (Object o : collocabilityArray) {
            checkedItems.add(Collocability.factory((String) o));
        }
        return checkedItems;
    }

    public static ArrayList<String> getArrayList(JSONArray array) {
        // convert JSONArray to ObservableList<String>
        ArrayList<String> arrayList = new ArrayList<>();
        for (Object o : array) {
            arrayList.add((String) o);
        }
        return arrayList;
    }

    public static ArrayList<String> getAlsoVisualizeList(JSONArray array) {
        // convert JSONArray to ObservableList<String>
        ArrayList<String> arrayList = new ArrayList<>();
        for (Object o : array) {
            arrayList.add(I18N.get((String) o));
        }
        return arrayList;
    }

    public static ArrayList<Pattern> getMsd(String stringMsd) {
        ArrayList<Pattern> msd = new ArrayList<>();
        if (stringMsd.equals("")) {
            return msd;
        }
        ArrayList<String> msdTmp = new ArrayList<>(Arrays.asList(stringMsd.split(" ")));
        for (String msdToken : msdTmp) {
            msd.add(Pattern.compile(msdToken));
        }
        return msd;
    }

    public static void updateProgress(int i, int size, String format) {
    }

    public static void updateProgress(double i, int size, String format) {
    }

    public static void prepareTaskForMinRelFre(StatisticsNew statistic, Corpus corpus) {
        Filter fi = statistic.getFilter();
        logger.info("Started execution: ", fi);

        try{
            Filter f2 = (Filter) fi.clone();
            f2.setIsMinimalRelFreScraper(true);
            StatisticsNew statisticsMinRelFre = new StatisticsNew(corpus, f2, false);

            Collection<File> corpusFiles = statisticsMinRelFre.getCorpus().getDetectedCorpusFiles();

            final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statisticsMinRelFre.getCorpus().getCorpusType());

            Date startTime = new Date();
            Date previousTime = new Date();
            int remainingSeconds = -1;
            int corpusSize;
            int i;
            if(statistic.getFilter().getCollocability().size() > 0){
                i = 0;
                corpusSize = corpusFiles.size() * 3;
            } else {
                i = 0;
                corpusSize = corpusFiles.size() * 2;
            }
            for (File f : corpusFiles) {
                final int iFinal = i;
                XML_processing xml_processing = new XML_processing();
                i++;
                if (multipleFiles) {
                    if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
                        remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/i) * (corpusSize - i) / 1000);
                        previousTime = new Date();
                    }
                } else {}
                xml_processing.readXML(f.toString(), statisticsMinRelFre);
            }

            // add remaining minRelFre results
            if(statisticsMinRelFre.getFilter().getIsMinimalRelFreScraper()) {
                long countFor1MWords = statisticsMinRelFre.getUniGramOccurrences().get(statisticsMinRelFre.getCorpus().getTotal()).longValue();
                double absToRelFactor = (statisticsMinRelFre.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;

                statisticsMinRelFre.updateMinimalRelFre(statisticsMinRelFre.getTaxonomyResult().get(statisticsMinRelFre.getCorpus().getTotal()).entrySet(), absToRelFactor);

                // reset all values
                for(Taxonomy taxonomy : statisticsMinRelFre.getTaxonomyResult().keySet()){
                    statisticsMinRelFre.getTaxonomyResult().put(taxonomy, new ConcurrentHashMap<>());
                }
                for(Taxonomy taxonomy : statisticsMinRelFre.getUniGramOccurrences().keySet()){
                    statisticsMinRelFre.getUniGramOccurrences().put(taxonomy, new AtomicLong(0));
                }
            }


            prepareMainTask(statistic, corpus);


        }catch(CloneNotSupportedException c){}
    }

    public static void prepareMainTask(StatisticsNew statistic, Corpus corpus) {
        Filter f = statistic.getFilter();
        logger.info("Started execution: ", f);

        Collection<File> corpusFiles = statistic.getCorpus().getDetectedCorpusFiles();

        final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
        Date startTime = new Date();
        Date previousTime = new Date();
        int remainingSeconds = -1;
        int corpusSize;
        int i;
        int taskIndex = 0;
        if(statistic.getFilter().getCollocability().size() > 0 && statistic.getFilter().getMinimalRelFre() > 1){
            i = corpusFiles.size();
            corpusSize = corpusFiles.size() * 3;
        } else if (statistic.getFilter().getMinimalRelFre() > 1) {
            i = corpusFiles.size();
            corpusSize = corpusFiles.size() * 2;
        } else if (statistic.getFilter().getCollocability().size() > 0) {
            i = 0;
            corpusSize = corpusFiles.size() * 2;
        } else {
            i = 0;
            corpusSize = corpusFiles.size();
        }
        for (File fi : corpusFiles) {
            final int iFinal = i;
            XML_processing xml_processing = new XML_processing();
            xml_processing.isCancelled = false;
            i++;
            taskIndex++;
//            if(xml_processing.progressBarListener != null) {
//                xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
//            }
            if (multipleFiles) {
                if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
                    remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
                    previousTime = new Date();
                }
//                this.updateProgress(i, corpusSize);
//                this.updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), i, corpusSize, f.getName(), remainingSeconds));

            } else {
//                xml_processing.progressBarListener = new InvalidationListener() {
//                    int remainingSeconds = -1;
//                    Date previousTime = new Date();
//                    @Override
//                    public void invalidated(Observable observable) {
//                        if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
//                            remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
//                                    (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
//                                    ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
//                            previousTime = new Date();
//                        }
//                        xml_processing.isCancelled = isCancelled();
//                        updateProgress((iFinal * 100) + ((ReadOnlyDoubleWrapper) observable).get() + 1, corpusSize * 100);
//                        updateMessage(String.format(I18N.get("message.ONGOING_NOTIFICATION_ANALYZING_FILE_X_OF_Y"), 1, 1, f.getName(), remainingSeconds));
//                    }
//                };
//
//                xml_processing.progressProperty().addListener(xml_processing.progressBarListener);
            }
            xml_processing.readXML(fi.toString(), statistic);
        }
        // if getMinimalRelFre > 1 erase all words that have lower occurrences at the end of processing
        if (statistic.getFilter().getMinimalRelFre() > 1){
            long countFor1MWords = statistic.getUniGramOccurrences().get(statistic.getCorpus().getTotal()).longValue();
            double absToRelFactor = (statistic.getFilter().getMinimalRelFre() / 1000000.0) * countFor1MWords;


            for(Map.Entry<MultipleHMKeys, AtomicLong> entry : statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet()){
                if(entry.getValue().longValue() < absToRelFactor){
                    statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).remove(entry.getKey());
                }
            }
            statistic.updateMinimalRelFre(statistic.getTaxonomyResult().get(statistic.getCorpus().getTotal()).entrySet(), absToRelFactor);
        }

        if (f.getCollocability().size() > 0) {
            try{
                Filter f2 = (Filter) f.clone();
                f2.setNgramValue(1);
                StatisticsNew statisticsOneGrams = new StatisticsNew(corpus, f2, false);
                prepareTaskForCollocability(statistic, statisticsOneGrams);
            }catch(CloneNotSupportedException c){}


        } else {
            try {
                boolean successullySaved = statistic.saveResultToDisk();
                if (successullySaved) {
                    logger.info(I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
                } else {
                    logger.info(I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
                }
            } catch (UnsupportedEncodingException e1) {
                logger.error(I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
                logger.error("Error while saving", e1);
            } catch (OutOfMemoryError e1) {
                logger.error(I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
                logger.error("Out of memory error", e1);
            }
        }
    }

    public static void prepareTaskForCollocability(StatisticsNew statistic, StatisticsNew statisticsOneGrams) {
        Collection<File> corpusFiles = statisticsOneGrams.getCorpus().getDetectedCorpusFiles();

        final boolean multipleFiles = CorpusType.multipleFilesCorpuses().contains(statistic.getCorpus().getCorpusType());
        Date startTime = new Date();
        Date previousTime = new Date();
        int remainingSeconds = -1;

        int corpusSize;
        int i;
        int taskIndex = 0;
        if(statistic.getFilter().getMinimalRelFre() > 1){
            i = corpusFiles.size() * 2;
            corpusSize = corpusFiles.size() * 3;
        } else {
            i = corpusFiles.size();
            corpusSize = corpusFiles.size() * 2;
        }


        for (File f : corpusFiles) {
            XML_processing xml_processing = new XML_processing();
            i++;
            taskIndex++;
            if(xml_processing.progressBarListener != null) {
                xml_processing.progressProperty().removeListener(xml_processing.progressBarListener);
            }
            if (multipleFiles) {
                if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
                    remainingSeconds = (int) (((new Date()).getTime() - startTime.getTime()) * (1.0/taskIndex) * (corpusSize - i) / 1000);
                    previousTime = new Date();
                }
            } else {
//                xml_processing.progressBarListener = new InvalidationListener() {
//                    int remainingSeconds = -1;
//                    Date previousTime = new Date();
//                    @Override
//                    public void invalidated(Observable observable) {
//                        if ((new Date()).getTime() - previousTime.getTime() > 500 || remainingSeconds == -1){
//                            remainingSeconds = (int) (((new Date()).getTime() - xml_processing.startTime.getTime()) *
//                                    (1.0/(iFinal * 100 + ((ReadOnlyDoubleWrapper) observable).get() + 1)) *
//                                    ((corpusSize - iFinal - 1) * 100 + 100 - ((ReadOnlyDoubleWrapper) observable).get()) / 1000);
//                            previousTime = new Date();
//                        }
//                    }
//                };
            }
            xml_processing.isCollocability = true;
            xml_processing.readXML(f.toString(), statisticsOneGrams);
            xml_processing.isCollocability = false;
        }

        try {
            System.out.print(statistic);
            statistic.updateCalculateCollocabilities(statisticsOneGrams);
            boolean successullySaved = statistic.saveResultToDisk();
            if (successullySaved) {
                logger.info(I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED"));
            } else {
                logger.info(I18N.get("message.NOTIFICATION_ANALYSIS_COMPLETED_NO_RESULTS"));
            }
        } catch (UnsupportedEncodingException e1) {
            logger.error(I18N.get("message.ERROR_WHILE_SAVING_RESULTS_TO_CSV"));
            logger.error("Error while saving", e1);
        } catch (OutOfMemoryError e1) {
            logger.error(I18N.get("message.ERROR_NOT_ENOUGH_MEMORY"));
            logger.error("Out of memory error", e1);
        }
    }
}