/*
 * NGramCorrelationForm.java
 *
 * Created on 24 ??????? 2006, 1:43 ??
 */

package gr.demokritos.iit.jinsect.gui;

import gr.demokritos.iit.conceptualIndex.LocalWordNetMeaningExtractor;
import gr.demokritos.iit.conceptualIndex.structs.Concatenation;
import gr.demokritos.iit.conceptualIndex.documentModel.DistributionDocument;
import gr.demokritos.iit.conceptualIndex.documentModel.SemanticIndex;
import gr.demokritos.iit.conceptualIndex.documentModel.SymbolicGraph;
import gr.demokritos.iit.conceptualIndex.structs.Union;
import gr.demokritos.iit.conceptualIndex.structs.Distribution;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import javax.swing.JFileChooser;
import javax.swing.JOptionPane;
import javax.swing.SwingUtilities;
import gr.demokritos.iit.jinsect.structs.CategorizedFileEntry;
import gr.demokritos.iit.jinsect.IMatching;
import gr.demokritos.iit.jinsect.structs.DocumentSet;
import gr.demokritos.iit.jinsect.algorithms.estimators.DistanceEstimator;
import gr.demokritos.iit.jinsect.algorithms.estimators.NGramSizeEstimator;
import gr.demokritos.iit.jinsect.algorithms.statistics.statisticalCalculation;
import gr.demokritos.iit.jinsect.console.StatusConsole;
import gr.demokritos.iit.jinsect.console.StreamOutputConsole;
import gr.demokritos.iit.jinsect.documentModel.documentTypes.NGramDocument;
import gr.demokritos.iit.jinsect.structs.IntegerPair;
import gr.demokritos.iit.jinsect.supportUtils.linguistic.WordDefinition;
import gr.demokritos.iit.jinsect.threading.ThreadList;
import gr.demokritos.iit.jinsect.utils;
import gr.demokritos.iit.summarization.analysis.EntropyChunker;
import salvo.jesus.graph.Vertex;
import salvo.jesus.graph.VertexImpl;
import salvo.jesus.graph.WeightedEdge;

/**
 *
 * @author  ggianna
 */

public class NGramCorrelationForm extends javax.swing.JFrame implements IMatching {
    DistributionDocument[] cdDoc;
    SymbolicGraph sgOverallGraph;
    SemanticIndex siIndex;
    private Date dLastUpdate = new Date();
    ArrayList lActiveActions = new ArrayList();
    TreeMap Delims = new TreeMap();
    EntropyChunker Chunker = new EntropyChunker();
    boolean RightToLeftText = false;
    
    public static final int MinLevel = 0;
    //public static final int Levels = 10;
    
    /** Creates new form NGramCorrelationForm */
    public NGramCorrelationForm() {
        initComponents();
    }
    
    /** This method is called from within the constructor to
     * initialize the form.
     * WARNING: Do NOT modify this code. The content of this method is
     * always regenerated by the Form Editor.
     */
    // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
    private void initComponents() {
        java.awt.GridBagConstraints gridBagConstraints;

        FilePathEdt = new javax.swing.JTextField();
        SelectInputFileBtn = new javax.swing.JButton();
        CreateNGramGraphBtn = new javax.swing.JButton();
        NGramSizeSldr = new javax.swing.JSlider();
        jLabel1 = new javax.swing.JLabel();
        jScrollPane1 = new javax.swing.JScrollPane();
        LogMemo = new javax.swing.JTextArea();
        jLabel2 = new javax.swing.JLabel();
        TermEdt = new javax.swing.JTextField();
        jLabel3 = new javax.swing.JLabel();
        LookUpTermBtn = new javax.swing.JButton();
        BreakDownBtn = new javax.swing.JButton();
        FindPathBtn = new javax.swing.JButton();
        jLabel4 = new javax.swing.JLabel();
        CorpusPercentSld = new javax.swing.JSlider();
        ClearGraphsBtn = new javax.swing.JButton();
        jLabel5 = new javax.swing.JLabel();
        SelectInputFileEdt = new javax.swing.JTextField();
        SelectTestFileBtn = new javax.swing.JButton();
        BreakFileDownBtn = new javax.swing.JButton();
        jButton1 = new javax.swing.JButton();
        SecondInputFileLbl = new javax.swing.JLabel();
        SelectSecondInputFileEdt = new javax.swing.JTextField();
        SelectSecondTestFileBtn = new javax.swing.JButton();
        CompareFilesBtn = new javax.swing.JButton();
        CancelAllBtn = new javax.swing.JButton();
        AnalyseCorpusBtn = new javax.swing.JButton();
        jLabel6 = new javax.swing.JLabel();
        DelimitersEdt = new javax.swing.JTextField();

        setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
        setTitle("NGram Correlation Experiments");
        setName("NGramCorrelationFrame"); // NOI18N
        getContentPane().setLayout(new java.awt.GridBagLayout());

        FilePathEdt.setText("/home/ggianna/Documents/JApplications/JInsect/conceptualCorpus");
        FilePathEdt.setName("FilePathEd"); // NOI18N
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 1;
        gridBagConstraints.gridy = 0;
        gridBagConstraints.gridwidth = 4;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(FilePathEdt, gridBagConstraints);

        SelectInputFileBtn.setText("Select input file");
        SelectInputFileBtn.setName("SelectInputFileBtn"); // NOI18N
        SelectInputFileBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                SelectInputFileBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 5;
        gridBagConstraints.gridy = 0;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(SelectInputFileBtn, gridBagConstraints);

        CreateNGramGraphBtn.setText("Create NGram Graphs");
        CreateNGramGraphBtn.setName("CreateGraphBtn"); // NOI18N
        CreateNGramGraphBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                CreateNGramGraphBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 14;
        getContentPane().add(CreateNGramGraphBtn, gridBagConstraints);

        NGramSizeSldr.setMajorTickSpacing(5);
        NGramSizeSldr.setMaximum(22);
        NGramSizeSldr.setMinimum(2);
        NGramSizeSldr.setMinorTickSpacing(1);
        NGramSizeSldr.setPaintLabels(true);
        NGramSizeSldr.setPaintTicks(true);
        NGramSizeSldr.setValue(9);
        NGramSizeSldr.setName("NGramSizeSldr"); // NOI18N
        NGramSizeSldr.setValueIsAdjusting(true);
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 1;
        gridBagConstraints.gridy = 1;
        gridBagConstraints.gridwidth = java.awt.GridBagConstraints.REMAINDER;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(NGramSizeSldr, gridBagConstraints);

        jLabel1.setText("Max NGram Size");
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 0;
        gridBagConstraints.gridy = 1;
        getContentPane().add(jLabel1, gridBagConstraints);

        jScrollPane1.setMaximumSize(null);
        jScrollPane1.setMinimumSize(null);
        jScrollPane1.setPreferredSize(new java.awt.Dimension(300, 400));

        LogMemo.setColumns(20);
        LogMemo.setRows(5);
        LogMemo.setPreferredSize(null);
        jScrollPane1.setViewportView(LogMemo);

        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 0;
        gridBagConstraints.gridy = 6;
        gridBagConstraints.gridwidth = java.awt.GridBagConstraints.REMAINDER;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(jScrollPane1, gridBagConstraints);

        jLabel2.setText("Graph training corpus");
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 0;
        gridBagConstraints.gridy = 0;
        getContentPane().add(jLabel2, gridBagConstraints);

        TermEdt.setMinimumSize(new java.awt.Dimension(200, 19));
        TermEdt.setPreferredSize(new java.awt.Dimension(100, 19));
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 1;
        gridBagConstraints.gridy = 3;
        gridBagConstraints.gridwidth = 3;
        getContentPane().add(TermEdt, gridBagConstraints);

        jLabel3.setText("Select Term to Look up");
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 0;
        gridBagConstraints.gridy = 3;
        getContentPane().add(jLabel3, gridBagConstraints);

        LookUpTermBtn.setText("Look up Term");
        LookUpTermBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                LookUpTermBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 2;
        gridBagConstraints.gridy = 5;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(LookUpTermBtn, gridBagConstraints);

        BreakDownBtn.setText("Break term down");
        BreakDownBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                BreakDownBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 0;
        gridBagConstraints.gridy = 5;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(BreakDownBtn, gridBagConstraints);

        FindPathBtn.setText("Test Search");
        FindPathBtn.setEnabled(false);
        FindPathBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                FindPathBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 1;
        gridBagConstraints.gridy = 5;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(FindPathBtn, gridBagConstraints);

        jLabel4.setText("Select % of Corpus to Use");
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 0;
        gridBagConstraints.gridy = 2;
        getContentPane().add(jLabel4, gridBagConstraints);

        CorpusPercentSld.setMajorTickSpacing(10);
        CorpusPercentSld.setMinorTickSpacing(5);
        CorpusPercentSld.setPaintLabels(true);
        CorpusPercentSld.setPaintTicks(true);
        CorpusPercentSld.setValue(100);
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 1;
        gridBagConstraints.gridy = 2;
        gridBagConstraints.gridwidth = java.awt.GridBagConstraints.REMAINDER;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(CorpusPercentSld, gridBagConstraints);

        ClearGraphsBtn.setText("Clear Graphs");
        ClearGraphsBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                ClearGraphsBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 14;
        getContentPane().add(ClearGraphsBtn, gridBagConstraints);

        jLabel5.setText("Select Input File");
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 8;
        getContentPane().add(jLabel5, gridBagConstraints);

        SelectInputFileEdt.setText("/home/ggianna/Documents/JApplications/JInsect/conceptualCorpus");
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 8;
        gridBagConstraints.gridwidth = 4;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(SelectInputFileEdt, gridBagConstraints);

        SelectTestFileBtn.setText("Select test file");
        SelectTestFileBtn.setName("SelectInputFileBtn"); // NOI18N
        SelectTestFileBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                SelectTestFileBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 8;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(SelectTestFileBtn, gridBagConstraints);

        BreakFileDownBtn.setText("Break file down");
        BreakFileDownBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                BreakFileDownBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 8;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(BreakFileDownBtn, gridBagConstraints);

        jButton1.setText("Chunk String");
        jButton1.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                jButton1ActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 4;
        gridBagConstraints.gridy = 5;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(jButton1, gridBagConstraints);

        SecondInputFileLbl.setText("Select Second File");
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 9;
        getContentPane().add(SecondInputFileLbl, gridBagConstraints);

        SelectSecondInputFileEdt.setText("/home/ggianna/Documents/JApplications/JInsect/conceptualCorpus");
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 9;
        gridBagConstraints.gridwidth = 4;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(SelectSecondInputFileEdt, gridBagConstraints);

        SelectSecondTestFileBtn.setText("Select second file");
        SelectSecondTestFileBtn.setName("SelectInputFileBtn"); // NOI18N
        SelectSecondTestFileBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                SelectSecondTestFileBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 9;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(SelectSecondTestFileBtn, gridBagConstraints);

        CompareFilesBtn.setText("Compare Files");
        CompareFilesBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                CompareFilesBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridx = 6;
        gridBagConstraints.gridy = 9;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(CompareFilesBtn, gridBagConstraints);

        CancelAllBtn.setText("Cancel All Processes");
        CancelAllBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                CancelAllBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 15;
        getContentPane().add(CancelAllBtn, gridBagConstraints);

        AnalyseCorpusBtn.setText("Analyse Corpus");
        AnalyseCorpusBtn.addActionListener(new java.awt.event.ActionListener() {
            public void actionPerformed(java.awt.event.ActionEvent evt) {
                AnalyseCorpusBtnActionPerformed(evt);
            }
        });
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 14;
        getContentPane().add(AnalyseCorpusBtn, gridBagConstraints);

        jLabel6.setText("Delimiters");
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 13;
        getContentPane().add(jLabel6, gridBagConstraints);
        gridBagConstraints = new java.awt.GridBagConstraints();
        gridBagConstraints.gridy = 13;
        gridBagConstraints.gridwidth = 2;
        gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
        getContentPane().add(DelimitersEdt, gridBagConstraints);

        pack();
    }// </editor-fold>//GEN-END:initComponents

    private void AnalyseCorpusBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_AnalyseCorpusBtnActionPerformed
        Thread t = new Thread() {
            @Override
            public void run() {
                DoAnalyseCorpus();
                unregisterThread(this);
            }
        };
        registerThread(t);
        t.setPriority(Math.min(Thread.MAX_PRIORITY, t.getPriority() + 1)); // Increased priority
        t.start();

    }//GEN-LAST:event_AnalyseCorpusBtnActionPerformed

    private synchronized void registerThread(Thread t) {
        lActiveActions.add(t);
        // DEBUG LINES
        System.out.println("Added " + t.getId() + ":" + t.getName());
        //////////////
    }
    private synchronized void unregisterThread(Thread t) {
        lActiveActions.remove(t);
        // DEBUG LINES
        System.out.println("Removed " + t.getId() + ":" + t.getName());
        //////////////
    }
    
    private void CancelAllBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_CancelAllBtnActionPerformed
        if (JOptionPane.showConfirmDialog(this, "Are you sure you want to stop running processes?", 
                "Cancelling running processes", JOptionPane.YES_NO_OPTION) == JOptionPane.YES_OPTION) {
            Iterator iIter = lActiveActions.iterator();
            while (iIter.hasNext()) {
                Object oNext = iIter.next();
                if (oNext instanceof Thread)
                    ((Thread)oNext).interrupt();
                iIter.remove();
            }            
        }

    }//GEN-LAST:event_CancelAllBtnActionPerformed

    private List KeepLeastSizeSets(List lSets) {
        return KeepLeastSizeSets(lSets, 0);
    }
    
    private List KeepLeastSizeSets(List lSets, int iMinContainedNGramSize) {
        ArrayList lRes = new ArrayList();
        // First pass - Get minimum size
        int iMinSize = Integer.MAX_VALUE;
        Iterator iIter = lSets.iterator();
        while (iIter.hasNext()) {
            List lNext = (List)iIter.next();
            iMinSize = Math.min(iMinSize, lNext.size());
        }
        // Second pass - Keep only minimum-sized sets
        iIter = lSets.iterator();
        while (iIter.hasNext()) {
            List lNext = (List)iIter.next();
            if (lNext.size() == iMinSize)
                lRes.add(lNext);
        }
        
        // Third pass - if needed - checking contained n-gram size
        if (iMinContainedNGramSize > 0) {
            iIter = lSets.iterator();
            while (iIter.hasNext()) {
                List lNext = (List)iIter.next();
                boolean bIsOK = true;
                Iterator iNGrams = lNext.iterator();
                while (iNGrams.hasNext())
                    if (((String)iNGrams.next()).length() < iMinContainedNGramSize) {
                        bIsOK = false;
                        break;
                    }
                if (!bIsOK)
                    iIter.remove();
            }            
        }
         
        return lRes;
    }
    
    private void DoCompareFiles() {
        // First file
        appendToLog("*** First file\n");
        
        ArrayList Meanings1 = new ArrayList();
        
        // Read file
        String sDataString = "";
        try {
            ByteArrayOutputStream bsOut = new ByteArrayOutputStream();
            FileInputStream fiIn = new FileInputStream(SelectInputFileEdt.getText());
            int iData = 0;
            while ((iData = fiIn.read()) > -1)
                bsOut.write(iData);
            sDataString = bsOut.toString();
        }
        catch (IOException ioe) {
            ioe.printStackTrace();
        }
        
        // Chunk text
        //String [] saSubtexts = chunkString(sDataString);
        List<String> saSubtexts = Chunker.chunkString(sDataString);
        
        //Iterator iStrIter = Arrays.asList(saSubtexts).iterator();
        Iterator iStrIter = saSubtexts.iterator();
        int iCnt = 0, iMax = saSubtexts.size();
        final gr.demokritos.iit.jinsect.gui.StatusFrame fStatus = 
                new gr.demokritos.iit.jinsect.gui.StatusFrame();
        fStatus.setVisible(true);

        while (iStrIter.hasNext())
        {
            String sStr = (String)iStrIter.next();
            // DEBUG LINES
            // appendToLog("\nCHECKING\n" + sStr + "\n");
            fStatus.setStatus("Checking: " + sStr, (double)iCnt++/iMax);
            //////////////
            // Analyse chunk in substrings
            List lSubStrings = utils.getSubStrings(sStr, sStr.length(), this);
            if (lSubStrings.size() == 0)
                continue;
            // Keep most promising substrings, i.e. with the minimal parts
            // lSubStrings = KeepLeastSizeSets(lSubStrings, 2);
            appendToLog(utils.printList(lSubStrings));
            ArrayList lOptions = new ArrayList();
            lOptions.addAll(lSubStrings);

            Iterator iIter = lOptions.iterator();

            HashMap hSubstringSet = new HashMap();

            // Get meanings for every substring
            while (iIter.hasNext()) {
                Object oNext = iIter.next();
                List lNext;
                if (oNext instanceof List) {
                    lNext = (List)oNext;
                }
                else 
                {
                    lNext = new ArrayList();
                    lNext.add(oNext);
                }
                if (hSubstringSet.containsKey(lNext.toString()))
                    continue;
                

                appendToLog("Case " + utils.printList(lNext));
                hSubstringSet.put(lNext.toString(),1);
                // Create vertex list
                List lNodes = new ArrayList();
                Iterator iSubstrings = lNext.iterator();
                while (iSubstrings.hasNext()) {
                    lNodes.add(new VertexImpl(iSubstrings.next()));
                }

                // Attempt retrieval of union meanings
                Iterator iNodes = lNodes.iterator();
                String sUnionMeaning = "";
                while (iNodes.hasNext()) {
                    String sCur = ((Vertex)iNodes.next()).toString();
                    Object oTxt = siIndex.getMeaning(new VertexImpl(sCur));
                    if (oTxt != null) {
                        sUnionMeaning += "-" + SemanticIndex.meaningToString(oTxt) + "-";
                        Meanings1.add(oTxt);    // Add meaning to list
                    }
                    else
                        appendToLog("No meaning found...");
                }
            }
        }
        fStatus.setVisible(false);
        // Second file        
        appendToLog("*** Second file\n");
        
        ArrayList Meanings2 = new ArrayList();        
        // Read file
        sDataString = "";
        try {
            ByteArrayOutputStream bsOut = new ByteArrayOutputStream();
            FileInputStream fiIn = new FileInputStream(SelectSecondInputFileEdt.getText());
            int iData = 0;
            while ((iData = fiIn.read()) > -1)
                bsOut.write(iData);
            sDataString = bsOut.toString();
        }
        catch (IOException ioe) {
            ioe.printStackTrace();
        }
        
        // Chunk text
        //saSubtexts = Chunker.chunkString(sDataString);
        saSubtexts = Chunker.chunkString(sDataString);
        //iStrIter = Arrays.asList(saSubtexts).iterator();
        iStrIter = saSubtexts.iterator();
        while (iStrIter.hasNext())
        {
            String sStr = (String)iStrIter.next();
            // DEBUG LINES
            // appendToLog("\nCHECKING\n" + sStr + "\n");
            //////////////

            ArrayList lOptions = new ArrayList();
            // Analyse chunk in substrings
            List lSubStrings = utils.getSubStrings(sStr, sStr.length(), this, 2 * NGramSizeSldr.getValue());
            // Keep most promising substrings, i.e. with the minimal parts
            // lSubStrings = KeepLeastSizeSets(lSubStrings, 2);
            appendToLog(utils.printList(lSubStrings));
            lOptions.addAll(lSubStrings);

            Iterator iIter = lOptions.iterator();

            HashMap hSubstringSet = new HashMap();

            // Get meanings for every substring
            while (iIter.hasNext()) {
                Object oNext = iIter.next();
                List lNext;
                if (oNext instanceof List) {
                    lNext = (List)oNext;
                }
                else 
                {
                    lNext = new ArrayList();
                    lNext.add(oNext);
                }
                if (hSubstringSet.containsKey(lNext.toString()))
                    continue;
                

                appendToLog("Case " + utils.printList(lNext));
                hSubstringSet.put(lNext.toString(),1);
                // Create vertex list
                List lNodes = new ArrayList();
                Iterator iSubstrings = lNext.iterator();
                while (iSubstrings.hasNext()) {
                    lNodes.add(new VertexImpl(iSubstrings.next()));
                }

                // Attempt retrieval of union meanings
                Iterator iNodes = lNodes.iterator();
                String sUnionMeaning = "";
                while (iNodes.hasNext()) {
                    String sCur = ((Vertex)iNodes.next()).toString();
                    Object oTxt = siIndex.getMeaning(new VertexImpl(sCur));
                    if (oTxt != null) {
                        sUnionMeaning += "-" + siIndex.meaningToString(oTxt) + "-";
                        Meanings2.add(oTxt);
                    }
                    else
                        appendToLog("No meaning found...");
                }
            }
        }
        
        // Calc similarity
        // TODO: DEBUG
        // For every meaning in Meanings1
        double dRes = 0.0;
        Iterator iIter1 = Meanings1.iterator();
        while (iIter1.hasNext()) {
            double dMaxSim = 0.0;
            Iterator iIter2 = Meanings2.iterator();
            WordDefinition d1 = (WordDefinition)iIter1.next();
            
            // For every meaning in Meanings2
            while (iIter2.hasNext()) {
                WordDefinition d2 = (WordDefinition)iIter2.next();
                // Keep max similarity
                dMaxSim = Math.max(dMaxSim, siIndex.compareWordDefinitions(d1, d2));
            }
            // DEBUG LINES
            appendToLog("Concluded similarity of " + dMaxSim);
            //////////////
            dRes += dMaxSim;
        }
        dRes = 2 * dRes / (Meanings1.size() + Meanings2.size());
        
        appendToLog("Final Similarity : " + dRes);
    }
    private void CompareFilesBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_CompareFilesBtnActionPerformed
        Thread t = new Thread() {
            @Override
            public void run() {
                DoCompareFiles();
                unregisterThread(this);
            }
        };
        registerThread(t);
        t.start();
    }//GEN-LAST:event_CompareFilesBtnActionPerformed

    private void SelectSecondTestFileBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SelectSecondTestFileBtnActionPerformed
        // Select a dir
        JFileChooser fc = new JFileChooser();
        fc.setCurrentDirectory((FilePathEdt.getText().length() == 0) ? new java.io.File(".") : new java.io.File(FilePathEdt.getText()));
        fc.setSelectedFile(fc.getCurrentDirectory());
        fc.setFileSelectionMode(JFileChooser.FILES_ONLY);
        int iRet = fc.showOpenDialog(this);
        if (iRet == JFileChooser.APPROVE_OPTION)
            SelectSecondInputFileEdt.setText(fc.getSelectedFile().getAbsolutePath());
        
    }//GEN-LAST:event_SelectSecondTestFileBtnActionPerformed

    private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed
        String sStr = TermEdt.getText();
        getSymbolsByEntropy(sStr, 3);
        // Sample String:
        // Is this, actual, text a test in deed? Yes! It should be...
        // Check if current point belongs to a ''normal'' sequence of symbols 
        // <= This would only mean that we 
        // have met such a sequence in the training set. What did I just say? Who cares...
        appendToLog(gr.demokritos.iit.jinsect.utils.printList(Arrays.asList(chunkString(sStr))));
        //chunkString(sStr,2);
        //chunkString(sStr,3);        
    }//GEN-LAST:event_jButton1ActionPerformed

    private void DoBreakFileDown() {
        String sDataString = "";
        try {
            ByteArrayOutputStream bsOut = new ByteArrayOutputStream();
            FileInputStream fiIn = new FileInputStream(SelectInputFileEdt.getText());
            int iData = 0;
            while ((iData = fiIn.read()) > -1)
                bsOut.write(iData);
            sDataString = bsOut.toString();
        }
        catch (IOException ioe) {
            ioe.printStackTrace();
        }
        
        //String [] saSubtexts = chunkString(sDataString);
        List saSubtexts = Chunker.chunkString(sDataString);
        // Show status window
        final gr.demokritos.iit.jinsect.gui.StatusFrame fStatus = 
                new gr.demokritos.iit.jinsect.gui.StatusFrame();
        fStatus.setVisible(true);
        int iCnt = 0;
        try {
            //Iterator iStrIter = Arrays.asList(saSubtexts).iterator();
            Iterator iStrIter = saSubtexts.iterator();
            while (iStrIter.hasNext())
            {
                String sStr = (String)iStrIter.next();
                fStatus.setStatus("Extracting meanings...", (double)(++iCnt) / 
                        saSubtexts.size());
                // DEBUG LINES
                // appendToLog("\nCHECKING\n" + sStr + "\n");
                //////////////

                // Analyse chunk in substrings
                List lSubStrings = utils.getSubStrings(sStr, sStr.length(), this);
                if (lSubStrings.size() == 0)
                    continue;
                // Keep most promising substrings, i.e. with the minimal parts
                // lSubStrings = KeepLeastSizeSets(lSubStrings, 2);
                appendToLog(utils.printList(lSubStrings));
                ArrayList lOptions = new ArrayList();
                lOptions.addAll(lSubStrings);

                Iterator iIter = lOptions.iterator();

                HashMap hSubstringSet = new HashMap();

                while (iIter.hasNext()) {
                    Object oNext = iIter.next();
                    List lNext;
                    if (oNext instanceof List) {
                        lNext = (List)oNext;
                    }
                    else 
                    {
                        lNext = new ArrayList();
                        lNext.add(oNext);
                    }
                    if (hSubstringSet.containsKey(lNext.toString()))
                        continue;


                    appendToLog("Case " + utils.printList(lNext));
                    hSubstringSet.put(lNext.toString(),1);
                    // Create vertex list
                    List lNodes = new ArrayList();
                    Iterator iSubstrings = lNext.iterator();
                    while (iSubstrings.hasNext()) {
                        lNodes.add(new VertexImpl(iSubstrings.next()));
                    }

                    // Attempt retrieval of union meanings
                    Iterator iNodes = lNodes.iterator();
                    String sUnionMeaning = "";
                    while (iNodes.hasNext()) {
                        String sCur = ((Vertex)iNodes.next()).toString();
                        Object oTxt = siIndex.getMeaning(new VertexImpl(sCur));
                        if (oTxt != null)
                            sUnionMeaning += "-" + siIndex.meaningToString(oTxt) + "-";
                        else
                            appendToLog("No meaning found...");
                    }

//                    if (sUnionMeaning.length() > 0)
//                    {
//                        appendToLog("MEANING: \n" + sUnionMeaning + "\nEND MEANING\n");
//
//    //                    int iAnswer = JOptionPane.showConfirmDialog(this, "Do you want to look up constituent nodes?");
//    //                    if (iAnswer == JOptionPane.NO_OPTION)
//    //                        continue;
//    //                    if (iAnswer == JOptionPane.CANCEL_OPTION)
//    //                        return;
//                        continue;
//                    }
//                    else
//                        continue;



                    // Search common subnode
    //                Vertex vRes = sgOverallGraph.getCommonSubnode(lNodes);
    //
    //                boolean bFound;
    //                if (vRes != null)
    //                {
    //                    appendToLog(vRes.toString());
    //                    Object oTxt = siIndex.getMeaning(vRes);
    //                    if (oTxt != null)
    //                        appendToLog(oTxt.toString());
    //                    else
    //                        appendToLog("No meaning found...");
    //                }
    //                else
    //                    appendToLog("No common child node found... Backtracking...");
    //
    //                bFound = vRes != null;
    //                int iSize = lNodes.size();
    //
    //                while (!bFound) {
    //                    // While not a single node list
    //                    while (--iSize > 1) {
    //                        Union uPossible = jinsect.utils.getCombinationsBy(lNodes, iSize);
    //                        Iterator iPossibleIter = uPossible.iterator();
    //                        while (iPossibleIter.hasNext()) {
    //                            Concatenation cCur = (Concatenation)iPossibleIter.next();
    //                            vRes = sgOverallGraph.getCommonSubnode(cCur);
    //                            if (vRes != null) {
    //                                appendToLog(vRes.toString());
    //                                bFound = true;
    //                                Object oTxt = siIndex.getMeaning(vRes);
    //                                if (oTxt != null)
    //                                    appendToLog(oTxt.toString());
    //                            }
    //                        }
    //                        if (bFound) // Found children. Exit loop.
    //                            break;
    //                    }
    //                    if (iSize <= 1)
    //                        break;
    //                }
    //                if (!bFound)
    //                    appendToLog("No common child node found...");
                }

            }
        }
        finally
        {
            fStatus.setVisible(false);
        }
        
                
    }
    
    private void BreakFileDownBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_BreakFileDownBtnActionPerformed
        Thread t = new Thread() {
            @Override
            public void run() {
                DoBreakFileDown();
                unregisterThread(this);
            }
        };
        registerThread(t);
        t.start();

    }//GEN-LAST:event_BreakFileDownBtnActionPerformed

    private void SelectTestFileBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SelectTestFileBtnActionPerformed
        // Select a dir
        JFileChooser fc = new JFileChooser();
        fc.setCurrentDirectory((FilePathEdt.getText().length() == 0) ? new java.io.File(".") : new java.io.File(FilePathEdt.getText()));
        fc.setSelectedFile(fc.getCurrentDirectory());
        fc.setFileSelectionMode(JFileChooser.FILES_ONLY);
        int iRet = fc.showOpenDialog(this);
        if (iRet == JFileChooser.APPROVE_OPTION)
            SelectInputFileEdt.setText(fc.getSelectedFile().getAbsolutePath());

    }//GEN-LAST:event_SelectTestFileBtnActionPerformed

    private void ClearGraphsBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_ClearGraphsBtnActionPerformed
        // Init Graph
        int Levels = Math.max(NGramSizeSldr.getValue(), 2);
        sgOverallGraph = new SymbolicGraph(1, Levels); // Init graph with a min of 2
        // Init Docs
        Iterator iIter = Arrays.asList(cdDoc).iterator();
        while (iIter.hasNext()) {
            DistributionDocument d = (DistributionDocument)iIter.next();
            d.clearDocumentGraph();
        }
        // Init index
        siIndex = new SemanticIndex(sgOverallGraph);
        try {
            siIndex.MeaningExtractor = new LocalWordNetMeaningExtractor();
        }
        catch (IOException ioe) {
            siIndex.MeaningExtractor = null; // Use default
        }
        
    }//GEN-LAST:event_ClearGraphsBtnActionPerformed

    private void FindPathBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_FindPathBtnActionPerformed
        // TODO: Implement correctly
        // TEST - OK
        /*
        SymbolicGraph g = new SymbolicGraph(1, 9);
        try {
            g.addEdge(new VertexImpl("a"), new VertexImpl("b"));
            g.addEdge(new VertexImpl("a"), new VertexImpl("c"));
            g.addEdge(new VertexImpl("b"), new VertexImpl("d"));
            g.addEdge(new VertexImpl("b"), new VertexImpl("e"));
            g.addEdge(new VertexImpl("e"), new VertexImpl("g"));
            g.addEdge(new VertexImpl("f"), new VertexImpl("d"));
            g.addEdge(new VertexImpl("f"), new VertexImpl("g"));
            g.addEdge(new VertexImpl("g"), new VertexImpl("f"));
            g.addEdge(new VertexImpl("f"), new VertexImpl("a"));
            List lRes = g.getPathBetween(new VertexImpl("f"), new VertexImpl("f"));
            if (lRes != null)
                appendToLog(lRes.toString());
            else
                appendToLog("Not found...");
            
            ArrayList alNodes = new ArrayList();
            alNodes.add(new VertexImpl("a"));
            alNodes.add(new VertexImpl("f"));
            Vertex vRes = g.getCommonSubnode(alNodes);
            if (vRes != null)
                appendToLog(vRes.toString());
            else
                appendToLog("No common child node found...");
            
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        */
        
        // TODO: Check for other interpretations
        
        
        SymbolicGraph g = new SymbolicGraph(1, 9);
        g.setDataString("abcdefg");
        ArrayList alNodes = new ArrayList();
        alNodes.add(new VertexImpl("c"));
        alNodes.add(new VertexImpl("e"));
        alNodes.add(new VertexImpl("h"));
//        Vertex vRes = g.getCommonSubnode(alNodes);
//        if (vRes != null)
//            appendToLog(vRes.toString());
//        else
//            appendToLog("No common child node found...");
//        
        // Search common subnode
        Vertex vRes = g.getCommonSubnode(alNodes);

        boolean bFound;
        if (vRes != null)
            appendToLog(vRes.toString());
        else
            appendToLog("No common child node found... Backtracking...");

        bFound = vRes != null;
        int iSize = alNodes.size();

        while (!bFound) {
            // While not a single node list
            while (--iSize > 1) {
                Union uPossible = gr.demokritos.iit.jinsect.utils.getCombinationsBy(alNodes, iSize);
                Iterator iPossibleIter = uPossible.iterator();
                while (iPossibleIter.hasNext()) {
                    Concatenation cCur = (Concatenation)iPossibleIter.next();
                    vRes = g.getCommonSubnode(cCur);
                    if (vRes != null) {
                        appendToLog(vRes.toString());
                        bFound = true;
                    }
                }
                if (bFound) // Found children. Exit loop.
                    break;
            }
        }
        
        if (!bFound)
            appendToLog("No common child node found...");

    }//GEN-LAST:event_FindPathBtnActionPerformed

    public final boolean match(Object o) {
        String sStr = (String)o;
        if (sStr.length() < 2)
            return false; // TODO: Check if true or false
        
        int iCnt = Math.min(cdDoc.length, sStr.length()) - 1;
        // Search for a full match
        if (iCnt < sStr.length() - 1) // Not a full match
            return false;
        double dNorm = cdDoc[iCnt].normality(sStr);
        // DEBUG LINES
        // System.out.println("Matched " + o.toString() + " for a normality of " + dNorm + ".");
        //////////////
        
        return dNorm >= 0.5;
    }

    private void BreakDownBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_BreakDownBtnActionPerformed
        String sStr = TermEdt.getText();
        List lSubStrings = utils.getSubStrings(sStr, sStr.length(), this);
        appendToLog(utils.printList(lSubStrings));
//        Iterator iSubStrings = lSubStrings.iterator();
        ArrayList lOptions = new ArrayList();
        lOptions.addAll(lSubStrings);
        /*
        while (iSubStrings.hasNext()) {
            lOptions.add(jinsect.utils.getFlattenedList(iSubStrings.next()));
        }
        */
        Iterator iIter = lOptions.iterator();
        
        HashMap hSubstringSet = new HashMap();
        
        while (iIter.hasNext()) {
            List lNext = (List)iIter.next();
            if (hSubstringSet.containsKey(lNext.toString()))
                continue;
            
            appendToLog("Case " + utils.printList(lNext));
            hSubstringSet.put(lNext.toString(),1);
            // Create vertex list
            List lNodes = new ArrayList();
            Iterator iSubstrings = lNext.iterator();
            while (iSubstrings.hasNext()) {
                lNodes.add(new VertexImpl(iSubstrings.next()));
            }
            
            int iAnswer = JOptionPane.showConfirmDialog(this, "Do you want to look up constituent nodes?");
            if (iAnswer == JOptionPane.NO_OPTION)
                continue;
            if (iAnswer == JOptionPane.CANCEL_OPTION)
                return;
            
            // Search common subnode
            Vertex vRes = sgOverallGraph.getCommonSubnode(lNodes);
            
            boolean bFound;
            if (vRes != null)
            {
                appendToLog(vRes.toString());
                Object oTxt = siIndex.getMeaning(vRes);
                appendToLog(siIndex.meaningToString(oTxt));
            }
            else
                appendToLog("No common child node found... Backtracking...");
            
            bFound = vRes != null;
            int iSize = lNodes.size();
                 
            while (!bFound) {
                // While not a single node list
                while (--iSize > 1) {
                    Union uPossible = gr.demokritos.iit.jinsect.utils.getCombinationsBy(lNodes, iSize);
                    Iterator iPossibleIter = uPossible.iterator();
                    while (iPossibleIter.hasNext()) {
                        Concatenation cCur = (Concatenation)iPossibleIter.next();
                        vRes = sgOverallGraph.getCommonSubnode(cCur);
                        if (vRes != null) {
                            appendToLog(vRes.toString());
                            bFound = true;
                            Object oTxt = siIndex.getMeaning(vRes);
                            appendToLog(siIndex.meaningToString(oTxt));
                        }
                    }
                    if (bFound) // Found children. Exit loop.
                        break;
                }
                if (iSize <= 1)
                    break;
            }
            if (!bFound)
                appendToLog("No common child node found...");
        }
        

    }//GEN-LAST:event_BreakDownBtnActionPerformed

    private void LookUpTermBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_LookUpTermBtnActionPerformed
        
        // For every level of n-grams in descending order
        String sStr = TermEdt.getText();
        appendToLog("Looking up term:" + sStr);
        appendToLog("===============");
        for (int iCnt=0; iCnt < Math.min(cdDoc.length, sStr.length()); iCnt++)
            // Display distribution similarity
                appendToLog("Level " + String.valueOf(iCnt) + ":" + 
                    String.valueOf(cdDoc[iCnt].normality(sStr)));
        appendToLog("===DONE========\n");
        
    }//GEN-LAST:event_LookUpTermBtnActionPerformed

    private void appendToLog(final String s) {
        Runnable rQuick = new Runnable() {
            public void run() {
                LogMemo.append(s + "\n");
                System.err.println(s);
            }
        };
        
        // Do not update image more often that 1 / sec.
//        if ((new Date()).getTime() - dLastUpdate.getTime() > 1000)
//        {
            dLastUpdate = new Date();
            try {
                SwingUtilities.invokeLater(rQuick);
            }
            catch (Exception e) {
                // Ignore
            }
//        };

    }
    
    /** Extract language symbols from candidate text, based on entropy 
     * calculation.
     * 
     * @param sStr The string from which symbols are extracted.
     * @param iMaxSymbolSize The maximum size of extracted symbols.
     * @return A {@link List} indicating the extracted symbols.
     */
    private List getSymbolsByEntropy(String sStr, int iMaxSymbolSize) {
        ArrayList alRes = new ArrayList();
        
        // For each n-gram size find minimum entropy n-grams
        String sSubStr;
        TreeMap tmRes = new TreeMap();
        
        // For each n-gram size
        for (int iNGramSize = 1; iNGramSize < iMaxSymbolSize; iNGramSize++) {
            // For each n-gram of selected size
            for (int iCnt = 0; iCnt <= sStr.length() - iNGramSize; iCnt++) {
                if (iCnt + iNGramSize > sStr.length())
                    continue;
                // Get n-gram
                sSubStr = sStr.substring(iCnt, iCnt + iNGramSize);

                // Look-up current n-gram
                Vertex vStrNode = gr.demokritos.iit.jinsect.utils.locateVertexInGraph(sgOverallGraph, sSubStr);
                // Ignore inexistent symbols
                if (vStrNode == null)
                    continue; 

                // else get outgoing edges
                List lEdges = gr.demokritos.iit.jinsect.utils.getOutgoingEdges(sgOverallGraph, vStrNode);
                Iterator iEdgeIter = lEdges.iterator();
                // Get children distribution
                Distribution dDist = new Distribution();
                if (lEdges.size() > 0) {
                    while (iEdgeIter.hasNext()) {
                        WeightedEdge weCur = (WeightedEdge)iEdgeIter.next();
                        dDist.setValue(weCur.toString(), weCur.getWeight());
                    }

                    // Determine entropy of children distribution
                    dDist.normalizeToSum();
                    double dEntropy = statisticalCalculation.entropy(dDist);
                    tmRes.put(dEntropy, sSubStr);

                    // DEBUG LINES
                    // appendToLog(vStrNode.toString() + " entropy: " + dEntropy);
                    //////////////
                }
            }

            // DEBUG LINES
//            Object oMin = tmRes.firstKey();       
//            Formatter f = new Formatter();
//            appendToLog("'" + tmRes.get(oMin).toString() + "' has minimum entropy of  " + 
//                    f.format("%6.3f", oMin));
//
//            Iterator iKeys = tmRes.keySet().iterator();
//            String sOrderedList = "";
//            while (iKeys.hasNext()) {
//                Object oKey = iKeys.next();
//                sOrderedList += "'" + tmRes.get(oKey).toString() + "' (" + oKey + " => " + Math.pow((Double)oKey, 2) + " alternatives)\n";
//            }
//            sOrderedList += " is the ordering of possible symbols.";
//            appendToLog(sOrderedList);
            //////////////
            
        }
        
        // Select most promising ones
        // Add to symbol pool
        
        return alRes;
    }
    
    // Chunks string according to maximum entropy of 1-grams
    private String[] chunkString(String sStr)
    {
        return chunkString(sStr, 1);
    }

    private TreeMap identifyCandidateDelimiters(String sStr, int iNGramSize) {
        String sSubStr;
        Integer[] iRes;
        ArrayList alRes = new ArrayList();
        TreeMap tmRes = new TreeMap();
        
        for (int iCnt = 0; iCnt <= sStr.length() - iNGramSize; iCnt++) {
            if (iCnt + iNGramSize > sStr.length())
                continue;
            // Get n-gram
            sSubStr = sStr.substring(iCnt, iCnt + iNGramSize);
            if (tmRes.containsValue(sSubStr))   // Ignore duplicates
                continue;
            
            // Look-up current n-gram
            Vertex vStrNode = gr.demokritos.iit.jinsect.utils.locateVertexInGraph(sgOverallGraph, sSubStr);
            if (vStrNode == null)
                continue; // Ignore inexistent symbols
            // double dNormEntropy = getEntropyOfNextChar(sSubStr, true);
            double dEntropy = getEntropyOfNextChar(sSubStr, false);
            // tmRes.put(dNormEntropy, sSubStr);
            tmRes.put(dEntropy, sSubStr);
                
        }
        
        return tmRes;
    }
    
    private double getEntropyOfNextChar(String sStr) {
        return getEntropyOfNextChar(sStr, false);
    }
    
    private final double getEntropyOfNextChar(String sStr, boolean bNormalized) {        
        double dRes = 0.0;
        
        // Look-up current n-gram
        Vertex vStrNode = gr.demokritos.iit.jinsect.utils.locateVertexInGraph(sgOverallGraph, sStr);
        if (vStrNode == null)
            return dRes; // Ignore inexistent symbols

        // else get outgoing edges
        List lEdges = gr.demokritos.iit.jinsect.utils.getOutgoingEdges(sgOverallGraph, vStrNode);
        Iterator iEdgeIter = lEdges.iterator();
        Distribution dDist = new Distribution();
        if (lEdges.size() > 0) {
            while (iEdgeIter.hasNext()) {
                WeightedEdge weCur = (WeightedEdge)iEdgeIter.next();
                if ( Double.isNaN(weCur.getWeight()))
                    System.err.println("WARNING: Not a number edge weight for edge:" + weCur.toString());
                dDist.setValue(weCur.toString(), weCur.getWeight());
            }

            dDist.normalizeToSum();
            if (bNormalized) {
                // Calc NORMALIZED entropy - entropy to the number of appearences
                double dLogOccurences = (Math.log(dDist.calcTotalValues()) / Math.log(2));
                dRes = statisticalCalculation.entropy(dDist) / dLogOccurences;
            }
            else
                // Calc entropy
                dRes = statisticalCalculation.entropy(dDist);
        }
        if ( Double.isNaN(dRes))
            System.err.println("WARNING: Not a number entropy for symbol:" + vStrNode);
        return dRes;
    }
    
    /** Returns the probability of generation of an exact given string, based on the alphabet of the 
     * Symbolic Graph, and given that there the generation process is random.
     *@param sString The string for which the probability of appearence is to be calculated.
     *@return The probability of appearence of the given string.
     */
    private final double getProbabilityOfStringInRandomText(String sPrefix, String sSuffix) {
        double dRes = 0.0;
        
        // Look directly in the data string of the symbolic graph
        int iPrefixCount = 0;
        int iLastOccurence = -1;
        
        if (sPrefix.length() == 0) // Check for empty string
            return 1.0;
        
        while ((iLastOccurence = sgOverallGraph.getDataString().indexOf(sPrefix, iLastOccurence + 1)) 
            > -1) 
            iPrefixCount++;
        
        // Consider that the occurence of a random suffix is a proportion of the prefix occurences,
        // as indicated by the probability of the given suffix, as a random selection process of n
        // symbols from the symbol set.
        int iFullStringCount = (int)Math.ceil((double)iPrefixCount * 
                Math.pow(1.0 / sgOverallGraph.getAlphabet().size(), sSuffix.length()));
        // Consider p(sPrefix) = N(sPrefix) / (length(Text) / length(sPrefix))
        double pPrefix = (double) iPrefixCount / 
                (sgOverallGraph.getDataString().length() / sPrefix.length());
        
        String sFullString = sPrefix + sSuffix;
        double pJoined = (double) iFullStringCount / 
                (sgOverallGraph.getDataString().length() / sFullString.length());
        // Return p(sPrefix) * p(sPrefix, sSuffix)
        dRes = (iPrefixCount == 0) ? 0.0 : pPrefix * pJoined;
        return dRes;
    }
    
    /** Returns the probability of occurence of a given suffix, given a prefix, within the data string
     *of the Symbolic Graph.
     *@param sPrefix The prefix required.
     *@param sSuffix The suffix for which the probability of occurence is to be calculated, given the
     * prefix.
     *@return The probability of occurence of the suffix, given the prefix.
     */
    private final double getProbabilityOfStringInText(String sPrefix, String sSuffix) {
        double dRes = 0.0;
        
        // Look directly in the data string of the symbolic graph
        int iPrefixCount = 0;
        int iLastOccurence = -1;
        
        if (sPrefix.length() == 0) // Check for empty string
            return 1.0;
        
        while ((iLastOccurence = sgOverallGraph.getDataString().indexOf(sPrefix, iLastOccurence + 1)) 
            > -1) 
            iPrefixCount++;
        
        String sFullString = sPrefix + sSuffix;
        int iFullStringCount = 0;
        iLastOccurence = -1;
        while ((iLastOccurence = sgOverallGraph.getDataString().indexOf(sFullString, iLastOccurence + 1)) 
            > -1) 
            iFullStringCount++;
        
        // Consider p(sPrefix) = N(sPrefix) / (length(Text) / length(sPrefix))
        double pPrefix = (double) iPrefixCount / 
                (sgOverallGraph.getDataString().length() / sPrefix.length());
        double pJoined = (double) iFullStringCount / 
                (sgOverallGraph.getDataString().length() / sFullString.length());
        // Return p(sPrefix) * p(sPrefix, sSuffix)
        dRes = (iPrefixCount == 0) ? 0.0 : pPrefix * pJoined;
        return dRes;
        
    }
    
    // Returns a list of indices concerning possible split points
    private Integer[] splitPointsByDelimiterList(String sStr, char[] lDelimiters) {
        TreeMap tmDels = new TreeMap();
        for (int iCnt=0; iCnt < lDelimiters.length; iCnt++)
            tmDels.put(iCnt, new String() + lDelimiters[iCnt]);
        
        return splitPointsByDelimiterList(sStr, tmDels);
    }
    
    // Returns a list of indices concerning possible split points
    private Integer[] splitPointsByDelimiterList(String sStr, SortedMap lDelimiters) {
        ArrayList alRes = new ArrayList();
        TreeMap lLocal = new TreeMap();
        lLocal.putAll(lDelimiters);
        
        // For every candidate delimiter
        while (lLocal.size() > 0) {
            Object oNext = lLocal.lastKey();
            // Get all split points
            int iNextSplit = 0;
            int iLastSplit = 0;
            while ((iNextSplit = sStr.indexOf((String)lDelimiters.get(oNext), iLastSplit)) > -1)
            {
                // TODO : Check
                alRes.add(new Integer(iNextSplit + ((String)lDelimiters.get(oNext)).length()));
                iLastSplit = iNextSplit + 1;
            }
            
            lLocal.remove(oNext);
        }
        Integer [] iaRes = new Integer[alRes.size()];
        alRes.toArray(iaRes);
        gr.demokritos.iit.jinsect.utils.bubbleSortArray(iaRes);
        
        return iaRes;
    }
    
    /* Chunks string according to maximum entropy of (iNGramSize)-grams
     *@param sStr   The string to chunk.
     *@param iNGramSize The n-gram size to take into account for delimiter analysis.
     *@return An array of substrings, which are chunks of the original string.
     */
    private String[] chunkString(String sStr, int iNGramSize) {        
        // Get separation points
        Integer[] iRes = splitPointsByDelimiterList(sStr, getDelimiters());
        
        String[] sRes = splitStringByDelimiterPoints(sStr, iRes);
        
        // DEBUG LINES
        appendToLog("Text splitted into "+ sRes.length + " chunks.");
        // appendToLog(jinsect.utils.printList(Arrays.asList(sRes), "\n*"));
        //appendToLog("*************");
        //////////////
        return sRes;
    }
    
    private double DetermineWMeanMutualInformationOf(String sStr, int iPos, 
            int iMaxDistance, int iMinDistance, boolean bBefore) {
    
        double dMeanMutualInfo = 0.0;
        for (int iCurDist = iMinDistance; iCurDist < iMaxDistance; iCurDist++) {
            // Should check if the delimiter is part of the adjacent sequence of symbols            
            // Using pointwise mutual information
            int iSubStrStart = Math.max(iPos - iCurDist, 0);
            int iSubStrEnd = Math.min(iPos + iCurDist, sStr.length());
            double dPrvEntropy, dCurEntropy;
            if (bBefore) {
                // Get uncertainty of next symbol, given the preceding substring
                dPrvEntropy = getEntropyOfNextChar(sStr.substring(iSubStrStart, iPos));
                // Get uncertainty of next symbol, given the preceding substring PLUS the delimiter
                dCurEntropy = getEntropyOfNextChar(sStr.substring(iSubStrStart, iPos + 1));
                // Small decrease or increase in the entropy, shows that the candidate is not very important to the decision of the next
                // symbol and thus, is probably part of an existing word.
            }
            else {
                // Get uncertainty of next symbol, given the following substring
                dPrvEntropy = getEntropyOfNextChar(sStr.substring(iPos + 1, iSubStrEnd));
                // Get uncertainty of next symbol, given the following substring PLUS the delimiter
                dCurEntropy = getEntropyOfNextChar(sStr.substring(iPos, iSubStrEnd));                
            }
            double dMutualInformationBefore = dCurEntropy - dPrvEntropy;                
            // Closest tests should have maximum impact
            dMeanMutualInfo += (iMaxDistance - iCurDist + 1) * dMutualInformationBefore; 
            
        }
        
        // Calc sum of integers in range [iMinDistance,iMaxDistance]
        int iSum = 0;
        for (int iCnt = iMinDistance; iCnt < iMaxDistance; iCnt++)
            iSum += iCnt;
        
        dMeanMutualInfo /= iSum; // Get weighted mean
        return dMeanMutualInfo;
    }
    
    private double DetermineMeanMutualInformationOf(String sStr, int iPos, 
            int iMaxDistance, int iMinDistance, boolean bBefore) {
    
        double dMeanMutualInfo = 0.0;
        for (int iCurDist = iMinDistance; iCurDist < iMaxDistance; iCurDist++) {
            // Should check if the delimiter is part of the adjacent sequence of symbols            
            // Using pointwise mutual information
            int iSubStrStart = Math.max(iPos - iCurDist, 0);
            int iSubStrEnd = Math.min(iPos + iCurDist, sStr.length());
            double dPrvEntropy, dCurEntropy;
            if (bBefore) {
                // Get uncertainty of next symbol, given the preceding substring
                dPrvEntropy = getEntropyOfNextChar(sStr.substring(iSubStrStart, iPos));
                // Get uncertainty of next symbol, given the preceding substring PLUS the delimiter
                dCurEntropy = getEntropyOfNextChar(sStr.substring(iSubStrStart, iPos + 1));
                // Small change in the entropy, shows that the candidate is not very important to the decision of the next
                // symbol and thus, is probably part of an existing word.
            }
            else {
                // Get uncertainty of next symbol, given the following substring
                dPrvEntropy = getEntropyOfNextChar(sStr.substring(iPos + 1, iSubStrEnd));
                // Get uncertainty of next symbol, given the following substring PLUS the delimiter
                dCurEntropy = getEntropyOfNextChar(sStr.substring(iPos, iSubStrEnd));                
            }
            double dMutualInformationBefore = dCurEntropy - dPrvEntropy;                
            dMeanMutualInfo += dMutualInformationBefore;
        }
        
        
        dMeanMutualInfo /= iMaxDistance - iMinDistance; // Get mean
        return dMeanMutualInfo;
    }
    
    private Double[] DetermineMutualInformationDistributionOf(String sStr,
            int iPos, int iMaxDistance, int iMinDistance, boolean bBefore) {
        Distribution dRes = new Distribution();
        
        double dMeanMutualInfo = 0.0;
        for (int iCurDist = iMinDistance; iCurDist < iMaxDistance; iCurDist++) {
            // Should check if the delimiter is part of the adjacent sequence of symbols            
            // Using pointwise mutual information
            int iSubStrStart = Math.max(iPos - iCurDist, 0);
            int iSubStrEnd = Math.min(iPos + iCurDist, sStr.length());
            double dPrvEntropy, dCurEntropy;
            if (bBefore) {
                // Get uncertainty of next symbol, given the preceding substring
                dPrvEntropy = getEntropyOfNextChar(sStr.substring(iSubStrStart, iPos));
                // Get uncertainty of next symbol, given the preceding substring PLUS the delimiter
                dCurEntropy = getEntropyOfNextChar(sStr.substring(iSubStrStart, iPos + 1));
                // Small change in the entropy, shows that the candidate is not very important to the decision of the next
                // symbol and thus, is probably part of an existing word.
            }
            else {
                // Get uncertainty of next symbol, given the following substring
                dPrvEntropy = getEntropyOfNextChar(sStr.substring(iPos + 1, iSubStrEnd));
                // Get uncertainty of next symbol, given the following substring PLUS the delimiter
                dCurEntropy = getEntropyOfNextChar(sStr.substring(iPos, iSubStrEnd));                
            }
            double dMutualInformationBefore = dCurEntropy - dPrvEntropy;                
            dRes.setValue(iCurDist, dMutualInformationBefore);
        }
        Double[] dblRes = new Double[dRes.asTreeMap().size()];
        dRes.asTreeMap().values().toArray(dblRes);
        return dblRes;
    }
    
    private Integer [] evalAndSelectActualDelimiterPoints(Integer[] iaDelimPoints, String sStr) {
        int iCurDelim = 0;
        ArrayList alList = new ArrayList();
        
        // DEBUG LINES
        try {
            FileWriter fOut = new FileWriter("train.arff");
            HashMap hDelims = new HashMap();
            for (int iCnt = 0; iCnt < iaDelimPoints.length; iCnt++) {
                String sChar = sStr.substring(iaDelimPoints[iCnt], Math.min(iaDelimPoints[iCnt] + 1, sStr.length()));
                hDelims.put(String.valueOf(sChar.hashCode()), sChar);
            }
            
            fOut.write("@relation train"+ System.getProperty("line.separator")
                + System.getProperty("line.separator"));
            // Write delimiters
            fOut.write("@attribute delimChar {");
            
            String sLegend = "";
                Iterator iIter = hDelims.keySet().iterator();
            while (iIter.hasNext()) {
                String sNext = (String)iIter.next();
                fOut.write(sNext);
                 if (Character.isISOControl(sNext.charAt(0)) || 
                        Character.isWhitespace(sNext.charAt(0)))
                     sLegend += "% " + sNext + ": chr(" + sNext + ")" + System.getProperty("line.separator");
                 else
                     sLegend += "% " + sNext + ": chr(" + hDelims.get(sNext) + ")" + System.getProperty("line.separator");
                if (iIter.hasNext())
                    fOut.write(",");
            }
            fOut.write("}" + System.getProperty("line.separator"));
            //////////////////
            fOut.write("@attribute class {delim,nondelim}" + System.getProperty("line.separator"));
            for (int iCnt=0; iCnt < NGramSizeSldr.getValue(); iCnt++) {
                fOut.write("@attribute entropyBefore" + iCnt + " real" + System.getProperty("line.separator"));
            }
            
            for (int iCnt=0; iCnt < NGramSizeSldr.getValue(); iCnt++) {
                fOut.write("@attribute entropyAfter" + iCnt + " real" + System.getProperty("line.separator"));
            }
            fOut.write(System.getProperty("line.separator") + System.getProperty("line.separator"));
            fOut.write("@data" + System.getProperty("line.separator"));
            
            // For all delimiter points
            while (iCurDelim < iaDelimPoints.length) {

                // DO NOT USE 
                // Check if current point belongs to a ''normal'' sequence of symbols <= This would only mean that we
                // have met such a sequence in the training set.
                // int iSubStrStart = Math.max(iaDelimPoints[iCurDelim] - (NGramSizeSldr.getValue() / 2), 0);
                // int iSubStrEnd = Math.min(iaDelimPoints[iCurDelim] + (NGramSizeSldr.getValue() / 2), sStr.length());
                // If not in a normal pattern.
                // if (!match(sStr.substring(iSubStrStart, iSubStrEnd)))
                //    alList.add(iaDelimPoints[iCurDelim]);

                // Should check if the delimiter is part of the adjacent sequence of symbols            
                // Using pointwise mutual information
                double dMutualInformationBefore = DetermineMeanMutualInformationOf(sStr, iaDelimPoints[iCurDelim], 
                        Math.max(NGramSizeSldr.getValue(), 1), 1, true);
                double dMutualInformationAfter = DetermineMeanMutualInformationOf(sStr, iaDelimPoints[iCurDelim], 
                        Math.max(NGramSizeSldr.getValue(), 1), 1, false);

                int iSubStrStart = Math.max(iaDelimPoints[iCurDelim] - NGramSizeSldr.getValue(), 0);
                int iSubStrEnd = Math.min(iaDelimPoints[iCurDelim] + NGramSizeSldr.getValue(), sStr.length());
//                appendToLog("Symbol '" + sStr.substring(iaDelimPoints[iCurDelim], Math.min(iaDelimPoints[iCurDelim] + 1, sStr.length())) + 
//                        "' in '" + sStr.substring(iSubStrStart, Math.min(iSubStrEnd + 1, sStr.length())) + "' has mutual information pair of (" + 
//                        String.format("%6.4f", dMutualInformationBefore) + "," + 
//                        String.format("%6.4f", dMutualInformationAfter) + ")\n");                
                //////////////
                // Output category
                fOut.write(sStr.substring(iaDelimPoints[iCurDelim], 
                        Math.min(iaDelimPoints[iCurDelim] + 1, sStr.length())).hashCode() + ",");
                if (" ()-\n".indexOf(sStr.substring(iaDelimPoints[iCurDelim], Math.min(iaDelimPoints[iCurDelim] + 1, sStr.length()))) > -1)
                    fOut.write("delim,");
                else
                    fOut.write("nondelim,");
                // Output features
                Double[] dBefore = 
                    DetermineMutualInformationDistributionOf(sStr, iaDelimPoints[iCurDelim], 
                        Math.max(NGramSizeSldr.getValue(), 1), 1, true);
                Double[] dAfter = DetermineMutualInformationDistributionOf(sStr, iaDelimPoints[iCurDelim], 
                        Math.max(NGramSizeSldr.getValue(), 1), 1, false);
                
                String sMutualInformationBefore = "";
                // Pad with zeroes
                if (dBefore.length < NGramSizeSldr.getValue())
                    for (int iCnt = dBefore.length; iCnt < NGramSizeSldr.getValue(); iCnt++)
                        sMutualInformationBefore += "0.0,";
                for (int iCnt = 0; iCnt < dBefore.length; iCnt++) {
                    sMutualInformationBefore += dBefore[iCnt];
                    if (iCnt < dBefore.length - 1)
                     sMutualInformationBefore +=  ",";
                }
                
                String sMutualInformationAfter = "";
                // Pad with zeroes
                if (dAfter.length < NGramSizeSldr.getValue())
                    for (int iCnt = dAfter.length; iCnt < NGramSizeSldr.getValue(); iCnt++)
                        sMutualInformationAfter += "0.0,";
                for (int iCnt = 0; iCnt < dAfter.length; iCnt++) {
                    sMutualInformationAfter += dAfter[iCnt];
                    if (iCnt < dAfter.length - 1)
                     sMutualInformationAfter +=  ",";
                }
                
                fOut.write(sMutualInformationBefore + "," + 
                        sMutualInformationAfter + System.getProperty("line.separator"));
                fOut.flush();

                if ((dMutualInformationBefore > 1) && (dMutualInformationAfter < 1)) {
                    // DEBUG LINES
                    // appendToLog("DELIMITER");
                    //////////////
                    alList.add(iaDelimPoints[iCurDelim]);
                }
                ++iCurDelim;
            }
            
            fOut.close();
        }
        catch (IOException ioe) {
            System.out.println("Output failed.");
        }
        
        Integer[] iaRes = new Integer[alList.size()];
        iaRes = (Integer[])alList.toArray(iaRes);
        return iaRes;
    }
    
    private int determineImportantDelimiters(SortedMap smMap) {
        Iterator iIter = smMap.keySet().iterator();
        // Distribution dEntropyDist = new Distribution();
        // Distribution dEntropyDeltaDist = new Distribution();
        Distribution dDist = new Distribution();
        Distribution dReverse = new Distribution();
        
        // Get first number
        Double dPrv = Double.NEGATIVE_INFINITY;
        Double dTwoPrv = Double.NEGATIVE_INFINITY;
        
        // Create corresponding distribution
        while (iIter.hasNext()) {
            Double oNext = (Double)iIter.next();
            if ((dPrv != Double.NEGATIVE_INFINITY) && (dTwoPrv != Double.NEGATIVE_INFINITY)) {
                if (oNext.isNaN())
                    System.err.println("WARNING: Encountered NaN. Ignoring...");
                // dEntropyDeltaDist.asTreeMap().put(smMap.get(oNext), (oNext - dPrv)); // Get distance from previous data point
                // dEntropyDist.asTreeMap().put(smMap.get(oNext), oNext); // Get position of current data point

                dDist.setValue(dPrv, dPrv * Math.abs(dPrv-dTwoPrv-oNext+dPrv)); // Detect peaks
                dReverse.setValue(dPrv * Math.abs(dPrv-dTwoPrv-oNext+dPrv), dPrv);
            }
            
            dTwoPrv = dPrv;
            dPrv = oNext;
        }
        
        // DEBUG LINES
//        System.err.println("Symbol\tEntropy");
//        for (Iterator iEntropies = smMap.keySet().iterator();
//            iEntropies.hasNext();) {
//            Object o = iEntropies.next();
//            String sSymbol = (String)smMap.get(o);
//            try {
//                sSymbol = URLEncoder.encode(sSymbol, "utf8");
//            } catch (UnsupportedEncodingException ex) {
//                sSymbol = "(NotPrintable)";
//                ex.printStackTrace(System.err);
//            }
//            System.err.println(o.toString() + "\t" + sSymbol);
//        }
        //////////////
        
        double dVar = dDist.variance(true);
        double dMean = dDist.average(true);
        
        // return getDelimiterIndexByThreshold(smMap, Math.min(dMean + Math.abs(dVar), dDist.maxValue()));
        return getDelimiterIndexByThreshold(smMap, dReverse.getValue(dDist.maxValue()));
    }

    private int getDelimiterIndexByThreshold(SortedMap smMap, double dThreshold) {
        // Locate delim in map
        Iterator iIter = smMap.keySet().iterator();
        int iCnt = 0;
        while (iIter.hasNext()) {
            if ((Double)iIter.next() > dThreshold)
                break;
            iCnt++;
        }
        // Indicate index
        return smMap.size() - iCnt + 1;
        
    }
    // Determine corpus complexity
    private void DoAnalyseCorpus() {
        gr.demokritos.iit.jinsect.gui.StatusFrame fStatus = new gr.demokritos.iit.jinsect.gui.StatusFrame();
        analyseCorpus(fStatus);
        fStatus.dispose();
    }
    
    private double determineDistanceDeviation() {
        double dRes = 0.0;
        SortedMap smDelims = identifyCandidateDelimiters(sgOverallGraph.getDataString(), 1);
        int iImportant = determineImportantDelimiters(smDelims);
        Iterator iIter = smDelims.keySet().iterator();
        int iCnt = 0;
        while (iIter.hasNext() && (iCnt++ < smDelims.size() - iImportant))
            iIter.next();
        
        smDelims = smDelims.tailMap(iIter.next());
        if (!smDelims.containsValue(StreamTokenizer.TT_EOF)) {
            smDelims.put((Double)smDelims.lastKey() + 0.1, new StringBuffer().append((char)StreamTokenizer.TT_EOF).toString()); // Add EOF char
        }
        
        String[] saChunks = splitStringByDelimiterPoints(sgOverallGraph.getDataString(), 
                splitPointsByDelimiterList(sgOverallGraph.getDataString(), smDelims));
        Distribution dSizes = new Distribution();
        for (iCnt=0;iCnt < saChunks.length; iCnt++) {
            dSizes.setValue((double)saChunks[iCnt].length(), 
                    dSizes.getValue((double)saChunks[iCnt].length()) + 1);
        }
        dRes = dSizes.average(false) + dSizes.standardDeviation(false);
        
        return dRes;
    }
    
    private void analyseCorpus(final IStatusDisplayer fStatus) {
        // Constants
        final String sDistrosFile = "Distros.tmp";
        final String sSymbolsFile = "Symbols.tmp";
        // Chunker re-init
        Chunker = new EntropyChunker();
        // Vars
        int Levels = 2; // Analyze 2-grams
        sgOverallGraph = new SymbolicGraph(1, Levels); // Init graph with a min of 2
        siIndex = new SemanticIndex(sgOverallGraph);
        try {
            siIndex.MeaningExtractor = new LocalWordNetMeaningExtractor();
        }
        catch (IOException ioe) {
            siIndex.MeaningExtractor = null; // Use default
        }
        
        try 
        {
            // Get categories and files from disk
            DocumentSet dsSet = new DocumentSet(FilePathEdt.getText(), 1.0);
            dsSet.createSets(true, (double)100 / 100);
            int iCurCnt, iTotal;
            String sFile = "";
            Iterator iIter = dsSet.getTrainingSet().iterator();
            iTotal = dsSet.getTrainingSet().size();
            if (iTotal == 0) // No documents to import
            {
                appendToLog("No input documents.\n");
                appendToLog("======DONE=====\n");
                return;
            }
            appendToLog("Training chunker...");
            Chunker.train(dsSet.toFilenameSet(DocumentSet.FROM_WHOLE_SET));
            appendToLog("Setting delimiters...");
            setDelimiters(Chunker.getDelimiters());
            iCurCnt = 0;

            // Create overall document
            cdDoc = new DistributionDocument[Levels];
            for (int iCnt=0; iCnt < Levels; iCnt++)
                cdDoc[iCnt] = new DistributionDocument(1, MinLevel + iCnt); // For all windows

            fStatus.setVisible(true);
            ThreadList t = new ThreadList(Runtime.getRuntime().availableProcessors() + 1);
            
            appendToLog("(Pass 1/3) Loading files..." + sFile);
            TreeSet tsOverallSymbols = new TreeSet();
            while (iIter.hasNext())
            {
                sFile = ((CategorizedFileEntry)iIter.next()).getFileName();
                //appendToLog("Loading file..." + sFile);
                fStatus.setStatus("(Pass 1/3) Loading file..." + sFile, (double)iCurCnt / iTotal);
                // Thread arguments
                final DistributionDocument[] cdDocArg = cdDoc;
                final String sFileArg = sFile;
                
                for (int iCnt=0; iCnt < cdDoc.length; iCnt++) {
                    final int iCntArg = iCnt;
                    while (!t.addThreadFor(new Runnable() {
                        public void run() {
                            if (!RightToLeftText)
                                cdDocArg[iCntArg].loadDataStringFromFile(sFileArg,false);
                            else
                            {
                                // Use reverse
                                cdDocArg[iCntArg].setDataString(
                                        utils.reverseString(utils.loadFileToString(sFileArg)),
                                        iCntArg, false);
                            }
                        }
                    }))
                        Thread.yield();
                }
                try {
                    t.waitUntilCompletion();
                } catch (InterruptedException ex) {
                    ex.printStackTrace(System.err);
                    appendToLog("Interrupted...");
                    
                    sgOverallGraph.removeNotificationListener();
                    return;
                }
                
                // Construct the datastring by dividing text with an EOF char.
                sgOverallGraph.setDataString(((new StringBuffer().append((char)StreamTokenizer.TT_EOF))).toString());
                sgOverallGraph.loadFromFile(sFile);
                // appendToLog("Loaded file..."+ sFile);
                fStatus.setStatus("Loaded file..." + sFile, (double)++iCurCnt / iTotal);
                Thread.yield();
            }
            
            // Get symbol set
            Set sSymbols = null;
            File fPreviousSymbols = new File(sSymbolsFile);
            boolean bSymbolsLoadedOK = false;
            if (fPreviousSymbols.exists()) {
                System.err.println("ATTENTION: Using previous symbols...");
                try {
                    
                    FileInputStream fis = new FileInputStream(fPreviousSymbols);
                    ObjectInputStream ois = new ObjectInputStream(fis);

                    sSymbols = (Set)ois.readObject();
                    ois.close();                
                    bSymbolsLoadedOK = true;
                } catch (FileNotFoundException ex) {
                    ex.printStackTrace(System.err);
                } catch (IOException ex) {
                    ex.printStackTrace(System.err);
                    // Reset distros
                } catch (ClassNotFoundException ex) {
                    ex.printStackTrace(System.err);
                }                
            }
            
            if (!bSymbolsLoadedOK)
                // If failed to load, extract...
                sSymbols = getSymbolsByProbabilities(sgOverallGraph.getDataString(), fStatus);
            
            int iMinSymbolSize = Integer.MAX_VALUE;
            int iMaxSymbolSize = Integer.MIN_VALUE;
            
            // Determine min, max symbol length, size distros
            Iterator iSymbol = sSymbols.iterator();
            // DEBUG LINES
            // System.err.println("-----Symbol Start");
            //////////////
            while (iSymbol.hasNext()) {
                String sCurSymbol = (String)iSymbol.next();
                if (iMaxSymbolSize < sCurSymbol.length())
                    iMaxSymbolSize = sCurSymbol.length();
                if (iMinSymbolSize > sCurSymbol.length())
                    iMinSymbolSize = sCurSymbol.length();
                
                // DEBUG LINES
                // System.err.println(sCurSymbol);
                //////////////
            }
            // DEBUG LINES
            // System.err.println("-----Symbol End");
            //////////////
            
            try {

                // Save to file for future use
                FileOutputStream fos = new FileOutputStream(sSymbolsFile);
                ObjectOutputStream oos = new ObjectOutputStream(fos);

                oos.writeObject(sSymbols);

                oos.close();                                
            } catch (FileNotFoundException ex) {
                ex.printStackTrace(System.err);
            } catch (IOException ex) {
                ex.printStackTrace(System.err);
            }                                
            
            appendToLog("(Pass 2/3) Determining symbol distros per n-gram size...");
            
            // Run through all files again to determine n-gram sizes
            iIter = dsSet.getTrainingSet().iterator();            
            iTotal = dsSet.getTrainingSet().size();
            if (iTotal == 0) // No documents to import
            {
                appendToLog("No input documents.\n");
                appendToLog("======DONE=====\n");
                return;
            }
            
            iCurCnt = 0;
            Distribution dSymbolsPerSize = new Distribution();
            Distribution dNonSymbolsPerSize = new Distribution();
            Distribution dSymbolSizes = new Distribution();

            // Use previous distros if available
            File fPreviousRun = new File(sDistrosFile);
            boolean bDistrosLoadedOK = false;
            if (fPreviousRun.exists()) {
                System.err.println("ATTENTION: Using previous distros...");
                try {
                    
                    FileInputStream fis = new FileInputStream(fPreviousRun);
                    ObjectInputStream ois = new ObjectInputStream(fis);

                    dSymbolsPerSize = (Distribution)ois.readObject();
                    dNonSymbolsPerSize = (Distribution)ois.readObject();
                    dSymbolSizes = (Distribution)ois.readObject();

                    ois.close();                
                    bDistrosLoadedOK = true;
                } catch (FileNotFoundException ex) {
                    ex.printStackTrace(System.err);
                } catch (IOException ex) {
                    ex.printStackTrace(System.err);
                    // Reset distros
                    dSymbolsPerSize = new Distribution();
                    dNonSymbolsPerSize = new Distribution();
                    dSymbolSizes = new Distribution();
                } catch (ClassNotFoundException ex) {
                    ex.printStackTrace(System.err);
                    // Reset distros
                    dSymbolsPerSize = new Distribution();
                    dNonSymbolsPerSize = new Distribution();
                    dSymbolSizes = new Distribution();
                }                
            }
            
            // If distros not loaded
            if (!bDistrosLoadedOK)
            while (iIter.hasNext())
            {
                // appendToLog("Parsing file..." + sFile);
                fStatus.setStatus("(Pass 2/3) Parsing file..." + sFile, (double)iCurCnt++ / iTotal);
                sFile = ((CategorizedFileEntry)iIter.next()).getFileName();

                // Load file and determine max n-gram
                String sDataString = "";
                try {
                    ByteArrayOutputStream bsOut = new ByteArrayOutputStream();
                    FileInputStream fiIn = new FileInputStream(sFile);
                    int iData = 0;
                    while ((iData = fiIn.read()) > -1)
                        bsOut.write(iData);
                    sDataString = bsOut.toString();
                }
                catch (IOException ioe) {
                    ioe.printStackTrace(System.err);
                }
                // Thread args
                final Distribution dSymbolsPerSizeArg = dSymbolsPerSize;
                final Distribution dNonSymbolsPerSizeArg = dNonSymbolsPerSize;
                final Distribution dSymbolSizesArg = dSymbolSizes;
                final String sDataStringArg = sDataString;
                final Set sSymbolsArg = sSymbols;
                
                for (int iSymbolSize = iMinSymbolSize; iSymbolSize <= iMaxSymbolSize; iSymbolSize++) {
                    final int iSymbolSizeArg = iSymbolSize;
                    while (!t.addThreadFor(new Runnable() {
                        public void run() {
                                NGramDocument ndCur = new NGramDocument(iSymbolSizeArg, iSymbolSizeArg,
                                        1, iSymbolSizeArg, iSymbolSizeArg);
                                ndCur.setDataString(sDataStringArg);
                                int iSymbolCnt = 0;
                                int iNonSymbolCnt = 0;
                                
                                Iterator iExtracted = 
                                        ndCur.getDocumentGraph().getGraphLevel(0).getVertexSet().iterator();
                                while (iExtracted.hasNext()) {
                                    String sCur = ((Vertex)iExtracted.next()).toString();
                                    if (sSymbolsArg.contains(sCur)) {
                                        iSymbolCnt++;
                                        // Increase occurences of this symbol length
                                        synchronized (dSymbolSizesArg) {
                                            dSymbolSizesArg.setValue(sCur.length(), 
                                                    dSymbolSizesArg.getValue(sCur.length()) + 1.0);
                                        }
                                    }
                                    else
                                        iNonSymbolCnt++;
                                }
                                
                                // Update distributions
                                synchronized (dSymbolsPerSizeArg) {
                                    dSymbolsPerSizeArg.setValue(iSymbolSizeArg, 
                                            dSymbolsPerSizeArg.getValue(iSymbolSizeArg) + iSymbolCnt);
                                }
                                synchronized (dNonSymbolsPerSizeArg) {
                                    dNonSymbolsPerSizeArg.setValue(iSymbolSizeArg, 
                                            dNonSymbolsPerSizeArg.getValue(iSymbolSizeArg) + iNonSymbolCnt);
                                }
                                
                            }
                    }))
                        Thread.yield();
                }
            }
            if (!bDistrosLoadedOK)
            try {
                t.waitUntilCompletion();
                try {
                    
                    // Save to file for future use
                    FileOutputStream fos = new FileOutputStream(sDistrosFile);
                    ObjectOutputStream oos = new ObjectOutputStream(fos);

                    oos.writeObject(dSymbolsPerSize);
                    oos.writeObject(dNonSymbolsPerSize);
                    oos.writeObject(dSymbolSizes);

                    oos.close();                                
                } catch (FileNotFoundException ex) {
                    ex.printStackTrace(System.err);
                } catch (IOException ex) {
                    ex.printStackTrace(System.err);
                }                                
                //System.err.println("Symbols distro:" + dSymbolsPerSize.asTreeMap());
                //System.err.println("Non-Symbols distro:" + dNonSymbolsPerSize.asTreeMap());
            } catch (InterruptedException ex) {
                appendToLog("Interrupted...");
                sgOverallGraph.removeNotificationListener();
                return;
            }

            appendToLog("\n(Pass 3/3) Determining optimal n-gram range...\n");
            
            /* DEPRECATED
            // Search through min, max for max F-measure ratio
            /////////////////////////////////////
            int iBestMin = -1;
            int iBestMax = -1;
            double dBestPerformance = 0.0;
            iCurCnt = 0;
            iTotal = ((iMaxSymbolSize - iMinSymbolSize + 1) * (iMaxSymbolSize + iMinSymbolSize)) / 2;
            /////////////////////////////////////
            for (int iMin = iMinSymbolSize; iMin <= iMaxSymbolSize; iMin++) {
                for (int iMax = iMin; iMax <= iMaxSymbolSize; iMax++) {
                    double dSymbolsSum = 0.0;
                    double dNonSymbolsSum = 0.0;
                    for (int iCnt=iMin; iCnt<=iMax; iCnt++) {
                        // Give higher weight to higher order n-gram matches
                        dSymbolsSum += jinsect.utils.sumFromTo(iMin, iCnt) * dSymbolsPerSize.getValue(iCnt) * 
                                (dSymbolSizes.getValue(iCnt) / dSymbolSizes.sumOfValues()) ;
                        // Use log for non-symbols
                        dNonSymbolsSum += dNonSymbolsPerSize.getValue(iCnt) * 
                                (dSymbolSizes.getValue(iCnt) / dSymbolSizes.sumOfValues())
                                / sgOverallGraph.getAlphabet().size();
                    }
                    double dCurPerformance = (double)dSymbolsSum / (1.0 + dNonSymbolsSum);
                    
                    if (dCurPerformance > dBestPerformance) {
                        // New max performance found
                        iBestMax = iMax;
                        iBestMin = iMin;
                        dBestPerformance = dCurPerformance;
                    }
                    fStatus.setStatus("(Pass 3/3) Determining optimal n-gram range...", 
                            (double)iCurCnt++ / iTotal);
                }
            }
            */
            
            NGramSizeEstimator nseEstimator = new NGramSizeEstimator(dSymbolsPerSize, dNonSymbolsPerSize);
            IntegerPair p = nseEstimator.getOptimalRange();
            
            // Log proposed value
            appendToLog("\nProposed n-gram sizes:" + p.first() + "," +
                    p.second());
             
            fStatus.setStatus("Determining optimal distance...", 0.0);
            // DEPRECATED: double dDev = determineDistanceDeviation();
            DistanceEstimator de = new DistanceEstimator(dSymbolsPerSize, dNonSymbolsPerSize, nseEstimator);
            // System.err.println("MinNGram MaxNGram BestDist");
//            for (int iMinNGram = 1; iMinNGram <= 10; iMinNGram++) {
//                for (int iMaxNGram = iMinNGram; iMaxNGram <= 10; iMaxNGram++) {
//                    int iBestDist = de.getOptimalDistance(1, 20,                
//                        iMinNGram, iMaxNGram);
//                    // System.err.println(String.format("%d %d %d", iMinNGram, iMaxNGram, iBestDist));
//                }
//            }
            
            int iBestDist = de.getOptimalDistance(1, nseEstimator.getMaxRank()*2,
                    p.first(), p.second());
            fStatus.setStatus("Determining optimal distance...", 1.0);
            appendToLog("\nOptimal distance:" + iBestDist);
            appendToLog("======DONE=====\n");            
        } finally {
            sgOverallGraph.removeNotificationListener();
        }
    }
    
    private void DoCreateNGramGraph() {
        final gr.demokritos.iit.jinsect.gui.StatusFrame fStatus = new gr.demokritos.iit.jinsect.gui.StatusFrame();
        int Levels = Math.max(NGramSizeSldr.getValue(), 2);
        sgOverallGraph = new SymbolicGraph(1, Levels); // Init graph with a min of 2
        siIndex = new SemanticIndex(sgOverallGraph);
        try {
            siIndex.MeaningExtractor = new LocalWordNetMeaningExtractor();
        }
        catch (IOException ioe) {
            siIndex.MeaningExtractor = null; // Use default
        }
        
//        sgOverallGraph.setNotificationListener(new NotificationAdapter() {
//            public void Notify(Object oSender, Object oParameters) {
//                //fStatus.setStatus("Graph size " + ((SymbolicGraph)sgOverallGraph).getEdgesCount(), 0.0);
//                System.out.println("Graph size " + ((SymbolicGraph)sgOverallGraph).getEdgesCount() + " Current NGram Size:" + oParameters);
//            }
//        });
//        
        try 
        {
            
            // Get categories and files from disk
            DocumentSet dsSet = new DocumentSet(FilePathEdt.getText(), 1.0);
            dsSet.createSets(true, (double)CorpusPercentSld.getValue() / 100);
            int iCurCnt, iTotal;
            String sFile = "";
            Iterator iIter = dsSet.getTrainingSet().iterator();
            iTotal = dsSet.getTrainingSet().size();
            if (iTotal == 0) // No documents to import
            {
                appendToLog("No input documents.\n");
                appendToLog("======DONE=====\n");
                return;
            }
            iCurCnt = 0;

            // Create overall document
            cdDoc = new DistributionDocument[Levels];
            for (int iCnt=0; iCnt < Levels; iCnt++)
                cdDoc[iCnt] = new DistributionDocument(1, MinLevel + iCnt); // For all windows

            fStatus.setVisible(true);
            while (iIter.hasNext())
            {
                sFile = ((CategorizedFileEntry)iIter.next()).getFileName();
                appendToLog("Loading file..." + sFile);
                fStatus.setStatus("Loading file..." + sFile, (double)iCurCnt / iTotal);
                for (int iCnt=0; iCnt < cdDoc.length; iCnt++) {
                    cdDoc[iCnt].loadDataStringFromFile(sFile,false);
                }            
                sgOverallGraph.loadFromFile(sFile);
                appendToLog("Loaded file..."+ sFile);
                fStatus.setStatus("Loaded file..." + sFile, (double)++iCurCnt / iTotal);
                Thread.yield();
            }
            /*
             appendToLog("Graphs created");
            for (int iCnt=0; iCnt < cdDoc.length; iCnt++) {
                appendToLog("***GRAPH LEVEL "+String.valueOf(iCnt) + "***");
                //appendToLog("======GRAPH:=====\n" + cdDoc[iCnt].toString());
            }
             */
            
            appendToLog("======DONE=====\n");            
        } finally {
            sgOverallGraph.removeNotificationListener();
            fStatus.dispose();
        }
        
    }
    
    private void CreateNGramGraphBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_CreateNGramGraphBtnActionPerformed
        Thread t = new Thread() {
            @Override
            public void run() {
                DoCreateNGramGraph();
                unregisterThread(this);
            }
        };
        registerThread(t);
        t.setPriority(Math.min(Thread.MAX_PRIORITY, t.getPriority() + 1)); // Increased priority
        t.start();
    }//GEN-LAST:event_CreateNGramGraphBtnActionPerformed

    private void SelectInputFileBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SelectInputFileBtnActionPerformed
        // Select a dir
        JFileChooser fc = new JFileChooser();
        fc.setCurrentDirectory((FilePathEdt.getText().length() == 0) ? new java.io.File(".") : new java.io.File(FilePathEdt.getText()));
        fc.setSelectedFile(fc.getCurrentDirectory());
        fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
        int iRet = fc.showOpenDialog(this);
        if (iRet == JFileChooser.APPROVE_OPTION)
            FilePathEdt.setText(fc.getSelectedFile().getAbsolutePath());
    }//GEN-LAST:event_SelectInputFileBtnActionPerformed
    
    private static String[] splitStringByDelimiterPoints(String sStr, Integer[] iRes) {
        ArrayList alRes = new ArrayList();
        
        // For every split point get substring
        for (int iCnt=0; iCnt < iRes.length; iCnt++) {
            if (iCnt == 0)
                alRes.add(sStr.substring(0, iRes[iCnt]));
            else
                alRes.add(sStr.substring(iRes[iCnt - 1], iRes[iCnt]));
        }
        // Add last part
        if (iRes.length > 0)
            alRes.add(sStr.substring(iRes[iRes.length - 1]));
        else
            alRes.add(sStr); // No splitting
        
        String[] sRes = new String[alRes.size()]; // n split points => n+1 string parts
        alRes.toArray(sRes);
        
        return sRes;
    }
    
    private synchronized final void setDelimiters(SortedMap smDelims) {
        String sRes = "";
        Iterator iIter = smDelims.keySet().iterator();
        while (iIter.hasNext()) {
            Object oNext = smDelims.get(iIter.next());
            sRes = oNext.toString() + sRes; // Reverse
        }

        try {
            DelimitersEdt.setText(sRes);
        }
        catch (Exception e) {
            System.err.println("Cannot update edit (probably due to strange encoding). Continuing...");
        }
        
        Delims.clear();
        Delims.putAll(smDelims);
    }
    
    public synchronized final SortedMap getDelimiters() {
        return Delims;
    }
    /**
     * @param args the command line arguments
     */
    public static void main(String args[]) {
        if (args.length == 0)
            java.awt.EventQueue.invokeLater(new Runnable() {
                public void run() {
                    new NGramCorrelationForm().setVisible(true);
                }
            });
        else {
            StatusConsole sc = new StatusConsole(80);
            Hashtable hSwitches = gr.demokritos.iit.jinsect.utils.parseCommandLineSwitches(args);
            
            int iOutputNGramMinSize = Integer.valueOf(gr.demokritos.iit.jinsect.utils.getSwitch(hSwitches, 
                    "outputSymbolsOfMinSize", "-1")).intValue();
            int iOutputNGramMaxSize = Integer.valueOf(gr.demokritos.iit.jinsect.utils.getSwitch(hSwitches, 
                    "outputSymbolsOfMaxSize", String.valueOf(iOutputNGramMinSize + 10))).intValue();
            String sSymbolFile = gr.demokritos.iit.jinsect.utils.getSwitch(hSwitches, "symbolFile", 
                    "Symbols.tmp");
            boolean bRightToLeftText = Boolean.valueOf(gr.demokritos.iit.jinsect.utils.getSwitch(
                    hSwitches, "rightToLeft",String.valueOf(false))).booleanValue();
            if (iOutputNGramMinSize > 0) {
                // Print out symbols
                int iCount = printOutSymbolsOfSize(iOutputNGramMinSize, iOutputNGramMaxSize, sSymbolFile, new 
                        StreamOutputConsole(System.out, false));
                System.out.println("Total: " + iCount);
            }
            else {
                if (bRightToLeftText) {
                    System.err.println("Performing right to left analysis...");
                }
                // else execute extraction
                NGramCorrelationForm nfMain = new NGramCorrelationForm();
                nfMain.RightToLeftText = bRightToLeftText;
                String sInputDir = gr.demokritos.iit.jinsect.utils.getSwitch(hSwitches, "dir", "./DUC/reducedModels2005/");
                nfMain.FilePathEdt.setText(sInputDir);
                nfMain.analyseCorpus(sc);
                nfMain.dispose();
            }
        }
            
    }
    
    private Set getSymbolsByProbabilities(String sText, IStatusDisplayer fStatus) {
        StringBuffer sbSubStr = new StringBuffer();
        TreeSet tsRes = new TreeSet();
        Date dStartTime = new Date();
        
        // For every character
        for (int iCnt = 0; iCnt < sText.length(); iCnt++) {
            String sNextChar = sText.substring(iCnt, iCnt+1);
            // If the probability of a suffix char given a prefix is higher than
            // random, the suffix is considered part of the prefix.
            if ((sbSubStr.length() == 0) || 
                    (getProbabilityOfStringInText(sbSubStr.toString(), sNextChar) > 
                    getProbabilityOfStringInRandomText(sbSubStr.toString(), sNextChar)))
                sbSubStr.append(sText.charAt(iCnt));
            else
                // else end the existing symbol, adding it to the returned set and start a new one
            {
                tsRes.add(sbSubStr.toString());
                // DEBUG LINES
                // System.err.println("Found symbol:" + sbSubStr.toString());
                // appendToLog("Found symbol:" + sbSubStr.toString());
                //////////////
                sbSubStr = new StringBuffer(sNextChar);
            }
            
            Date dCurTime = new Date();
            long lRemaining = (sText.length() - iCnt + 1) * 
                    (long)((double)(dCurTime.getTime() - dStartTime.getTime()) / iCnt);
            String sRemaining = String.format(" - Remaining: %40s\r", 
                    gr.demokritos.iit.jinsect.utils.millisToMinSecString(lRemaining));
            fStatus.setStatus("Determining corpus symbols..." + sRemaining, (double)iCnt / sText.length());
        }
        // Add the final symbol, if not empty.
        if (sbSubStr.length() > 0)
            tsRes.add(sbSubStr.toString());
        return tsRes;
    }
    
    static public int printOutSymbolsOfSize(int iMinSize, int iMaxSize, String sSymbolsFile,
            IStatusDisplayer isStatus) {
        // Get symbol set
        Set sSymbols = null;
        File fPreviousSymbols = new File(sSymbolsFile);
        boolean bSymbolsLoadedOK = false;
        int iCurCnt = 0;
        if (fPreviousSymbols.exists()) {
            try {

                FileInputStream fis = new FileInputStream(fPreviousSymbols);
                ObjectInputStream ois = new ObjectInputStream(fis);

                sSymbols = (Set)ois.readObject();
                ois.close();                
                bSymbolsLoadedOK = true;
            } catch (FileNotFoundException ex) {
                ex.printStackTrace(System.err);
            } catch (IOException ex) {
                ex.printStackTrace(System.err);
                // Reset distros
            } catch (ClassNotFoundException ex) {
                ex.printStackTrace(System.err);
            }
            
            Iterator iSymbolIter = sSymbols.iterator();
            int iTotalSize = sSymbols.size();
            while (iSymbolIter.hasNext()) {
                String sCur = (String)iSymbolIter.next();
                int iLen = sCur.length();
                if ((iLen >= iMinSize) && (iLen <= iMaxSize)) {
                    isStatus.setStatus(sCur,(double)iCurCnt / iTotalSize);
                    iCurCnt++;
                }
            }
        }
            
        return iCurCnt;
    }
    // Variables declaration - do not modify//GEN-BEGIN:variables
    private javax.swing.JButton AnalyseCorpusBtn;
    private javax.swing.JButton BreakDownBtn;
    private javax.swing.JButton BreakFileDownBtn;
    private javax.swing.JButton CancelAllBtn;
    private javax.swing.JButton ClearGraphsBtn;
    private javax.swing.JButton CompareFilesBtn;
    private javax.swing.JSlider CorpusPercentSld;
    private javax.swing.JButton CreateNGramGraphBtn;
    private javax.swing.JTextField DelimitersEdt;
    private javax.swing.JTextField FilePathEdt;
    private javax.swing.JButton FindPathBtn;
    private javax.swing.JTextArea LogMemo;
    private javax.swing.JButton LookUpTermBtn;
    private javax.swing.JSlider NGramSizeSldr;
    private javax.swing.JLabel SecondInputFileLbl;
    private javax.swing.JButton SelectInputFileBtn;
    private javax.swing.JTextField SelectInputFileEdt;
    private javax.swing.JTextField SelectSecondInputFileEdt;
    private javax.swing.JButton SelectSecondTestFileBtn;
    private javax.swing.JButton SelectTestFileBtn;
    private javax.swing.JTextField TermEdt;
    private javax.swing.JButton jButton1;
    private javax.swing.JLabel jLabel1;
    private javax.swing.JLabel jLabel2;
    private javax.swing.JLabel jLabel3;
    private javax.swing.JLabel jLabel4;
    private javax.swing.JLabel jLabel5;
    private javax.swing.JLabel jLabel6;
    private javax.swing.JScrollPane jScrollPane1;
    // End of variables declaration//GEN-END:variables
    
}