package packt;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.Chunker;
import com.aliasi.chunk.Chunking;
import com.aliasi.dict.DictionaryEntry;
import com.aliasi.dict.MapDictionary;
import com.aliasi.dict.TrieDictionary;
import com.aliasi.dict.Dictionary;
import com.aliasi.dict.ExactDictionaryChunker;

import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;

import java.util.Iterator;
import java.util.Set;

public class DictionaryChunker {
    private static final String sentences[] = {"Joe was the last person to see Fred. ",
        "He saw him in Boston at McKenzie's pub at 3:00 where he paid "
        + "$2.45 for an ale. ",
        "Joe wanted to go to Vermont for the day to visit a cousin who "
        + "works at IBM, but Sally and he had to look for Fred"};
    static final double CHUNK_SCORE = 1.0;

    public static void main(String[] args) {

        MapDictionary<String> dictionary = new MapDictionary<String>();
        dictionary.addEntry(new DictionaryEntry<String>("Joe","PERSON",CHUNK_SCORE));
        dictionary.addEntry(new DictionaryEntry<String>("Fred","PERSON",CHUNK_SCORE));
        dictionary.addEntry(new DictionaryEntry<String>("Boston","PLACE",CHUNK_SCORE));
        dictionary.addEntry(new DictionaryEntry<String>("pub","PLACE",CHUNK_SCORE));
        dictionary.addEntry(new DictionaryEntry<String>("Vermont","PLACE",CHUNK_SCORE));
        dictionary.addEntry(new DictionaryEntry<String>("IBM","ORGANIZATION",CHUNK_SCORE));
        dictionary.addEntry(new DictionaryEntry<String>("Sally","PERSON",CHUNK_SCORE));


        ExactDictionaryChunker dictionaryChunkerTT
            = new ExactDictionaryChunker(dictionary,
                                         IndoEuropeanTokenizerFactory.INSTANCE,
                                         true,true);

        ExactDictionaryChunker dictionaryChunkerTF
            = new ExactDictionaryChunker(dictionary,
                                         IndoEuropeanTokenizerFactory.INSTANCE,
                                         true,false);

        ExactDictionaryChunker dictionaryChunkerFT
            = new ExactDictionaryChunker(dictionary,
                                         IndoEuropeanTokenizerFactory.INSTANCE,
                                         false,true);

        ExactDictionaryChunker dictionaryChunkerFF
            = new ExactDictionaryChunker(dictionary,
                                         IndoEuropeanTokenizerFactory.INSTANCE,
                                         false,false);



        System.out.println("\nDICTIONARY\n" + dictionary);

        for (int i = 0; i < sentences.length; ++i) {
            String text = sentences[i];
            System.out.println("\n\nTEXT=" + text);

            chunk(dictionaryChunkerTT,text);
            chunk(dictionaryChunkerTF,text);
            chunk(dictionaryChunkerFT,text);
            chunk(dictionaryChunkerFF,text);
        }

    }

    static void chunk(ExactDictionaryChunker chunker, String text) {
        System.out.println("\nChunker."
                           + " All matches=" + chunker.returnAllMatches()
                           + " Case sensitive=" + chunker.caseSensitive());
        Chunking chunking = chunker.chunk(text);
        for (Chunk chunk : chunking.chunkSet()) {
            int start = chunk.start();
            int end = chunk.end();
            String type = chunk.type();
            double score = chunk.score();
            String phrase = text.substring(start,end);
            System.out.println("     phrase=|" + phrase + "|"
                               + " start=" + start
                               + " end=" + end
                               + " type=" + type
                               + " score=" + score);
        }
    }

}