package packt;

import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import opennlp.tools.cmdline.postag.POSModelLoader;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.Span;

public class Chapter1 {

    public static void main(String[] args) {
//        apacheOpenNLPExample();
//        stanfordNLPExample();
        lingpipeExamples();
//        findingPartsOfText();
//        findingSentences();
//        findingPeopleAndThings();
//        nameFinderExample();        
//        detectingPartsOfSpeechExample();
//        extractingRelationshipsExample();
    }

    private static void apacheOpenNLPExample() {
        try (InputStream is = new FileInputStream(
                new File("C:\\OpenNLP Models", "en-token.bin"))) {
            TokenizerModel model = new TokenizerModel(is);
            Tokenizer tokenizer = new TokenizerME(model);
            String tokens[] = tokenizer.tokenize("He lives at 1511 W. Randolph.");
            for (String a : tokens) {
                System.out.print("[" + a + "] ");
            }
            System.out.println();

        } catch (FileNotFoundException ex) {
            ex.printStackTrace();
        } catch (IOException ex) {
            ex.printStackTrace();
        }

    }

    private static void stanfordNLPExample() {
        PTBTokenizer ptb = new PTBTokenizer(
                new StringReader("He lives at 1511 W. Randolph."),
                new CoreLabelTokenFactory(), null);
        while (ptb.hasNext()) {
            System.out.println(ptb.next());
        }

    }

    private static void lingpipeExamples() {
        List<String> tokenList = new ArrayList<>();
        List<String> whiteList = new ArrayList<>();
        String text = "A sample sentence processed \nby \tthe "
                + "LingPipe tokenizer.";
        com.aliasi.tokenizer.Tokenizer tokenizer = IndoEuropeanTokenizerFactory.INSTANCE.
                tokenizer(text.toCharArray(), 0, text.length());
        tokenizer.tokenize(tokenList, whiteList);
        for (String element : tokenList) {
            System.out.print(element + " ");
        }
        System.out.println();

    }

    private static void splitMethodDemonstration() {
        String text = "Mr. Smith went to 123 Washington avenue.";
        String tokens[] = text.split("\\s+");
        for (String token : tokens) {
            System.out.println(token);
        }
    }

    private static void findingPartsOfText() {
        String text = "Mr. Smith went to 123 Washington avenue.";
        String tokens[] = text.split("\\s+");
        for (String token : tokens) {
            System.out.println(token);
        }
    }

    private static void findingSentences() {
        String paragraph = "The first sentence. The second sentence.";
        Reader reader = new StringReader(paragraph);
        DocumentPreprocessor documentPreprocessor
                = new DocumentPreprocessor(reader);
        List<String> sentenceList = new LinkedList<String>();
        for (List<HasWord> element : documentPreprocessor) {
            StringBuilder sentence = new StringBuilder();
            List<HasWord> hasWordList = element;
            for (HasWord token : hasWordList) {
                sentence.append(token).append(" ");
            }
            sentenceList.add(sentence.toString());
        }
        for (String sentence : sentenceList) {
            System.out.println(sentence);
        }

    }

    private static void findingPeopleAndThings() {
        String text = "Mr. Smith went to 123 Washington avenue.";
        String target = "Washington";
        int index = text.indexOf(target);
        System.out.println(index);
    }

    private static void nameFinderExample() {
        try {
            String[] sentences = {
                "Tim was a good neighbor. Perhaps not as good a Bob "
                + "Haywood, but still pretty good. Of course Mr. Adam "
                + "took the cake!"};
            Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
            TokenNameFinderModel model = new TokenNameFinderModel(new File(
                    "C:\\OpenNLP Models", "en-ner-person.bin"));
            NameFinderME finder = new NameFinderME(model);

            for (String sentence : sentences) {
                // Split the sentence into tokens
                String[] tokens = tokenizer.tokenize(sentence);

                // Find the names in the tokens and return Span objects
                Span[] nameSpans = finder.find(tokens);

                // Print the names extracted from the tokens using the Span data
                System.out.println(Arrays.toString(
                        Span.spansToStrings(nameSpans, tokens)));
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }

    private static void detectingPartsOfSpeechExample() {
        String sentence = "POS processing is useful for enhancing the "
                + "quality of data sent to other elements of a pipeline.";

        POSModel model = new POSModelLoader()
                .load(new File("C:/Current Books/NLP and Java/Models/", "en-pos-maxent.bin"));
        POSTaggerME tagger = new POSTaggerME(model);

        String tokens[] = WhitespaceTokenizer.INSTANCE
                .tokenize(sentence);
        String[] tags = tagger.tag(tokens);

        POSSample sample = new POSSample(tokens, tags);
        String posTokens[] = sample.getSentence();
        String posTags[] = sample.getTags();
        for (int i = 0; i < posTokens.length; i++) {
            System.out.print(posTokens[i] + " - " + posTags[i]);
        }
        System.out.println();

        for (int i = 0; i < tokens.length; i++) {
            System.out.print(tokens[i] + "[" + tags[i] + "] ");
        }
    }

    private static void extractingRelationshipsExample() {
        Properties properties = new Properties();
        properties.put("annotators", "tokenize, ssplit, parse");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);
        Annotation annotation = new Annotation(
                "The meaning and purpose of life is plain to see.");
        pipeline.annotate(annotation);
        pipeline.prettyPrint(annotation, System.out);

    }
}