package org.xbib.elasticsearch.plugin.bundle.test.index.analysis.autophrase;

import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.CharArraySet;
import org.elasticsearch.test.ESTestCase;
import org.xbib.elasticsearch.plugin.bundle.index.analysis.autophrase.AutoPhrasingTokenFilter;

import java.io.StringReader;
import java.util.Arrays;

/**
 * Auto phrase token filter test.
 */
public class AutoPhrasingTokenFilterTests extends ESTestCase {

    public void testAutoPhrase() throws Exception {
        final CharArraySet phraseSets = new CharArraySet(Arrays.asList("income tax", "tax refund", "property tax"), false);

        final String input = "what is my income tax refund this year now that my property tax is so high";

        StringReader reader = new StringReader(input);
        final WhitespaceTokenizer in = new WhitespaceTokenizer();
        in.setReader(reader);
        AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
        aptf.setReplaceWhitespaceWith('_');
        CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
        aptf.reset();
        assertTrue(aptf.incrementToken());
        assertEquals("what", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("is", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("my", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("income_tax", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("tax_refund", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("this", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("year", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("now", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("that", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("my", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("property_tax", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("is", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("so", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("high", term.toString());
    }

    public void testAutoPhraseEmitSingle() throws Exception {
        final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
                "income tax", "tax refund", "property tax"), false);

        final String input = "what is my income tax refund this year now that my property tax is so high";

        StringReader reader = new StringReader(input);
        final WhitespaceTokenizer in = new WhitespaceTokenizer();
        in.setReader(reader);

        AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, true);
        aptf.setReplaceWhitespaceWith('_');
        CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
        aptf.reset();

        assertTrue(aptf.incrementToken());
        assertEquals("what", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("is", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("my", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("income", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("income_tax", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("tax", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("tax_refund", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("refund", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("this", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("year", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("now", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("that", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("my", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("property", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("property_tax", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("tax", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("is", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("so", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("high", term.toString());
    }

    public void testOverlappingAtBeginning() throws Exception {
        final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
                "new york", "new york city", "city of new york"), false);

        final String input = "new york city is great";

        StringReader reader = new StringReader(input);
        final WhitespaceTokenizer in = new WhitespaceTokenizer();
        in.setReader(reader);

        AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
        aptf.setReplaceWhitespaceWith('_');
        CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
        aptf.reset();

        assertTrue(aptf.incrementToken());
        assertEquals("new_york_city", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("is", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("great", term.toString());
    }

    public void testOverlappingAtBeginningEmitSingle() throws Exception {
        final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
                "new york", "new york city", "city of new york"), false);

        final String input = "new york city is great";

        StringReader reader = new StringReader(input);
        final WhitespaceTokenizer in = new WhitespaceTokenizer();
        in.setReader(reader);

        AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, true);
        aptf.setReplaceWhitespaceWith('_');
        CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
        aptf.reset();

        assertTrue(aptf.incrementToken());
        assertEquals("new", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("york", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("new_york", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("new_york_city", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("city", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("is", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("great", term.toString());
    }

    public void testOverlappingAtEndEmitSingle() throws Exception {
        final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
                "new york", "new york city", "city of new york"), false);

        final String input = "the great city of new york";

        StringReader reader = new StringReader(input);
        final WhitespaceTokenizer in = new WhitespaceTokenizer();
        in.setReader(reader);

        AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, true);
        aptf.setReplaceWhitespaceWith('_');
        CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
        aptf.reset();

        assertTrue(aptf.incrementToken());
        assertEquals("the", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("great", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("city", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("of", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("new", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("york", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("city_of_new_york", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("new_york", term.toString());
    }

    public void testOverlappingAtEnd() throws Exception {
        final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
                "new york", "new york city", "city of new york"), false);

        final String input = "the great city of new york";

        StringReader reader = new StringReader(input);
        final WhitespaceTokenizer in = new WhitespaceTokenizer();
        in.setReader(reader);

        AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
        aptf.setReplaceWhitespaceWith('_');
        CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
        aptf.reset();

        assertTrue(aptf.incrementToken());
        assertEquals("the", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("great", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("city_of_new_york", term.toString());
    }

    public void testIncompletePhrase() throws Exception {
        final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
                "big apple", "new york city", "property tax", "three word phrase"), false);

        final String input = "some new york";

        StringReader reader = new StringReader(input);
        final WhitespaceTokenizer in = new WhitespaceTokenizer();
        in.setReader(reader);

        AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
        aptf.setReplaceWhitespaceWith('_');
        CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
        aptf.reset();

        assertTrue(aptf.incrementToken());
        assertEquals("some", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("new", term.toString());
        assertTrue(aptf.incrementToken());
        assertEquals("york", term.toString());
    }

}