package org.elasticsearch.index.analysis;

import static com.hankcs.hanlp.tokenizer.NLPTokenizer.ANALYZER;
import static org.elasticsearch.plugin.analysis.AnalysisHanLPPlugin.defaultStopWordDictionary;

import com.hankcs.hanlp.seg.CRF.CRFSegment;
import com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment;
import com.hankcs.hanlp.seg.NShort.NShortSegment;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.tokenizer.IndexTokenizer;
import com.hankcs.hanlp.tokenizer.SpeedTokenizer;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import com.hankcs.lucene.HanLPTokenizer;

import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;

public abstract class HanLPTokenizerFactory extends AbstractTokenizerFactory {

    protected boolean enablePorterStemming;

    private HanLPTokenizerFactory(IndexSettings indexSettings,
                                  Environment env,
                                  String name,
                                  Settings settings) {
        super(indexSettings, name, settings);
        this.enablePorterStemming = settings.getAsBoolean("enablePorterStemming", false);
    }

    public static HanLPTokenizerFactory createStandard(IndexSettings indexSettings,
                                                       Environment environment,
                                                       String name, Settings
                                                           settings) {
        return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
            @Override
            public Tokenizer create() {
                return new HanLPTokenizer(StandardTokenizer.SEGMENT, defaultStopWordDictionary, enablePorterStemming);
            }
        };
    }

    public static HanLPTokenizerFactory createNLP(IndexSettings indexSettings,
                                                  Environment environment,
                                                  String name,
                                                  Settings settings) {
        return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
            @Override
            public Tokenizer create() {
                return new HanLPTokenizer(ANALYZER, defaultStopWordDictionary, enablePorterStemming);
            }
        };
    }

    public static HanLPTokenizerFactory createIndex(IndexSettings indexSettings,
                                                    Environment environment,
                                                    String name,
                                                    Settings settings) {
        return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
            @Override
            public Tokenizer create() {
                return new HanLPTokenizer(IndexTokenizer.SEGMENT, defaultStopWordDictionary, enablePorterStemming);
            }
        };
    }

    public static HanLPTokenizerFactory createNShort(IndexSettings indexSettings,
                                                     Environment environment,
                                                     String name,
                                                     Settings settings) {
        return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
            @Override
            public Tokenizer create() {
                Segment seg = new NShortSegment().enableCustomDictionary(false)
                                                 .enablePlaceRecognize(true)
                                                 .enableOrganizationRecognize(true);
                return new HanLPTokenizer(seg, defaultStopWordDictionary, enablePorterStemming);
            }
        };
    }

    public static HanLPTokenizerFactory createShortest(IndexSettings indexSettings,
                                                       Environment environment,
                                                       String name,
                                                       Settings settings) {
        return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
            @Override
            public Tokenizer create() {
                Segment seg = new DijkstraSegment().enableCustomDictionary(false)
                                                   .enablePlaceRecognize(true)
                                                   .enableOrganizationRecognize(true);
                return new HanLPTokenizer(seg, defaultStopWordDictionary, enablePorterStemming);
            }
        };
    }

    public static HanLPTokenizerFactory createCRF(IndexSettings indexSettings,
                                                  Environment environment,
                                                  String name,
                                                  Settings settings) {
        return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
            @Override
            public Tokenizer create() {
                Segment seg = new CRFSegment().enablePartOfSpeechTagging(true);
                return new HanLPTokenizer(seg, defaultStopWordDictionary, enablePorterStemming);
            }
        };
    }

    public static HanLPTokenizerFactory createSpeed(IndexSettings indexSettings,
                                                    Environment environment,
                                                    String name,
                                                    Settings settings) {
        return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
            @Override
            public Tokenizer create() {
                return new HanLPTokenizer(SpeedTokenizer.SEGMENT, defaultStopWordDictionary, enablePorterStemming);
            }
        };
    }

}