package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.url.URLTokenizer;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

/**
 * Joe Linn
 * 8/1/2015
 */
public class URLTokenizerFactory extends AbstractTokenizerFactory {
    private List<URLPart> parts;
    private boolean urlDecode;
    private boolean tokenizeHost;
    private boolean tokenizePath;
    private boolean tokenizeQuery;
    private boolean allowMalformed;
    private boolean tokenizeMalformed;


    public URLTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
        super(indexSettings, name, settings);

        String[] parts = settings.getAsArray("part");
        if (parts != null && parts.length > 0) {
            this.parts = Arrays.stream(parts)
                    .map(URLPart::fromString)
                    .collect(Collectors.toList());
        }
        this.urlDecode = settings.getAsBoolean("url_decode", false);
        this.tokenizeHost = settings.getAsBoolean("tokenize_host", true);
        this.tokenizePath = settings.getAsBoolean("tokenize_path", true);
        this.tokenizeQuery = settings.getAsBoolean("tokenize_query", true);
        this.allowMalformed = settings.getAsBoolean("allow_malformed", false);
        this.tokenizeMalformed = settings.getAsBoolean("tokenize_malformed", false);
    }


    @Override
    public Tokenizer create() {
        URLTokenizer tokenizer = new URLTokenizer();
        tokenizer.setParts(parts);
        tokenizer.setUrlDecode(urlDecode);
        tokenizer.setTokenizeHost(tokenizeHost);
        tokenizer.setTokenizePath(tokenizePath);
        tokenizer.setTokenizeQuery(tokenizeQuery);
        tokenizer.setAllowMalformed(allowMalformed);
        tokenizer.setTokenizeMalformed(tokenizeMalformed);
        return tokenizer;
    }
}