/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.luke.models.analysis;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.lang.reflect.Modifier;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.luke.models.LukeException;
import org.apache.lucene.luke.util.reflection.ClassScanner;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.IOUtils;

/** Default implementation of {@link AnalysisImpl} */
public final class AnalysisImpl implements Analysis {

  private List<Class<? extends Analyzer>> presetAnalyzerTypes;

  private Analyzer analyzer;

  @Override
  public void addExternalJars(List<String> jarFiles) {
    List<URL> urls = new ArrayList<>();

    for (String jarFile : jarFiles) {
      Path path = FileSystems.getDefault().getPath(jarFile);
      if (!Files.exists(path) || !jarFile.endsWith(".jar")) {
        throw new LukeException(String.format(Locale.ENGLISH, "Invalid jar file path: %s", jarFile));
      }
      try {
        URL url = path.toUri().toURL();
        urls.add(url);
      } catch (IOException e) {
        throw new LukeException(e.getMessage(), e);
      }
    }

    // reload available tokenizers, charfilters, and tokenfilters
    URLClassLoader classLoader = new URLClassLoader(
        urls.toArray(new URL[0]), this.getClass().getClassLoader());
    CharFilterFactory.reloadCharFilters(classLoader);
    TokenizerFactory.reloadTokenizers(classLoader);
    TokenFilterFactory.reloadTokenFilters(classLoader);
  }

  @Override
  public Collection<Class<? extends Analyzer>> getPresetAnalyzerTypes() {
    if (Objects.isNull(presetAnalyzerTypes)) {
      List<Class<? extends Analyzer>> types = new ArrayList<>();
      for (Class<? extends Analyzer> clazz : getInstantiableSubTypesBuiltIn(Analyzer.class)) {
        try {
          // add to presets if no args constructor is available
          clazz.getConstructor();
          types.add(clazz);
        } catch (NoSuchMethodException e) {
        }
      }
      presetAnalyzerTypes = List.copyOf(types);
    }
    return presetAnalyzerTypes;
  }

  @Override
  public Collection<String> getAvailableCharFilters() {
    return CharFilterFactory.availableCharFilters().stream().sorted().collect(Collectors.toList());
  }

  @Override
  public Collection<String> getAvailableTokenizers() {
    return TokenizerFactory.availableTokenizers().stream().sorted().collect(Collectors.toList());
  }

  @Override
  public Collection<String> getAvailableTokenFilters() {
    return TokenFilterFactory.availableTokenFilters().stream().sorted().collect(Collectors.toList());
  }

  private <T> List<Class<? extends T>> getInstantiableSubTypesBuiltIn(Class<T> superType) {
    ClassScanner scanner = new ClassScanner("org.apache.lucene.analysis", getClass().getClassLoader());
    Set<Class<? extends T>> types = scanner.scanSubTypes(superType);
    return types.stream()
        .filter(type -> !Modifier.isAbstract(type.getModifiers()))
        .filter(type -> !type.getSimpleName().startsWith("Mock"))
        .sorted(Comparator.comparing(Class::getName))
        .collect(Collectors.toList());
  }

  @Override
  public List<Token> analyze(String text) {
    Objects.requireNonNull(text);

    if (analyzer == null) {
      throw new LukeException("Analyzer is not set.");
    }

    try {
      List<Token> result = new ArrayList<>();
      TokenStream stream = analyzer.tokenStream("", text);
      stream.reset();

      CharTermAttribute charAtt = stream.getAttribute(CharTermAttribute.class);

      // iterate tokens
      while (stream.incrementToken()) {
        List<TokenAttribute> attributes = copyAttributes(stream, charAtt);
        result.add(new Token(charAtt.toString(), attributes));
      }
      stream.close();

      return result;
    } catch (IOException e) {
      throw new LukeException(e.getMessage(), e);
    }
  }

  private List<TokenAttribute> copyAttributes(TokenStream tokenStream, CharTermAttribute charAtt) {
    List<TokenAttribute> attributes = new ArrayList<>();
    Iterator<AttributeImpl> itr = tokenStream.getAttributeImplsIterator();
    while(itr.hasNext()) {
      AttributeImpl att = itr.next();
      Map<String, String> attValues = new LinkedHashMap<>();
      att.reflectWith((attClass, key, value) -> {
        if (value != null)
          attValues.put(key, value.toString());
      });
      attributes.add(new TokenAttribute(att.getClass().getSimpleName(), attValues));
    }
    return attributes;
  }

  @Override
  public Analyzer createAnalyzerFromClassName(String analyzerType) {
    Objects.requireNonNull(analyzerType);

    try {
      Class<? extends Analyzer> clazz = Class.forName(analyzerType).asSubclass(Analyzer.class);
      this.analyzer = clazz.getConstructor().newInstance();
      return analyzer;
    } catch (ReflectiveOperationException e) {
      throw new LukeException(String.format(Locale.ENGLISH, "Failed to instantiate class: %s", analyzerType), e);
    }
  }

  @Override
  public Analyzer buildCustomAnalyzer(CustomAnalyzerConfig config) {
    Objects.requireNonNull(config);
    try {
      // create builder
      CustomAnalyzer.Builder builder = config.getConfigDir()
          .map(path -> CustomAnalyzer.builder(FileSystems.getDefault().getPath(path)))
          .orElse(CustomAnalyzer.builder());

      // set tokenizer
      builder.withTokenizer(config.getTokenizerConfig().getName(), config.getTokenizerConfig().getParams());

      // add char filters
      for (CustomAnalyzerConfig.ComponentConfig cfConf : config.getCharFilterConfigs()) {
        builder.addCharFilter(cfConf.getName(), cfConf.getParams());
      }

      // add token filters
      for (CustomAnalyzerConfig.ComponentConfig tfConf : config.getTokenFilterConfigs()) {
        builder.addTokenFilter(tfConf.getName(), tfConf.getParams());
      }

      // build analyzer
      this.analyzer = builder.build();
      return analyzer;
    } catch (Exception e) {
      throw new LukeException("Failed to build custom analyzer.", e);
    }
  }

  @Override
  public Analyzer currentAnalyzer() {
    if (analyzer == null) {
      throw new LukeException("Analyzer is not set.");
    }
    return analyzer;
  }

  @Override
  public StepByStepResult analyzeStepByStep(String text){
    Objects.requireNonNull(text);
    if (analyzer == null) {
      throw new LukeException("Analyzer is not set.");
    }

    if (!(analyzer instanceof CustomAnalyzer)) {
      throw new LukeException("Analyzer is not CustomAnalyzer.");
    }

    List<NamedTokens> namedTokens = new ArrayList<>();
    List<CharfilteredText> charfilteredTexts = new ArrayList<>();

    try {
      CustomAnalyzer customAnalyzer = (CustomAnalyzer)analyzer;
      final List<CharFilterFactory> charFilterFactories = customAnalyzer.getCharFilterFactories();
      Reader reader = new StringReader(text);
      String charFilteredSource = text;
      if (charFilterFactories.size() > 0) {
        Reader cs = reader;
        for (CharFilterFactory charFilterFactory : charFilterFactories) {
          cs = charFilterFactory.create(reader);
          Reader readerForWriteOut = new StringReader(charFilteredSource);
          readerForWriteOut = charFilterFactory.create(readerForWriteOut);
          charFilteredSource = writeCharStream(readerForWriteOut);
          charfilteredTexts.add(new CharfilteredText(CharFilterFactory.findSPIName(charFilterFactory.getClass()), charFilteredSource));
        }
        reader = cs;
      }

      final TokenizerFactory tokenizerFactory = customAnalyzer.getTokenizerFactory();
      final List<TokenFilterFactory> tokenFilterFactories = customAnalyzer.getTokenFilterFactories();

      TokenStream tokenStream = tokenizerFactory.create();
      ((Tokenizer)tokenStream).setReader(reader);
      List<Token> tokens = new ArrayList<>();
      List<AttributeSource> attributeSources = analyzeTokenStream(tokenStream, tokens);
      namedTokens.add(new NamedTokens(TokenizerFactory.findSPIName(tokenizerFactory.getClass()), tokens));

      ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokenStream, attributeSources);
      for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
        tokenStream = tokenFilterFactory.create(listBasedTokenStream);
        tokens = new ArrayList<>();
        attributeSources = analyzeTokenStream(tokenStream, tokens);
        namedTokens.add(new NamedTokens(TokenFilterFactory.findSPIName(tokenFilterFactory.getClass()), tokens));
        try {
          listBasedTokenStream.close();
        } catch (IOException e) {
          // do nothing;
        }
        listBasedTokenStream = new ListBasedTokenStream(listBasedTokenStream, attributeSources);
      }
      try {
        listBasedTokenStream.close();
      } catch (IOException e) {
        // do nothing.
      } finally {
        reader.close();
      }
      return new StepByStepResult(charfilteredTexts, namedTokens);
    } catch (Exception e) {
      throw new LukeException(e.getMessage(), e);
    }
  }

  /**
   * Analyzes the given TokenStream, collecting the Tokens it produces.
   *
   * @param tokenStream TokenStream to analyze
   *
   * @return List of tokens produced from the TokenStream
   */
  private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream, List<Token> result) {
    final List<AttributeSource> tokens = new ArrayList<>();
    try {
      tokenStream.reset();
      CharTermAttribute charAtt = tokenStream.getAttribute(CharTermAttribute.class);
      while (tokenStream.incrementToken()) {
        tokens.add(tokenStream.cloneAttributes());
        List<TokenAttribute> attributes = copyAttributes(tokenStream, charAtt);
        result.add(new Token(charAtt.toString(), attributes));
      }
      tokenStream.end();
    } catch (IOException ioe) {
      throw new RuntimeException("Error occurred while iterating over TokenStream", ioe);
    } finally {
      IOUtils.closeWhileHandlingException(tokenStream);
    }
    return tokens;
  }

  /**
   * TokenStream that iterates over a list of pre-existing Tokens
   * see org.apache.solr.handler.AnalysisRequestHandlerBase#ListBasedTokenStream
   */
  protected final static class ListBasedTokenStream extends TokenStream {
    private final List<AttributeSource> tokens;
    private Iterator<AttributeSource> tokenIterator;

    /**
     * Creates a new ListBasedTokenStream which uses the given tokens as its token source.
     *
     * @param attributeSource source of the attribute factory and attribute impls
     * @param tokens Source of tokens to be used
     */
    ListBasedTokenStream(AttributeSource attributeSource, List<AttributeSource> tokens) {
      super(attributeSource.getAttributeFactory());
      this.tokens = tokens;
      // Make sure all the attributes of the source are here too
      addAttributes(attributeSource);
    }

    @Override
    public void reset() throws IOException {
      super.reset();
      tokenIterator = tokens.iterator();
    }

    @Override
    public boolean incrementToken() {
      if (tokenIterator.hasNext()) {
        clearAttributes();
        AttributeSource next = tokenIterator.next();
        addAttributes(next);
        next.copyTo(this);
        return true;
      } else {
        return false;
      }
    }

    void addAttributes(AttributeSource attributeSource) {
      Iterator<AttributeImpl> atts = attributeSource.getAttributeImplsIterator();
      while (atts.hasNext()) {
        addAttributeImpl(atts.next()); // adds both impl & interfaces
      }
    }
  }

  private static String writeCharStream(Reader input ){
    final int BUFFER_SIZE = 1024;
    char[] buf = new char[BUFFER_SIZE];
    int len = 0;
    StringBuilder sb = new StringBuilder();
    do {
      try {
        len = input.read( buf, 0, BUFFER_SIZE );
      } catch (IOException e) {
        throw new RuntimeException("Error occurred while iterating over charfiltering", e);
      }
      if( len > 0 )
        sb.append(buf, 0, len);
    } while( len == BUFFER_SIZE );
    return sb.toString();
  }

}