package de.mpg.mpi_inf.ambiversenlu.nlu.trie;

import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Set;

public class TrieBuilder {

  private static final Logger logger = LoggerFactory.getLogger(TrieBuilder.class);

  public static FST<Long> buildTrie(Set<String> sortedStrings) throws IOException {
    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
    BytesRefBuilder scratchBytes = new BytesRefBuilder();
    IntsRefBuilder scratchInts = new IntsRefBuilder();
    long outputValue = 0;
    for (String mention : sortedStrings) {
      scratchBytes.copyChars(mention);
      try {
        builder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), outputValue++);
      } catch (java.lang.AssertionError ae) {
        logger.debug("Assertion error for mention " + mention);
      }
    }
    return builder.finish();
  }
}