/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.joshua.tools;

import static org.apache.joshua.decoder.ff.tm.packed.PackedGrammar.VOCABULARY_FILENAME;

import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.TreeMap;

import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
import org.apache.joshua.decoder.ff.tm.format.MosesFormatReader;
import org.apache.joshua.util.FormatUtils;
import org.apache.joshua.util.encoding.EncoderConfiguration;
import org.apache.joshua.util.encoding.FeatureTypeAnalyzer;
import org.apache.joshua.util.encoding.IntEncoder;
import org.apache.joshua.util.io.LineReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class GrammarPacker {

  private static final Logger LOG = LoggerFactory.getLogger(GrammarPacker.class);

  /**
   * The packed grammar version number. Increment this any time you add new features, and update
   * the documentation.
   *
   * Version history:
   * 
   * - 2. The default version.
   *
   * - 3 (May 2016). This was the first version that was marked. It removed the special phrase-
   * table packing that packed phrases without the [X,1] on the source and target sides, which
   * then required special handling in the decoder to use for phrase-based decoding. So in this
   * version, [X,1] is required to be presented on the source and target sides, and phrase-based
   * decoding is implemented as a left-branching grammar.
   *
   * - 4 (August 2016). Phrase-based decoding was rewritten to represent phrases without a builtin
   * nonterminal. Instead, cost-less glue rules are used in phrase-based decoding. This eliminates
   * the need for special handling of phrase grammars (except for having to add a LHS), and lets
   * phrase grammars be used in both hierarchical and phrase-based decoding without conversion.
   *
   */
  public static final int VERSION = 4;

  // Size limit for slice in bytes.
  private static final int DATA_SIZE_LIMIT = (int) (Integer.MAX_VALUE * 0.8);
  // Estimated average number of feature entries for one rule.
  private static final int DATA_SIZE_ESTIMATE = 20;

  private static final String SOURCE_WORDS_SEPARATOR = " ||| ";

  // Output directory name.
  private final String output;

  // Input grammar to be packed.
  private final String grammar;

  public String getGrammar() {
    return grammar;
  }

  public String getOutputDirectory() {
    return output;
  }

  // Approximate maximum size of a slice in number of rules
  private int approximateMaximumSliceSize;

  private final boolean labeled;

  private final boolean packAlignments;
  private final boolean grammarAlignments;
  private final String alignments;

  private final FeatureTypeAnalyzer types;
  private EncoderConfiguration encoderConfig;

  private final String dump;

  private int max_source_len;

  public GrammarPacker(String grammar_filename, String config_filename, String output_filename,
      String alignments_filename, String featuredump_filename, boolean grammar_alignments,
      int approximateMaximumSliceSize)
      throws IOException {
    this.labeled = true;
    this.grammar = grammar_filename;
    this.output = output_filename;
    this.dump = featuredump_filename;
    this.grammarAlignments = grammar_alignments;
    this.approximateMaximumSliceSize = approximateMaximumSliceSize;
    this.max_source_len = 0;

    // TODO: Always open encoder config? This is debatable.
    this.types = new FeatureTypeAnalyzer(true);

    this.alignments = alignments_filename;
    packAlignments = grammarAlignments || (alignments != null);
    if (!packAlignments) {
      LOG.info("No alignments file or grammar specified, skipping.");
    } else if (alignments != null && !new File(alignments_filename).exists()) {
      throw new RuntimeException("Alignments file does not exist: " + alignments);
    }

    if (config_filename != null) {
      readConfig(config_filename);
      types.readConfig(config_filename);
    } else {
      LOG.info("No config specified. Attempting auto-detection of feature types.");
    }
    LOG.info("Approximate maximum slice size (in # of rules) set to {}", approximateMaximumSliceSize);

    File working_dir = new File(output);
    working_dir.mkdir();
    if (!working_dir.exists()) {
      throw new RuntimeException("Failed creating output directory.");
    }
  }

  private void readConfig(String config_filename) throws IOException {
    try(LineReader reader = new LineReader(config_filename)) {
      while (reader.hasNext()) {
        // Clean up line, chop comments off and skip if the result is empty.
        String line = reader.next().trim();
        if (line.indexOf('#') != -1)
          line = line.substring(0, line.indexOf('#'));
        if (line.isEmpty())
          continue;
        String[] fields = line.split("[\\s]+");

        if (fields.length < 2) {
          throw new RuntimeException("Incomplete line in config.");
        }
        if ("slice_size".equals(fields[0])) {
          // Number of records to concurrently load into memory for sorting.
          approximateMaximumSliceSize = Integer.parseInt(fields[1]);
        }
      }
    }
  }

  /**
   * Executes the packing.
   *
   * @throws IOException if there is an error reading the grammar
   */
  public void pack() throws IOException {
    LOG.info("Beginning exploration pass.");

    // Explore pass. Learn vocabulary and feature value histograms.
    LOG.info("Exploring: {}", grammar);

    HieroFormatReader grammarReader = getGrammarReader();
    explore(grammarReader);

    LOG.info("Exploration pass complete. Freezing vocabulary and finalizing encoders.");
    if (dump != null) {
      PrintWriter dump_writer = new PrintWriter(dump);
      dump_writer.println(types.toString());
      dump_writer.close();
    }

    types.inferTypes(this.labeled);
    LOG.info("Type inference complete.");

    LOG.info("Finalizing encoding.");

    LOG.info("Writing encoding.");
    types.write(output + File.separator + "encoding");

    writeVocabulary();

    String configFile = output + File.separator + "config";
    LOG.info("Writing config to '{}'", configFile);
    // Write config options
    FileWriter config = new FileWriter(configFile);
    config.write(String.format("version = %d\n", VERSION));
    config.write(String.format("max-source-len = %d\n", max_source_len));
    config.close();

    // Read previously written encoder configuration to match up to changed
    // vocabulary id's.
    LOG.info("Reading encoding.");
    encoderConfig = new EncoderConfiguration();
    encoderConfig.load(output + File.separator + "encoding");

    LOG.info("Beginning packing pass.");
    // Actual binarization pass. Slice and pack source, target and data.
    grammarReader = getGrammarReader();
    LineReader alignment_reader = null;
    if (packAlignments && !grammarAlignments)
      alignment_reader = new LineReader(alignments);
    binarize(grammarReader, alignment_reader);
    LOG.info("Packing complete.");

    LOG.info("Packed grammar in: {}", output);
    LOG.info("Done.");
  }

  /**
   * Returns a reader that turns whatever file format is found into Hiero grammar rules.
   *
   * @return a Hiero format reader
   * @throws IOException
   */
  private HieroFormatReader getGrammarReader() throws IOException {
    try (LineReader reader = new LineReader(grammar)) {
      String line = reader.next();
      if (line.startsWith("[")) {
        return new HieroFormatReader(grammar);
      } else {
        return new MosesFormatReader(grammar);
      }
    }
  }

  /**
   * This first pass over the grammar
   * @param reader the Hiero format reader
   */
  private void explore(HieroFormatReader reader) {

    // We always assume a labeled grammar. Unlabeled features are assumed to be dense and to always
    // appear in the same order. They are assigned numeric names in order of appearance.
    this.types.setLabeled(true);

    for (Rule rule: reader) {

      max_source_len = Math.max(max_source_len, rule.getFrench().length);

      /* Add symbols to vocabulary.
       * NOTE: In case of nonterminals, we add both stripped versions ("[X]")
       * and "[X,1]" to the vocabulary.
       *
       * TODO: MJP May 2016: Is it necessary to add [X,1]? This is currently being done in
       * {@link HieroFormatReader}, which is called by {@link MosesFormatReader}.
       */

      // Add feature names to vocabulary and pass the value through the
      // appropriate encoder.
      int feature_counter = 0;
      String[] features = rule.getFeatureString().split("\\s+");
      for (String feature : features) {
        if (feature.contains("=")) {
          String[] fe = feature.split("=");
          if (fe[0].equals("Alignment"))
            continue;
          types.observe(Vocabulary.id(fe[0]), Float.parseFloat(fe[1]));
        } else {
          types
              .observe(Vocabulary.id(String.valueOf(feature_counter++)), Float.parseFloat(feature));
        }
      }
    }
  }

  /**
   * Returns a String encoding the first two source words.
   * If there is only one source word, use empty string for the second.
   */
  private String getFirstTwoSourceWords(final String[] source_words) {
    return source_words[0] + SOURCE_WORDS_SEPARATOR + ((source_words.length > 1) ? source_words[1] : "");
  }

  private void binarize(HieroFormatReader grammarReader, LineReader alignment_reader) throws IOException {
    int counter = 0;
    int slice_counter = 0;
    int num_slices = 0;

    boolean ready_to_flush = false;
    // to determine when flushing is possible
    String prev_first_two_source_words = null;

    PackingTrie<SourceValue> source_trie = new PackingTrie<>();
    PackingTrie<TargetValue> target_trie = new PackingTrie<>();
    FeatureBuffer feature_buffer = new FeatureBuffer();

    AlignmentBuffer alignment_buffer = null;
    if (packAlignments)
      alignment_buffer = new AlignmentBuffer();

    TreeMap<Integer, Float> features = new TreeMap<>();
    for (Rule rule: grammarReader) {
      counter++;
      slice_counter++;

      String lhs_word = Vocabulary.word(rule.getLHS());
      String[] source_words = rule.getFrenchWords().split("\\s+");
      String[] target_words = rule.getEnglishWords().split("\\s+");
      String[] feature_entries = rule.getFeatureString().split("\\s+");

      // Reached slice limit size, indicate that we're closing up.
      if (!ready_to_flush
          && (slice_counter > approximateMaximumSliceSize
              || feature_buffer.overflowing()
              || (packAlignments && alignment_buffer.overflowing()))) {
        ready_to_flush = true;
        // store the first two source words when slice size limit was reached
        prev_first_two_source_words = getFirstTwoSourceWords(source_words);
      }
      // ready to flush
      if (ready_to_flush) {
        final String first_two_source_words = getFirstTwoSourceWords(source_words);
        // the grammar can only be partitioned at the level of first two source word changes.
        // Thus, we can only flush if the current first two source words differ from the ones
        // when the slice size limit was reached.
        if (!first_two_source_words.equals(prev_first_two_source_words)) {
          LOG.warn("ready to flush and first two words have changed ({} vs. {})",
              prev_first_two_source_words, first_two_source_words);
          LOG.info("flushing {} rules to slice.", slice_counter);
          flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices);
          source_trie.clear();
          target_trie.clear();
          feature_buffer.clear();
          if (packAlignments)
            alignment_buffer.clear();

          num_slices++;
          slice_counter = 0;
          ready_to_flush = false;
        }
      }

      int alignment_index = -1;
      // If present, process alignments.
      if (packAlignments) {
        String alignment_line;
        if (grammarAlignments) {
          alignment_line = rule.getAlignmentString();
        } else {
          if (!alignment_reader.hasNext()) {
            LOG.error("No more alignments starting in line {}", counter);
            throw new RuntimeException("No more alignments starting in line " + counter);
          }
          alignment_line = alignment_reader.next().trim();
        }
        String[] alignment_entries = alignment_line.split("\\s");
        byte[] alignments = new byte[alignment_entries.length * 2];
        if (alignment_line.length() > 0) {
          for (int i = 0; i < alignment_entries.length; i++) {
            String[] parts = alignment_entries[i].split("-");
            alignments[2 * i] = Byte.parseByte(parts[0]);
            alignments[2 * i + 1] = Byte.parseByte(parts[1]);
          }
        }
        alignment_index = alignment_buffer.add(alignments);
      }

      // Process features.
      // Implicitly sort via TreeMap, write to data buffer, remember position
      // to pass on to the source trie node.
      features.clear();
      int feature_count = 0;
      for (String feature_entry : feature_entries) {
        int feature_id;
        float feature_value;
        if (feature_entry.contains("=")) {
          String[] parts = feature_entry.split("=");
          if (parts[0].equals("Alignment"))
            continue;
          feature_id = Vocabulary.id(parts[0]);
          feature_value = Float.parseFloat(parts[1]);
        } else {
          feature_id = Vocabulary.id(String.valueOf(feature_count++));
          feature_value = Float.parseFloat(feature_entry);
        }
        if (feature_value != 0)
          features.put(encoderConfig.innerId(feature_id), feature_value);
      }
      int features_index = feature_buffer.add(features);

      // Sanity check on the data block index.
      if (packAlignments && features_index != alignment_index) {
        LOG.error("Block index mismatch between features ({}) and alignments ({}).",
            features_index, alignment_index);
        throw new RuntimeException("Data block index mismatch.");
      }

      // Process source side.
      SourceValue sv = new SourceValue(Vocabulary.id(lhs_word), features_index);
      int[] source = new int[source_words.length];
      for (int i = 0; i < source_words.length; i++) {
        if (FormatUtils.isNonterminal(source_words[i]))
          source[i] = Vocabulary.id(FormatUtils.stripNonTerminalIndex(source_words[i]));
        else
          source[i] = Vocabulary.id(source_words[i]);
      }
      source_trie.add(source, sv);

      // Process target side.
      TargetValue tv = new TargetValue(sv);
      int[] target = new int[target_words.length];
      for (int i = 0; i < target_words.length; i++) {
        if (FormatUtils.isNonterminal(target_words[i])) {
          target[target_words.length - (i + 1)] = -FormatUtils.getNonterminalIndex(target_words[i]);
        } else {
          target[target_words.length - (i + 1)] = Vocabulary.id(target_words[i]);
        }
      }
      target_trie.add(target, tv);
    }
    // flush last slice and clear buffers
    flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices);
  }

  /**
   * Serializes the source, target and feature data structures into interlinked binary files. Target
   * is written first, into a skeletal (node don't carry any data) upward-pointing trie, updating
   * the linking source trie nodes with the position once it is known. Source and feature data are
   * written simultaneously. The source structure is written into a downward-pointing trie and
   * stores the rule's lhs as well as links to the target and feature stream. The feature stream is
   * prompted to write out a block
   *
   * @param source_trie the source Trie
   * @param target_trie the target Trie
   * @param feature_buffer the feature buffer
   * @param id the id of the piece of grammar to flush
   * @throws IOException
   */
  private void flush(PackingTrie<SourceValue> source_trie,
      PackingTrie<TargetValue> target_trie, FeatureBuffer feature_buffer,
      AlignmentBuffer alignment_buffer, int id) throws IOException {
    // Make a slice object for this piece of the grammar.
    PackingFileTuple slice = new PackingFileTuple("slice_" + String.format("%05d", id));
    // Pull out the streams for source, target and data output.
    DataOutputStream source_stream = slice.getSourceOutput();
    DataOutputStream target_stream = slice.getTargetOutput();
    DataOutputStream target_lookup_stream = slice.getTargetLookupOutput();
    DataOutputStream feature_stream = slice.getFeatureOutput();
    DataOutputStream alignment_stream = slice.getAlignmentOutput();

    Queue<PackingTrie<TargetValue>> target_queue;
    Queue<PackingTrie<SourceValue>> source_queue;

    // The number of bytes both written into the source stream and
    // buffered in the source queue.
    int source_position;
    // The number of bytes written into the target stream.
    int target_position;

    // Add trie root into queue, set target position to 0 and set cumulated
    // size to size of trie root.
    target_queue = new LinkedList<>();
    target_queue.add(target_trie);
    target_position = 0;

    // Target lookup table for trie levels.
    int current_level_size = 1;
    int next_level_size = 0;
    ArrayList<Integer> target_lookup = new ArrayList<>();

    // Packing loop for upwards-pointing target trie.
    while (!target_queue.isEmpty()) {
      // Pop top of queue.
      PackingTrie<TargetValue> node = target_queue.poll();
      // Register that this is where we're writing the node to.
      node.address = target_position;
      // Tell source nodes that we're writing to this position in the file.
      for (TargetValue tv : node.values)
        tv.parent.target = node.address;
      // Write link to parent.
      if (node.parent != null)
        target_stream.writeInt(node.parent.address);
      else
        target_stream.writeInt(-1);
      target_stream.writeInt(node.symbol);
      // Enqueue children.
      for (int k : node.children.descendingKeySet()) {
        PackingTrie<TargetValue> child = node.children.get(k);
        target_queue.add(child);
      }
      target_position += node.size(false, true);
      next_level_size += node.children.descendingKeySet().size();

      current_level_size--;
      if (current_level_size == 0) {
        target_lookup.add(target_position);
        current_level_size = next_level_size;
        next_level_size = 0;
      }
    }
    target_lookup_stream.writeInt(target_lookup.size());
    for (int i : target_lookup)
      target_lookup_stream.writeInt(i);
    target_lookup_stream.close();

    // Setting up for source and data writing.
    source_queue = new LinkedList<>();
    source_queue.add(source_trie);
    source_position = source_trie.size(true, false);
    source_trie.address = target_position;

    // Ready data buffers for writing.
    feature_buffer.initialize();
    if (packAlignments)
      alignment_buffer.initialize();

    // Packing loop for downwards-pointing source trie.
    while (!source_queue.isEmpty()) {
      // Pop top of queue.
      PackingTrie<SourceValue> node = source_queue.poll();
      // Write number of children.
      source_stream.writeInt(node.children.size());
      // Write links to children.
      for (int k : node.children.descendingKeySet()) {
        PackingTrie<SourceValue> child = node.children.get(k);
        // Enqueue child.
        source_queue.add(child);
        // Child's address will be at the current end of the queue.
        child.address = source_position;
        // Advance cumulated size by child's size.
        source_position += child.size(true, false);
        // Write the link.
        source_stream.writeInt(k);
        source_stream.writeInt(child.address);
      }
      // Write number of data items.
      source_stream.writeInt(node.values.size());
      // Write lhs and links to target and data.
      for (SourceValue sv : node.values) {
        int feature_block_index = feature_buffer.write(sv.data);
        if (packAlignments) {
          int alignment_block_index = alignment_buffer.write(sv.data);
          if (alignment_block_index != feature_block_index) {
            LOG.error("Block index mismatch.");
            throw new RuntimeException("Block index mismatch: alignment (" + alignment_block_index
                + ") and features (" + feature_block_index + ") don't match.");
          }
        }
        source_stream.writeInt(sv.lhs);
        source_stream.writeInt(sv.target);
        source_stream.writeInt(feature_block_index);
      }
    }
    // Flush the data stream.
    feature_buffer.flush(feature_stream);
    if (packAlignments)
      alignment_buffer.flush(alignment_stream);

    target_stream.close();
    source_stream.close();
    feature_stream.close();
    if (packAlignments)
      alignment_stream.close();
  }

  public void writeVocabulary() throws IOException {
    final String vocabularyFilename = output + File.separator + VOCABULARY_FILENAME;
    LOG.info("Writing vocabulary to {}", vocabularyFilename);
    Vocabulary.write(vocabularyFilename);
  }

  /**
   * Integer-labeled, doubly-linked trie with some provisions for packing.
   *
   * @author Juri Ganitkevitch
   *
   * @param <D> The trie's value type.
   */
  class PackingTrie<D extends PackingTrieValue> {
    int symbol;
    PackingTrie<D> parent;

    final TreeMap<Integer, PackingTrie<D>> children;
    final List<D> values;

    int address;

    PackingTrie() {
      address = -1;

      symbol = 0;
      parent = null;

      children = new TreeMap<>();
      values = new ArrayList<>();
    }

    PackingTrie(PackingTrie<D> parent, int symbol) {
      this();
      this.parent = parent;
      this.symbol = symbol;
    }

    void add(int[] path, D value) {
      add(path, 0, value);
    }

    private void add(int[] path, int index, D value) {
      if (index == path.length)
        this.values.add(value);
      else {
        PackingTrie<D> child = children.get(path[index]);
        if (child == null) {
          child = new PackingTrie<>(this, path[index]);
          children.put(path[index], child);
        }
        child.add(path, index + 1, value);
      }
    }

    /**
     * Calculate the size (in ints) of a packed trie node. Distinguishes downwards pointing (parent
     * points to children) from upwards pointing (children point to parent) tries, as well as
     * skeletal (no data, just the labeled links) and non-skeletal (nodes have a data block)
     * packing.
     *
     * @param downwards Are we packing into a downwards-pointing trie?
     * @param skeletal Are we packing into a skeletal trie?
     *
     * @return Number of bytes the trie node would occupy.
     */
    int size(boolean downwards, boolean skeletal) {
      int size = 0;
      if (downwards) {
        // Number of children and links to children.
        size = 1 + 2 * children.size();
      } else {
        // Link to parent.
        size += 2;
      }
      // Non-skeletal packing: number of data items.
      if (!skeletal)
        size += 1;
      // Non-skeletal packing: write size taken up by data items.
      if (!skeletal && !values.isEmpty())
        size += values.size() * values.get(0).size();

      return size;
    }

    void clear() {
      children.clear();
      values.clear();
    }
  }

  interface PackingTrieValue {
    int size();
  }

  class SourceValue implements PackingTrieValue {
    int lhs;
    int data;
    int target;

    public SourceValue() {
    }

    SourceValue(int lhs, int data) {
      this.lhs = lhs;
      this.data = data;
    }

    void setTarget(int target) {
      this.target = target;
    }

    @Override
    public int size() {
      return 3;
    }
  }

  class TargetValue implements PackingTrieValue {
    final SourceValue parent;

    TargetValue(SourceValue parent) {
      this.parent = parent;
    }

    @Override
    public int size() {
      return 0;
    }
  }

  abstract class PackingBuffer<T> {
    private byte[] backing;
    protected ByteBuffer buffer;

    protected final ArrayList<Integer> memoryLookup;
    protected int totalSize;
    protected final ArrayList<Integer> onDiskOrder;

    PackingBuffer() {
      allocate();
      memoryLookup = new ArrayList<>();
      onDiskOrder = new ArrayList<>();
      totalSize = 0;
    }

    abstract int add(T item);

    // Allocate a reasonably-sized buffer for the feature data.
    private void allocate() {
      backing = new byte[approximateMaximumSliceSize * DATA_SIZE_ESTIMATE];
      buffer = ByteBuffer.wrap(backing);
    }

    // Reallocate the backing array and buffer, copies data over.
    protected void reallocate() {
      if (backing.length == Integer.MAX_VALUE)
        return;
      long attempted_length = backing.length * 2L;
      int new_length;
      // Detect overflow.
      if (attempted_length >= Integer.MAX_VALUE)
        new_length = Integer.MAX_VALUE;
      else
        new_length = (int) attempted_length;
      byte[] new_backing = new byte[new_length];
      System.arraycopy(backing, 0, new_backing, 0, backing.length);
      int old_position = buffer.position();
      ByteBuffer new_buffer = ByteBuffer.wrap(new_backing);
      new_buffer.position(old_position);
      buffer = new_buffer;
      backing = new_backing;
    }

    /**
     * Prepare the data buffer for disk writing.
     */
    void initialize() {
      onDiskOrder.clear();
    }

    /**
     * Enqueue a data block for later writing.
     *
     * @param block_index The index of the data block to add to writing queue.
     * @return The to-be-written block's output index.
     */
    int write(int block_index) {
      onDiskOrder.add(block_index);
      return onDiskOrder.size() - 1;
    }

    /**
     * Performs the actual writing to disk in the order specified by calls to write() since the last
     * call to initialize().
     *
     * @param out
     * @throws IOException
     */
    void flush(DataOutputStream out) throws IOException {
      writeHeader(out);
      int size;
      int block_address;
      for (int block_index : onDiskOrder) {
        block_address = memoryLookup.get(block_index);
        size = blockSize(block_index);
        out.write(backing, block_address, size);
      }
    }

    void clear() {
      buffer.clear();
      memoryLookup.clear();
      onDiskOrder.clear();
    }

    boolean overflowing() {
      return (buffer.position() >= DATA_SIZE_LIMIT);
    }

    private void writeHeader(DataOutputStream out) throws IOException {
      if (out.size() == 0) {
        out.writeInt(onDiskOrder.size());
        out.writeInt(totalSize);
        int disk_position = headerSize();
        for (int block_index : onDiskOrder) {
          out.writeInt(disk_position);
          disk_position += blockSize(block_index);
        }
      } else {
        throw new RuntimeException("Got a used stream for header writing.");
      }
    }

    private int headerSize() {
      // One integer for each data block, plus number of blocks and total size.
      return 4 * (onDiskOrder.size() + 2);
    }

    private int blockSize(int block_index) {
      int block_address = memoryLookup.get(block_index);
      return (block_index < memoryLookup.size() - 1 ? memoryLookup.get(block_index + 1) : totalSize)
          - block_address;
    }
  }

  class FeatureBuffer extends PackingBuffer<TreeMap<Integer, Float>> {

    private IntEncoder idEncoder;

    FeatureBuffer() throws IOException {
      super();
      idEncoder = types.getIdEncoder();
      LOG.info("Encoding feature ids in: {}", idEncoder.getKey());
    }

    /**
     * Add a block of features to the buffer.
     *
     * @param features TreeMap with the features for one rule.
     * @return The index of the resulting data block.
     */
    @Override
    int add(TreeMap<Integer, Float> features) {
      int data_position = buffer.position();

      // Over-estimate how much room this addition will need: for each
      // feature (ID_SIZE for label, "upper bound" of 4 for the value), plus ID_SIZE for
      // the number of features. If this won't fit, reallocate the buffer.
      int size_estimate = (4 + EncoderConfiguration.ID_SIZE) * features.size()
          + EncoderConfiguration.ID_SIZE;
      if (buffer.capacity() - buffer.position() <= size_estimate)
        reallocate();

      // Write features to buffer.
      idEncoder.write(buffer, features.size());
      for (Integer k : features.descendingKeySet()) {
        float v = features.get(k);
        // Sparse features.
        if (v != 0.0) {
          idEncoder.write(buffer, k);
          encoderConfig.encoder(k).write(buffer, v);
        }
      }
      // Store position the block was written to.
      memoryLookup.add(data_position);
      // Update total size (in bytes).
      totalSize = buffer.position();

      // Return block index.
      return memoryLookup.size() - 1;
    }
  }

  class AlignmentBuffer extends PackingBuffer<byte[]> {

    AlignmentBuffer() throws IOException {
      super();
    }

    /**
     * Add a rule alignments to the buffer.
     *
     * @param alignments a byte array with the alignment points for one rule.
     * @return The index of the resulting data block.
     */
    @Override
    int add(byte[] alignments) {
      int data_position = buffer.position();
      int size_estimate = alignments.length + 1;
      if (buffer.capacity() - buffer.position() <= size_estimate)
        reallocate();

      // Write alignment points to buffer.
      buffer.put((byte) (alignments.length / 2));
      buffer.put(alignments);

      // Store position the block was written to.
      memoryLookup.add(data_position);
      // Update total size (in bytes).
      totalSize = buffer.position();
      // Return block index.
      return memoryLookup.size() - 1;
    }
  }

  class PackingFileTuple implements Comparable<PackingFileTuple> {
    private final File sourceFile;
    private final File targetLookupFile;
    private final File targetFile;

    private final File featureFile;
    private File alignmentFile;

    PackingFileTuple(String prefix) {
      sourceFile = new File(output + File.separator + prefix + ".source");
      targetFile = new File(output + File.separator + prefix + ".target");
      targetLookupFile = new File(output + File.separator + prefix + ".target.lookup");
      featureFile = new File(output + File.separator + prefix + ".features");

      alignmentFile = null;
      if (packAlignments)
        alignmentFile = new File(output + File.separator + prefix + ".alignments");

      LOG.info("Allocated slice: {}", sourceFile.getAbsolutePath());
    }

    DataOutputStream getSourceOutput() throws IOException {
      return getOutput(sourceFile);
    }

    DataOutputStream getTargetOutput() throws IOException {
      return getOutput(targetFile);
    }

    DataOutputStream getTargetLookupOutput() throws IOException {
      return getOutput(targetLookupFile);
    }

    DataOutputStream getFeatureOutput() throws IOException {
      return getOutput(featureFile);
    }

    DataOutputStream getAlignmentOutput() throws IOException {
      if (alignmentFile != null)
        return getOutput(alignmentFile);
      return null;
    }

    private DataOutputStream getOutput(File file) throws IOException {
      if (file.createNewFile()) {
        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file)));
      } else {
        throw new RuntimeException("File doesn't exist: " + file.getName());
      }
    }

    long getSize() {
      return sourceFile.length() + targetFile.length() + featureFile.length();
    }

    @Override
    public int compareTo(PackingFileTuple o) {
      if (getSize() > o.getSize()) {
        return -1;
      } else if (getSize() < o.getSize()) {
        return 1;
      } else {
        return 0;
      }
    }
  }
}