package org.seqdoop.hadoop_bam.util;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.io.compress.SplitCompressionInputStream;
import org.apache.hadoop.io.compress.SplittableCompressionCodec;

/**
 * A Hadoop {@link CompressionCodec} for the
 * <a href="https://samtools.github.io/hts-specs/SAMv1.pdf">BGZF compression format</a>,
 * which reads and writes files with a <code>.bgz</code> suffix. There is no standard
 * suffix for BGZF-compressed files, and in fact <code>.gz</code> is commonly used, in
 * which case {@link BGZFEnhancedGzipCodec} should be used instead of this class.
 * <p>
 * To use BGZFCodec, set it on the configuration object as follows.
 * </p>
 * {@code
 * conf.set("io.compression.codecs", BGZFCodec.class.getCanonicalName())
 * }
 * @see BGZFEnhancedGzipCodec
 */
public class BGZFCodec extends GzipCodec implements SplittableCompressionCodec {

  public static final String DEFAULT_EXTENSION = ".bgz";

  @Override
  public CompressionOutputStream createOutputStream(OutputStream out) throws IOException {
    return new BGZFCompressionOutputStream(out);
  }

  // compressors are not used, so ignore/return null

  @Override
  public CompressionOutputStream createOutputStream(OutputStream out,
      Compressor compressor) throws IOException {
    return createOutputStream(out); // compressors are not used, so ignore
  }

  @Override
  public Class<? extends Compressor> getCompressorType() {
    return null; // compressors are not used, so return null
  }

  @Override
  public Compressor createCompressor() {
    return null; // compressors are not used, so return null
  }

  @Override
  public SplitCompressionInputStream createInputStream(InputStream seekableIn,
      Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
    BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn);
    long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end);
    ((Seekable)seekableIn).seek(adjustedStart);
    return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end);
  }

  // fall back to GzipCodec for input streams without a start position

  @Override
  public String getDefaultExtension() {
    return DEFAULT_EXTENSION;
  }
}