Java Code Examples for org.apache.hadoop.io.compress.CompressionCodec#createOutputStream()

The following examples show how to use org.apache.hadoop.io.compress.CompressionCodec#createOutputStream() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IFile.java    From tez with Apache License 2.0 6 votes vote down vote up
void setupOutputStream(CompressionCodec codec) throws IOException {
  this.checksumOut = new IFileOutputStream(this.rawOut);
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
}
 
Example 2
Source File: Anonymizer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example 3
Source File: CompressionEmulationUtil.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a {@link OutputStream} for a file that might need 
 * compression.
 */
static OutputStream getPossiblyCompressedOutputStream(Path file, 
                                                      Configuration conf)
throws IOException {
  FileSystem fs = file.getFileSystem(conf);
  JobConf jConf = new JobConf(conf);
  if (org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(jConf)) {
    // get the codec class
    Class<? extends CompressionCodec> codecClass =
      org.apache.hadoop.mapred.FileOutputFormat
                              .getOutputCompressorClass(jConf, 
                                                        GzipCodec.class);
    // get the codec implementation
    CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);

    // add the appropriate extension
    file = file.suffix(codec.getDefaultExtension());

    if (isCompressionEmulationEnabled(conf)) {
      FSDataOutputStream fileOut = fs.create(file, false);
      return new DataOutputStream(codec.createOutputStream(fileOut));
    }
  }
  return fs.create(file, false);
}
 
Example 4
Source File: CompressedFileWriteFromLocal.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
public static void main(String... args) throws Exception {
  Configuration config = new Configuration();
  FileSystem hdfs = FileSystem.get(config);

  Class<?> codecClass = Class.forName(args[0]);
  CompressionCodec codec = (CompressionCodec)
      ReflectionUtils.newInstance(codecClass, config);

  InputStream is = FileSystem.getLocal(config).open(new Path(args[1]));
  OutputStream os = hdfs.create(
      new Path(args[2] + codec.getDefaultExtension()));


  OutputStream cos = codec.createOutputStream(os);

  IOUtils.copyBytes(is, cos, config, true);

  IOUtils.closeStream(os);
  IOUtils.closeStream(is);
}
 
Example 5
Source File: LineBreakAccessor.java    From pxf with Apache License 2.0 5 votes vote down vote up
private void createOutputStream(Path file, CompressionCodec codec)
        throws IOException {
    fsdos = fs.create(file, false);
    if (codec != null) {
        dos = new DataOutputStream(codec.createOutputStream(fsdos));
    } else {
        dos = fsdos;
    }

}
 
Example 6
Source File: FsShell.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Compress a file.
 */
private int compress(String argv[], Configuration conf) throws IOException {
  int i = 0;
  String cmd = argv[i++];
  String srcf = argv[i++];
  String dstf = argv[i++];

  Path srcPath = new Path(srcf);
  FileSystem srcFs = srcPath.getFileSystem(getConf());
  Path dstPath = new Path(dstf);
  FileSystem dstFs = dstPath.getFileSystem(getConf());

  // Create codec
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);
  CompressionCodec codec = factory.getCodec(dstPath);
  if (codec == null) {
    System.err.println(cmd.substring(1) + ": cannot find compression codec for "
        + dstf);
    return 1;
  }

  // open input stream
  InputStream in = srcFs.open(srcPath);

  // Create compression stream
  OutputStream out = dstFs.create(dstPath);
  out = codec.createOutputStream(out);

  IOUtils.copyBytes(in, out, conf, true);

  return 0;
}
 
Example 7
Source File: AbstractFileOutputOperatorTest.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Test
public void testSnappyCompressionSimple() throws IOException
{
  if (checkNativeSnappy()) {
    return;
  }

  File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy");

  BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile));
  Configuration conf = new Configuration();
  CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(SnappyCodec.class, conf);
  FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream(
      codec.createOutputStream(os));

  int ONE_MB = 1024 * 1024;

  String testStr = "TestSnap-16bytes";
  for (int i = 0; i < ONE_MB; i++) { // write 16 MBs
    filterStream.write(testStr.getBytes());
  }
  filterStream.flush();
  filterStream.close();

  CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile));

  byte[] recovered = new byte[testStr.length()];
  int bytesRead = is.read(recovered);
  is.close();
  assertEquals(testStr, new String(recovered));
}
 
Example 8
Source File: IFile.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream outputStream,
    Class keyClass, Class valueClass,
    CompressionCodec codec, TezCounter writesCounter, TezCounter serializedBytesCounter,
    boolean rle) throws IOException {
  this.rawOut = outputStream;
  this.writtenRecordsCounter = writesCounter;
  this.serializedUncompressedBytes = serializedBytesCounter;
  this.checksumOut = new IFileOutputStream(outputStream);
  this.start = this.rawOut.getPos();
  this.rle = rle;
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  writeHeader(outputStream);
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory =
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
}
 
Example 9
Source File: FreightStreamer.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Compress a file.
 */
private int compress(String argv[], Configuration conf) throws IOException {
  int i = 0;
  String cmd = argv[i++];
  String srcf = argv[i++];
  String dstf = argv[i++];

  Path srcPath = new Path(srcf);
  FileSystem srcFs = srcPath.getFileSystem(getConf());
  Path dstPath = new Path(dstf);
  FileSystem dstFs = dstPath.getFileSystem(getConf());

  // Create codec
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);
  CompressionCodec codec = factory.getCodec(dstPath);
  if (codec == null) {
    System.err.println(cmd.substring(1) + ": cannot find compression codec for "
        + dstf);
    return 1;
  }

  // open input stream
  InputStream in = srcFs.open(srcPath);

  // Create compression stream
  OutputStream out = dstFs.create(dstPath);
  out = codec.createOutputStream(out);

  IOUtils.copyBytes(in, out, conf, true);

  return 0;
}
 
Example 10
Source File: IFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    this.compressor.reset();
    this.compressedOut = codec.createOutputStream(checksumOut, compressor);
    this.out = new FSDataOutputStream(this.compressedOut,  null);
    this.compressOutput = true;
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.valueSerializer = serializationFactory.getSerializer(valueClass);
  this.valueSerializer.open(buffer);
}
 
Example 11
Source File: Compression.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a compression stream without any additional wrapping into
 * buffering streams.
 */
public CompressionOutputStream createPlainCompressionStream(
    OutputStream downStream, Compressor compressor) throws IOException {
  CompressionCodec codec = getCodec(conf);
  ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
  return codec.createOutputStream(downStream, compressor);
}
 
Example 12
Source File: IFile.java    From big-c with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter,
    boolean ownOutputStream)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory = 
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
  this.ownOutputStream = ownOutputStream;
}
 
Example 13
Source File: TestCombineTextInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static void writeFile(FileSystem fs, Path name,
                              CompressionCodec codec,
                              String contents) throws IOException {
  OutputStream stm;
  if (codec == null) {
    stm = fs.create(name);
  } else {
    stm = codec.createOutputStream(fs.create(name));
  }
  stm.write(contents.getBytes());
  stm.close();
}
 
Example 14
Source File: DefaultOutputter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void init(Path path, Configuration conf) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(fs.create(path), compressor);
  } else {
    output = fs.create(path);
  }
  writer = new JsonObjectMapperWriter<T>(output, 
      conf.getBoolean("rumen.output.pretty.print", true));
}
 
Example 15
Source File: IFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter,
    boolean ownOutputStream)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory = 
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
  this.ownOutputStream = ownOutputStream;
}
 
Example 16
Source File: TestCombineTextInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void writeFile(FileSystem fs, Path name,
                              CompressionCodec codec,
                              String contents) throws IOException {
  OutputStream stm;
  if (codec == null) {
    stm = fs.create(name);
  } else {
    stm = codec.createOutputStream(fs.create(name));
  }
  stm.write(contents.getBytes());
  stm.close();
}
 
Example 17
Source File: TestCombineTextInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void writeFile(FileSystem fs, Path name,
                              CompressionCodec codec,
                              String contents) throws IOException {
  OutputStream stm;
  if (codec == null) {
    stm = fs.create(name);
  } else {
    stm = codec.createOutputStream(fs.create(name));
  }
  stm.write(contents.getBytes());
  stm.close();
}
 
Example 18
Source File: TestExport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
/**
 * Create a data file that gets exported to the db.
 * @param fileNum the number of the file (for multi-file export)
 * @param numRecords how many records to write to the file.
 * @param gzip is true if the file should be gzipped.
 */
protected void createTextFile(int fileNum, int numRecords, boolean gzip,
    ColumnGenerator... extraCols) throws IOException {
  int startId = fileNum * numRecords;

  String ext = ".txt";
  if (gzip) {
    ext = ext + ".gz";
  }
  Path tablePath = getTablePath();
  Path filePath = new Path(tablePath, "part" + fileNum + ext);

  Configuration conf = new Configuration();
  if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
    conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
  }
  FileSystem fs = FileSystem.get(conf);
  fs.mkdirs(tablePath);
  OutputStream os = fs.create(filePath);
  if (gzip) {
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(filePath);
    os = codec.createOutputStream(os);
  }
  BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
  for (int i = 0; i < numRecords; i++) {
    w.write(getRecordLine(startId + i, extraCols));
  }
  w.close();
  os.close();

  if (gzip) {
    verifyCompressedFile(filePath, numRecords);
  }
}
 
Example 19
Source File: PushdownLargeFieldedListsVisitor.java    From datawave with Apache License 2.0 5 votes vote down vote up
protected URI createFst(SortedSet<String> values) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    FST fst = DatawaveFieldIndexListIteratorJexl.getFST(values);
    
    // now serialize to our file system
    CompressionCodec codec = null;
    String extension = "";
    if (config.getHdfsFileCompressionCodec() != null) {
        ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
        if (classLoader == null) {
            classLoader = this.getClass().getClassLoader();
        }
        Class<? extends CompressionCodec> clazz = Class.forName(config.getHdfsFileCompressionCodec(), true, classLoader).asSubclass(CompressionCodec.class);
        codec = clazz.newInstance();
        extension = codec.getDefaultExtension();
    }
    int fstCount = config.getFstCount().incrementAndGet();
    Path fstFile = new Path(fstHdfsUri, "PushdownLargeFileFst." + fstCount + ".fst" + extension);
    
    OutputStream fstFileOut = new BufferedOutputStream(fs.create(fstFile, false));
    if (codec != null) {
        fstFileOut = codec.createOutputStream(fstFileOut);
    }
    
    OutputStreamDataOutput outStream = new OutputStreamDataOutput(fstFileOut);
    fst.save(outStream);
    outStream.close();
    
    return fstFile.toUri();
}
 
Example 20
Source File: RubixRecordWriter.java    From Cubert with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
public RubixRecordWriter(Configuration conf,
                         FSDataOutputStream out,
                         Class keyClass,
                         Class valueClass,
                         CompressionCodec codec) throws IOException
{
    this.out = out;

    final SerializationFactory serializationFactory = new SerializationFactory(conf);
    keySerializer = serializationFactory.getSerializer(keyClass);

    ObjectMapper mapper = new ObjectMapper();
    metadataJson =
            mapper.readValue(conf.get(CubertStrings.JSON_METADATA), JsonNode.class);
    ((ObjectNode) metadataJson).put("keyClass", keyClass.getCanonicalName());
    ((ObjectNode) metadataJson).put("valueClass", valueClass.getCanonicalName());
    BlockSchema schema = new BlockSchema(metadataJson.get("schema"));

    if (conf.getBoolean(CubertStrings.USE_COMPACT_SERIALIZATION, false)
            && schema.isFlatSchema())
    {
        valueSerializer = new CompactSerializer<V>(schema);
        ((ObjectNode) metadataJson).put("serializationType",
                                        BlockSerializationType.COMPACT.toString());
    }
    else
    {
        valueSerializer = serializationFactory.getSerializer(valueClass);
        ((ObjectNode) metadataJson).put("serializationType",
                                        BlockSerializationType.DEFAULT.toString());
    }

    keySerializer.open(keySectionStream);

    if (codec == null)
    {
        valueSerializer.open(out);
        compressedStream = null;
    }
    else
    {
        compressedStream = codec.createOutputStream(out);
        valueSerializer.open(compressedStream);
    }

}