org.apache.hadoop.io.compress.zlib.ZlibFactory Java Examples

The following examples show how to use org.apache.hadoop.io.compress.zlib.ZlibFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestNativeCodeLoader.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testNativeCodeLoaded() {
  if (requireTestJni() == false) {
    LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required.");
    return;
  }
  if (!NativeCodeLoader.isNativeCodeLoaded()) {
    fail("TestNativeCodeLoader: libhadoop.so testing was required, but " +
        "libhadoop.so was not loaded.");
  }
  assertFalse(NativeCodeLoader.getLibraryName().isEmpty());
  // library names are depended on platform and build envs
  // so just check names are available
  assertFalse(ZlibFactory.getLibraryName().isEmpty());
  if (NativeCodeLoader.buildSupportsSnappy()) {
    assertFalse(SnappyCodec.getLibraryName().isEmpty());
  }
  if (NativeCodeLoader.buildSupportsOpenssl()) {
    assertFalse(OpensslCipher.getLibraryName().isEmpty());
  }
  assertFalse(Lz4Codec.getLibraryName().isEmpty());
  LOG.info("TestNativeCodeLoader: libhadoop.so is loaded.");
}
 
Example #2
Source File: SequenceFile.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Construct the preferred type of 'raw' SequenceFile Writer.
 * @param conf The configuration.
 * @param out The stream on top which the writer is to be constructed.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param compressionType The compression type.
 * @param codec The compression codec.
 * @param metadata The metadata of the file.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer
  createWriter(Configuration conf, FSDataOutputStream out, 
               Class keyClass, Class valClass, CompressionType compressionType,
               CompressionCodec codec, Metadata metadata)
  throws IOException {
  if ((codec instanceof GzipCodec) && 
      !NativeCodeLoader.isNativeCodeLoaded() && 
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (compressionType == CompressionType.NONE) {
    writer = new Writer(conf, out, keyClass, valClass, metadata);
  } else if (compressionType == CompressionType.RECORD) {
    writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  } else if (compressionType == CompressionType.BLOCK){
    writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  }
  
  return writer;
}
 
Example #3
Source File: SequenceFile.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Construct the preferred type of 'raw' SequenceFile Writer.
 * @param out The stream on top which the writer is to be constructed.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param compress Compress data?
 * @param blockCompress Compress blocks?
 * @param metadata The metadata of the file.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
private static Writer
  createWriter(Configuration conf, FSDataOutputStream out, 
               Class keyClass, Class valClass, boolean compress, boolean blockCompress,
               CompressionCodec codec, Metadata metadata)
  throws IOException {
  if (codec != null && (codec instanceof GzipCodec) && 
      !NativeCodeLoader.isNativeCodeLoaded() && 
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (!compress) {
    writer = new Writer(conf, out, keyClass, valClass, metadata);
  } else if (compress && !blockCompress) {
    writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  } else {
    writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  }
  
  return writer;
}
 
Example #4
Source File: SequenceFile.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Construct the preferred type of 'raw' SequenceFile Writer.
 * @param conf The configuration.
 * @param out The stream on top which the writer is to be constructed.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param compressionType The compression type.
 * @param codec The compression codec.
 * @param metadata The metadata of the file.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer
  createWriter(Configuration conf, FSDataOutputStream out, 
               Class keyClass, Class valClass, CompressionType compressionType,
               CompressionCodec codec, Metadata metadata)
  throws IOException {
  if ((codec instanceof GzipCodec) && 
      !NativeCodeLoader.isNativeCodeLoaded() && 
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (compressionType == CompressionType.NONE) {
    writer = new Writer(conf, out, keyClass, valClass, metadata);
  } else if (compressionType == CompressionType.RECORD) {
    writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  } else if (compressionType == CompressionType.BLOCK){
    writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  }
  
  return writer;
}
 
Example #5
Source File: SequenceFile.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Construct the preferred type of 'raw' SequenceFile Writer.
 * @param out The stream on top which the writer is to be constructed.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param compress Compress data?
 * @param blockCompress Compress blocks?
 * @param metadata The metadata of the file.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
private static Writer
  createWriter(Configuration conf, FSDataOutputStream out, 
               Class keyClass, Class valClass, boolean compress, boolean blockCompress,
               CompressionCodec codec, Metadata metadata)
  throws IOException {
  if (codec != null && (codec instanceof GzipCodec) && 
      !NativeCodeLoader.isNativeCodeLoaded() && 
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (!compress) {
    writer = new Writer(conf, out, keyClass, valClass, metadata);
  } else if (compress && !blockCompress) {
    writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  } else {
    writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  }
  
  return writer;
}
 
Example #6
Source File: TestCodec.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecPoolGzipReuse() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (!ZlibFactory.isNativeZlibLoaded(conf)) {
    LOG.warn("testCodecPoolGzipReuse skipped: native libs not loaded");
    return;
  }
  GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf);
  DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf);
  Compressor c1 = CodecPool.getCompressor(gzc);
  Compressor c2 = CodecPool.getCompressor(dfc);
  CodecPool.returnCompressor(c1);
  CodecPool.returnCompressor(c2);
  assertTrue("Got mismatched ZlibCompressor", c2 != CodecPool.getCompressor(gzc));
}
 
Example #7
Source File: CompressDecompressTester.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Method for compressor availability check
 */
private static <T extends Compressor, E extends Decompressor> boolean isAvailable(TesterPair<T, E> pair) {
  Compressor compressor = pair.compressor;

  if (compressor.getClass().isAssignableFrom(Lz4Compressor.class)
          && (NativeCodeLoader.isNativeCodeLoaded()))
    return true;

  else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class)
          && NativeCodeLoader.isNativeCodeLoaded())
    return true;

  else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) {
    return ZlibFactory.isNativeZlibLoaded(new Configuration());
  }              
  else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)
          && isNativeSnappyLoadable())
    return true;
  
  return false;      
}
 
Example #8
Source File: TestCodec.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecPoolGzipReuse() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (!ZlibFactory.isNativeZlibLoaded(conf)) {
    LOG.warn("testCodecPoolGzipReuse skipped: native libs not loaded");
    return;
  }
  GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf);
  DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf);
  Compressor c1 = CodecPool.getCompressor(gzc);
  Compressor c2 = CodecPool.getCompressor(dfc);
  CodecPool.returnCompressor(c1);
  CodecPool.returnCompressor(c2);
  assertTrue("Got mismatched ZlibCompressor", c2 != CodecPool.getCompressor(gzc));
}
 
Example #9
Source File: CompressDecompressTester.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Method for compressor availability check
 */
private static <T extends Compressor, E extends Decompressor> boolean isAvailable(TesterPair<T, E> pair) {
  Compressor compressor = pair.compressor;

  if (compressor.getClass().isAssignableFrom(Lz4Compressor.class)
          && (NativeCodeLoader.isNativeCodeLoaded()))
    return true;

  else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class)
          && NativeCodeLoader.isNativeCodeLoaded())
    return true;

  else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) {
    return ZlibFactory.isNativeZlibLoaded(new Configuration());
  }              
  else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)
          && isNativeSnappyLoadable())
    return true;
  
  return false;      
}
 
Example #10
Source File: TestNativeCodeLoader.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testNativeCodeLoaded() {
  if (requireTestJni() == false) {
    LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required.");
    return;
  }
  if (!NativeCodeLoader.isNativeCodeLoaded()) {
    fail("TestNativeCodeLoader: libhadoop.so testing was required, but " +
        "libhadoop.so was not loaded.");
  }
  assertFalse(NativeCodeLoader.getLibraryName().isEmpty());
  // library names are depended on platform and build envs
  // so just check names are available
  assertFalse(ZlibFactory.getLibraryName().isEmpty());
  if (NativeCodeLoader.buildSupportsSnappy()) {
    assertFalse(SnappyCodec.getLibraryName().isEmpty());
  }
  if (NativeCodeLoader.buildSupportsOpenssl()) {
    assertFalse(OpensslCipher.getLibraryName().isEmpty());
  }
  assertFalse(Lz4Codec.getLibraryName().isEmpty());
  LOG.info("TestNativeCodeLoader: libhadoop.so is loaded.");
}
 
Example #11
Source File: TestCodec.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testGzipCodecWithParam() throws IOException {
  Configuration conf = new Configuration(this.conf);
  ZlibFactory.setCompressionLevel(conf, CompressionLevel.BEST_COMPRESSION);
  ZlibFactory.setCompressionStrategy(conf, CompressionStrategy.HUFFMAN_ONLY);
  codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.GzipCodec");
  codecTest(conf, seed, count, "org.apache.hadoop.io.compress.GzipCodec");
}
 
Example #12
Source File: TestCodec.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testGzipCodecWithParam() throws IOException {
  Configuration conf = new Configuration(this.conf);
  ZlibFactory.setCompressionLevel(conf, CompressionLevel.BEST_COMPRESSION);
  ZlibFactory.setCompressionStrategy(conf, CompressionStrategy.HUFFMAN_ONLY);
  codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.GzipCodec");
  codecTest(conf, seed, count, "org.apache.hadoop.io.compress.GzipCodec");
}
 
Example #13
Source File: TestCodec.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static void gzipReinitTest(Configuration conf, CompressionCodec codec)
    throws IOException {
  // Add codec to cache
  ZlibFactory.setCompressionLevel(conf, CompressionLevel.BEST_COMPRESSION);
  ZlibFactory.setCompressionStrategy(conf,
      CompressionStrategy.DEFAULT_STRATEGY);
  Compressor c1 = CodecPool.getCompressor(codec);
  CodecPool.returnCompressor(c1);
  // reset compressor's compression level to perform no compression
  ZlibFactory.setCompressionLevel(conf, CompressionLevel.NO_COMPRESSION);
  Compressor c2 = CodecPool.getCompressor(codec, conf);
  // ensure same compressor placed earlier
  assertTrue("Got mismatched ZlibCompressor", c1 == c2);
  ByteArrayOutputStream bos = new ByteArrayOutputStream();
  CompressionOutputStream cos = null;
  // write trivially compressable data
  byte[] b = new byte[1 << 15];
  Arrays.fill(b, (byte) 43);
  try {
    cos = codec.createOutputStream(bos, c2);
    cos.write(b);
  } finally {
    if (cos != null) {
      cos.close();
    }
    CodecPool.returnCompressor(c2);
  }
  byte[] outbytes = bos.toByteArray();
  // verify data were not compressed
  assertTrue("Compressed bytes contrary to configuration",
             outbytes.length >= b.length);
}
 
Example #14
Source File: TestCodec.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static void codecTestWithNOCompression (Configuration conf,
                    String codecClass) throws IOException {
  // Create a compressor with NO_COMPRESSION and make sure that
  // output is not compressed by comparing the size with the
  // original input

  CompressionCodec codec = null;
  ZlibFactory.setCompressionLevel(conf, CompressionLevel.NO_COMPRESSION);
  try {
    codec = (CompressionCodec)
      ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
  } catch (ClassNotFoundException cnfe) {
    throw new IOException("Illegal codec!");
  }
  Compressor c = codec.createCompressor();
  // ensure same compressor placed earlier
  ByteArrayOutputStream bos = new ByteArrayOutputStream();
  CompressionOutputStream cos = null;
  // write trivially compressable data
  byte[] b = new byte[1 << 15];
  Arrays.fill(b, (byte) 43);
  try {
    cos = codec.createOutputStream(bos, c);
    cos.write(b);
  } finally {
    if (cos != null) {
      cos.close();
    }
  }
  byte[] outbytes = bos.toByteArray();
  // verify data were not compressed
  assertTrue("Compressed bytes contrary to configuration(NO_COMPRESSION)",
             outbytes.length >= b.length);
}
 
Example #15
Source File: TestCodec.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testCodecPoolCompressorReinit() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (ZlibFactory.isNativeZlibLoaded(conf)) {
    GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf);
    gzipReinitTest(conf, gzc);
  } else {
    LOG.warn("testCodecPoolCompressorReinit skipped: native libs not loaded");
  }
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, false);
  DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf);
  gzipReinitTest(conf, dfc);
}
 
Example #16
Source File: TestCodec.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testNativeGzipConcat() throws IOException {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (!ZlibFactory.isNativeZlibLoaded(conf)) {
    LOG.warn("skipped: native libs not loaded");
    return;
  }
  GzipConcatTest(conf, GzipCodec.GzipZlibDecompressor.class);
}
 
Example #17
Source File: TestCodec.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testGzipCodecRead() throws IOException {
  // Create a gzipped file and try to read it back, using a decompressor
  // from the CodecPool.

  // Don't use native libs for this test.
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, false);
  assertFalse("ZlibFactory is using native libs against request",
      ZlibFactory.isNativeZlibLoaded(conf));

  // Ensure that the CodecPool has a BuiltInZlibInflater in it.
  Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
  assertNotNull("zlibDecompressor is null!", zlibDecompressor);
  assertTrue("ZlibFactory returned unexpected inflator",
      zlibDecompressor instanceof BuiltInZlibInflater);
  CodecPool.returnDecompressor(zlibDecompressor);

  // Now create a GZip text file.
  String tmpDir = System.getProperty("test.build.data", "/tmp/");
  Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
  BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
    new GZIPOutputStream(new FileOutputStream(f.toString()))));
  final String msg = "This is the message in the file!";
  bw.write(msg);
  bw.close();

  // Now read it back, using the CodecPool to establish the
  // decompressor to use.
  CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
  CompressionCodec codec = ccf.getCodec(f);
  Decompressor decompressor = CodecPool.getDecompressor(codec);
  FileSystem fs = FileSystem.getLocal(conf);
  InputStream is = fs.open(f);
  is = codec.createInputStream(is, decompressor);
  BufferedReader br = new BufferedReader(new InputStreamReader(is));
  String line = br.readLine();
  assertEquals("Didn't get the same message back!", msg, line);
  br.close();
}
 
Example #18
Source File: TestCompressionStorages.java    From tajo with Apache License 2.0 5 votes vote down vote up
@Test
public void testGzipCodecCompressionData() throws IOException {
  if (dataFormat.equalsIgnoreCase(BuiltinStorages.RCFILE)) {
    if( ZlibFactory.isNativeZlibLoaded(conf)) {
      storageCompressionTest(dataFormat, GzipCodec.class);
    }
  } else if (dataFormat.equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) {
    if( ZlibFactory.isNativeZlibLoaded(conf)) {
      storageCompressionTest(dataFormat, GzipCodec.class);
    }
  } else {
    storageCompressionTest(dataFormat, GzipCodec.class);
  }
}
 
Example #19
Source File: ReusableStreamGzipCodec.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
public CompressionOutputStream createOutputStream(OutputStream out)
    throws IOException {
  if (ZlibFactory.isNativeZlibLoaded(getConf())) {
    return super.createOutputStream(out);
  }
  return new ReusableGzipOutputStream(out);
}
 
Example #20
Source File: TestCompressionStorages.java    From incubator-tajo with Apache License 2.0 5 votes vote down vote up
@Test
public void testGzipCodecCompressionData() throws IOException {
  if (storeType == StoreType.RCFILE) {
    if( ZlibFactory.isNativeZlibLoaded(conf)) {
      storageCompressionTest(storeType, GzipCodec.class);
    }
  } else {
    storageCompressionTest(storeType, GzipCodec.class);
  }
}
 
Example #21
Source File: TestCodec.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testCodecPoolGzipReuse() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean("hadoop.native.lib", true);
  if (!ZlibFactory.isNativeZlibLoaded(conf)) {
    LOG.warn("testCodecPoolGzipReuse skipped: native libs not loaded");
    return;
  }
  GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf);
  DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf);
  Compressor c1 = CodecPool.getCompressor(gzc);
  Compressor c2 = CodecPool.getCompressor(dfc);
  CodecPool.returnCompressor(c1);
  CodecPool.returnCompressor(c2);
  assertTrue("Got mismatched ZlibCompressor", c2 != CodecPool.getCompressor(gzc));
}
 
Example #22
Source File: SequenceFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Construct the preferred type of SequenceFile Writer.
 * @param fs The configured filesystem.
 * @param conf The configuration.
 * @param name The name of the file.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param bufferSize buffer size for the underlaying outputstream.
 * @param replication replication factor for the file.
 * @param blockSize block size for the file.
 * @param compressionType The compression type.
 * @param codec The compression codec.
 * @param progress The Progressable object to track progress.
 * @param metadata The metadata of the file.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer
  createWriter(FileSystem fs, Configuration conf, Path name,
               Class keyClass, Class valClass, int bufferSize,
               short replication, long blockSize,
               CompressionType compressionType, CompressionCodec codec,
               Progressable progress, Metadata metadata) throws IOException {
  if ((codec instanceof GzipCodec) &&
      !NativeCodeLoader.isNativeCodeLoaded() &&
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (compressionType == CompressionType.NONE) {
    writer = new Writer(fs, conf, name, keyClass, valClass,
                        bufferSize, replication, blockSize,
                        progress, metadata);
  } else if (compressionType == CompressionType.RECORD) {
    writer = new RecordCompressWriter(fs, conf, name, keyClass, valClass,
                                      bufferSize, replication, blockSize,
                                      codec, progress, metadata);
  } else if (compressionType == CompressionType.BLOCK){
    writer = new BlockCompressWriter(fs, conf, name, keyClass, valClass,
                                     bufferSize, replication, blockSize,
                                     codec, progress, metadata);
  }

  return writer;
}
 
Example #23
Source File: SequenceFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Construct the preferred type of SequenceFile Writer.
 * @param fs The configured filesystem.
 * @param conf The configuration.
 * @param name The name of the file.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param bufferSize buffer size for the underlaying outputstream.
 * @param replication replication factor for the file.
 * @param blockSize block size for the file.
 * @param createParent create parent directory if non-existent
 * @param compressionType The compression type.
 * @param codec The compression codec.
 * @param progress The Progressable object to track progress.
 * @param metadata The metadata of the file.
 * @param forceSync set the forceSync flag for this file
 * @param doParallelWrites write replicas in parallel
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer
  createWriter(FileSystem fs, Configuration conf, Path name,
               Class keyClass, Class valClass, int bufferSize,
               short replication, long blockSize, boolean createParent,
               CompressionType compressionType, CompressionCodec codec,
               Metadata metadata, boolean forceSync,
               boolean doParallelWrites) throws IOException {
  if ((codec instanceof GzipCodec) &&
      !NativeCodeLoader.isNativeCodeLoaded() &&
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  switch (compressionType) {
  case NONE:
    return new Writer(conf,
        fs.createNonRecursive(name, FsPermission.getDefault(),true,
            bufferSize, replication, blockSize, null,forceSync, doParallelWrites),
        keyClass, valClass, metadata).ownStream();
  case RECORD:
    return new RecordCompressWriter(conf,
        fs.createNonRecursive(name, FsPermission.getDefault(), true,
            bufferSize, replication, blockSize, null,forceSync, doParallelWrites),
        keyClass, valClass, codec, metadata).ownStream();
  case BLOCK:
    return new BlockCompressWriter(conf,
        fs.createNonRecursive(name, FsPermission.getDefault(), true,
            bufferSize, replication, blockSize, null, forceSync, doParallelWrites),
        keyClass, valClass, codec, metadata).ownStream();
  default:
    return null;
  }
}
 
Example #24
Source File: SequenceFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
   * Construct the preferred type of 'raw' SequenceFile Writer.
   * @param fs The configured filesystem. 
   * @param conf The configuration.
   * @param file The name of the file. 
   * @param keyClass The 'key' type.
   * @param valClass The 'value' type.
   * @param compress Compress data?
   * @param blockCompress Compress blocks?
   * @param codec The compression codec.
   * @param progress
   * @param metadata The metadata of the file.
   * @return Returns the handle to the constructed SequenceFile Writer.
   * @throws IOException
   */
  private static Writer
  createWriter(FileSystem fs, Configuration conf, Path file, 
               Class keyClass, Class valClass, 
               boolean compress, boolean blockCompress,
               CompressionCodec codec, Progressable progress, Metadata metadata)
  throws IOException {
  if (codec != null && (codec instanceof GzipCodec) && 
      !NativeCodeLoader.isNativeCodeLoaded() && 
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (!compress) {
    writer = new Writer(fs, conf, file, keyClass, valClass, progress, metadata);
  } else if (compress && !blockCompress) {
    writer = new RecordCompressWriter(fs, conf, file, keyClass, valClass, 
                                      codec, progress, metadata);
  } else {
    writer = new BlockCompressWriter(fs, conf, file, keyClass, valClass, 
                                     codec, progress, metadata);
  }
  
  return writer;
}
 
Example #25
Source File: TestCodec.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void testCodecPoolGzipReuse() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean("hadoop.native.lib", true);
  if (!ZlibFactory.isNativeZlibLoaded(conf)) {
    LOG.warn("testCodecPoolGzipReuse skipped: native libs not loaded");
    return;
  }
  GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf);
  DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf);
  Compressor c1 = CodecPool.getCompressor(gzc);
  Compressor c2 = CodecPool.getCompressor(dfc);
  CodecPool.returnCompressor(c1);
  CodecPool.returnCompressor(c2);
  assertTrue("Got mismatched ZlibCompressor", c2 != CodecPool.getCompressor(gzc));
}
 
Example #26
Source File: SequenceFile.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Construct the preferred type of SequenceFile Writer.
 * @param fs The configured filesystem.
 * @param conf The configuration.
 * @param name The name of the file.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param bufferSize buffer size for the underlaying outputstream.
 * @param replication replication factor for the file.
 * @param blockSize block size for the file.
 * @param compressionType The compression type.
 * @param codec The compression codec.
 * @param progress The Progressable object to track progress.
 * @param metadata The metadata of the file.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer
  createWriter(FileSystem fs, Configuration conf, Path name,
               Class keyClass, Class valClass, int bufferSize,
               short replication, long blockSize,
               CompressionType compressionType, CompressionCodec codec,
               Progressable progress, Metadata metadata) throws IOException {
  if ((codec instanceof GzipCodec) &&
      !NativeCodeLoader.isNativeCodeLoaded() &&
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (compressionType == CompressionType.NONE) {
    writer = new Writer(fs, conf, name, keyClass, valClass,
                        bufferSize, replication, blockSize,
                        progress, metadata);
  } else if (compressionType == CompressionType.RECORD) {
    writer = new RecordCompressWriter(fs, conf, name, keyClass, valClass,
                                      bufferSize, replication, blockSize,
                                      codec, progress, metadata);
  } else if (compressionType == CompressionType.BLOCK){
    writer = new BlockCompressWriter(fs, conf, name, keyClass, valClass,
                                     bufferSize, replication, blockSize,
                                     codec, progress, metadata);
  }

  return writer;
}
 
Example #27
Source File: SequenceFile.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
   * Construct the preferred type of 'raw' SequenceFile Writer.
   * @param fs The configured filesystem. 
   * @param conf The configuration.
   * @param file The name of the file. 
   * @param keyClass The 'key' type.
   * @param valClass The 'value' type.
   * @param compress Compress data?
   * @param blockCompress Compress blocks?
   * @param codec The compression codec.
   * @param progress
   * @param metadata The metadata of the file.
   * @return Returns the handle to the constructed SequenceFile Writer.
   * @throws IOException
   */
  private static Writer
  createWriter(FileSystem fs, Configuration conf, Path file, 
               Class keyClass, Class valClass, 
               boolean compress, boolean blockCompress,
               CompressionCodec codec, Progressable progress, Metadata metadata)
  throws IOException {
  if (codec != null && (codec instanceof GzipCodec) && 
      !NativeCodeLoader.isNativeCodeLoaded() && 
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (!compress) {
    writer = new Writer(fs, conf, file, keyClass, valClass, progress, metadata);
  } else if (compress && !blockCompress) {
    writer = new RecordCompressWriter(fs, conf, file, keyClass, valClass, 
                                      codec, progress, metadata);
  } else {
    writer = new BlockCompressWriter(fs, conf, file, keyClass, valClass, 
                                     codec, progress, metadata);
  }
  
  return writer;
}
 
Example #28
Source File: TestCodec.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void gzipReinitTest(Configuration conf, CompressionCodec codec)
    throws IOException {
  // Add codec to cache
  ZlibFactory.setCompressionLevel(conf, CompressionLevel.BEST_COMPRESSION);
  ZlibFactory.setCompressionStrategy(conf,
      CompressionStrategy.DEFAULT_STRATEGY);
  Compressor c1 = CodecPool.getCompressor(codec);
  CodecPool.returnCompressor(c1);
  // reset compressor's compression level to perform no compression
  ZlibFactory.setCompressionLevel(conf, CompressionLevel.NO_COMPRESSION);
  Compressor c2 = CodecPool.getCompressor(codec, conf);
  // ensure same compressor placed earlier
  assertTrue("Got mismatched ZlibCompressor", c1 == c2);
  ByteArrayOutputStream bos = new ByteArrayOutputStream();
  CompressionOutputStream cos = null;
  // write trivially compressable data
  byte[] b = new byte[1 << 15];
  Arrays.fill(b, (byte) 43);
  try {
    cos = codec.createOutputStream(bos, c2);
    cos.write(b);
  } finally {
    if (cos != null) {
      cos.close();
    }
    CodecPool.returnCompressor(c2);
  }
  byte[] outbytes = bos.toByteArray();
  // verify data were not compressed
  assertTrue("Compressed bytes contrary to configuration",
             outbytes.length >= b.length);
}
 
Example #29
Source File: TestCodec.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void codecTestWithNOCompression (Configuration conf,
                    String codecClass) throws IOException {
  // Create a compressor with NO_COMPRESSION and make sure that
  // output is not compressed by comparing the size with the
  // original input

  CompressionCodec codec = null;
  ZlibFactory.setCompressionLevel(conf, CompressionLevel.NO_COMPRESSION);
  try {
    codec = (CompressionCodec)
      ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
  } catch (ClassNotFoundException cnfe) {
    throw new IOException("Illegal codec!");
  }
  Compressor c = codec.createCompressor();
  // ensure same compressor placed earlier
  ByteArrayOutputStream bos = new ByteArrayOutputStream();
  CompressionOutputStream cos = null;
  // write trivially compressable data
  byte[] b = new byte[1 << 15];
  Arrays.fill(b, (byte) 43);
  try {
    cos = codec.createOutputStream(bos, c);
    cos.write(b);
  } finally {
    if (cos != null) {
      cos.close();
    }
  }
  byte[] outbytes = bos.toByteArray();
  // verify data were not compressed
  assertTrue("Compressed bytes contrary to configuration(NO_COMPRESSION)",
             outbytes.length >= b.length);
}
 
Example #30
Source File: TestCodec.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testCodecPoolCompressorReinit() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (ZlibFactory.isNativeZlibLoaded(conf)) {
    GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf);
    gzipReinitTest(conf, gzc);
  } else {
    LOG.warn("testCodecPoolCompressorReinit skipped: native libs not loaded");
  }
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, false);
  DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf);
  gzipReinitTest(conf, dfc);
}