Java Code Examples for org.apache.hadoop.io.compress.CompressionCodec

The following examples show how to use org.apache.hadoop.io.compress.CompressionCodec. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: big-c   Source File: FileOutputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Get the {@link CompressionCodec} for compressing the job outputs.
 * @param job the {@link Job} to look in
 * @param defaultValue the {@link CompressionCodec} to return if not set
 * @return the {@link CompressionCodec} to be used to compress the 
 *         job outputs
 * @throws IllegalArgumentException if the class was specified, but not found
 */
public static Class<? extends CompressionCodec> 
getOutputCompressorClass(JobContext job, 
                       Class<? extends CompressionCodec> defaultValue) {
  Class<? extends CompressionCodec> codecClass = defaultValue;
  Configuration conf = job.getConfiguration();
  String name = conf.get(FileOutputFormat.COMPRESS_CODEC);
  if (name != null) {
    try {
      codecClass = 
      	conf.getClassByName(name).asSubclass(CompressionCodec.class);
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Compression codec " + name + 
                                         " was not found.", e);
    }
  }
  return codecClass;
}
 
Example 2
Source Project: incubator-tez   Source File: TezMerger.java    License: Apache License 2.0 6 votes vote down vote up
public static <K extends Object, V extends Object>
  TezRawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class keyClass, Class valueClass,
                          CompressionCodec codec,
                          List<Segment> segments,
                          int mergeFactor, int inMemSegments, Path tmpDir,
                          RawComparator comparator, Progressable reporter,
                          boolean sortSegments,
                          TezCounter readsCounter,
                          TezCounter writesCounter,
                          TezCounter bytesReadCounter,
                          Progress mergePhase)
    throws IOException {
  return new MergeQueue(conf, fs, segments, comparator, reporter,
                         sortSegments, codec, false).merge(keyClass, valueClass,
                                             mergeFactor, inMemSegments,
                                             tmpDir,
                                             readsCounter, writesCounter,
                                             bytesReadCounter,
                                             mergePhase);
}
 
Example 3
@Test
 public void readBitcoinRawBlockInputFormatGzipCompressed() throws IOException, InterruptedException {
  Configuration conf = new Configuration(defaultConf);
   Job job = Job.getInstance(conf);
   CompressionCodec gzip = new GzipCodec();
   ReflectionUtils.setConf(gzip, conf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.gz";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for compressed block");
   	RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();	
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes");
   	assertFalse( reader.nextKeyValue(),"No further blocks in compressed block");
reader.close();
 }
 
Example 4
Source Project: big-c   Source File: InMemoryMapOutput.java    License: Apache License 2.0 6 votes vote down vote up
public InMemoryMapOutput(Configuration conf, TaskAttemptID mapId,
                         MergeManagerImpl<K, V> merger,
                         int size, CompressionCodec codec,
                         boolean primaryMapOutput) {
  super(mapId, (long)size, primaryMapOutput);
  this.conf = conf;
  this.merger = merger;
  this.codec = codec;
  byteStream = new BoundedByteArrayOutputStream(size);
  memory = byteStream.getBuffer();
  if (codec != null) {
    decompressor = CodecPool.getDecompressor(codec);
  } else {
    decompressor = null;
  }
}
 
Example 5
@Test
 public void readBitcoinRawBlockInputFormatBzip2Compressed() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
   Job job = Job.getInstance(conf);
   CompressionCodec bzip2 = new BZip2Codec();
   ReflectionUtils.setConf(bzip2, conf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.bz2";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for compressed block");
   	RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();	
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes");
   	assertFalse( reader.nextKeyValue(),"No further blocks in compressed block");
reader.close();
 }
 
Example 6
Source Project: hadoop-gpu   Source File: SequenceFile.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Clones the attributes (like compression of the input file and creates a 
 * corresponding Writer
 * @param inputFile the path of the input file whose attributes should be 
 * cloned
 * @param outputFile the path of the output file 
 * @param prog the Progressable to report status during the file write
 * @return Writer
 * @throws IOException
 */
public Writer cloneFileAttributes(Path inputFile, Path outputFile, 
                                  Progressable prog) 
throws IOException {
  FileSystem srcFileSys = inputFile.getFileSystem(conf);
  Reader reader = new Reader(srcFileSys, inputFile, 4096, conf, true);
  boolean compress = reader.isCompressed();
  boolean blockCompress = reader.isBlockCompressed();
  CompressionCodec codec = reader.getCompressionCodec();
  reader.close();

  Writer writer = createWriter(outputFile.getFileSystem(conf), conf, 
                               outputFile, keyClass, valClass, compress, 
                               blockCompress, codec, prog,
                               new Metadata());
  return writer;
}
 
Example 7
Source Project: hiped2   Source File: SequenceFileStoreFunc.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void setStoreLocation(String location, Job job)
    throws IOException {
  job.setOutputKeyClass(keyClass);
  job.setOutputValueClass(valueClass);
  if (compressionType != null && compressionCodecClass != null) {
    Class<? extends CompressionCodec> codecClass =
        FileOutputFormat.getOutputCompressorClass(job,
            DefaultCodec.class);
    SequenceFileOutputFormat.
        setOutputCompressorClass(job, codecClass);
    SequenceFileOutputFormat.setOutputCompressionType(job,
        SequenceFile.CompressionType.valueOf(compressionType));
  }
  FileOutputFormat.setOutputPath(job, new Path(location));
}
 
Example 8
Source Project: RDFS   Source File: TestSequenceFile.java    License: Apache License 2.0 6 votes vote down vote up
private static void writeTest(FileSystem fs, int count, int seed, Path file, 
                              CompressionType compressionType, CompressionCodec codec)
  throws IOException {
  fs.delete(file, true);
  LOG.info("creating " + count + " records with " + compressionType +
           " compression");
  SequenceFile.Writer writer = 
    SequenceFile.createWriter(fs, conf, file, 
                              RandomDatum.class, RandomDatum.class, compressionType, codec);
  RandomDatum.Generator generator = new RandomDatum.Generator(seed);
  for (int i = 0; i < count; i++) {
    generator.next();
    RandomDatum key = generator.getKey();
    RandomDatum value = generator.getValue();

    writer.append(key, value);
  }
  writer.close();
}
 
Example 9
Source Project: hadoop-gpu   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        LOG.debug("Got a compressor: " + compressor.hashCode());
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 10
Source Project: big-c   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a compressor: " + compressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 11
Source Project: incubator-tez   Source File: TestIFile.java    License: Apache License 2.0 6 votes vote down vote up
private Writer writeTestFile(IFile.Writer writer, boolean rle, boolean repeatKeys,
    List<KVPair> data, CompressionCodec codec) throws IOException {
  assertNotNull(writer);

  Text previousKey = null;
  for (KVPair kvp : data) {
    if (repeatKeys && (previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
      //RLE is enabled in IFile when IFile.REPEAT_KEY is set
      writer.append(IFile.REPEAT_KEY, kvp.getvalue());
    } else {
      writer.append(kvp.getKey(), kvp.getvalue());
    }
    previousKey = kvp.getKey();
  }

  writer.close();

  LOG.info("Uncompressed: " + writer.getRawLength());
  LOG.info("CompressedSize: " + writer.getCompressedLength());

  return writer;
}
 
Example 12
Source Project: tajo   Source File: TestInsertQuery.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public final void testInsertOverwriteWithCompression() throws Exception {
  String tableName = IdentifierUtil.normalizeIdentifier("testInsertOverwriteWithCompression");
  ResultSet res = executeFile("testInsertOverwriteWithCompression_ddl.sql");
  res.close();

  CatalogService catalog = testingCluster.getMaster().getCatalog();
  assertTrue(catalog.existsTable(getCurrentDatabase(), tableName));

  res = executeQuery();
  res.close();
  TableDesc desc = catalog.getTableDesc(getCurrentDatabase(), tableName);
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    assertEquals(2, desc.getStats().getNumRows().intValue());
  }

  FileSystem fs = FileSystem.get(testingCluster.getConfiguration());
  assertTrue(fs.exists(new Path(desc.getUri())));
  CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration());

  for (FileStatus file : fs.listStatus(new Path(desc.getUri()))) {
    CompressionCodec codec = factory.getCodec(file.getPath());
    assertTrue(codec instanceof DeflateCodec);
  }
  executeString("DROP TABLE " + tableName + " PURGE");
}
 
Example 13
Source Project: hadoop   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized boolean isSupported() {
  if (!checked) {
    checked = true;
    String extClazz =
        (conf.get(CONF_LZO_CLASS) == null ? System
            .getProperty(CONF_LZO_CLASS) : null);
    String clazz = (extClazz != null) ? extClazz : defaultClazz;
    try {
      LOG.info("Trying to load Lzo codec class: " + clazz);
      codec =
          (CompressionCodec) ReflectionUtils.newInstance(Class
              .forName(clazz), conf);
    } catch (ClassNotFoundException e) {
      // that is okay
    }
  }
  return codec != null;
}
 
Example 14
Source Project: tez   Source File: IFile.java    License: Apache License 2.0 6 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream outputStream,
    Class keyClass, Class valueClass,
    CompressionCodec codec, TezCounter writesCounter, TezCounter serializedBytesCounter,
    boolean rle) throws IOException {
  this.rawOut = outputStream;
  this.writtenRecordsCounter = writesCounter;
  this.serializedUncompressedBytes = serializedBytesCounter;
  this.start = this.rawOut.getPos();
  this.rle = rle;

  setupOutputStream(codec);

  writeHeader(outputStream);

  if (keyClass != null) {
    this.closeSerializers = true;
    SerializationFactory serializationFactory =
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  } else {
    this.closeSerializers = false;
  }
}
 
Example 15
Source Project: hadoop-gpu   Source File: SequenceFile.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Construct the preferred type of 'raw' SequenceFile Writer.
 * @param out The stream on top which the writer is to be constructed.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param compress Compress data?
 * @param blockCompress Compress blocks?
 * @param metadata The metadata of the file.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
private static Writer
  createWriter(Configuration conf, FSDataOutputStream out, 
               Class keyClass, Class valClass, boolean compress, boolean blockCompress,
               CompressionCodec codec, Metadata metadata)
  throws IOException {
  if (codec != null && (codec instanceof GzipCodec) && 
      !NativeCodeLoader.isNativeCodeLoaded() && 
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (!compress) {
    writer = new Writer(conf, out, keyClass, valClass, metadata);
  } else if (compress && !blockCompress) {
    writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  } else {
    writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  }
  
  return writer;
}
 
Example 16
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (currentPath>=split.getNumPaths()) {
        return false;
    }

    Path path = split.getPath(currentPath);
    currentPath++;

    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(path);
    key = path.toString();
    FSDataInputStream fileIn = fs.open(path);

    value = codec!=null?codec.createInputStream(fileIn):fileIn;
    return true;
}
 
Example 17
Source Project: hadoop-gpu   Source File: SequenceFile.java    License: Apache License 2.0 6 votes vote down vote up
/** Create the named file with write-progress reporter. */
public BlockCompressWriter(FileSystem fs, Configuration conf, Path name,
                           Class keyClass, Class valClass,
                           int bufferSize, short replication, long blockSize,
                           CompressionCodec codec,
                           Progressable progress, Metadata metadata)
  throws IOException {
  super.init(name, conf,
             fs.create(name, true, bufferSize, replication, blockSize, progress),
             keyClass, valClass, true, codec, metadata);
  init(conf.getInt("io.seqfile.compress.blocksize", 1000000));

  initializeFileHeader();
  writeFileHeader();
  finalizeFileHeader();
}
 
Example 18
Source Project: hadoop   Source File: Merger.java    License: Apache License 2.0 6 votes vote down vote up
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          CompressionCodec codec,
                          List<Segment<K, V>> segments,
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase,
                          TaskType taskType)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments, codec,
                         taskType).merge(keyClass, valueClass,
                                             mergeFactor, tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
 
Example 19
Source Project: hadoop   Source File: CompressionEmulationUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns a {@link OutputStream} for a file that might need 
 * compression.
 */
static OutputStream getPossiblyCompressedOutputStream(Path file, 
                                                      Configuration conf)
throws IOException {
  FileSystem fs = file.getFileSystem(conf);
  JobConf jConf = new JobConf(conf);
  if (org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(jConf)) {
    // get the codec class
    Class<? extends CompressionCodec> codecClass =
      org.apache.hadoop.mapred.FileOutputFormat
                              .getOutputCompressorClass(jConf, 
                                                        GzipCodec.class);
    // get the codec implementation
    CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);

    // add the appropriate extension
    file = file.suffix(codec.getDefaultExtension());

    if (isCompressionEmulationEnabled(conf)) {
      FSDataOutputStream fileOut = fs.create(file, false);
      return new DataOutputStream(codec.createOutputStream(fileOut));
    }
  }
  return fs.create(file, false);
}
 
Example 20
Source Project: incubator-tez   Source File: ConfigUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static Class<? extends CompressionCodec> getIntermediateOutputCompressorClass(
    Configuration conf, Class<DefaultCodec> defaultValue) {
  Class<? extends CompressionCodec> codecClass = defaultValue;
  String name = conf
      .get(TezJobConfig.TEZ_RUNTIME_COMPRESS_CODEC);
  if (name != null) {
    try {
      codecClass = conf.getClassByName(name).asSubclass(
          CompressionCodec.class);
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Compression codec " + name
          + " was not found.", e);
    }
  }
  return codecClass;
}
 
Example 21
Source Project: big-c   Source File: TestDFSIO.java    License: Apache License 2.0 6 votes vote down vote up
@Override // Mapper
public void configure(JobConf conf) {
  super.configure(conf);

  // grab compression
  String compression = getConf().get("test.io.compression.class", null);
  Class<? extends CompressionCodec> codec;

  // try to initialize codec
  try {
    codec = (compression == null) ? null : 
      Class.forName(compression).asSubclass(CompressionCodec.class);
  } catch(Exception e) {
    throw new RuntimeException("Compression codec not found: ", e);
  }

  if(codec != null) {
    compressionCodec = (CompressionCodec)
        ReflectionUtils.newInstance(codec, getConf());
  }
}
 
Example 22
Source Project: RDFS   Source File: FileOutputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Get the {@link CompressionCodec} for compressing the job outputs.
 * @param job the {@link Job} to look in
 * @param defaultValue the {@link CompressionCodec} to return if not set
 * @return the {@link CompressionCodec} to be used to compress the 
 *         job outputs
 * @throws IllegalArgumentException if the class was specified, but not found
 */
public static Class<? extends CompressionCodec> 
getOutputCompressorClass(JobContext job, 
                       Class<? extends CompressionCodec> defaultValue) {
  Class<? extends CompressionCodec> codecClass = defaultValue;
  Configuration conf = job.getConfiguration();
  String name = conf.get("mapred.output.compression.codec");
  if (name != null) {
    try {
      codecClass = 
      	conf.getClassByName(name).asSubclass(CompressionCodec.class);
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Compression codec " + name + 
                                         " was not found.", e);
    }
  }
  return codecClass;
}
 
Example 23
Source Project: hadoop   Source File: Merger.java    License: Apache License 2.0 6 votes vote down vote up
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass, 
                          CompressionCodec codec,
                          Path[] inputs, boolean deleteInputs, 
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
throws IOException {
  return 
    new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator, 
                         reporter, null,
                         TaskType.REDUCE).merge(keyClass, valueClass,
                                         mergeFactor, tmpDir,
                                         readsCounter, writesCounter, 
                                         mergePhase);
}
 
Example 24
Source Project: Kylin   Source File: HiveColumnCardinalityUpdateJob.java    License: Apache License 2.0 5 votes vote down vote up
private static List<String> readLines(Path location, Configuration conf) throws Exception {
    FileSystem fileSystem = FileSystem.get(location.toUri(), conf);
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    FileStatus[] items = fileSystem.listStatus(location);
    if (items == null)
        return new ArrayList<String>();
    List<String> results = new ArrayList<String>();
    for (FileStatus item : items) {

        // ignoring files like _SUCCESS
        if (item.getPath().getName().startsWith("_")) {
            continue;
        }

        CompressionCodec codec = factory.getCodec(item.getPath());
        InputStream stream = null;

        // check if we have a compression codec we need to use
        if (codec != null) {
            stream = codec.createInputStream(fileSystem.open(item.getPath()));
        } else {
            stream = fileSystem.open(item.getPath());
        }

        StringWriter writer = new StringWriter();
        IOUtils.copy(stream, writer, "UTF-8");
        String raw = writer.toString();
        for (String str : raw.split("\n")) {
            results.add(str);
        }
    }
    return results;
}
 
Example 25
Source Project: hbase   Source File: Compression.java    License: Apache License 2.0 5 votes vote down vote up
@Override
CompressionCodec getCodec(Configuration conf) {
  if (bzipCodec == null) {
    synchronized (lock) {
      if (bzipCodec == null) {
        bzipCodec = buildCodec(conf);
      }
    }
  }
  return bzipCodec;
}
 
Example 26
Source Project: RDFS   Source File: SequenceFile.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Construct the preferred type of SequenceFile Writer.
 * @param fs The configured filesystem.
 * @param conf The configuration.
 * @param name The name of the file.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param bufferSize buffer size for the underlaying outputstream.
 * @param replication replication factor for the file.
 * @param blockSize block size for the file.
 * @param createParent create parent directory if non-existent
 * @param compressionType The compression type.
 * @param codec The compression codec.
 * @param progress The Progressable object to track progress.
 * @param metadata The metadata of the file.
 * @param forceSync set the forceSync flag for this file
 * @param doParallelWrites write replicas in parallel
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer
  createWriter(FileSystem fs, Configuration conf, Path name,
               Class keyClass, Class valClass, int bufferSize,
               short replication, long blockSize, boolean createParent,
               CompressionType compressionType, CompressionCodec codec,
               Metadata metadata, boolean forceSync,
               boolean doParallelWrites) throws IOException {
  if ((codec instanceof GzipCodec) &&
      !NativeCodeLoader.isNativeCodeLoaded() &&
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  switch (compressionType) {
  case NONE:
    return new Writer(conf,
        fs.createNonRecursive(name, FsPermission.getDefault(),true,
            bufferSize, replication, blockSize, null,forceSync, doParallelWrites),
        keyClass, valClass, metadata).ownStream();
  case RECORD:
    return new RecordCompressWriter(conf,
        fs.createNonRecursive(name, FsPermission.getDefault(), true,
            bufferSize, replication, blockSize, null,forceSync, doParallelWrites),
        keyClass, valClass, codec, metadata).ownStream();
  case BLOCK:
    return new BlockCompressWriter(conf,
        fs.createNonRecursive(name, FsPermission.getDefault(), true,
            bufferSize, replication, blockSize, null, forceSync, doParallelWrites),
        keyClass, valClass, codec, metadata).ownStream();
  default:
    return null;
  }
}
 
Example 27
Source Project: Hadoop-BAM   Source File: QseqInputFormat.java    License: MIT License 5 votes vote down vote up
public QseqRecordReader(Configuration conf, FileSplit split) throws IOException
{
	setConf(conf);
	file = split.getPath();
	start = split.getStart();
	end = start + split.getLength();

	FileSystem fs = file.getFileSystem(conf);
	FSDataInputStream fileIn = fs.open(file);

	CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
	CompressionCodec        codec        = codecFactory.getCodec(file);

	if (codec == null) // no codec.  Uncompressed file.
	{
		positionAtFirstRecord(fileIn);
		inputStream = fileIn;
	}
	else
	{ // compressed file
		if (start != 0)
			throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");

		inputStream = codec.createInputStream(fileIn);
		end = Long.MAX_VALUE; // read until the end of the file
	}

	lineReader = new LineReader(inputStream);
}
 
Example 28
Source Project: tajo   Source File: TestInsertQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public final void testInsertOverwritePathWithNonFromQuery() throws Exception {
  ResultSet res = executeString("insert overwrite into location " +
      "'/tajo-data/testInsertOverwritePathWithNonFromQuery' " +
      "USING text WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
      "select 1::INT4, 2.1::FLOAT4, 'test'");

  res.close();
  FileSystem fs = FileSystem.get(testingCluster.getConfiguration());
  Path path = new Path("/tajo-data/testInsertOverwritePathWithNonFromQuery");
  assertTrue(fs.exists(path));
  assertEquals(1, fs.listStatus(path).length);

  CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration());
  FileStatus file = fs.listStatus(path)[0];
  CompressionCodec codec = factory.getCodec(file.getPath());
  assertTrue(codec instanceof DeflateCodec);

  try (BufferedReader reader = new BufferedReader(
          new InputStreamReader(codec.createInputStream(fs.open(file.getPath()))))) {
    String line = reader.readLine();
    assertNotNull(line);

    String[] tokens = line.split("\\|");

    assertEquals(3, tokens.length);
    assertEquals("1", tokens[0]);
    assertEquals("2.1", tokens[1]);
    assertEquals("test", tokens[2]);
  }
}
 
Example 29
Source Project: Flink-CEPplus   Source File: SequenceFileWriterFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public SequenceFileWriter<K, V> create(FSDataOutputStream out) throws IOException {
	org.apache.hadoop.fs.FSDataOutputStream stream = new org.apache.hadoop.fs.FSDataOutputStream(out, null);
	CompressionCodec compressionCodec = getCompressionCodec(serializableHadoopConfig.get(), compressionCodecName);
	SequenceFile.Writer writer = SequenceFile.createWriter(
		serializableHadoopConfig.get(),
		SequenceFile.Writer.stream(stream),
		SequenceFile.Writer.keyClass(keyClass),
		SequenceFile.Writer.valueClass(valueClass),
		SequenceFile.Writer.compression(compressionType, compressionCodec));
	return new SequenceFileWriter<>(writer);
}
 
Example 30
Source Project: hbase   Source File: NettyRpcDuplexHandler.java    License: Apache License 2.0 5 votes vote down vote up
public NettyRpcDuplexHandler(NettyRpcConnection conn, CellBlockBuilder cellBlockBuilder,
    Codec codec, CompressionCodec compressor) {
  this.conn = conn;
  this.cellBlockBuilder = cellBlockBuilder;
  this.codec = codec;
  this.compressor = compressor;

}