org.apache.hadoop.io.compress.DefaultCodec Java Examples

The following examples show how to use org.apache.hadoop.io.compress.DefaultCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestIFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
/** Same as above but create a reader. */
public void testIFileReaderWithCodec() throws Exception {
  Configuration conf = new Configuration();
  FileSystem localFs = FileSystem.getLocal(conf);
  FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
  Path path = new Path(new Path("build/test.ifile"), "data");
  DefaultCodec codec = new GzipCodec();
  codec.setConf(conf);
  FSDataOutputStream out = rfs.create(path);
  IFile.Writer<Text, Text> writer =
      new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class,
                                   codec, null);
  writer.close();
  FSDataInputStream in = rfs.open(path);
  IFile.Reader<Text, Text> reader =
    new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(),
        codec, null);
  reader.close();
  
  // test check sum 
  byte[] ab= new byte[100];
  int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length);
  assertEquals( readed,reader.checksumIn.getChecksum().length);
  
}
 
Example #2
Source File: TestIFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
/**
 * Create an IFile.Writer using GzipCodec since this code does not
 * have a compressor when run via the tests (ie no native libraries).
 */
public void testIFileWriterWithCodec() throws Exception {
  Configuration conf = new Configuration();
  FileSystem localFs = FileSystem.getLocal(conf);
  FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
  Path path = new Path(new Path("build/test.ifile"), "data");
  DefaultCodec codec = new GzipCodec();
  codec.setConf(conf);
  IFile.Writer<Text, Text> writer =
    new IFile.Writer<Text, Text>(conf, rfs.create(path), Text.class, Text.class,
                                 codec, null);
  writer.close();
}
 
Example #3
Source File: SequenceFileStoreFunc.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
@Override
public void setStoreLocation(String location, Job job)
    throws IOException {
  job.setOutputKeyClass(keyClass);
  job.setOutputValueClass(valueClass);
  if (compressionType != null && compressionCodecClass != null) {
    Class<? extends CompressionCodec> codecClass =
        FileOutputFormat.getOutputCompressorClass(job,
            DefaultCodec.class);
    SequenceFileOutputFormat.
        setOutputCompressorClass(job, codecClass);
    SequenceFileOutputFormat.setOutputCompressionType(job,
        SequenceFile.CompressionType.valueOf(compressionType));
  }
  FileOutputFormat.setOutputPath(job, new Path(location));
}
 
Example #4
Source File: FileRegistryTest.java    From secor with Apache License 2.0 6 votes vote down vote up
private FileWriter createWriter() throws Exception {
    PowerMockito.mockStatic(FileUtil.class);

    PowerMockito.mockStatic(ReflectionUtil.class);
    FileWriter writer = Mockito.mock(FileWriter.class);
    Mockito.when(
            ReflectionUtil.createFileWriter(
                    Mockito.any(String.class),
                    Mockito.any(LogFilePath.class),
                    Mockito.any(CompressionCodec.class),
                    Mockito.any(SecorConfig.class)
            ))
            .thenReturn(writer);

    Mockito.when(writer.getLength()).thenReturn(123L);

    FileWriter createdWriter = mRegistry.getOrCreateWriter(
            mLogFilePath, new DefaultCodec());
    assertTrue(createdWriter == writer);

    return writer;
}
 
Example #5
Source File: TestIFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
/**
 * Create an IFile.Writer using GzipCodec since this code does not
 * have a compressor when run via the tests (ie no native libraries).
 */
public void testIFileWriterWithCodec() throws Exception {
  Configuration conf = new Configuration();
  FileSystem localFs = FileSystem.getLocal(conf);
  FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
  Path path = new Path(new Path("build/test.ifile"), "data");
  DefaultCodec codec = new GzipCodec();
  codec.setConf(conf);
  IFile.Writer<Text, Text> writer =
    new IFile.Writer<Text, Text>(conf, rfs.create(path), Text.class, Text.class,
                                 codec, null);
  writer.close();
}
 
Example #6
Source File: TestIFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
/** Same as above but create a reader. */
public void testIFileReaderWithCodec() throws Exception {
  Configuration conf = new Configuration();
  FileSystem localFs = FileSystem.getLocal(conf);
  FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
  Path path = new Path(new Path("build/test.ifile"), "data");
  DefaultCodec codec = new GzipCodec();
  codec.setConf(conf);
  FSDataOutputStream out = rfs.create(path);
  IFile.Writer<Text, Text> writer =
      new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class,
                                   codec, null);
  writer.close();
  FSDataInputStream in = rfs.open(path);
  IFile.Reader<Text, Text> reader =
    new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(),
        codec, null);
  reader.close();
  
  // test check sum 
  byte[] ab= new byte[100];
  int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length);
  assertEquals( readed,reader.checksumIn.getChecksum().length);
  
}
 
Example #7
Source File: ConfigUtils.java    From tez with Apache License 2.0 6 votes vote down vote up
public static Class<? extends CompressionCodec> getIntermediateInputCompressorClass(
    Configuration conf, Class<DefaultCodec> defaultValue) {
  Class<? extends CompressionCodec> codecClass = defaultValue;
  String name = conf
      .get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC);
  if (name != null) {
    try {
      codecClass = conf.getClassByName(name).asSubclass(
          CompressionCodec.class);
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Compression codec " + name
          + " was not found.", e);
    }
  }
  return codecClass;
}
 
Example #8
Source File: ReduceTask.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public MapOutputCopier(JobConf job, Reporter reporter) {
  setName("MapOutputCopier " + reduceTask.getTaskID() + "." + id);
  LOG.debug(getName() + " created");
  this.reporter = reporter;

  shuffleConnectionTimeout =
    job.getInt("mapreduce.reduce.shuffle.connect.timeout", STALLED_COPY_TIMEOUT);
  shuffleReadTimeout =
    job.getInt("mapreduce.reduce.shuffle.read.timeout", DEFAULT_READ_TIMEOUT);

  if (job.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      job.getMapOutputCompressorClass(DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
    decompressor = CodecPool.getDecompressor(codec);
  }
  setDaemon(true);
}
 
Example #9
Source File: ParquetAsTextOutputFormat.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
private static CompressionCodecName getCodec(JobConf conf) {

        CompressionCodecName codec;

        if (ParquetOutputFormat.isCompressionSet(conf)) { // explicit parquet config
            codec = ParquetOutputFormat.getCompression(conf);
        } else if (getCompressOutput(conf)) { // from hadoop config
            // find the right codec
            Class<?> codecClass = getOutputCompressorClass(conf, DefaultCodec.class);
            LOG.info("Compression set through hadoop codec: " + codecClass.getName());
            codec = CompressionCodecName.fromCompressionCodec(codecClass);
        } else {
            codec = CompressionCodecName.UNCOMPRESSED;
        }

        LOG.info("Compression: " + codec.name());
        return codec;
    }
 
Example #10
Source File: TestUnorderedPartitionedKVWriter.java    From tez with Apache License 2.0 6 votes vote down vote up
private Configuration createConfiguration(OutputContext outputContext,
    Class<? extends Writable> keyClass, Class<? extends Writable> valClass,
    boolean shouldCompress, int maxSingleBufferSizeBytes,
    Class<? extends Partitioner> partitionerClass) {
  Configuration conf = new Configuration(false);
  conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
  conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, outputContext.getWorkDirs());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClass.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valClass.getName());
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, partitionerClass.getName());
  if (maxSingleBufferSizeBytes >= 0) {
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_MAX_PER_BUFFER_SIZE_BYTES,
        maxSingleBufferSizeBytes);
  }
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, shouldCompress);
  if (shouldCompress) {
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC,
        DefaultCodec.class.getName());
  }
  conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS,
      reportPartitionStats.getType());
  return conf;
}
 
Example #11
Source File: ReduceTask.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private CompressionCodec initCodec() {
  // check if map-outputs are to be compressed
  if (conf.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      conf.getMapOutputCompressorClass(DefaultCodec.class);
    return ReflectionUtils.newInstance(codecClass, conf);
  }

  return null;
}
 
Example #12
Source File: MapFileOutputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public RecordWriter<WritableComparable, Writable> getRecordWriter(FileSystem ignored, JobConf job,
                                    String name, Progressable progress)
  throws IOException {
  // get the path of the temporary output file 
  Path file = FileOutputFormat.getTaskOutputPath(job, name);
  
  FileSystem fs = file.getFileSystem(job);
  CompressionCodec codec = null;
  CompressionType compressionType = CompressionType.NONE;
  if (getCompressOutput(job)) {
    // find the kind of compression to do
    compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);

    // find the right codec
    Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job,
 DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  
  // ignore the progress parameter, since MapFile is local
  final MapFile.Writer out =
    new MapFile.Writer(job, fs, file.toString(),
                       job.getOutputKeyClass().asSubclass(WritableComparable.class),
                       job.getOutputValueClass().asSubclass(Writable.class),
                       compressionType, codec,
                       progress);

  return new RecordWriter<WritableComparable, Writable>() {

      public void write(WritableComparable key, Writable value)
        throws IOException {

        out.append(key, value);
      }

      public void close(Reporter reporter) throws IOException { out.close();}
    };
}
 
Example #13
Source File: TestExtractor.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
@DataProvider(name="test-hdfs-extractor")
public static Object[][] data() {
  List<Object[]> parameters = new ArrayList<Object[]>();
  for (Class<?> compressionClass : new Class<?>[]{null, DefaultCodec.class, BZip2Codec.class}) {
    for (Object outputFileType : new Object[]{TEXT_FILE, SEQUENCE_FILE}) {
      parameters.add(new Object[]{outputFileType, compressionClass});
    }
  }
  return parameters.toArray(new Object[0][]);
}
 
Example #14
Source File: AbstractHdfsConnector.java    From pulsar with Apache License 2.0 5 votes vote down vote up
protected CompressionCodec getCompressionCodec() {
   if (connectorConfig.getCompression() == null) {
       return null;
   }

   CompressionCodec codec = getCompressionCodecFactory()
           .getCodecByName(connectorConfig.getCompression().name());

   return (codec != null) ? codec : new DefaultCodec();
}
 
Example #15
Source File: AbstractHdfsConnector.java    From pulsar with Apache License 2.0 5 votes vote down vote up
protected CompressionCodec getCompressionCodec() {
   if (connectorConfig.getCompression() == null) {
       return null;
   }

   CompressionCodec codec = getCompressionCodecFactory()
           .getCodecByName(connectorConfig.getCompression().name());

   return (codec != null) ? codec : new DefaultCodec();
}
 
Example #16
Source File: MapFileOutputFormat.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public RecordWriter<WritableComparable, Writable> getRecordWriter(FileSystem ignored, JobConf job,
                                    String name, Progressable progress)
  throws IOException {
  // get the path of the temporary output file 
  Path file = FileOutputFormat.getTaskOutputPath(job, name);
  
  FileSystem fs = file.getFileSystem(job);
  CompressionCodec codec = null;
  CompressionType compressionType = CompressionType.NONE;
  if (getCompressOutput(job)) {
    // find the kind of compression to do
    compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);

    // find the right codec
    Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job,
 DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  
  // ignore the progress parameter, since MapFile is local
  final MapFile.Writer out =
    new MapFile.Writer(job, fs, file.toString(),
                       job.getOutputKeyClass().asSubclass(WritableComparable.class),
                       job.getOutputValueClass().asSubclass(Writable.class),
                       compressionType, codec,
                       progress);

  return new RecordWriter<WritableComparable, Writable>() {

      public void write(WritableComparable key, Writable value)
        throws IOException {

        out.append(key, value);
      }

      public void close(Reporter reporter) throws IOException { out.close();}
    };
}
 
Example #17
Source File: MapFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/** Create the named map using the named key comparator. */
public Writer(Configuration conf, FileSystem fs, String dirName,
              WritableComparator comparator, Class valClass,
              SequenceFile.CompressionType compress,
              Progressable progress)
  throws IOException {
  this(conf, fs, dirName, comparator, valClass, 
       compress, new DefaultCodec(), progress);
}
 
Example #18
Source File: MapFileOutputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public RecordWriter<WritableComparable, Writable> getRecordWriter(FileSystem ignored, JobConf job,
                                    String name, Progressable progress)
  throws IOException {
  // get the path of the temporary output file 
  Path file = FileOutputFormat.getTaskOutputPath(job, name);
  
  FileSystem fs = file.getFileSystem(job);
  CompressionCodec codec = null;
  CompressionType compressionType = CompressionType.NONE;
  if (getCompressOutput(job)) {
    // find the kind of compression to do
    compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);

    // find the right codec
    Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job,
 DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  
  // ignore the progress parameter, since MapFile is local
  final MapFile.Writer out =
    new MapFile.Writer(job, fs, file.toString(),
                       job.getOutputKeyClass().asSubclass(WritableComparable.class),
                       job.getOutputValueClass().asSubclass(Writable.class),
                       compressionType, codec,
                       progress);

  return new RecordWriter<WritableComparable, Writable>() {

      public void write(WritableComparable key, Writable value)
        throws IOException {

        out.append(key, value);
      }

      public void close(Reporter reporter) throws IOException { out.close();}
    };
}
 
Example #19
Source File: TestReduceTask.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testValueIteratorWithCompression() throws Exception {
  Path tmpDir = new Path("build/test/test.reduce.task.compression");
  Configuration conf = new Configuration();
  DefaultCodec codec = new DefaultCodec();
  codec.setConf(conf);
  for (Pair[] testCase: testCases) {
    runValueIterator(tmpDir, testCase, conf, codec);
  }
}
 
Example #20
Source File: TestReduceTask.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void testValueIteratorWithCompression() throws Exception {
  Path tmpDir = new Path("build/test/test.reduce.task.compression");
  Configuration conf = new Configuration();
  DefaultCodec codec = new DefaultCodec();
  codec.setConf(conf);
  for (Pair[] testCase: testCases) {
    runValueIterator(tmpDir, testCase, conf, codec);
  }
}
 
Example #21
Source File: HdfsMrsPyramidOutputFormatTest.java    From mrgeo with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws Exception
{
  defaultCodec = DefaultCodec.class.newInstance();
  defaultCompressionType = SequenceFile.CompressionType.BLOCK;
  mockWriter = mock(MapFile.Writer.class);
  outputPath = new Path(outputPathString);
}
 
Example #22
Source File: SequenceFileOutputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public RecordWriter<K, V> getRecordWriter(
                                        FileSystem ignored, JobConf job,
                                        String name, Progressable progress)
  throws IOException {
  // get the path of the temporary output file 
  Path file = FileOutputFormat.getTaskOutputPath(job, name);
  
  FileSystem fs = file.getFileSystem(job);
  CompressionCodec codec = null;
  CompressionType compressionType = CompressionType.NONE;
  if (getCompressOutput(job)) {
    // find the kind of compression to do
    compressionType = getOutputCompressionType(job);

    // find the right codec
    Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job,
 DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  final SequenceFile.Writer out = 
    SequenceFile.createWriter(fs, job, file,
                              job.getOutputKeyClass(),
                              job.getOutputValueClass(),
                              compressionType,
                              codec,
                              progress);

  return new RecordWriter<K, V>() {

      public void write(K key, V value)
        throws IOException {

        out.append(key, value);
      }

      public void close(Reporter reporter) throws IOException { out.close();}
    };
}
 
Example #23
Source File: TestReduceTask.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void testValueIteratorWithCompression() throws Exception {
  Path tmpDir = new Path("build/test/test.reduce.task.compression");
  Configuration conf = new Configuration();
  DefaultCodec codec = new DefaultCodec();
  codec.setConf(conf);
  for (Pair[] testCase: testCases) {
    runValueIterator(tmpDir, testCase, conf, codec);
  }
}
 
Example #24
Source File: Compression.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
DefaultCodec getCodec(Configuration conf) {
  if (codec == null) {
    synchronized (lock) {
      if (codec == null) {
        codec = buildCodec(conf);
      }
    }
  }

  return codec;
}
 
Example #25
Source File: FsStateStore.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * See {@link StateStore#put(String, String, T)}.
 *
 * <p>
 *   This implementation does not support putting the state object into an existing store as
 *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
 * </p>
 */
@Override
public void put(String storeName, String tableName, T state) throws IOException {
  String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
  Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);

  if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
    throw new IOException("Failed to create a state file for table " + tmpTableName);
  }

  Closer closer = Closer.create();
  try {
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath,
        Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
    writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
  } catch (Throwable t) {
    throw closer.rethrow(t);
  } finally {
    closer.close();
  }

  if (this.useTmpFileForPut) {
    Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
    renamePath(tmpTablePath, tablePath);
  }
}
 
Example #26
Source File: FsStateStore.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * See {@link StateStore#putAll(String, String, Collection)}.
 *
 * <p>
 *   This implementation does not support putting the state objects into an existing store as
 *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
 * </p>
 */
@Override
public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
  String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
  Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);

  if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
    throw new IOException("Failed to create a state file for table " + tmpTableName);
  }

  Closer closer = Closer.create();
  try {
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath,
        Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
    for (T state : states) {
      writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
    }
  } catch (Throwable t) {
    throw closer.rethrow(t);
  } finally {
    closer.close();
  }

  if (this.useTmpFileForPut) {
    Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
    renamePath(tmpTablePath, tablePath);
  }
}
 
Example #27
Source File: SequenceFileProtobufMapReduce.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
/**
   * Write the sequence file.
   *
   * @param args the command-line arguments
   * @return the process exit code
   * @throws Exception if something goes wrong
   */
  public int run(final String[] args) throws Exception {

    Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
    int result = cli.runCmd();

    if (result != 0) {
      return result;
    }

    Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
    Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

    Configuration conf = super.getConf();

    Job job = new Job(conf);
    job.setJarByClass(SequenceFileProtobufMapReduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Stock.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(PbMapper.class);
    job.setReducerClass(PbReducer.class);

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);

  ProtobufSerialization.register(job.getConfiguration());

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  if (job.waitForCompletion(true)) {
    return 0;
  }
  return 1;
}
 
Example #28
Source File: SequenceFileProtobufWriter.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
/**
   * Write the sequence file.
   *
   * @param args the command-line arguments
   * @return the process exit code
   * @throws Exception if something goes wrong
   */
  public int run(final String[] args) throws Exception {

    Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
    int result = cli.runCmd();

    if (result != 0) {
      return result;
    }

    File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
    Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

Configuration conf = super.getConf();

ProtobufSerialization.register(conf);

SequenceFile.Writer writer =
    SequenceFile.createWriter(conf,
        SequenceFile.Writer.file(outputPath),
        SequenceFile.Writer.keyClass(Text.class),
        SequenceFile.Writer.valueClass(Stock.class),
        SequenceFile.Writer.compression(
            SequenceFile.CompressionType.BLOCK,
            new DefaultCodec())
    );
try {
  Text key = new Text();

  for (Stock stock : StockUtils.fromCsvFile(inputFile)) {
    key.set(stock.getSymbol());
    writer.append(key, stock);
  }
} finally {
  writer.close();
}
    return 0;
  }
 
Example #29
Source File: SequenceFileOutputFormat.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public RecordWriter<K, V> getRecordWriter(
                                        FileSystem ignored, JobConf job,
                                        String name, Progressable progress)
  throws IOException {
  // get the path of the temporary output file 
  Path file = FileOutputFormat.getTaskOutputPath(job, name);
  
  FileSystem fs = file.getFileSystem(job);
  CompressionCodec codec = null;
  CompressionType compressionType = CompressionType.NONE;
  if (getCompressOutput(job)) {
    // find the kind of compression to do
    compressionType = getOutputCompressionType(job);

    // find the right codec
    Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job,
 DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  final SequenceFile.Writer out = 
    SequenceFile.createWriter(fs, job, file,
                              job.getOutputKeyClass(),
                              job.getOutputValueClass(),
                              compressionType,
                              codec,
                              progress);

  return new RecordWriter<K, V>() {

      public void write(K key, V value)
        throws IOException {

        out.append(key, value);
      }

      public void close(Reporter reporter) throws IOException { out.close();}
    };
}
 
Example #30
Source File: ReduceTask.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public MapOutputCopier(JobConf job, Reporter reporter) {
  setName("MapOutputCopier " + reduceTask.getTaskID() + "." + id);
  LOG.debug(getName() + " created");
  this.reporter = reporter;
  
  if (job.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      job.getMapOutputCompressorClass(DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
    decompressor = CodecPool.getDecompressor(codec);
  }
}