org.apache.hadoop.io.compress.SnappyCodec Java Examples

The following examples show how to use org.apache.hadoop.io.compress.SnappyCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: CommonSnappyShim.java From pentaho-hadoop-shims with Apache License 2.0

6 votes

/**
 * Gets an OutputStream that uses the snappy codec and wraps the supplied base output stream.
 *
 * @param the buffer size for the codec to use (in bytes)
 * @param out the base output stream to wrap around
 * @return a OutputStream that uses the Snappy codec
 * @throws Exception if snappy is not available or an error occurs during reflection
 */
public OutputStream getSnappyOutputStream( int bufferSize, OutputStream out ) throws Exception {
  if ( !isHadoopSnappyAvailable() ) {
    throw new Exception( "Hadoop-snappy does not seem to be available" );
  }

  ClassLoader cl = Thread.currentThread().getContextClassLoader();
  Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
  try {
    SnappyCodec c = new SnappyCodec();
    Configuration newConf = new Configuration();
    newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize );
    c.setConf( newConf );
    return c.createOutputStream( out );
  } finally {
    Thread.currentThread().setContextClassLoader( cl );
  }
}

Example #2

Source File: CommonSnappyShim.java From pentaho-hadoop-shims with Apache License 2.0

6 votes

/**
 * Gets an InputStream that uses the snappy codec and wraps the supplied base input stream.
 *
 * @param the buffer size for the codec to use (in bytes)
 * @param in  the base input stream to wrap around
 * @return an InputStream that uses the Snappy codec
 * @throws Exception if snappy is not available or an error occurs during reflection
 */
public InputStream getSnappyInputStream( int bufferSize, InputStream in ) throws Exception {
  if ( !isHadoopSnappyAvailable() ) {
    throw new Exception( "Hadoop-snappy does not seem to be available" );
  }

  ClassLoader cl = Thread.currentThread().getContextClassLoader();
  Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
  try {
    SnappyCodec c = new SnappyCodec();
    Configuration newConf = new Configuration();
    newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize );
    c.setConf( newConf );
    return c.createInputStream( in );
  } finally {
    Thread.currentThread().setContextClassLoader( cl );
  }
}

Example #3

Source File: TestNativeCodeLoader.java From hadoop with Apache License 2.0

6 votes

@Test
public void testNativeCodeLoaded() {
  if (requireTestJni() == false) {
    LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required.");
    return;
  }
  if (!NativeCodeLoader.isNativeCodeLoaded()) {
    fail("TestNativeCodeLoader: libhadoop.so testing was required, but " +
        "libhadoop.so was not loaded.");
  }
  assertFalse(NativeCodeLoader.getLibraryName().isEmpty());
  // library names are depended on platform and build envs
  // so just check names are available
  assertFalse(ZlibFactory.getLibraryName().isEmpty());
  if (NativeCodeLoader.buildSupportsSnappy()) {
    assertFalse(SnappyCodec.getLibraryName().isEmpty());
  }
  if (NativeCodeLoader.buildSupportsOpenssl()) {
    assertFalse(OpensslCipher.getLibraryName().isEmpty());
  }
  assertFalse(Lz4Codec.getLibraryName().isEmpty());
  LOG.info("TestNativeCodeLoader: libhadoop.so is loaded.");
}

Example #4

Source File: CsvBlurDriverTest.java From incubator-retired-blur with Apache License 2.0

6 votes

@Test
public void testCsvBlurDriverTest3() throws Exception {
  Configuration configurationSetup = new Configuration();
  ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() {
    @Override
    public Iface getClient(String controllerConnectionStr) {
      return getMockIface();
    }
  };
  AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>();
  Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d", "family1",
      "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(), "-i",
      _path2.toString(), "-S", "-C", "1000000", "2000000", "-p", "SNAPPY");
  assertNotNull(job);
  Configuration configuration = job.getConfiguration();
  TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration);
  assertEquals(tableDescriptor.getName(), "table1");
  Collection<String> inputs = configuration.getStringCollection("mapred.input.dir");
  assertEquals(2, inputs.size());
  Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration);
  assertEquals(2, familyAndColumnNameMap.size());
  assertEquals("true", configuration.get(CsvBlurDriver.MAPRED_COMPRESS_MAP_OUTPUT));
  assertEquals(SnappyCodec.class.getName(), configuration.get(CsvBlurDriver.MAPRED_MAP_OUTPUT_COMPRESSION_CODEC));
}

Example #5

Source File: TestNativeCodeLoader.java From big-c with Apache License 2.0

6 votes

@Test
public void testNativeCodeLoaded() {
  if (requireTestJni() == false) {
    LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required.");
    return;
  }
  if (!NativeCodeLoader.isNativeCodeLoaded()) {
    fail("TestNativeCodeLoader: libhadoop.so testing was required, but " +
        "libhadoop.so was not loaded.");
  }
  assertFalse(NativeCodeLoader.getLibraryName().isEmpty());
  // library names are depended on platform and build envs
  // so just check names are available
  assertFalse(ZlibFactory.getLibraryName().isEmpty());
  if (NativeCodeLoader.buildSupportsSnappy()) {
    assertFalse(SnappyCodec.getLibraryName().isEmpty());
  }
  if (NativeCodeLoader.buildSupportsOpenssl()) {
    assertFalse(OpensslCipher.getLibraryName().isEmpty());
  }
  assertFalse(Lz4Codec.getLibraryName().isEmpty());
  LOG.info("TestNativeCodeLoader: libhadoop.so is loaded.");
}

Example #6

Source File: HadoopWordCount2.java From ignite with Apache License 2.0

6 votes

/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change.
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
        boolean outputCompression) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Combiner.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }

    if (outputCompression) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        SequenceFileOutputFormat.setCompressOutput(job, true);

        job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
    }
}

Example #7

Source File: AbstractFileOutputOperatorTest.java From attic-apex-malhar with Apache License 2.0

5 votes

private boolean checkNativeSnappy()
{
  try {
    SnappyCodec.checkNativeCodeLoaded();
  } catch (UnsatisfiedLinkError u) {
    LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found.");
    return true;
  } catch (RuntimeException e) {
    LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found.");
    return true;
  }
  return false;
}

Example #8

Source File: SnappyShimImpl.java From pentaho-hadoop-shims with Apache License 2.0

5 votes

/**
 * Tests whether hadoop-snappy (not to be confused with other java-based snappy implementations such as jsnappy or
 * snappy-java) plus the native snappy libraries are available.
 *
 * @return true if hadoop-snappy is available on the classpath
 */
public boolean isHadoopSnappyAvailable() {
  ClassLoader cl = Thread.currentThread().getContextClassLoader();
  Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
  try {
    return SnappyCodec.isNativeCodeLoaded();
  } catch ( Throwable t ) {
    return false;
  } finally {
    Thread.currentThread().setContextClassLoader( cl );
  }
}

Example #9

Source File: JsonORCFileReaderWriterFactory.java From secor with Apache License 2.0

5 votes

/**
 * Used for returning the compression kind used in ORC
 *
 * @param codec
 * @return
 */
private CompressionKind resolveCompression(CompressionCodec codec) {
    if (codec instanceof Lz4Codec)
        return CompressionKind.LZ4;
    else if (codec instanceof SnappyCodec)
        return CompressionKind.SNAPPY;
    // although GZip and ZLIB are not same thing
    // there is no better named codec for this case,
    // use hadoop Gzip codec to enable ORC ZLIB compression
    else if (codec instanceof GzipCodec)
        return CompressionKind.ZLIB;
    else
        return CompressionKind.NONE;
}

Example #10

Source File: BloomFilterCreator.java From hiped2 with Apache License 2.0

5 votes

/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  JobConf job = new JobConf(conf);
  job.setJarByClass(BloomFilterCreator.class);

  job.set(AvroJob.OUTPUT_SCHEMA, AvroBytesRecord.SCHEMA.toString());
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setInputFormat(KeyValueTextInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(BloomFilter.class);

  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(BloomFilter.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}

Example #11

Source File: AbstractFileOutputOperatorTest.java From attic-apex-malhar with Apache License 2.0

5 votes

@Test
public void testSnappyCompressionSimple() throws IOException
{
  if (checkNativeSnappy()) {
    return;
  }

  File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy");

  BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile));
  Configuration conf = new Configuration();
  CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(SnappyCodec.class, conf);
  FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream(
      codec.createOutputStream(os));

  int ONE_MB = 1024 * 1024;

  String testStr = "TestSnap-16bytes";
  for (int i = 0; i < ONE_MB; i++) { // write 16 MBs
    filterStream.write(testStr.getBytes());
  }
  filterStream.flush();
  filterStream.close();

  CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile));

  byte[] recovered = new byte[testStr.length()];
  int bytesRead = is.read(recovered);
  is.close();
  assertEquals(testStr, new String(recovered));
}

Example #12

Source File: FilterStreamCodec.java From attic-apex-malhar with Apache License 2.0

5 votes

public SnappyFilterStreamContext(OutputStream outputStream) throws IOException
{
  SnappyCodec codec = new SnappyCodec();
  codec.setConf(new Configuration());
  try {
    filterStream = new SnappyFilterStream(
        codec.createOutputStream(outputStream, new SnappyCompressor(bufferSize)));
  } catch (IOException e) {
    throw e;
  }
}

Example #13

Source File: Phase2ExactMatchDeDuplication.java From dkpro-c4corpus with Apache License 2.0

5 votes

@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());
    //set from the command line

    job.setJarByClass(Phase2ExactMatchDeDuplication.class);
    job.setJobName(Phase2ExactMatchDeDuplication.class.getName());

    // mapper
    job.setMapperClass(ExactMatchDetectionMapper.class);

    // we will compress the mapper's output (use fast Snappy compressor)
    job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true);
    job.getConfiguration()
            .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);

    // reducer
    job.setReducerClass(UniqueWarcWriterReducer.class);
    // no combiner, as the output classes in mapper and reducer are different!

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(WARCOutputFormat.class);

    // mapper output data
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

Example #14

Source File: AbstractHadoopProcessor.java From localization_nifi with Apache License 2.0

5 votes

@Override
public String toString() {
    switch (this) {
        case NONE: return "NONE";
        case DEFAULT: return DefaultCodec.class.getName();
        case BZIP: return BZip2Codec.class.getName();
        case GZIP: return GzipCodec.class.getName();
        case LZ4: return Lz4Codec.class.getName();
        case SNAPPY: return SnappyCodec.class.getName();
        case AUTOMATIC: return "Automatically Detected";
    }
    return null;
}

Example #15

Source File: HdfsSink2.java From sylph with Apache License 2.0

5 votes

public HdfsSink2(Hdfs2SinkConfig config)
        throws ClassNotFoundException
{
    this.batchSize = config.getBatchBufferSize();
    this.writerDir = config.getWriteDir();
    switch (config.getZipType().trim().toLowerCase()) {
        case "lzo":
            codecClass = (Class<? extends CompressionCodec>) Class.forName("com.hadoop.compression.lzo.LzopCodec");
            break;
        case "lz4":
            codecClass = Lz4Codec.class;
            break;
        case "snappy":
            codecClass = SnappyCodec.class;
            break;
        case "gzip":
            codecClass = GzipCodec.class;
            break;
        case "bzip2":
            codecClass = BZip2Codec.class;
            break;
        case "default":
            codecClass = DefaultCodec.class;
            break;
        default:
            codecClass = NoneCodec.class;
    }
}

Example #16

Source File: TestSnappyCompressorDecompressor.java From hadoop with Apache License 2.0

5 votes

@Test
public void testSnappyDirectBlockCompression() {
  int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 };    
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
  try {
    for (int i = 0; i < size.length; i++) {
      compressDecompressLoop(size[i]);
    }
  } catch (IOException ex) {
    fail("testSnappyDirectBlockCompression ex !!!" + ex);
  }
}

Example #17

Source File: TestSnappyCompressorDecompressor.java From big-c with Apache License 2.0

5 votes

@Test
public void testSnappyDirectBlockCompression() {
  int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 };    
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
  try {
    for (int i = 0; i < size.length; i++) {
      compressDecompressLoop(size[i]);
    }
  } catch (IOException ex) {
    fail("testSnappyDirectBlockCompression ex !!!" + ex);
  }
}

Example #18

Source File: Phase1FullJob.java From dkpro-c4corpus with Apache License 2.0

5 votes

@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());
    // set from the command line

    job.setJarByClass(Phase1FullJob.class);
    job.setJobName(Phase1FullJob.class.getName());

    // mapper
    job.setMapperClass(MapperClass.class);

    // we will compress the mapper's output (use fast Snappy compressor)
    job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true);
    job.getConfiguration()
            .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);

    // reducer
    job.setReducerClass(SimpleWarcWriterReducer.class);

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(WARCOutputFormat.class);

    // mapper output data
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

Example #19

Source File: TestSnappyCompressorDecompressor.java From big-c with Apache License 2.0

4 votes

@Before
public void before() {
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
}

Example #20

Source File: DBImportMapReduce.java From hiped2 with Apache License 2.0

4 votes

/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.OutputFileOption.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path output = new Path(cli.getArgValueAsString(CliCommonOpts.OutputFileOption.OUTPUT));

  Configuration conf = super.getConf();

  DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
      "jdbc:mysql://localhost/sqoop_test" +
          "?user=hip_sqoop_user&password=password");

  JobConf job = new JobConf(conf);
  job.setJarByClass(DBImportMapReduce.class);

  job.setInputFormat(DBInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);
  AvroJob.setOutputSchema(job, Stock.SCHEMA$);
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setMapperClass(Map.class);

  job.setNumMapTasks(4);
  job.setNumReduceTasks(0);

  job.setMapOutputKeyClass(AvroWrapper.class);
  job.setMapOutputValueClass(NullWritable.class);

  job.setOutputKeyClass(AvroWrapper.class);
  job.setOutputValueClass(NullWritable.class);

  FileOutputFormat.setOutputPath(job, output);

  DBInputFormat.setInput(
      job,
      StockDbWritable.class,
      "select * from stocks",
      "SELECT COUNT(id) FROM stocks");

  RunningJob runningJob = JobClient.runJob(job);

  return runningJob.isSuccessful() ? 0 : 1;
}

Example #21

Source File: NativeLibraryChecker.java From big-c with Apache License 2.0

4 votes

/**
 * A tool to test native library availability, 
 */
public static void main(String[] args) {
  String usage = "NativeLibraryChecker [-a|-h]\n"
      + "  -a  use -a to check all libraries are available\n"
      + "      by default just check hadoop library (and\n"
      + "      winutils.exe on Windows OS) is available\n"
      + "      exit with error code 1 if check failed\n"
      + "  -h  print this message\n";
  if (args.length > 1 ||
      (args.length == 1 &&
          !(args[0].equals("-a") || args[0].equals("-h")))) {
    System.err.println(usage);
    ExitUtil.terminate(1);
  }
  boolean checkAll = false;
  if (args.length == 1) {
    if (args[0].equals("-h")) {
      System.out.println(usage);
      return;
    }
    checkAll = true;
  }
  Configuration conf = new Configuration();
  boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
  boolean zlibLoaded = false;
  boolean snappyLoaded = false;
  // lz4 is linked within libhadoop
  boolean lz4Loaded = nativeHadoopLoaded;
  boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf);
  boolean openSslLoaded = false;
  boolean winutilsExists = false;

  String openSslDetail = "";
  String hadoopLibraryName = "";
  String zlibLibraryName = "";
  String snappyLibraryName = "";
  String lz4LibraryName = "";
  String bzip2LibraryName = "";
  String winutilsPath = null;

  if (nativeHadoopLoaded) {
    hadoopLibraryName = NativeCodeLoader.getLibraryName();
    zlibLoaded = ZlibFactory.isNativeZlibLoaded(conf);
    if (zlibLoaded) {
      zlibLibraryName = ZlibFactory.getLibraryName();
    }
    snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
        SnappyCodec.isNativeCodeLoaded();
    if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) {
      snappyLibraryName = SnappyCodec.getLibraryName();
    }
    if (OpensslCipher.getLoadingFailureReason() != null) {
      openSslDetail = OpensslCipher.getLoadingFailureReason();
      openSslLoaded = false;
    } else {
      openSslDetail = OpensslCipher.getLibraryName();
      openSslLoaded = true;
    }
    if (lz4Loaded) {
      lz4LibraryName = Lz4Codec.getLibraryName();
    }
    if (bzip2Loaded) {
      bzip2LibraryName = Bzip2Factory.getLibraryName(conf);
    }
  }

  // winutils.exe is required on Windows
  winutilsPath = Shell.getWinUtilsPath();
  if (winutilsPath != null) {
    winutilsExists = true;
  } else {
    winutilsPath = "";
  }

  System.out.println("Native library checking:");
  System.out.printf("hadoop:  %b %s%n", nativeHadoopLoaded, hadoopLibraryName);
  System.out.printf("zlib:    %b %s%n", zlibLoaded, zlibLibraryName);
  System.out.printf("snappy:  %b %s%n", snappyLoaded, snappyLibraryName);
  System.out.printf("lz4:     %b %s%n", lz4Loaded, lz4LibraryName);
  System.out.printf("bzip2:   %b %s%n", bzip2Loaded, bzip2LibraryName);
  System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail);
  if (Shell.WINDOWS) {
    System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath);
  }

  if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) ||
      (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded && bzip2Loaded))) {
    // return 1 to indicated check failed
    ExitUtil.terminate(1);
  }
}

Example #22

Source File: TestSnappyCompressorDecompressor.java From hadoop with Apache License 2.0

4 votes

@Before
public void before() {
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
}

Example #23

Source File: ExportHBaseTableToAvro.java From HBase-ToHDFS with Apache License 2.0

4 votes

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String rowKeyColumn = "";
  
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToAvro.class);
  job.setJobName("ExportHBaseTableToAvro ");

  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  
  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroKeyOutputFormat.class);
  AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());

  AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}

Example #24

Source File: ExportHBaseTableToDelimiteredSeq.java From HBase-ToHDFS with Apache License 2.0

4 votes

public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException {
 if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToDelimiteredSeq {tableName} {ColumnFamily} {outputPath} {compressionCodec} {schemaLocationOnLocal} {delimiter} {rowKeyColumn.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String delimiter = args[5];

  String rowKeyColumn = "";
  if (args.length > 6) {
    rowKeyColumn = args[6];
  }
  
  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  
  HBaseConfiguration.addHbaseResources(job.getConfiguration());
  
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath);
  job.getConfiguration().set(DELIMITER_CONF, delimiter);

  job.setJarByClass(ExportHBaseTableToDelimiteredSeq.class);
  job.setJobName("ExportHBaseTableToDelimiteredSeq ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(SequenceFileOutputFormat.class); 
  SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
  
  if (compressionCodec.equals("snappy")) {
    SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    //nothing
  }
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(NullWritable.class);
  
  job.setNumReduceTasks(0);
  
  boolean b = job.waitForCompletion(true);
}

Example #25

Source File: ExportHBaseTableToParquet.java From HBase-ToHDFS with Apache License 2.0

4 votes

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];

  String rowKeyColumn = "";
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToParquet.class);
  job.setJobName("ExportHBaseTableToParquet ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());
  AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}

Example #26

Source File: NativeLibraryChecker.java From hadoop with Apache License 2.0

4 votes

/**
 * A tool to test native library availability, 
 */
public static void main(String[] args) {
  String usage = "NativeLibraryChecker [-a|-h]\n"
      + "  -a  use -a to check all libraries are available\n"
      + "      by default just check hadoop library (and\n"
      + "      winutils.exe on Windows OS) is available\n"
      + "      exit with error code 1 if check failed\n"
      + "  -h  print this message\n";
  if (args.length > 1 ||
      (args.length == 1 &&
          !(args[0].equals("-a") || args[0].equals("-h")))) {
    System.err.println(usage);
    ExitUtil.terminate(1);
  }
  boolean checkAll = false;
  if (args.length == 1) {
    if (args[0].equals("-h")) {
      System.out.println(usage);
      return;
    }
    checkAll = true;
  }
  Configuration conf = new Configuration();
  boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
  boolean zlibLoaded = false;
  boolean snappyLoaded = false;
  // lz4 is linked within libhadoop
  boolean lz4Loaded = nativeHadoopLoaded;
  boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf);
  boolean openSslLoaded = false;
  boolean winutilsExists = false;

  String openSslDetail = "";
  String hadoopLibraryName = "";
  String zlibLibraryName = "";
  String snappyLibraryName = "";
  String lz4LibraryName = "";
  String bzip2LibraryName = "";
  String winutilsPath = null;

  if (nativeHadoopLoaded) {
    hadoopLibraryName = NativeCodeLoader.getLibraryName();
    zlibLoaded = ZlibFactory.isNativeZlibLoaded(conf);
    if (zlibLoaded) {
      zlibLibraryName = ZlibFactory.getLibraryName();
    }
    snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
        SnappyCodec.isNativeCodeLoaded();
    if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) {
      snappyLibraryName = SnappyCodec.getLibraryName();
    }
    if (OpensslCipher.getLoadingFailureReason() != null) {
      openSslDetail = OpensslCipher.getLoadingFailureReason();
      openSslLoaded = false;
    } else {
      openSslDetail = OpensslCipher.getLibraryName();
      openSslLoaded = true;
    }
    if (lz4Loaded) {
      lz4LibraryName = Lz4Codec.getLibraryName();
    }
    if (bzip2Loaded) {
      bzip2LibraryName = Bzip2Factory.getLibraryName(conf);
    }
  }

  // winutils.exe is required on Windows
  winutilsPath = Shell.getWinUtilsPath();
  if (winutilsPath != null) {
    winutilsExists = true;
  } else {
    winutilsPath = "";
  }

  System.out.println("Native library checking:");
  System.out.printf("hadoop:  %b %s%n", nativeHadoopLoaded, hadoopLibraryName);
  System.out.printf("zlib:    %b %s%n", zlibLoaded, zlibLibraryName);
  System.out.printf("snappy:  %b %s%n", snappyLoaded, snappyLibraryName);
  System.out.printf("lz4:     %b %s%n", lz4Loaded, lz4LibraryName);
  System.out.printf("bzip2:   %b %s%n", bzip2Loaded, bzip2LibraryName);
  System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail);
  if (Shell.WINDOWS) {
    System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath);
  }

  if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) ||
      (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded && bzip2Loaded))) {
    // return 1 to indicated check failed
    ExitUtil.terminate(1);
  }
}

Example #27

Source File: RcFileTester.java From presto with Apache License 2.0

4 votes

@Override
Optional<String> getCodecName()
{
    return Optional.of(SnappyCodec.class.getName());
}

Example #28

Source File: AvroMixedMapReduce.java From hiped2 with Apache License 2.0

3 votes

/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {


  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  JobConf job = new JobConf(conf);
  job.setJarByClass(AvroMixedMapReduce.class);

  job.set(AvroJob.INPUT_SCHEMA, Stock.SCHEMA$.toString());
  job.set(AvroJob.OUTPUT_SCHEMA, StockAvg.SCHEMA$.toString());
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setInputFormat(AvroInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(DoubleWritable.class);

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(DoubleWritable.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}

Example #29

Source File: BloomFilterCreator.java From hiped2 with Apache License 2.0

3 votes

/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(ReplicatedJoin.UserOptions.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path usersPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.USERS));
  Path outputPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);

  job.setJarByClass(BloomFilterCreator.class);
  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  AvroJob.setOutputKeySchema(job, AvroBytesRecord.SCHEMA);
  job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setOutputFormatClass(AvroKeyOutputFormat.class);

  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(BloomFilter.class);

  FileInputFormat.setInputPaths(job, usersPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  job.setNumReduceTasks(1);

  return job.waitForCompletion(true) ? 0 : 1;
}