Java Code Examples for org.apache.hadoop.io.compress.SnappyCodec

The following examples show how to use org.apache.hadoop.io.compress.SnappyCodec. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: TestNativeCodeLoader.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNativeCodeLoaded() {
  if (requireTestJni() == false) {
    LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required.");
    return;
  }
  if (!NativeCodeLoader.isNativeCodeLoaded()) {
    fail("TestNativeCodeLoader: libhadoop.so testing was required, but " +
        "libhadoop.so was not loaded.");
  }
  assertFalse(NativeCodeLoader.getLibraryName().isEmpty());
  // library names are depended on platform and build envs
  // so just check names are available
  assertFalse(ZlibFactory.getLibraryName().isEmpty());
  if (NativeCodeLoader.buildSupportsSnappy()) {
    assertFalse(SnappyCodec.getLibraryName().isEmpty());
  }
  if (NativeCodeLoader.buildSupportsOpenssl()) {
    assertFalse(OpensslCipher.getLibraryName().isEmpty());
  }
  assertFalse(Lz4Codec.getLibraryName().isEmpty());
  LOG.info("TestNativeCodeLoader: libhadoop.so is loaded.");
}
 
Example 2
Source Project: big-c   Source File: TestNativeCodeLoader.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNativeCodeLoaded() {
  if (requireTestJni() == false) {
    LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required.");
    return;
  }
  if (!NativeCodeLoader.isNativeCodeLoaded()) {
    fail("TestNativeCodeLoader: libhadoop.so testing was required, but " +
        "libhadoop.so was not loaded.");
  }
  assertFalse(NativeCodeLoader.getLibraryName().isEmpty());
  // library names are depended on platform and build envs
  // so just check names are available
  assertFalse(ZlibFactory.getLibraryName().isEmpty());
  if (NativeCodeLoader.buildSupportsSnappy()) {
    assertFalse(SnappyCodec.getLibraryName().isEmpty());
  }
  if (NativeCodeLoader.buildSupportsOpenssl()) {
    assertFalse(OpensslCipher.getLibraryName().isEmpty());
  }
  assertFalse(Lz4Codec.getLibraryName().isEmpty());
  LOG.info("TestNativeCodeLoader: libhadoop.so is loaded.");
}
 
Example 3
Source Project: ignite   Source File: HadoopWordCount2.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change.
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
        boolean outputCompression) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Combiner.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }

    if (outputCompression) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        SequenceFileOutputFormat.setCompressOutput(job, true);

        job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
    }
}
 
Example 4
Source Project: incubator-retired-blur   Source File: CsvBlurDriverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvBlurDriverTest3() throws Exception {
  Configuration configurationSetup = new Configuration();
  ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() {
    @Override
    public Iface getClient(String controllerConnectionStr) {
      return getMockIface();
    }
  };
  AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>();
  Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d", "family1",
      "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(), "-i",
      _path2.toString(), "-S", "-C", "1000000", "2000000", "-p", "SNAPPY");
  assertNotNull(job);
  Configuration configuration = job.getConfiguration();
  TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration);
  assertEquals(tableDescriptor.getName(), "table1");
  Collection<String> inputs = configuration.getStringCollection("mapred.input.dir");
  assertEquals(2, inputs.size());
  Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration);
  assertEquals(2, familyAndColumnNameMap.size());
  assertEquals("true", configuration.get(CsvBlurDriver.MAPRED_COMPRESS_MAP_OUTPUT));
  assertEquals(SnappyCodec.class.getName(), configuration.get(CsvBlurDriver.MAPRED_MAP_OUTPUT_COMPRESSION_CODEC));
}
 
Example 5
Source Project: pentaho-hadoop-shims   Source File: CommonSnappyShim.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Gets an InputStream that uses the snappy codec and wraps the supplied base input stream.
 *
 * @param the buffer size for the codec to use (in bytes)
 * @param in  the base input stream to wrap around
 * @return an InputStream that uses the Snappy codec
 * @throws Exception if snappy is not available or an error occurs during reflection
 */
public InputStream getSnappyInputStream( int bufferSize, InputStream in ) throws Exception {
  if ( !isHadoopSnappyAvailable() ) {
    throw new Exception( "Hadoop-snappy does not seem to be available" );
  }

  ClassLoader cl = Thread.currentThread().getContextClassLoader();
  Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
  try {
    SnappyCodec c = new SnappyCodec();
    Configuration newConf = new Configuration();
    newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize );
    c.setConf( newConf );
    return c.createInputStream( in );
  } finally {
    Thread.currentThread().setContextClassLoader( cl );
  }
}
 
Example 6
Source Project: pentaho-hadoop-shims   Source File: CommonSnappyShim.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Gets an OutputStream that uses the snappy codec and wraps the supplied base output stream.
 *
 * @param the buffer size for the codec to use (in bytes)
 * @param out the base output stream to wrap around
 * @return a OutputStream that uses the Snappy codec
 * @throws Exception if snappy is not available or an error occurs during reflection
 */
public OutputStream getSnappyOutputStream( int bufferSize, OutputStream out ) throws Exception {
  if ( !isHadoopSnappyAvailable() ) {
    throw new Exception( "Hadoop-snappy does not seem to be available" );
  }

  ClassLoader cl = Thread.currentThread().getContextClassLoader();
  Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
  try {
    SnappyCodec c = new SnappyCodec();
    Configuration newConf = new Configuration();
    newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize );
    c.setConf( newConf );
    return c.createOutputStream( out );
  } finally {
    Thread.currentThread().setContextClassLoader( cl );
  }
}
 
Example 7
Source Project: localization_nifi   Source File: AbstractHadoopProcessor.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String toString() {
    switch (this) {
        case NONE: return "NONE";
        case DEFAULT: return DefaultCodec.class.getName();
        case BZIP: return BZip2Codec.class.getName();
        case GZIP: return GzipCodec.class.getName();
        case LZ4: return Lz4Codec.class.getName();
        case SNAPPY: return SnappyCodec.class.getName();
        case AUTOMATIC: return "Automatically Detected";
    }
    return null;
}
 
Example 8
Source Project: sylph   Source File: HdfsSink2.java    License: Apache License 2.0 5 votes vote down vote up
public HdfsSink2(Hdfs2SinkConfig config)
        throws ClassNotFoundException
{
    this.batchSize = config.getBatchBufferSize();
    this.writerDir = config.getWriteDir();
    switch (config.getZipType().trim().toLowerCase()) {
        case "lzo":
            codecClass = (Class<? extends CompressionCodec>) Class.forName("com.hadoop.compression.lzo.LzopCodec");
            break;
        case "lz4":
            codecClass = Lz4Codec.class;
            break;
        case "snappy":
            codecClass = SnappyCodec.class;
            break;
        case "gzip":
            codecClass = GzipCodec.class;
            break;
        case "bzip2":
            codecClass = BZip2Codec.class;
            break;
        case "default":
            codecClass = DefaultCodec.class;
            break;
        default:
            codecClass = NoneCodec.class;
    }
}
 
Example 9
Source Project: hadoop   Source File: TestSnappyCompressorDecompressor.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSnappyDirectBlockCompression() {
  int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 };    
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
  try {
    for (int i = 0; i < size.length; i++) {
      compressDecompressLoop(size[i]);
    }
  } catch (IOException ex) {
    fail("testSnappyDirectBlockCompression ex !!!" + ex);
  }
}
 
Example 10
Source Project: big-c   Source File: TestSnappyCompressorDecompressor.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSnappyDirectBlockCompression() {
  int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 };    
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
  try {
    for (int i = 0; i < size.length; i++) {
      compressDecompressLoop(size[i]);
    }
  } catch (IOException ex) {
    fail("testSnappyDirectBlockCompression ex !!!" + ex);
  }
}
 
Example 11
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());
    //set from the command line

    job.setJarByClass(Phase2ExactMatchDeDuplication.class);
    job.setJobName(Phase2ExactMatchDeDuplication.class.getName());

    // mapper
    job.setMapperClass(ExactMatchDetectionMapper.class);

    // we will compress the mapper's output (use fast Snappy compressor)
    job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true);
    job.getConfiguration()
            .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);

    // reducer
    job.setReducerClass(UniqueWarcWriterReducer.class);
    // no combiner, as the output classes in mapper and reducer are different!

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(WARCOutputFormat.class);

    // mapper output data
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 12
Source Project: dkpro-c4corpus   Source File: Phase1FullJob.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());
    // set from the command line

    job.setJarByClass(Phase1FullJob.class);
    job.setJobName(Phase1FullJob.class.getName());

    // mapper
    job.setMapperClass(MapperClass.class);

    // we will compress the mapper's output (use fast Snappy compressor)
    job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true);
    job.getConfiguration()
            .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);

    // reducer
    job.setReducerClass(SimpleWarcWriterReducer.class);

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(WARCOutputFormat.class);

    // mapper output data
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 13
Source Project: attic-apex-malhar   Source File: FilterStreamCodec.java    License: Apache License 2.0 5 votes vote down vote up
public SnappyFilterStreamContext(OutputStream outputStream) throws IOException
{
  SnappyCodec codec = new SnappyCodec();
  codec.setConf(new Configuration());
  try {
    filterStream = new SnappyFilterStream(
        codec.createOutputStream(outputStream, new SnappyCompressor(bufferSize)));
  } catch (IOException e) {
    throw e;
  }
}
 
Example 14
private boolean checkNativeSnappy()
{
  try {
    SnappyCodec.checkNativeCodeLoaded();
  } catch (UnsatisfiedLinkError u) {
    LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found.");
    return true;
  } catch (RuntimeException e) {
    LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found.");
    return true;
  }
  return false;
}
 
Example 15
@Test
public void testSnappyCompressionSimple() throws IOException
{
  if (checkNativeSnappy()) {
    return;
  }

  File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy");

  BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile));
  Configuration conf = new Configuration();
  CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(SnappyCodec.class, conf);
  FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream(
      codec.createOutputStream(os));

  int ONE_MB = 1024 * 1024;

  String testStr = "TestSnap-16bytes";
  for (int i = 0; i < ONE_MB; i++) { // write 16 MBs
    filterStream.write(testStr.getBytes());
  }
  filterStream.flush();
  filterStream.close();

  CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile));

  byte[] recovered = new byte[testStr.length()];
  int bytesRead = is.read(recovered);
  is.close();
  assertEquals(testStr, new String(recovered));
}
 
Example 16
Source Project: hiped2   Source File: BloomFilterCreator.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  JobConf job = new JobConf(conf);
  job.setJarByClass(BloomFilterCreator.class);

  job.set(AvroJob.OUTPUT_SCHEMA, AvroBytesRecord.SCHEMA.toString());
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setInputFormat(KeyValueTextInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(BloomFilter.class);

  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(BloomFilter.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}
 
Example 17
Source Project: secor   Source File: JsonORCFileReaderWriterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Used for returning the compression kind used in ORC
 *
 * @param codec
 * @return
 */
private CompressionKind resolveCompression(CompressionCodec codec) {
    if (codec instanceof Lz4Codec)
        return CompressionKind.LZ4;
    else if (codec instanceof SnappyCodec)
        return CompressionKind.SNAPPY;
    // although GZip and ZLIB are not same thing
    // there is no better named codec for this case,
    // use hadoop Gzip codec to enable ORC ZLIB compression
    else if (codec instanceof GzipCodec)
        return CompressionKind.ZLIB;
    else
        return CompressionKind.NONE;
}
 
Example 18
Source Project: pentaho-hadoop-shims   Source File: SnappyShimImpl.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests whether hadoop-snappy (not to be confused with other java-based snappy implementations such as jsnappy or
 * snappy-java) plus the native snappy libraries are available.
 *
 * @return true if hadoop-snappy is available on the classpath
 */
public boolean isHadoopSnappyAvailable() {
  ClassLoader cl = Thread.currentThread().getContextClassLoader();
  Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
  try {
    return SnappyCodec.isNativeCodeLoaded();
  } catch ( Throwable t ) {
    return false;
  } finally {
    Thread.currentThread().setContextClassLoader( cl );
  }
}
 
Example 19
Source Project: presto   Source File: RcFileTester.java    License: Apache License 2.0 4 votes vote down vote up
@Override
Optional<String> getCodecName()
{
    return Optional.of(SnappyCodec.class.getName());
}
 
Example 20
Source Project: hadoop   Source File: NativeLibraryChecker.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * A tool to test native library availability, 
 */
public static void main(String[] args) {
  String usage = "NativeLibraryChecker [-a|-h]\n"
      + "  -a  use -a to check all libraries are available\n"
      + "      by default just check hadoop library (and\n"
      + "      winutils.exe on Windows OS) is available\n"
      + "      exit with error code 1 if check failed\n"
      + "  -h  print this message\n";
  if (args.length > 1 ||
      (args.length == 1 &&
          !(args[0].equals("-a") || args[0].equals("-h")))) {
    System.err.println(usage);
    ExitUtil.terminate(1);
  }
  boolean checkAll = false;
  if (args.length == 1) {
    if (args[0].equals("-h")) {
      System.out.println(usage);
      return;
    }
    checkAll = true;
  }
  Configuration conf = new Configuration();
  boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
  boolean zlibLoaded = false;
  boolean snappyLoaded = false;
  // lz4 is linked within libhadoop
  boolean lz4Loaded = nativeHadoopLoaded;
  boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf);
  boolean openSslLoaded = false;
  boolean winutilsExists = false;

  String openSslDetail = "";
  String hadoopLibraryName = "";
  String zlibLibraryName = "";
  String snappyLibraryName = "";
  String lz4LibraryName = "";
  String bzip2LibraryName = "";
  String winutilsPath = null;

  if (nativeHadoopLoaded) {
    hadoopLibraryName = NativeCodeLoader.getLibraryName();
    zlibLoaded = ZlibFactory.isNativeZlibLoaded(conf);
    if (zlibLoaded) {
      zlibLibraryName = ZlibFactory.getLibraryName();
    }
    snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
        SnappyCodec.isNativeCodeLoaded();
    if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) {
      snappyLibraryName = SnappyCodec.getLibraryName();
    }
    if (OpensslCipher.getLoadingFailureReason() != null) {
      openSslDetail = OpensslCipher.getLoadingFailureReason();
      openSslLoaded = false;
    } else {
      openSslDetail = OpensslCipher.getLibraryName();
      openSslLoaded = true;
    }
    if (lz4Loaded) {
      lz4LibraryName = Lz4Codec.getLibraryName();
    }
    if (bzip2Loaded) {
      bzip2LibraryName = Bzip2Factory.getLibraryName(conf);
    }
  }

  // winutils.exe is required on Windows
  winutilsPath = Shell.getWinUtilsPath();
  if (winutilsPath != null) {
    winutilsExists = true;
  } else {
    winutilsPath = "";
  }

  System.out.println("Native library checking:");
  System.out.printf("hadoop:  %b %s%n", nativeHadoopLoaded, hadoopLibraryName);
  System.out.printf("zlib:    %b %s%n", zlibLoaded, zlibLibraryName);
  System.out.printf("snappy:  %b %s%n", snappyLoaded, snappyLibraryName);
  System.out.printf("lz4:     %b %s%n", lz4Loaded, lz4LibraryName);
  System.out.printf("bzip2:   %b %s%n", bzip2Loaded, bzip2LibraryName);
  System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail);
  if (Shell.WINDOWS) {
    System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath);
  }

  if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) ||
      (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded && bzip2Loaded))) {
    // return 1 to indicated check failed
    ExitUtil.terminate(1);
  }
}
 
Example 21
Source Project: hadoop   Source File: TestSnappyCompressorDecompressor.java    License: Apache License 2.0 4 votes vote down vote up
@Before
public void before() {
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
}
 
Example 22
Source Project: big-c   Source File: NativeLibraryChecker.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * A tool to test native library availability, 
 */
public static void main(String[] args) {
  String usage = "NativeLibraryChecker [-a|-h]\n"
      + "  -a  use -a to check all libraries are available\n"
      + "      by default just check hadoop library (and\n"
      + "      winutils.exe on Windows OS) is available\n"
      + "      exit with error code 1 if check failed\n"
      + "  -h  print this message\n";
  if (args.length > 1 ||
      (args.length == 1 &&
          !(args[0].equals("-a") || args[0].equals("-h")))) {
    System.err.println(usage);
    ExitUtil.terminate(1);
  }
  boolean checkAll = false;
  if (args.length == 1) {
    if (args[0].equals("-h")) {
      System.out.println(usage);
      return;
    }
    checkAll = true;
  }
  Configuration conf = new Configuration();
  boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
  boolean zlibLoaded = false;
  boolean snappyLoaded = false;
  // lz4 is linked within libhadoop
  boolean lz4Loaded = nativeHadoopLoaded;
  boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf);
  boolean openSslLoaded = false;
  boolean winutilsExists = false;

  String openSslDetail = "";
  String hadoopLibraryName = "";
  String zlibLibraryName = "";
  String snappyLibraryName = "";
  String lz4LibraryName = "";
  String bzip2LibraryName = "";
  String winutilsPath = null;

  if (nativeHadoopLoaded) {
    hadoopLibraryName = NativeCodeLoader.getLibraryName();
    zlibLoaded = ZlibFactory.isNativeZlibLoaded(conf);
    if (zlibLoaded) {
      zlibLibraryName = ZlibFactory.getLibraryName();
    }
    snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
        SnappyCodec.isNativeCodeLoaded();
    if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) {
      snappyLibraryName = SnappyCodec.getLibraryName();
    }
    if (OpensslCipher.getLoadingFailureReason() != null) {
      openSslDetail = OpensslCipher.getLoadingFailureReason();
      openSslLoaded = false;
    } else {
      openSslDetail = OpensslCipher.getLibraryName();
      openSslLoaded = true;
    }
    if (lz4Loaded) {
      lz4LibraryName = Lz4Codec.getLibraryName();
    }
    if (bzip2Loaded) {
      bzip2LibraryName = Bzip2Factory.getLibraryName(conf);
    }
  }

  // winutils.exe is required on Windows
  winutilsPath = Shell.getWinUtilsPath();
  if (winutilsPath != null) {
    winutilsExists = true;
  } else {
    winutilsPath = "";
  }

  System.out.println("Native library checking:");
  System.out.printf("hadoop:  %b %s%n", nativeHadoopLoaded, hadoopLibraryName);
  System.out.printf("zlib:    %b %s%n", zlibLoaded, zlibLibraryName);
  System.out.printf("snappy:  %b %s%n", snappyLoaded, snappyLibraryName);
  System.out.printf("lz4:     %b %s%n", lz4Loaded, lz4LibraryName);
  System.out.printf("bzip2:   %b %s%n", bzip2Loaded, bzip2LibraryName);
  System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail);
  if (Shell.WINDOWS) {
    System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath);
  }

  if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) ||
      (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded && bzip2Loaded))) {
    // return 1 to indicated check failed
    ExitUtil.terminate(1);
  }
}
 
Example 23
Source Project: big-c   Source File: TestSnappyCompressorDecompressor.java    License: Apache License 2.0 4 votes vote down vote up
@Before
public void before() {
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
}
 
Example 24
Source Project: hiped2   Source File: DBImportMapReduce.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.OutputFileOption.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path output = new Path(cli.getArgValueAsString(CliCommonOpts.OutputFileOption.OUTPUT));

  Configuration conf = super.getConf();

  DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
      "jdbc:mysql://localhost/sqoop_test" +
          "?user=hip_sqoop_user&password=password");

  JobConf job = new JobConf(conf);
  job.setJarByClass(DBImportMapReduce.class);

  job.setInputFormat(DBInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);
  AvroJob.setOutputSchema(job, Stock.SCHEMA$);
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setMapperClass(Map.class);

  job.setNumMapTasks(4);
  job.setNumReduceTasks(0);

  job.setMapOutputKeyClass(AvroWrapper.class);
  job.setMapOutputValueClass(NullWritable.class);

  job.setOutputKeyClass(AvroWrapper.class);
  job.setOutputValueClass(NullWritable.class);

  FileOutputFormat.setOutputPath(job, output);

  DBInputFormat.setInput(
      job,
      StockDbWritable.class,
      "select * from stocks",
      "SELECT COUNT(id) FROM stocks");

  RunningJob runningJob = JobClient.runJob(job);

  return runningJob.isSuccessful() ? 0 : 1;
}
 
Example 25
Source Project: HBase-ToHDFS   Source File: ExportHBaseTableToAvro.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String rowKeyColumn = "";
  
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToAvro.class);
  job.setJobName("ExportHBaseTableToAvro ");

  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  
  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroKeyOutputFormat.class);
  AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());

  AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}
 
Example 26
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException {
 if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToDelimiteredSeq {tableName} {ColumnFamily} {outputPath} {compressionCodec} {schemaLocationOnLocal} {delimiter} {rowKeyColumn.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String delimiter = args[5];

  String rowKeyColumn = "";
  if (args.length > 6) {
    rowKeyColumn = args[6];
  }
  
  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  
  HBaseConfiguration.addHbaseResources(job.getConfiguration());
  
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath);
  job.getConfiguration().set(DELIMITER_CONF, delimiter);

  job.setJarByClass(ExportHBaseTableToDelimiteredSeq.class);
  job.setJobName("ExportHBaseTableToDelimiteredSeq ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(SequenceFileOutputFormat.class); 
  SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
  
  if (compressionCodec.equals("snappy")) {
    SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    //nothing
  }
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(NullWritable.class);
  
  job.setNumReduceTasks(0);
  
  boolean b = job.waitForCompletion(true);
}
 
Example 27
Source Project: HBase-ToHDFS   Source File: ExportHBaseTableToParquet.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];

  String rowKeyColumn = "";
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToParquet.class);
  job.setJobName("ExportHBaseTableToParquet ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());
  AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}
 
Example 28
Source Project: hiped2   Source File: AvroMixedMapReduce.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {


  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  JobConf job = new JobConf(conf);
  job.setJarByClass(AvroMixedMapReduce.class);

  job.set(AvroJob.INPUT_SCHEMA, Stock.SCHEMA$.toString());
  job.set(AvroJob.OUTPUT_SCHEMA, StockAvg.SCHEMA$.toString());
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setInputFormat(AvroInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(DoubleWritable.class);

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(DoubleWritable.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}
 
Example 29
Source Project: hiped2   Source File: BloomFilterCreator.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(ReplicatedJoin.UserOptions.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path usersPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.USERS));
  Path outputPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);

  job.setJarByClass(BloomFilterCreator.class);
  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  AvroJob.setOutputKeySchema(job, AvroBytesRecord.SCHEMA);
  job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setOutputFormatClass(AvroKeyOutputFormat.class);

  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(BloomFilter.class);

  FileInputFormat.setInputPaths(job, usersPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  job.setNumReduceTasks(1);

  return job.waitForCompletion(true) ? 0 : 1;
}