org.apache.hadoop.io.compress.SnappyCodec Java Examples

The following examples show how to use org.apache.hadoop.io.compress.SnappyCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: hadoop   Author: naver   File: TestNativeCodeLoader.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNativeCodeLoaded() {
  if (requireTestJni() == false) {
    LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required.");
    return;
  }
  if (!NativeCodeLoader.isNativeCodeLoaded()) {
    fail("TestNativeCodeLoader: libhadoop.so testing was required, but " +
        "libhadoop.so was not loaded.");
  }
  assertFalse(NativeCodeLoader.getLibraryName().isEmpty());
  // library names are depended on platform and build envs
  // so just check names are available
  assertFalse(ZlibFactory.getLibraryName().isEmpty());
  if (NativeCodeLoader.buildSupportsSnappy()) {
    assertFalse(SnappyCodec.getLibraryName().isEmpty());
  }
  if (NativeCodeLoader.buildSupportsOpenssl()) {
    assertFalse(OpensslCipher.getLibraryName().isEmpty());
  }
  assertFalse(Lz4Codec.getLibraryName().isEmpty());
  LOG.info("TestNativeCodeLoader: libhadoop.so is loaded.");
}
 
Example #2
Source Project: big-c   Author: yncxcw   File: TestNativeCodeLoader.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNativeCodeLoaded() {
  if (requireTestJni() == false) {
    LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required.");
    return;
  }
  if (!NativeCodeLoader.isNativeCodeLoaded()) {
    fail("TestNativeCodeLoader: libhadoop.so testing was required, but " +
        "libhadoop.so was not loaded.");
  }
  assertFalse(NativeCodeLoader.getLibraryName().isEmpty());
  // library names are depended on platform and build envs
  // so just check names are available
  assertFalse(ZlibFactory.getLibraryName().isEmpty());
  if (NativeCodeLoader.buildSupportsSnappy()) {
    assertFalse(SnappyCodec.getLibraryName().isEmpty());
  }
  if (NativeCodeLoader.buildSupportsOpenssl()) {
    assertFalse(OpensslCipher.getLibraryName().isEmpty());
  }
  assertFalse(Lz4Codec.getLibraryName().isEmpty());
  LOG.info("TestNativeCodeLoader: libhadoop.so is loaded.");
}
 
Example #3
Source Project: ignite   Author: apache   File: HadoopWordCount2.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change.
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
        boolean outputCompression) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Combiner.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }

    if (outputCompression) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        SequenceFileOutputFormat.setCompressOutput(job, true);

        job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
    }
}
 
Example #4
Source Project: incubator-retired-blur   Author: apache   File: CsvBlurDriverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvBlurDriverTest3() throws Exception {
  Configuration configurationSetup = new Configuration();
  ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() {
    @Override
    public Iface getClient(String controllerConnectionStr) {
      return getMockIface();
    }
  };
  AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>();
  Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d", "family1",
      "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(), "-i",
      _path2.toString(), "-S", "-C", "1000000", "2000000", "-p", "SNAPPY");
  assertNotNull(job);
  Configuration configuration = job.getConfiguration();
  TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration);
  assertEquals(tableDescriptor.getName(), "table1");
  Collection<String> inputs = configuration.getStringCollection("mapred.input.dir");
  assertEquals(2, inputs.size());
  Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration);
  assertEquals(2, familyAndColumnNameMap.size());
  assertEquals("true", configuration.get(CsvBlurDriver.MAPRED_COMPRESS_MAP_OUTPUT));
  assertEquals(SnappyCodec.class.getName(), configuration.get(CsvBlurDriver.MAPRED_MAP_OUTPUT_COMPRESSION_CODEC));
}
 
Example #5
Source Project: pentaho-hadoop-shims   Author: pentaho   File: CommonSnappyShim.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Gets an InputStream that uses the snappy codec and wraps the supplied base input stream.
 *
 * @param the buffer size for the codec to use (in bytes)
 * @param in  the base input stream to wrap around
 * @return an InputStream that uses the Snappy codec
 * @throws Exception if snappy is not available or an error occurs during reflection
 */
public InputStream getSnappyInputStream( int bufferSize, InputStream in ) throws Exception {
  if ( !isHadoopSnappyAvailable() ) {
    throw new Exception( "Hadoop-snappy does not seem to be available" );
  }

  ClassLoader cl = Thread.currentThread().getContextClassLoader();
  Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
  try {
    SnappyCodec c = new SnappyCodec();
    Configuration newConf = new Configuration();
    newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize );
    c.setConf( newConf );
    return c.createInputStream( in );
  } finally {
    Thread.currentThread().setContextClassLoader( cl );
  }
}
 
Example #6
Source Project: pentaho-hadoop-shims   Author: pentaho   File: CommonSnappyShim.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Gets an OutputStream that uses the snappy codec and wraps the supplied base output stream.
 *
 * @param the buffer size for the codec to use (in bytes)
 * @param out the base output stream to wrap around
 * @return a OutputStream that uses the Snappy codec
 * @throws Exception if snappy is not available or an error occurs during reflection
 */
public OutputStream getSnappyOutputStream( int bufferSize, OutputStream out ) throws Exception {
  if ( !isHadoopSnappyAvailable() ) {
    throw new Exception( "Hadoop-snappy does not seem to be available" );
  }

  ClassLoader cl = Thread.currentThread().getContextClassLoader();
  Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
  try {
    SnappyCodec c = new SnappyCodec();
    Configuration newConf = new Configuration();
    newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize );
    c.setConf( newConf );
    return c.createOutputStream( out );
  } finally {
    Thread.currentThread().setContextClassLoader( cl );
  }
}
 
Example #7
Source Project: localization_nifi   Author: wangrenlei   File: AbstractHadoopProcessor.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String toString() {
    switch (this) {
        case NONE: return "NONE";
        case DEFAULT: return DefaultCodec.class.getName();
        case BZIP: return BZip2Codec.class.getName();
        case GZIP: return GzipCodec.class.getName();
        case LZ4: return Lz4Codec.class.getName();
        case SNAPPY: return SnappyCodec.class.getName();
        case AUTOMATIC: return "Automatically Detected";
    }
    return null;
}
 
Example #8
Source Project: sylph   Author: harbby   File: HdfsSink2.java    License: Apache License 2.0 5 votes vote down vote up
public HdfsSink2(Hdfs2SinkConfig config)
        throws ClassNotFoundException
{
    this.batchSize = config.getBatchBufferSize();
    this.writerDir = config.getWriteDir();
    switch (config.getZipType().trim().toLowerCase()) {
        case "lzo":
            codecClass = (Class<? extends CompressionCodec>) Class.forName("com.hadoop.compression.lzo.LzopCodec");
            break;
        case "lz4":
            codecClass = Lz4Codec.class;
            break;
        case "snappy":
            codecClass = SnappyCodec.class;
            break;
        case "gzip":
            codecClass = GzipCodec.class;
            break;
        case "bzip2":
            codecClass = BZip2Codec.class;
            break;
        case "default":
            codecClass = DefaultCodec.class;
            break;
        default:
            codecClass = NoneCodec.class;
    }
}
 
Example #9
Source Project: hadoop   Author: naver   File: TestSnappyCompressorDecompressor.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSnappyDirectBlockCompression() {
  int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 };    
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
  try {
    for (int i = 0; i < size.length; i++) {
      compressDecompressLoop(size[i]);
    }
  } catch (IOException ex) {
    fail("testSnappyDirectBlockCompression ex !!!" + ex);
  }
}
 
Example #10
Source Project: big-c   Author: yncxcw   File: TestSnappyCompressorDecompressor.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSnappyDirectBlockCompression() {
  int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 };    
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
  try {
    for (int i = 0; i < size.length; i++) {
      compressDecompressLoop(size[i]);
    }
  } catch (IOException ex) {
    fail("testSnappyDirectBlockCompression ex !!!" + ex);
  }
}
 
Example #11
Source Project: dkpro-c4corpus   Author: dkpro   File: Phase2ExactMatchDeDuplication.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());
    //set from the command line

    job.setJarByClass(Phase2ExactMatchDeDuplication.class);
    job.setJobName(Phase2ExactMatchDeDuplication.class.getName());

    // mapper
    job.setMapperClass(ExactMatchDetectionMapper.class);

    // we will compress the mapper's output (use fast Snappy compressor)
    job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true);
    job.getConfiguration()
            .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);

    // reducer
    job.setReducerClass(UniqueWarcWriterReducer.class);
    // no combiner, as the output classes in mapper and reducer are different!

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(WARCOutputFormat.class);

    // mapper output data
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example #12
Source Project: dkpro-c4corpus   Author: dkpro   File: Phase1FullJob.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());
    // set from the command line

    job.setJarByClass(Phase1FullJob.class);
    job.setJobName(Phase1FullJob.class.getName());

    // mapper
    job.setMapperClass(MapperClass.class);

    // we will compress the mapper's output (use fast Snappy compressor)
    job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true);
    job.getConfiguration()
            .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);

    // reducer
    job.setReducerClass(SimpleWarcWriterReducer.class);

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(WARCOutputFormat.class);

    // mapper output data
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example #13
Source Project: attic-apex-malhar   Author: apache   File: FilterStreamCodec.java    License: Apache License 2.0 5 votes vote down vote up
public SnappyFilterStreamContext(OutputStream outputStream) throws IOException
{
  SnappyCodec codec = new SnappyCodec();
  codec.setConf(new Configuration());
  try {
    filterStream = new SnappyFilterStream(
        codec.createOutputStream(outputStream, new SnappyCompressor(bufferSize)));
  } catch (IOException e) {
    throw e;
  }
}
 
Example #14
Source Project: attic-apex-malhar   Author: apache   File: AbstractFileOutputOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
private boolean checkNativeSnappy()
{
  try {
    SnappyCodec.checkNativeCodeLoaded();
  } catch (UnsatisfiedLinkError u) {
    LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found.");
    return true;
  } catch (RuntimeException e) {
    LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found.");
    return true;
  }
  return false;
}
 
Example #15
Source Project: attic-apex-malhar   Author: apache   File: AbstractFileOutputOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSnappyCompressionSimple() throws IOException
{
  if (checkNativeSnappy()) {
    return;
  }

  File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy");

  BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile));
  Configuration conf = new Configuration();
  CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(SnappyCodec.class, conf);
  FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream(
      codec.createOutputStream(os));

  int ONE_MB = 1024 * 1024;

  String testStr = "TestSnap-16bytes";
  for (int i = 0; i < ONE_MB; i++) { // write 16 MBs
    filterStream.write(testStr.getBytes());
  }
  filterStream.flush();
  filterStream.close();

  CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile));

  byte[] recovered = new byte[testStr.length()];
  int bytesRead = is.read(recovered);
  is.close();
  assertEquals(testStr, new String(recovered));
}
 
Example #16
Source Project: hiped2   Author: alexholmes   File: BloomFilterCreator.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  JobConf job = new JobConf(conf);
  job.setJarByClass(BloomFilterCreator.class);

  job.set(AvroJob.OUTPUT_SCHEMA, AvroBytesRecord.SCHEMA.toString());
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setInputFormat(KeyValueTextInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(BloomFilter.class);

  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(BloomFilter.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}
 
Example #17
Source Project: secor   Author: pinterest   File: JsonORCFileReaderWriterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Used for returning the compression kind used in ORC
 *
 * @param codec
 * @return
 */
private CompressionKind resolveCompression(CompressionCodec codec) {
    if (codec instanceof Lz4Codec)
        return CompressionKind.LZ4;
    else if (codec instanceof SnappyCodec)
        return CompressionKind.SNAPPY;
    // although GZip and ZLIB are not same thing
    // there is no better named codec for this case,
    // use hadoop Gzip codec to enable ORC ZLIB compression
    else if (codec instanceof GzipCodec)
        return CompressionKind.ZLIB;
    else
        return CompressionKind.NONE;
}
 
Example #18
Source Project: pentaho-hadoop-shims   Author: pentaho   File: SnappyShimImpl.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests whether hadoop-snappy (not to be confused with other java-based snappy implementations such as jsnappy or
 * snappy-java) plus the native snappy libraries are available.
 *
 * @return true if hadoop-snappy is available on the classpath
 */
public boolean isHadoopSnappyAvailable() {
  ClassLoader cl = Thread.currentThread().getContextClassLoader();
  Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
  try {
    return SnappyCodec.isNativeCodeLoaded();
  } catch ( Throwable t ) {
    return false;
  } finally {
    Thread.currentThread().setContextClassLoader( cl );
  }
}
 
Example #19
Source Project: presto   Author: prestosql   File: RcFileTester.java    License: Apache License 2.0 4 votes vote down vote up
@Override
Optional<String> getCodecName()
{
    return Optional.of(SnappyCodec.class.getName());
}
 
Example #20
Source Project: hadoop   Author: naver   File: NativeLibraryChecker.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * A tool to test native library availability, 
 */
public static void main(String[] args) {
  String usage = "NativeLibraryChecker [-a|-h]\n"
      + "  -a  use -a to check all libraries are available\n"
      + "      by default just check hadoop library (and\n"
      + "      winutils.exe on Windows OS) is available\n"
      + "      exit with error code 1 if check failed\n"
      + "  -h  print this message\n";
  if (args.length > 1 ||
      (args.length == 1 &&
          !(args[0].equals("-a") || args[0].equals("-h")))) {
    System.err.println(usage);
    ExitUtil.terminate(1);
  }
  boolean checkAll = false;
  if (args.length == 1) {
    if (args[0].equals("-h")) {
      System.out.println(usage);
      return;
    }
    checkAll = true;
  }
  Configuration conf = new Configuration();
  boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
  boolean zlibLoaded = false;
  boolean snappyLoaded = false;
  // lz4 is linked within libhadoop
  boolean lz4Loaded = nativeHadoopLoaded;
  boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf);
  boolean openSslLoaded = false;
  boolean winutilsExists = false;

  String openSslDetail = "";
  String hadoopLibraryName = "";
  String zlibLibraryName = "";
  String snappyLibraryName = "";
  String lz4LibraryName = "";
  String bzip2LibraryName = "";
  String winutilsPath = null;

  if (nativeHadoopLoaded) {
    hadoopLibraryName = NativeCodeLoader.getLibraryName();
    zlibLoaded = ZlibFactory.isNativeZlibLoaded(conf);
    if (zlibLoaded) {
      zlibLibraryName = ZlibFactory.getLibraryName();
    }
    snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
        SnappyCodec.isNativeCodeLoaded();
    if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) {
      snappyLibraryName = SnappyCodec.getLibraryName();
    }
    if (OpensslCipher.getLoadingFailureReason() != null) {
      openSslDetail = OpensslCipher.getLoadingFailureReason();
      openSslLoaded = false;
    } else {
      openSslDetail = OpensslCipher.getLibraryName();
      openSslLoaded = true;
    }
    if (lz4Loaded) {
      lz4LibraryName = Lz4Codec.getLibraryName();
    }
    if (bzip2Loaded) {
      bzip2LibraryName = Bzip2Factory.getLibraryName(conf);
    }
  }

  // winutils.exe is required on Windows
  winutilsPath = Shell.getWinUtilsPath();
  if (winutilsPath != null) {
    winutilsExists = true;
  } else {
    winutilsPath = "";
  }

  System.out.println("Native library checking:");
  System.out.printf("hadoop:  %b %s%n", nativeHadoopLoaded, hadoopLibraryName);
  System.out.printf("zlib:    %b %s%n", zlibLoaded, zlibLibraryName);
  System.out.printf("snappy:  %b %s%n", snappyLoaded, snappyLibraryName);
  System.out.printf("lz4:     %b %s%n", lz4Loaded, lz4LibraryName);
  System.out.printf("bzip2:   %b %s%n", bzip2Loaded, bzip2LibraryName);
  System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail);
  if (Shell.WINDOWS) {
    System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath);
  }

  if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) ||
      (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded && bzip2Loaded))) {
    // return 1 to indicated check failed
    ExitUtil.terminate(1);
  }
}
 
Example #21
Source Project: hadoop   Author: naver   File: TestSnappyCompressorDecompressor.java    License: Apache License 2.0 4 votes vote down vote up
@Before
public void before() {
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
}
 
Example #22
Source Project: big-c   Author: yncxcw   File: NativeLibraryChecker.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * A tool to test native library availability, 
 */
public static void main(String[] args) {
  String usage = "NativeLibraryChecker [-a|-h]\n"
      + "  -a  use -a to check all libraries are available\n"
      + "      by default just check hadoop library (and\n"
      + "      winutils.exe on Windows OS) is available\n"
      + "      exit with error code 1 if check failed\n"
      + "  -h  print this message\n";
  if (args.length > 1 ||
      (args.length == 1 &&
          !(args[0].equals("-a") || args[0].equals("-h")))) {
    System.err.println(usage);
    ExitUtil.terminate(1);
  }
  boolean checkAll = false;
  if (args.length == 1) {
    if (args[0].equals("-h")) {
      System.out.println(usage);
      return;
    }
    checkAll = true;
  }
  Configuration conf = new Configuration();
  boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
  boolean zlibLoaded = false;
  boolean snappyLoaded = false;
  // lz4 is linked within libhadoop
  boolean lz4Loaded = nativeHadoopLoaded;
  boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf);
  boolean openSslLoaded = false;
  boolean winutilsExists = false;

  String openSslDetail = "";
  String hadoopLibraryName = "";
  String zlibLibraryName = "";
  String snappyLibraryName = "";
  String lz4LibraryName = "";
  String bzip2LibraryName = "";
  String winutilsPath = null;

  if (nativeHadoopLoaded) {
    hadoopLibraryName = NativeCodeLoader.getLibraryName();
    zlibLoaded = ZlibFactory.isNativeZlibLoaded(conf);
    if (zlibLoaded) {
      zlibLibraryName = ZlibFactory.getLibraryName();
    }
    snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
        SnappyCodec.isNativeCodeLoaded();
    if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) {
      snappyLibraryName = SnappyCodec.getLibraryName();
    }
    if (OpensslCipher.getLoadingFailureReason() != null) {
      openSslDetail = OpensslCipher.getLoadingFailureReason();
      openSslLoaded = false;
    } else {
      openSslDetail = OpensslCipher.getLibraryName();
      openSslLoaded = true;
    }
    if (lz4Loaded) {
      lz4LibraryName = Lz4Codec.getLibraryName();
    }
    if (bzip2Loaded) {
      bzip2LibraryName = Bzip2Factory.getLibraryName(conf);
    }
  }

  // winutils.exe is required on Windows
  winutilsPath = Shell.getWinUtilsPath();
  if (winutilsPath != null) {
    winutilsExists = true;
  } else {
    winutilsPath = "";
  }

  System.out.println("Native library checking:");
  System.out.printf("hadoop:  %b %s%n", nativeHadoopLoaded, hadoopLibraryName);
  System.out.printf("zlib:    %b %s%n", zlibLoaded, zlibLibraryName);
  System.out.printf("snappy:  %b %s%n", snappyLoaded, snappyLibraryName);
  System.out.printf("lz4:     %b %s%n", lz4Loaded, lz4LibraryName);
  System.out.printf("bzip2:   %b %s%n", bzip2Loaded, bzip2LibraryName);
  System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail);
  if (Shell.WINDOWS) {
    System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath);
  }

  if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) ||
      (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded && bzip2Loaded))) {
    // return 1 to indicated check failed
    ExitUtil.terminate(1);
  }
}
 
Example #23
Source Project: big-c   Author: yncxcw   File: TestSnappyCompressorDecompressor.java    License: Apache License 2.0 4 votes vote down vote up
@Before
public void before() {
  assumeTrue(SnappyCodec.isNativeCodeLoaded());
}
 
Example #24
Source Project: hiped2   Author: alexholmes   File: DBImportMapReduce.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.OutputFileOption.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path output = new Path(cli.getArgValueAsString(CliCommonOpts.OutputFileOption.OUTPUT));

  Configuration conf = super.getConf();

  DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
      "jdbc:mysql://localhost/sqoop_test" +
          "?user=hip_sqoop_user&password=password");

  JobConf job = new JobConf(conf);
  job.setJarByClass(DBImportMapReduce.class);

  job.setInputFormat(DBInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);
  AvroJob.setOutputSchema(job, Stock.SCHEMA$);
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setMapperClass(Map.class);

  job.setNumMapTasks(4);
  job.setNumReduceTasks(0);

  job.setMapOutputKeyClass(AvroWrapper.class);
  job.setMapOutputValueClass(NullWritable.class);

  job.setOutputKeyClass(AvroWrapper.class);
  job.setOutputValueClass(NullWritable.class);

  FileOutputFormat.setOutputPath(job, output);

  DBInputFormat.setInput(
      job,
      StockDbWritable.class,
      "select * from stocks",
      "SELECT COUNT(id) FROM stocks");

  RunningJob runningJob = JobClient.runJob(job);

  return runningJob.isSuccessful() ? 0 : 1;
}
 
Example #25
Source Project: HBase-ToHDFS   Author: tmalaska   File: ExportHBaseTableToAvro.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String rowKeyColumn = "";
  
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToAvro.class);
  job.setJobName("ExportHBaseTableToAvro ");

  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  
  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroKeyOutputFormat.class);
  AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());

  AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}
 
Example #26
Source Project: HBase-ToHDFS   Author: tmalaska   File: ExportHBaseTableToDelimiteredSeq.java    License: Apache License 2.0 4 votes vote down vote up
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException {
 if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToDelimiteredSeq {tableName} {ColumnFamily} {outputPath} {compressionCodec} {schemaLocationOnLocal} {delimiter} {rowKeyColumn.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String delimiter = args[5];

  String rowKeyColumn = "";
  if (args.length > 6) {
    rowKeyColumn = args[6];
  }
  
  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  
  HBaseConfiguration.addHbaseResources(job.getConfiguration());
  
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath);
  job.getConfiguration().set(DELIMITER_CONF, delimiter);

  job.setJarByClass(ExportHBaseTableToDelimiteredSeq.class);
  job.setJobName("ExportHBaseTableToDelimiteredSeq ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(SequenceFileOutputFormat.class); 
  SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
  
  if (compressionCodec.equals("snappy")) {
    SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    //nothing
  }
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(NullWritable.class);
  
  job.setNumReduceTasks(0);
  
  boolean b = job.waitForCompletion(true);
}
 
Example #27
Source Project: HBase-ToHDFS   Author: tmalaska   File: ExportHBaseTableToParquet.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];

  String rowKeyColumn = "";
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToParquet.class);
  job.setJobName("ExportHBaseTableToParquet ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());
  AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}
 
Example #28
Source Project: hiped2   Author: alexholmes   File: AvroMixedMapReduce.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {


  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  JobConf job = new JobConf(conf);
  job.setJarByClass(AvroMixedMapReduce.class);

  job.set(AvroJob.INPUT_SCHEMA, Stock.SCHEMA$.toString());
  job.set(AvroJob.OUTPUT_SCHEMA, StockAvg.SCHEMA$.toString());
  job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setInputFormat(AvroInputFormat.class);
  job.setOutputFormat(AvroOutputFormat.class);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(DoubleWritable.class);

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(DoubleWritable.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}
 
Example #29
Source Project: hiped2   Author: alexholmes   File: BloomFilterCreator.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(ReplicatedJoin.UserOptions.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path usersPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.USERS));
  Path outputPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);

  job.setJarByClass(BloomFilterCreator.class);
  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  AvroJob.setOutputKeySchema(job, AvroBytesRecord.SCHEMA);
  job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, SnappyCodec.class.getName());

  job.setOutputFormatClass(AvroKeyOutputFormat.class);

  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(BloomFilter.class);

  FileInputFormat.setInputPaths(job, usersPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  job.setNumReduceTasks(1);

  return job.waitForCompletion(true) ? 0 : 1;
}