org.apache.hadoop.io.compress.SnappyCodec Java Examples
The following examples show how to use
org.apache.hadoop.io.compress.SnappyCodec.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: hadoop Author: naver File: TestNativeCodeLoader.java License: Apache License 2.0 | 6 votes |
@Test public void testNativeCodeLoaded() { if (requireTestJni() == false) { LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required."); return; } if (!NativeCodeLoader.isNativeCodeLoaded()) { fail("TestNativeCodeLoader: libhadoop.so testing was required, but " + "libhadoop.so was not loaded."); } assertFalse(NativeCodeLoader.getLibraryName().isEmpty()); // library names are depended on platform and build envs // so just check names are available assertFalse(ZlibFactory.getLibraryName().isEmpty()); if (NativeCodeLoader.buildSupportsSnappy()) { assertFalse(SnappyCodec.getLibraryName().isEmpty()); } if (NativeCodeLoader.buildSupportsOpenssl()) { assertFalse(OpensslCipher.getLibraryName().isEmpty()); } assertFalse(Lz4Codec.getLibraryName().isEmpty()); LOG.info("TestNativeCodeLoader: libhadoop.so is loaded."); }
Example #2
Source Project: big-c Author: yncxcw File: TestNativeCodeLoader.java License: Apache License 2.0 | 6 votes |
@Test public void testNativeCodeLoaded() { if (requireTestJni() == false) { LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required."); return; } if (!NativeCodeLoader.isNativeCodeLoaded()) { fail("TestNativeCodeLoader: libhadoop.so testing was required, but " + "libhadoop.so was not loaded."); } assertFalse(NativeCodeLoader.getLibraryName().isEmpty()); // library names are depended on platform and build envs // so just check names are available assertFalse(ZlibFactory.getLibraryName().isEmpty()); if (NativeCodeLoader.buildSupportsSnappy()) { assertFalse(SnappyCodec.getLibraryName().isEmpty()); } if (NativeCodeLoader.buildSupportsOpenssl()) { assertFalse(OpensslCipher.getLibraryName().isEmpty()); } assertFalse(Lz4Codec.getLibraryName().isEmpty()); LOG.info("TestNativeCodeLoader: libhadoop.so is loaded."); }
Example #3
Source Project: ignite Author: apache File: HadoopWordCount2.java License: Apache License 2.0 | 6 votes |
/** * Sets task classes with related info if needed into configuration object. * * @param job Configuration to change. * @param setMapper Option to set mapper and input format classes. * @param setCombiner Option to set combiner class. * @param setReducer Option to set reducer and output format classes. */ public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer, boolean outputCompression) { if (setMapper) { job.setMapperClass(HadoopWordCount2Mapper.class); job.setInputFormatClass(TextInputFormat.class); } if (setCombiner) job.setCombinerClass(HadoopWordCount2Combiner.class); if (setReducer) { job.setReducerClass(HadoopWordCount2Reducer.class); job.setOutputFormatClass(TextOutputFormat.class); } if (outputCompression) { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName()); } }
Example #4
Source Project: incubator-retired-blur Author: apache File: CsvBlurDriverTest.java License: Apache License 2.0 | 6 votes |
@Test public void testCsvBlurDriverTest3() throws Exception { Configuration configurationSetup = new Configuration(); ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() { @Override public Iface getClient(String controllerConnectionStr) { return getMockIface(); } }; AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>(); Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d", "family1", "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(), "-i", _path2.toString(), "-S", "-C", "1000000", "2000000", "-p", "SNAPPY"); assertNotNull(job); Configuration configuration = job.getConfiguration(); TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration); assertEquals(tableDescriptor.getName(), "table1"); Collection<String> inputs = configuration.getStringCollection("mapred.input.dir"); assertEquals(2, inputs.size()); Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration); assertEquals(2, familyAndColumnNameMap.size()); assertEquals("true", configuration.get(CsvBlurDriver.MAPRED_COMPRESS_MAP_OUTPUT)); assertEquals(SnappyCodec.class.getName(), configuration.get(CsvBlurDriver.MAPRED_MAP_OUTPUT_COMPRESSION_CODEC)); }
Example #5
Source Project: pentaho-hadoop-shims Author: pentaho File: CommonSnappyShim.java License: Apache License 2.0 | 6 votes |
/** * Gets an InputStream that uses the snappy codec and wraps the supplied base input stream. * * @param the buffer size for the codec to use (in bytes) * @param in the base input stream to wrap around * @return an InputStream that uses the Snappy codec * @throws Exception if snappy is not available or an error occurs during reflection */ public InputStream getSnappyInputStream( int bufferSize, InputStream in ) throws Exception { if ( !isHadoopSnappyAvailable() ) { throw new Exception( "Hadoop-snappy does not seem to be available" ); } ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader( getClass().getClassLoader() ); try { SnappyCodec c = new SnappyCodec(); Configuration newConf = new Configuration(); newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize ); c.setConf( newConf ); return c.createInputStream( in ); } finally { Thread.currentThread().setContextClassLoader( cl ); } }
Example #6
Source Project: pentaho-hadoop-shims Author: pentaho File: CommonSnappyShim.java License: Apache License 2.0 | 6 votes |
/** * Gets an OutputStream that uses the snappy codec and wraps the supplied base output stream. * * @param the buffer size for the codec to use (in bytes) * @param out the base output stream to wrap around * @return a OutputStream that uses the Snappy codec * @throws Exception if snappy is not available or an error occurs during reflection */ public OutputStream getSnappyOutputStream( int bufferSize, OutputStream out ) throws Exception { if ( !isHadoopSnappyAvailable() ) { throw new Exception( "Hadoop-snappy does not seem to be available" ); } ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader( getClass().getClassLoader() ); try { SnappyCodec c = new SnappyCodec(); Configuration newConf = new Configuration(); newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize ); c.setConf( newConf ); return c.createOutputStream( out ); } finally { Thread.currentThread().setContextClassLoader( cl ); } }
Example #7
Source Project: localization_nifi Author: wangrenlei File: AbstractHadoopProcessor.java License: Apache License 2.0 | 5 votes |
@Override public String toString() { switch (this) { case NONE: return "NONE"; case DEFAULT: return DefaultCodec.class.getName(); case BZIP: return BZip2Codec.class.getName(); case GZIP: return GzipCodec.class.getName(); case LZ4: return Lz4Codec.class.getName(); case SNAPPY: return SnappyCodec.class.getName(); case AUTOMATIC: return "Automatically Detected"; } return null; }
Example #8
Source Project: sylph Author: harbby File: HdfsSink2.java License: Apache License 2.0 | 5 votes |
public HdfsSink2(Hdfs2SinkConfig config) throws ClassNotFoundException { this.batchSize = config.getBatchBufferSize(); this.writerDir = config.getWriteDir(); switch (config.getZipType().trim().toLowerCase()) { case "lzo": codecClass = (Class<? extends CompressionCodec>) Class.forName("com.hadoop.compression.lzo.LzopCodec"); break; case "lz4": codecClass = Lz4Codec.class; break; case "snappy": codecClass = SnappyCodec.class; break; case "gzip": codecClass = GzipCodec.class; break; case "bzip2": codecClass = BZip2Codec.class; break; case "default": codecClass = DefaultCodec.class; break; default: codecClass = NoneCodec.class; } }
Example #9
Source Project: hadoop Author: naver File: TestSnappyCompressorDecompressor.java License: Apache License 2.0 | 5 votes |
@Test public void testSnappyDirectBlockCompression() { int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 }; assumeTrue(SnappyCodec.isNativeCodeLoaded()); try { for (int i = 0; i < size.length; i++) { compressDecompressLoop(size[i]); } } catch (IOException ex) { fail("testSnappyDirectBlockCompression ex !!!" + ex); } }
Example #10
Source Project: big-c Author: yncxcw File: TestSnappyCompressorDecompressor.java License: Apache License 2.0 | 5 votes |
@Test public void testSnappyDirectBlockCompression() { int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 }; assumeTrue(SnappyCodec.isNativeCodeLoaded()); try { for (int i = 0; i < size.length; i++) { compressDecompressLoop(size[i]); } } catch (IOException ex) { fail("testSnappyDirectBlockCompression ex !!!" + ex); } }
Example #11
Source Project: dkpro-c4corpus Author: dkpro File: Phase2ExactMatchDeDuplication.java License: Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); //set from the command line job.setJarByClass(Phase2ExactMatchDeDuplication.class); job.setJobName(Phase2ExactMatchDeDuplication.class.getName()); // mapper job.setMapperClass(ExactMatchDetectionMapper.class); // we will compress the mapper's output (use fast Snappy compressor) job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true); job.getConfiguration() .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class); // reducer job.setReducerClass(UniqueWarcWriterReducer.class); // no combiner, as the output classes in mapper and reducer are different! // input-output is warc job.setInputFormatClass(WARCInputFormat.class); job.setOutputFormatClass(WARCOutputFormat.class); // mapper output data job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(WARCWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
Example #12
Source Project: dkpro-c4corpus Author: dkpro File: Phase1FullJob.java License: Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); // set from the command line job.setJarByClass(Phase1FullJob.class); job.setJobName(Phase1FullJob.class.getName()); // mapper job.setMapperClass(MapperClass.class); // we will compress the mapper's output (use fast Snappy compressor) job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true); job.getConfiguration() .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class); // reducer job.setReducerClass(SimpleWarcWriterReducer.class); // input-output is warc job.setInputFormatClass(WARCInputFormat.class); job.setOutputFormatClass(WARCOutputFormat.class); // mapper output data job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(WARCWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
Example #13
Source Project: attic-apex-malhar Author: apache File: FilterStreamCodec.java License: Apache License 2.0 | 5 votes |
public SnappyFilterStreamContext(OutputStream outputStream) throws IOException { SnappyCodec codec = new SnappyCodec(); codec.setConf(new Configuration()); try { filterStream = new SnappyFilterStream( codec.createOutputStream(outputStream, new SnappyCompressor(bufferSize))); } catch (IOException e) { throw e; } }
Example #14
Source Project: attic-apex-malhar Author: apache File: AbstractFileOutputOperatorTest.java License: Apache License 2.0 | 5 votes |
private boolean checkNativeSnappy() { try { SnappyCodec.checkNativeCodeLoaded(); } catch (UnsatisfiedLinkError u) { LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found."); return true; } catch (RuntimeException e) { LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found."); return true; } return false; }
Example #15
Source Project: attic-apex-malhar Author: apache File: AbstractFileOutputOperatorTest.java License: Apache License 2.0 | 5 votes |
@Test public void testSnappyCompressionSimple() throws IOException { if (checkNativeSnappy()) { return; } File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy"); BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile)); Configuration conf = new Configuration(); CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(SnappyCodec.class, conf); FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream( codec.createOutputStream(os)); int ONE_MB = 1024 * 1024; String testStr = "TestSnap-16bytes"; for (int i = 0; i < ONE_MB; i++) { // write 16 MBs filterStream.write(testStr.getBytes()); } filterStream.flush(); filterStream.close(); CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile)); byte[] recovered = new byte[testStr.length()]; int bytesRead = is.read(recovered); is.close(); assertEquals(testStr, new String(recovered)); }
Example #16
Source Project: hiped2 Author: alexholmes File: BloomFilterCreator.java License: Apache License 2.0 | 5 votes |
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); JobConf job = new JobConf(conf); job.setJarByClass(BloomFilterCreator.class); job.set(AvroJob.OUTPUT_SCHEMA, AvroBytesRecord.SCHEMA.toString()); job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName()); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(AvroOutputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(BloomFilter.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(BloomFilter.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); return JobClient.runJob(job).isSuccessful() ? 0 : 1; }
Example #17
Source Project: secor Author: pinterest File: JsonORCFileReaderWriterFactory.java License: Apache License 2.0 | 5 votes |
/** * Used for returning the compression kind used in ORC * * @param codec * @return */ private CompressionKind resolveCompression(CompressionCodec codec) { if (codec instanceof Lz4Codec) return CompressionKind.LZ4; else if (codec instanceof SnappyCodec) return CompressionKind.SNAPPY; // although GZip and ZLIB are not same thing // there is no better named codec for this case, // use hadoop Gzip codec to enable ORC ZLIB compression else if (codec instanceof GzipCodec) return CompressionKind.ZLIB; else return CompressionKind.NONE; }
Example #18
Source Project: pentaho-hadoop-shims Author: pentaho File: SnappyShimImpl.java License: Apache License 2.0 | 5 votes |
/** * Tests whether hadoop-snappy (not to be confused with other java-based snappy implementations such as jsnappy or * snappy-java) plus the native snappy libraries are available. * * @return true if hadoop-snappy is available on the classpath */ public boolean isHadoopSnappyAvailable() { ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader( getClass().getClassLoader() ); try { return SnappyCodec.isNativeCodeLoaded(); } catch ( Throwable t ) { return false; } finally { Thread.currentThread().setContextClassLoader( cl ); } }
Example #19
Source Project: presto Author: prestosql File: RcFileTester.java License: Apache License 2.0 | 4 votes |
@Override Optional<String> getCodecName() { return Optional.of(SnappyCodec.class.getName()); }
Example #20
Source Project: hadoop Author: naver File: NativeLibraryChecker.java License: Apache License 2.0 | 4 votes |
/** * A tool to test native library availability, */ public static void main(String[] args) { String usage = "NativeLibraryChecker [-a|-h]\n" + " -a use -a to check all libraries are available\n" + " by default just check hadoop library (and\n" + " winutils.exe on Windows OS) is available\n" + " exit with error code 1 if check failed\n" + " -h print this message\n"; if (args.length > 1 || (args.length == 1 && !(args[0].equals("-a") || args[0].equals("-h")))) { System.err.println(usage); ExitUtil.terminate(1); } boolean checkAll = false; if (args.length == 1) { if (args[0].equals("-h")) { System.out.println(usage); return; } checkAll = true; } Configuration conf = new Configuration(); boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded(); boolean zlibLoaded = false; boolean snappyLoaded = false; // lz4 is linked within libhadoop boolean lz4Loaded = nativeHadoopLoaded; boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf); boolean openSslLoaded = false; boolean winutilsExists = false; String openSslDetail = ""; String hadoopLibraryName = ""; String zlibLibraryName = ""; String snappyLibraryName = ""; String lz4LibraryName = ""; String bzip2LibraryName = ""; String winutilsPath = null; if (nativeHadoopLoaded) { hadoopLibraryName = NativeCodeLoader.getLibraryName(); zlibLoaded = ZlibFactory.isNativeZlibLoaded(conf); if (zlibLoaded) { zlibLibraryName = ZlibFactory.getLibraryName(); } snappyLoaded = NativeCodeLoader.buildSupportsSnappy() && SnappyCodec.isNativeCodeLoaded(); if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) { snappyLibraryName = SnappyCodec.getLibraryName(); } if (OpensslCipher.getLoadingFailureReason() != null) { openSslDetail = OpensslCipher.getLoadingFailureReason(); openSslLoaded = false; } else { openSslDetail = OpensslCipher.getLibraryName(); openSslLoaded = true; } if (lz4Loaded) { lz4LibraryName = Lz4Codec.getLibraryName(); } if (bzip2Loaded) { bzip2LibraryName = Bzip2Factory.getLibraryName(conf); } } // winutils.exe is required on Windows winutilsPath = Shell.getWinUtilsPath(); if (winutilsPath != null) { winutilsExists = true; } else { winutilsPath = ""; } System.out.println("Native library checking:"); System.out.printf("hadoop: %b %s%n", nativeHadoopLoaded, hadoopLibraryName); System.out.printf("zlib: %b %s%n", zlibLoaded, zlibLibraryName); System.out.printf("snappy: %b %s%n", snappyLoaded, snappyLibraryName); System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName); System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName); System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail); if (Shell.WINDOWS) { System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath); } if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) || (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded && bzip2Loaded))) { // return 1 to indicated check failed ExitUtil.terminate(1); } }
Example #21
Source Project: hadoop Author: naver File: TestSnappyCompressorDecompressor.java License: Apache License 2.0 | 4 votes |
@Before public void before() { assumeTrue(SnappyCodec.isNativeCodeLoaded()); }
Example #22
Source Project: big-c Author: yncxcw File: NativeLibraryChecker.java License: Apache License 2.0 | 4 votes |
/** * A tool to test native library availability, */ public static void main(String[] args) { String usage = "NativeLibraryChecker [-a|-h]\n" + " -a use -a to check all libraries are available\n" + " by default just check hadoop library (and\n" + " winutils.exe on Windows OS) is available\n" + " exit with error code 1 if check failed\n" + " -h print this message\n"; if (args.length > 1 || (args.length == 1 && !(args[0].equals("-a") || args[0].equals("-h")))) { System.err.println(usage); ExitUtil.terminate(1); } boolean checkAll = false; if (args.length == 1) { if (args[0].equals("-h")) { System.out.println(usage); return; } checkAll = true; } Configuration conf = new Configuration(); boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded(); boolean zlibLoaded = false; boolean snappyLoaded = false; // lz4 is linked within libhadoop boolean lz4Loaded = nativeHadoopLoaded; boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf); boolean openSslLoaded = false; boolean winutilsExists = false; String openSslDetail = ""; String hadoopLibraryName = ""; String zlibLibraryName = ""; String snappyLibraryName = ""; String lz4LibraryName = ""; String bzip2LibraryName = ""; String winutilsPath = null; if (nativeHadoopLoaded) { hadoopLibraryName = NativeCodeLoader.getLibraryName(); zlibLoaded = ZlibFactory.isNativeZlibLoaded(conf); if (zlibLoaded) { zlibLibraryName = ZlibFactory.getLibraryName(); } snappyLoaded = NativeCodeLoader.buildSupportsSnappy() && SnappyCodec.isNativeCodeLoaded(); if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) { snappyLibraryName = SnappyCodec.getLibraryName(); } if (OpensslCipher.getLoadingFailureReason() != null) { openSslDetail = OpensslCipher.getLoadingFailureReason(); openSslLoaded = false; } else { openSslDetail = OpensslCipher.getLibraryName(); openSslLoaded = true; } if (lz4Loaded) { lz4LibraryName = Lz4Codec.getLibraryName(); } if (bzip2Loaded) { bzip2LibraryName = Bzip2Factory.getLibraryName(conf); } } // winutils.exe is required on Windows winutilsPath = Shell.getWinUtilsPath(); if (winutilsPath != null) { winutilsExists = true; } else { winutilsPath = ""; } System.out.println("Native library checking:"); System.out.printf("hadoop: %b %s%n", nativeHadoopLoaded, hadoopLibraryName); System.out.printf("zlib: %b %s%n", zlibLoaded, zlibLibraryName); System.out.printf("snappy: %b %s%n", snappyLoaded, snappyLibraryName); System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName); System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName); System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail); if (Shell.WINDOWS) { System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath); } if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) || (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded && bzip2Loaded))) { // return 1 to indicated check failed ExitUtil.terminate(1); } }
Example #23
Source Project: big-c Author: yncxcw File: TestSnappyCompressorDecompressor.java License: Apache License 2.0 | 4 votes |
@Before public void before() { assumeTrue(SnappyCodec.isNativeCodeLoaded()); }
Example #24
Source Project: hiped2 Author: alexholmes File: DBImportMapReduce.java License: Apache License 2.0 | 4 votes |
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.OutputFileOption.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path output = new Path(cli.getArgValueAsString(CliCommonOpts.OutputFileOption.OUTPUT)); Configuration conf = super.getConf(); DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost/sqoop_test" + "?user=hip_sqoop_user&password=password"); JobConf job = new JobConf(conf); job.setJarByClass(DBImportMapReduce.class); job.setInputFormat(DBInputFormat.class); job.setOutputFormat(AvroOutputFormat.class); AvroJob.setOutputSchema(job, Stock.SCHEMA$); job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName()); job.setMapperClass(Map.class); job.setNumMapTasks(4); job.setNumReduceTasks(0); job.setMapOutputKeyClass(AvroWrapper.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(AvroWrapper.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, output); DBInputFormat.setInput( job, StockDbWritable.class, "select * from stocks", "SELECT COUNT(id) FROM stocks"); RunningJob runningJob = JobClient.runJob(job); return runningJob.isSuccessful() ? 0 : 1; }
Example #25
Source Project: HBase-ToHDFS Author: tmalaska File: ExportHBaseTableToAvro.java License: Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String rowKeyColumn = ""; if (args.length > 5) { rowKeyColumn = args[5]; } Job job = Job.getInstance(); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.setJarByClass(ExportHBaseTableToAvro.class); job.setJobName("ExportHBaseTableToAvro "); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(AvroKeyOutputFormat.class); AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath)); Schema.Parser parser = new Schema.Parser(); FileSystem fs = FileSystem.get(job.getConfiguration()); AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath)))); if (compressionCodec.equals("snappy")) { AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { // nothing } job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example #26
Source Project: HBase-ToHDFS Author: tmalaska File: ExportHBaseTableToDelimiteredSeq.java License: Apache License 2.0 | 4 votes |
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out .println("ExportHBaseTableToDelimiteredSeq {tableName} {ColumnFamily} {outputPath} {compressionCodec} {schemaLocationOnLocal} {delimiter} {rowKeyColumn.optional"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String delimiter = args[5]; String rowKeyColumn = ""; if (args.length > 6) { rowKeyColumn = args[6]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath); job.getConfiguration().set(DELIMITER_CONF, delimiter); job.setJarByClass(ExportHBaseTableToDelimiteredSeq.class); job.setJobName("ExportHBaseTableToDelimiteredSeq "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); if (compressionCodec.equals("snappy")) { SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { //nothing } job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example #27
Source Project: HBase-ToHDFS Author: tmalaska File: ExportHBaseTableToParquet.java License: Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String rowKeyColumn = ""; if (args.length > 5) { rowKeyColumn = args[5]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.setJarByClass(ExportHBaseTableToParquet.class); job.setJobName("ExportHBaseTableToParquet "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(AvroParquetOutputFormat.class); AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath)); Schema.Parser parser = new Schema.Parser(); FileSystem fs = FileSystem.get(job.getConfiguration()); AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath)))); if (compressionCodec.equals("snappy")) { AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { // nothing } job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example #28
Source Project: hiped2 Author: alexholmes File: AvroMixedMapReduce.java License: Apache License 2.0 | 3 votes |
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); JobConf job = new JobConf(conf); job.setJarByClass(AvroMixedMapReduce.class); job.set(AvroJob.INPUT_SCHEMA, Stock.SCHEMA$.toString()); job.set(AvroJob.OUTPUT_SCHEMA, StockAvg.SCHEMA$.toString()); job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName()); job.setInputFormat(AvroInputFormat.class); job.setOutputFormat(AvroOutputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); return JobClient.runJob(job).isSuccessful() ? 0 : 1; }
Example #29
Source Project: hiped2 Author: alexholmes File: BloomFilterCreator.java License: Apache License 2.0 | 3 votes |
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(ReplicatedJoin.UserOptions.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path usersPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.USERS)); Path outputPath = new Path(cli.getArgValueAsString(ReplicatedJoin.UserOptions.OUTPUT)); Configuration conf = super.getConf(); Job job = new Job(conf); job.setJarByClass(BloomFilterCreator.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); AvroJob.setOutputKeySchema(job, AvroBytesRecord.SCHEMA); job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, SnappyCodec.class.getName()); job.setOutputFormatClass(AvroKeyOutputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(BloomFilter.class); FileInputFormat.setInputPaths(job, usersPath); FileOutputFormat.setOutputPath(job, outputPath); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }