org.apache.hadoop.hive.ql.io.orc.CompressionKind Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.io.orc.CompressionKind.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestOrcStorage.java From spork with Apache License 2.0 | 6 votes |
@Test public void testMultiStore() throws Exception { pigServer.setBatchOn(); pigServer.registerQuery("A = load '" + INPUT1 + "' as (a0:int, a1:chararray);"); pigServer.registerQuery("B = order A by a0;"); pigServer.registerQuery("store B into '" + OUTPUT2 + "' using OrcStorage();"); pigServer.registerQuery("store B into '" + OUTPUT3 +"' using OrcStorage('-c SNAPPY');"); pigServer.executeBatch(); Path outputFilePath = new Path(new Path(OUTPUT2), "part-r-00000"); Reader reader = OrcFile.createReader(fs, outputFilePath); assertEquals(reader.getNumberOfRows(), 2); assertEquals(reader.getCompression(), CompressionKind.ZLIB); Path outputFilePath2 = new Path(new Path(OUTPUT3), "part-r-00000"); reader = OrcFile.createReader(fs, outputFilePath2); assertEquals(reader.getNumberOfRows(), 2); assertEquals(reader.getCompression(), CompressionKind.SNAPPY); verifyData(outputFilePath, outputFilePath2, fs, 2); }
Example #2
Source File: PutORC.java From nifi with Apache License 2.0 | 6 votes |
@Override public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema) throws IOException, SchemaNotFoundException { final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue(); final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue(); final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue()); final boolean normalizeForHive = context.getProperty(HIVE_FIELD_NAMES).asBoolean(); TypeInfo orcSchema = NiFiOrcUtils.getOrcSchema(schema, normalizeForHive); final Writer orcWriter = NiFiOrcUtils.createWriter(path, conf, orcSchema, stripeSize, compressionType, bufferSize); final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet() ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue() : NiFiOrcUtils.normalizeHiveTableName(schema.getIdentifier().getName().orElse("unknown")); final boolean hiveFieldNames = context.getProperty(HIVE_FIELD_NAMES).asBoolean(); return new ORCHDFSRecordWriter(orcWriter, schema, hiveTableName, hiveFieldNames); }
Example #3
Source File: OrcStorage.java From spork with Apache License 2.0 | 5 votes |
public OrcStorage(String options) { String[] optsArr = options.split(" "); try { CommandLine configuredOptions = parser.parse(validOptions, optsArr); if (configuredOptions.hasOption('s')) { stripeSize = Long.parseLong(configuredOptions.getOptionValue('s')); } if (configuredOptions.hasOption('r')) { rowIndexStride = Integer.parseInt(configuredOptions.getOptionValue('r')); } if (configuredOptions.hasOption('b')) { bufferSize = Integer.parseInt(configuredOptions.getOptionValue('b')); } blockPadding = configuredOptions.hasOption('p'); if (configuredOptions.hasOption('c')) { compress = CompressionKind.valueOf(configuredOptions.getOptionValue('c')); } if (configuredOptions.hasOption('v')) { version = Version.byName(configuredOptions.getOptionValue('v')); } } catch (ParseException e) { log.error("Exception in OrcStorage", e); log.error("OrcStorage called with arguments " + options); warn("ParseException in OrcStorage", PigWarning.UDF_WARNING_1); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("OrcStorage(',', '[options]')", validOptions); throw new RuntimeException(e); } }