org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCachingOrcDataSource.java From presto with Apache License 2.0 | 6 votes |
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11"); OrcConf.COMPRESS.setString(jobConf, compression.name()); Properties tableProperties = new Properties(); tableProperties.setProperty(IOConstants.COLUMNS, "test"); tableProperties.setProperty(IOConstants.COLUMNS_TYPES, columnObjectInspector.getTypeName()); tableProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), "120000"); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, tableProperties, () -> {}); }
Example #2
Source File: HdfsHelper.java From DataLink with Apache License 2.0 | 6 votes |
OrcWriterProxy(Configuration config, String fileName) throws IOException{ // initial columns columns = config.getListConfiguration(Key.COLUMN); // initial inspector List<String> columnNames = getColumnNames(columns); List<ObjectInspector> columnTypeInspectors = getColumnTypeInspectors(columns); inspector = (StructObjectInspector)ObjectInspectorFactory .getStandardStructObjectInspector(columnNames, columnTypeInspectors); // initial writer String compress = config.getString(Key.COMPRESS, null); FileOutputFormat outFormat = new OrcOutputFormat(); if(!"NONE".equalsIgnoreCase(compress) && null != compress ) { Class<? extends CompressionCodec> codecClass = getCompressCodec(compress); if (null != codecClass) { outFormat.setOutputCompressorClass(conf, codecClass); } } writer = outFormat.getRecordWriter(fileSystem, conf, fileName, Reporter.NULL); //initial orcSerde orcSerde = new OrcSerde(); }
Example #3
Source File: OrcTester.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11"); jobConf.set("hive.exec.orc.default.compress", compression.name()); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> { } ); }
Example #4
Source File: OrcTester.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.default.compress", compressionCodec.name()); jobConf.set("hive.exec.orc.compress", compressionCodec.name()); OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1); OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2); OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compressionCodec != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> { } ); }
Example #5
Source File: TestCachingOrcDataSource.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11"); jobConf.set("hive.exec.orc.default.compress", compression.name()); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", "test"); tableProperties.setProperty("columns.types", columnObjectInspector.getTypeName()); tableProperties.setProperty("orc.stripe.size", "1200000"); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, tableProperties, () -> { }); }
Example #6
Source File: OrcTester.java From presto with Apache License 2.0 | 5 votes |
static RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, Type type) throws IOException { JobConf jobConf = new JobConf(); OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11"); OrcConf.COMPRESS.setString(jobConf, compression.name()); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, createTableProperties("test", getJavaObjectInspector(type).getTypeName()), () -> {}); }
Example #7
Source File: TestOrcReaderPositions.java From presto with Apache License 2.0 | 5 votes |
private static void flushWriter(FileSinkOperator.RecordWriter writer) throws IOException, ReflectiveOperationException { Field field = OrcOutputFormat.class.getClassLoader() .loadClass(OrcOutputFormat.class.getName() + "$OrcRecordWriter") .getDeclaredField("writer"); field.setAccessible(true); ((Writer) field.get(writer)).writeIntermediateFooter(); }
Example #8
Source File: OrcFileWriter.java From presto with Apache License 2.0 | 5 votes |
private static Constructor<? extends RecordWriter> getOrcWriterConstructor() { try { String writerClassName = OrcOutputFormat.class.getName() + "$OrcRecordWriter"; Constructor<? extends RecordWriter> constructor = OrcOutputFormat.class.getClassLoader() .loadClass(writerClassName).asSubclass(RecordWriter.class) .getDeclaredConstructor(Path.class, OrcFile.WriterOptions.class); constructor.setAccessible(true); return constructor; } catch (ReflectiveOperationException e) { throw new RuntimeException(e); } }
Example #9
Source File: TestOrcPageSourceMemoryTracking.java From presto with Apache License 2.0 | 5 votes |
private static Constructor<? extends RecordWriter> getOrcWriterConstructor() { try { Constructor<? extends RecordWriter> constructor = OrcOutputFormat.class.getClassLoader() .loadClass(ORC_RECORD_WRITER) .asSubclass(RecordWriter.class) .getDeclaredConstructor(Path.class, WriterOptions.class); constructor.setAccessible(true); return constructor; } catch (ReflectiveOperationException e) { throw new RuntimeException(e); } }
Example #10
Source File: HiveDialectITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCreateTable() throws Exception { String location = warehouse + "/external_location"; tableEnv.executeSql(String.format( "create external table tbl1 (d decimal(10,0),ts timestamp) partitioned by (p string) location '%s' tblproperties('k1'='v1')", location)); Table hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl1")); assertEquals(TableType.EXTERNAL_TABLE.toString(), hiveTable.getTableType()); assertEquals(1, hiveTable.getPartitionKeysSize()); assertEquals(location, locationPath(hiveTable.getSd().getLocation())); assertEquals("v1", hiveTable.getParameters().get("k1")); assertFalse(hiveTable.getParameters().containsKey(SqlCreateHiveTable.TABLE_LOCATION_URI)); tableEnv.executeSql("create table tbl2 (s struct<ts:timestamp,bin:binary>) stored as orc"); hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl2")); assertEquals(TableType.MANAGED_TABLE.toString(), hiveTable.getTableType()); assertEquals(OrcSerde.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib()); assertEquals(OrcInputFormat.class.getName(), hiveTable.getSd().getInputFormat()); assertEquals(OrcOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat()); tableEnv.executeSql("create table tbl3 (m map<timestamp,binary>) partitioned by (p1 bigint,p2 tinyint) " + "row format serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'"); hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl3")); assertEquals(2, hiveTable.getPartitionKeysSize()); assertEquals(LazyBinarySerDe.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib()); tableEnv.executeSql("create table tbl4 (x int,y smallint) row format delimited fields terminated by '|' lines terminated by '\n'"); hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl4")); assertEquals("|", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.FIELD_DELIM)); assertEquals("|", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.SERIALIZATION_FORMAT)); assertEquals("\n", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.LINE_DELIM)); tableEnv.executeSql("create table tbl5 (m map<bigint,string>) row format delimited collection items terminated by ';' " + "map keys terminated by ':'"); hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl5")); assertEquals(";", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.COLLECTION_DELIM)); assertEquals(":", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.MAPKEY_DELIM)); }
Example #11
Source File: TestPutHive3Streaming.java From nifi with Apache License 2.0 | 5 votes |
MockHiveStreamingConnection(HiveOptions options, RecordReader reader, RecordWriter recordWriter, List<FieldSchema> schema) throws StreamingException { this.options = options; metastoreURI = options.getMetaStoreURI(); this.writer = recordWriter; this.hiveConf = this.options.getHiveConf(); connectionStats = new ConnectionStats(); this.table = new Table(Table.getEmptyTable(options.getDatabaseName(), options.getTableName())); this.table.setFields(schema); StorageDescriptor sd = this.table.getSd(); sd.setOutputFormat(OrcOutputFormat.class.getName()); sd.setLocation(TARGET_HIVE); }
Example #12
Source File: HiveMetaStoreUtilsTest.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Test public void testGetTableOrc() { final String databaseName = "db"; final String tableName = "tbl"; HiveTable.Builder builder = new HiveTable.Builder(); builder.withDbName(databaseName).withTableName(tableName); HiveTable hiveTable = builder.build(); // SerDe props are State serdeProps = new State(); serdeProps.setProp("columns", "timestamp,namespace,name,metadata"); serdeProps.setProp("columns.types", "bigint,string,string,map<string,string>"); hiveTable.getProps().addAll(serdeProps); hiveTable.setInputFormat(OrcInputFormat.class.getName()); hiveTable.setOutputFormat(OrcOutputFormat.class.getName()); hiveTable.setSerDeType(OrcSerde.class.getName()); Table table = HiveMetaStoreUtils.getTable(hiveTable); Assert.assertEquals(table.getDbName(), databaseName); Assert.assertEquals(table.getTableName(), tableName); StorageDescriptor sd = table.getSd(); Assert.assertEquals(sd.getInputFormat(), OrcInputFormat.class.getName()); Assert.assertEquals(sd.getOutputFormat(), OrcOutputFormat.class.getName()); Assert.assertNotNull(sd.getSerdeInfo()); Assert.assertEquals(sd.getSerdeInfo().getSerializationLib(), OrcSerde.class.getName()); // verify column name List<FieldSchema> fields = sd.getCols(); Assert.assertTrue(fields != null && fields.size() == 4); FieldSchema fieldA = fields.get(0); Assert.assertEquals(fieldA.getName(), "timestamp"); Assert.assertEquals(fieldA.getType(), "bigint"); FieldSchema fieldB = fields.get(1); Assert.assertEquals(fieldB.getName(), "namespace"); Assert.assertEquals(fieldB.getType(), "string"); FieldSchema fieldC = fields.get(2); Assert.assertEquals(fieldC.getName(), "name"); Assert.assertEquals(fieldC.getType(), "string"); FieldSchema fieldD = fields.get(3); Assert.assertEquals(fieldD.getName(), "metadata"); Assert.assertEquals(fieldD.getType(), "map<string,string>"); }