Java Code Examples for org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat

The following examples show how to use org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: presto   Source File: TestCachingOrcDataSource.java    License: Apache License 2.0 6 votes vote down vote up
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11");
    OrcConf.COMPRESS.setString(jobConf, compression.name());

    Properties tableProperties = new Properties();
    tableProperties.setProperty(IOConstants.COLUMNS, "test");
    tableProperties.setProperty(IOConstants.COLUMNS_TYPES, columnObjectInspector.getTypeName());
    tableProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), "120000");

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            tableProperties,
            () -> {});
}
 
Example 2
Source Project: DataLink   Source File: HdfsHelper.java    License: Apache License 2.0 6 votes vote down vote up
OrcWriterProxy(Configuration config, String fileName) throws IOException{
	// initial columns
       columns = config.getListConfiguration(Key.COLUMN);

       // initial inspector
       List<String> columnNames = getColumnNames(columns);
       List<ObjectInspector> columnTypeInspectors = getColumnTypeInspectors(columns);
       inspector = (StructObjectInspector)ObjectInspectorFactory
               .getStandardStructObjectInspector(columnNames, columnTypeInspectors);

       // initial writer
       String compress = config.getString(Key.COMPRESS, null);
       FileOutputFormat outFormat = new OrcOutputFormat();
       if(!"NONE".equalsIgnoreCase(compress) && null != compress ) {
           Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
           if (null != codecClass) {
               outFormat.setOutputCompressorClass(conf, codecClass);
           }
       }
       writer = outFormat.getRecordWriter(fileSystem, conf, fileName, Reporter.NULL);

       //initial orcSerde
       orcSerde = new OrcSerde();
}
 
Example 3
static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> { }
    );
}
 
Example 4
private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.default.compress", compressionCodec.name());
    jobConf.set("hive.exec.orc.compress", compressionCodec.name());
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
    OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compressionCodec != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> { }
    );
}
 
Example 5
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());

    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", "test");
    tableProperties.setProperty("columns.types", columnObjectInspector.getTypeName());
    tableProperties.setProperty("orc.stripe.size", "1200000");

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            tableProperties,
            () -> { });
}
 
Example 6
Source Project: presto   Source File: OrcTester.java    License: Apache License 2.0 5 votes vote down vote up
static RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, Type type)
        throws IOException
{
    JobConf jobConf = new JobConf();
    OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11");
    OrcConf.COMPRESS.setString(jobConf, compression.name());

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            createTableProperties("test", getJavaObjectInspector(type).getTypeName()),
            () -> {});
}
 
Example 7
Source Project: presto   Source File: TestOrcReaderPositions.java    License: Apache License 2.0 5 votes vote down vote up
private static void flushWriter(FileSinkOperator.RecordWriter writer)
        throws IOException, ReflectiveOperationException
{
    Field field = OrcOutputFormat.class.getClassLoader()
            .loadClass(OrcOutputFormat.class.getName() + "$OrcRecordWriter")
            .getDeclaredField("writer");
    field.setAccessible(true);
    ((Writer) field.get(writer)).writeIntermediateFooter();
}
 
Example 8
Source Project: presto   Source File: OrcFileWriter.java    License: Apache License 2.0 5 votes vote down vote up
private static Constructor<? extends RecordWriter> getOrcWriterConstructor()
{
    try {
        String writerClassName = OrcOutputFormat.class.getName() + "$OrcRecordWriter";
        Constructor<? extends RecordWriter> constructor = OrcOutputFormat.class.getClassLoader()
                .loadClass(writerClassName).asSubclass(RecordWriter.class)
                .getDeclaredConstructor(Path.class, OrcFile.WriterOptions.class);
        constructor.setAccessible(true);
        return constructor;
    }
    catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
    }
}
 
Example 9
Source Project: presto   Source File: TestOrcPageSourceMemoryTracking.java    License: Apache License 2.0 5 votes vote down vote up
private static Constructor<? extends RecordWriter> getOrcWriterConstructor()
{
    try {
        Constructor<? extends RecordWriter> constructor = OrcOutputFormat.class.getClassLoader()
                .loadClass(ORC_RECORD_WRITER)
                .asSubclass(RecordWriter.class)
                .getDeclaredConstructor(Path.class, WriterOptions.class);
        constructor.setAccessible(true);
        return constructor;
    }
    catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
    }
}
 
Example 10
Source Project: flink   Source File: HiveDialectITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateTable() throws Exception {
	String location = warehouse + "/external_location";
	tableEnv.executeSql(String.format(
			"create external table tbl1 (d decimal(10,0),ts timestamp) partitioned by (p string) location '%s' tblproperties('k1'='v1')", location));
	Table hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl1"));
	assertEquals(TableType.EXTERNAL_TABLE.toString(), hiveTable.getTableType());
	assertEquals(1, hiveTable.getPartitionKeysSize());
	assertEquals(location, locationPath(hiveTable.getSd().getLocation()));
	assertEquals("v1", hiveTable.getParameters().get("k1"));
	assertFalse(hiveTable.getParameters().containsKey(SqlCreateHiveTable.TABLE_LOCATION_URI));

	tableEnv.executeSql("create table tbl2 (s struct<ts:timestamp,bin:binary>) stored as orc");
	hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl2"));
	assertEquals(TableType.MANAGED_TABLE.toString(), hiveTable.getTableType());
	assertEquals(OrcSerde.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib());
	assertEquals(OrcInputFormat.class.getName(), hiveTable.getSd().getInputFormat());
	assertEquals(OrcOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat());

	tableEnv.executeSql("create table tbl3 (m map<timestamp,binary>) partitioned by (p1 bigint,p2 tinyint) " +
			"row format serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'");
	hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl3"));
	assertEquals(2, hiveTable.getPartitionKeysSize());
	assertEquals(LazyBinarySerDe.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib());

	tableEnv.executeSql("create table tbl4 (x int,y smallint) row format delimited fields terminated by '|' lines terminated by '\n'");
	hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl4"));
	assertEquals("|", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.FIELD_DELIM));
	assertEquals("|", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.SERIALIZATION_FORMAT));
	assertEquals("\n", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.LINE_DELIM));

	tableEnv.executeSql("create table tbl5 (m map<bigint,string>) row format delimited collection items terminated by ';' " +
			"map keys terminated by ':'");
	hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl5"));
	assertEquals(";", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.COLLECTION_DELIM));
	assertEquals(":", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.MAPKEY_DELIM));
}
 
Example 11
Source Project: nifi   Source File: TestPutHive3Streaming.java    License: Apache License 2.0 5 votes vote down vote up
MockHiveStreamingConnection(HiveOptions options, RecordReader reader, RecordWriter recordWriter, List<FieldSchema> schema) throws StreamingException {
    this.options = options;
    metastoreURI = options.getMetaStoreURI();
    this.writer = recordWriter;
    this.hiveConf = this.options.getHiveConf();
    connectionStats = new ConnectionStats();
    this.table = new Table(Table.getEmptyTable(options.getDatabaseName(), options.getTableName()));
    this.table.setFields(schema);
    StorageDescriptor sd = this.table.getSd();
    sd.setOutputFormat(OrcOutputFormat.class.getName());
    sd.setLocation(TARGET_HIVE);
}
 
Example 12
Source Project: incubator-gobblin   Source File: HiveMetaStoreUtilsTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetTableOrc() {
  final String databaseName = "db";
  final String tableName = "tbl";
  HiveTable.Builder builder = new HiveTable.Builder();
  builder.withDbName(databaseName).withTableName(tableName);

  HiveTable hiveTable = builder.build();

  // SerDe props are
  State serdeProps = new State();
  serdeProps.setProp("columns", "timestamp,namespace,name,metadata");
  serdeProps.setProp("columns.types", "bigint,string,string,map<string,string>");

  hiveTable.getProps().addAll(serdeProps);

  hiveTable.setInputFormat(OrcInputFormat.class.getName());
  hiveTable.setOutputFormat(OrcOutputFormat.class.getName());
  hiveTable.setSerDeType(OrcSerde.class.getName());

  Table table = HiveMetaStoreUtils.getTable(hiveTable);
  Assert.assertEquals(table.getDbName(), databaseName);
  Assert.assertEquals(table.getTableName(), tableName);

  StorageDescriptor sd = table.getSd();
  Assert.assertEquals(sd.getInputFormat(), OrcInputFormat.class.getName());
  Assert.assertEquals(sd.getOutputFormat(), OrcOutputFormat.class.getName());
  Assert.assertNotNull(sd.getSerdeInfo());
  Assert.assertEquals(sd.getSerdeInfo().getSerializationLib(), OrcSerde.class.getName());

  // verify column name
  List<FieldSchema> fields = sd.getCols();
  Assert.assertTrue(fields != null && fields.size() == 4);
  FieldSchema fieldA = fields.get(0);
  Assert.assertEquals(fieldA.getName(), "timestamp");
  Assert.assertEquals(fieldA.getType(), "bigint");

  FieldSchema fieldB = fields.get(1);
  Assert.assertEquals(fieldB.getName(), "namespace");
  Assert.assertEquals(fieldB.getType(), "string");

  FieldSchema fieldC = fields.get(2);
  Assert.assertEquals(fieldC.getName(), "name");
  Assert.assertEquals(fieldC.getType(), "string");


  FieldSchema fieldD = fields.get(3);
  Assert.assertEquals(fieldD.getName(), "metadata");
  Assert.assertEquals(fieldD.getType(), "map<string,string>");
}