org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCachingOrcDataSource.java    From presto with Apache License 2.0 6 votes vote down vote up
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11");
    OrcConf.COMPRESS.setString(jobConf, compression.name());

    Properties tableProperties = new Properties();
    tableProperties.setProperty(IOConstants.COLUMNS, "test");
    tableProperties.setProperty(IOConstants.COLUMNS_TYPES, columnObjectInspector.getTypeName());
    tableProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), "120000");

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            tableProperties,
            () -> {});
}
 
Example #2
Source File: HdfsHelper.java    From DataLink with Apache License 2.0 6 votes vote down vote up
OrcWriterProxy(Configuration config, String fileName) throws IOException{
	// initial columns
       columns = config.getListConfiguration(Key.COLUMN);

       // initial inspector
       List<String> columnNames = getColumnNames(columns);
       List<ObjectInspector> columnTypeInspectors = getColumnTypeInspectors(columns);
       inspector = (StructObjectInspector)ObjectInspectorFactory
               .getStandardStructObjectInspector(columnNames, columnTypeInspectors);

       // initial writer
       String compress = config.getString(Key.COMPRESS, null);
       FileOutputFormat outFormat = new OrcOutputFormat();
       if(!"NONE".equalsIgnoreCase(compress) && null != compress ) {
           Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
           if (null != codecClass) {
               outFormat.setOutputCompressorClass(conf, codecClass);
           }
       }
       writer = outFormat.getRecordWriter(fileSystem, conf, fileName, Reporter.NULL);

       //initial orcSerde
       orcSerde = new OrcSerde();
}
 
Example #3
Source File: OrcTester.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> { }
    );
}
 
Example #4
Source File: OrcTester.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.default.compress", compressionCodec.name());
    jobConf.set("hive.exec.orc.compress", compressionCodec.name());
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
    OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compressionCodec != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> { }
    );
}
 
Example #5
Source File: TestCachingOrcDataSource.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());

    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", "test");
    tableProperties.setProperty("columns.types", columnObjectInspector.getTypeName());
    tableProperties.setProperty("orc.stripe.size", "1200000");

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            tableProperties,
            () -> { });
}
 
Example #6
Source File: OrcTester.java    From presto with Apache License 2.0 5 votes vote down vote up
static RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, Type type)
        throws IOException
{
    JobConf jobConf = new JobConf();
    OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11");
    OrcConf.COMPRESS.setString(jobConf, compression.name());

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            createTableProperties("test", getJavaObjectInspector(type).getTypeName()),
            () -> {});
}
 
Example #7
Source File: TestOrcReaderPositions.java    From presto with Apache License 2.0 5 votes vote down vote up
private static void flushWriter(FileSinkOperator.RecordWriter writer)
        throws IOException, ReflectiveOperationException
{
    Field field = OrcOutputFormat.class.getClassLoader()
            .loadClass(OrcOutputFormat.class.getName() + "$OrcRecordWriter")
            .getDeclaredField("writer");
    field.setAccessible(true);
    ((Writer) field.get(writer)).writeIntermediateFooter();
}
 
Example #8
Source File: OrcFileWriter.java    From presto with Apache License 2.0 5 votes vote down vote up
private static Constructor<? extends RecordWriter> getOrcWriterConstructor()
{
    try {
        String writerClassName = OrcOutputFormat.class.getName() + "$OrcRecordWriter";
        Constructor<? extends RecordWriter> constructor = OrcOutputFormat.class.getClassLoader()
                .loadClass(writerClassName).asSubclass(RecordWriter.class)
                .getDeclaredConstructor(Path.class, OrcFile.WriterOptions.class);
        constructor.setAccessible(true);
        return constructor;
    }
    catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
    }
}
 
Example #9
Source File: TestOrcPageSourceMemoryTracking.java    From presto with Apache License 2.0 5 votes vote down vote up
private static Constructor<? extends RecordWriter> getOrcWriterConstructor()
{
    try {
        Constructor<? extends RecordWriter> constructor = OrcOutputFormat.class.getClassLoader()
                .loadClass(ORC_RECORD_WRITER)
                .asSubclass(RecordWriter.class)
                .getDeclaredConstructor(Path.class, WriterOptions.class);
        constructor.setAccessible(true);
        return constructor;
    }
    catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
    }
}
 
Example #10
Source File: HiveDialectITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateTable() throws Exception {
	String location = warehouse + "/external_location";
	tableEnv.executeSql(String.format(
			"create external table tbl1 (d decimal(10,0),ts timestamp) partitioned by (p string) location '%s' tblproperties('k1'='v1')", location));
	Table hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl1"));
	assertEquals(TableType.EXTERNAL_TABLE.toString(), hiveTable.getTableType());
	assertEquals(1, hiveTable.getPartitionKeysSize());
	assertEquals(location, locationPath(hiveTable.getSd().getLocation()));
	assertEquals("v1", hiveTable.getParameters().get("k1"));
	assertFalse(hiveTable.getParameters().containsKey(SqlCreateHiveTable.TABLE_LOCATION_URI));

	tableEnv.executeSql("create table tbl2 (s struct<ts:timestamp,bin:binary>) stored as orc");
	hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl2"));
	assertEquals(TableType.MANAGED_TABLE.toString(), hiveTable.getTableType());
	assertEquals(OrcSerde.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib());
	assertEquals(OrcInputFormat.class.getName(), hiveTable.getSd().getInputFormat());
	assertEquals(OrcOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat());

	tableEnv.executeSql("create table tbl3 (m map<timestamp,binary>) partitioned by (p1 bigint,p2 tinyint) " +
			"row format serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'");
	hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl3"));
	assertEquals(2, hiveTable.getPartitionKeysSize());
	assertEquals(LazyBinarySerDe.class.getName(), hiveTable.getSd().getSerdeInfo().getSerializationLib());

	tableEnv.executeSql("create table tbl4 (x int,y smallint) row format delimited fields terminated by '|' lines terminated by '\n'");
	hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl4"));
	assertEquals("|", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.FIELD_DELIM));
	assertEquals("|", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.SERIALIZATION_FORMAT));
	assertEquals("\n", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.LINE_DELIM));

	tableEnv.executeSql("create table tbl5 (m map<bigint,string>) row format delimited collection items terminated by ';' " +
			"map keys terminated by ':'");
	hiveTable = hiveCatalog.getHiveTable(new ObjectPath("default", "tbl5"));
	assertEquals(";", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.COLLECTION_DELIM));
	assertEquals(":", hiveTable.getSd().getSerdeInfo().getParameters().get(serdeConstants.MAPKEY_DELIM));
}
 
Example #11
Source File: TestPutHive3Streaming.java    From nifi with Apache License 2.0 5 votes vote down vote up
MockHiveStreamingConnection(HiveOptions options, RecordReader reader, RecordWriter recordWriter, List<FieldSchema> schema) throws StreamingException {
    this.options = options;
    metastoreURI = options.getMetaStoreURI();
    this.writer = recordWriter;
    this.hiveConf = this.options.getHiveConf();
    connectionStats = new ConnectionStats();
    this.table = new Table(Table.getEmptyTable(options.getDatabaseName(), options.getTableName()));
    this.table.setFields(schema);
    StorageDescriptor sd = this.table.getSd();
    sd.setOutputFormat(OrcOutputFormat.class.getName());
    sd.setLocation(TARGET_HIVE);
}
 
Example #12
Source File: HiveMetaStoreUtilsTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetTableOrc() {
  final String databaseName = "db";
  final String tableName = "tbl";
  HiveTable.Builder builder = new HiveTable.Builder();
  builder.withDbName(databaseName).withTableName(tableName);

  HiveTable hiveTable = builder.build();

  // SerDe props are
  State serdeProps = new State();
  serdeProps.setProp("columns", "timestamp,namespace,name,metadata");
  serdeProps.setProp("columns.types", "bigint,string,string,map<string,string>");

  hiveTable.getProps().addAll(serdeProps);

  hiveTable.setInputFormat(OrcInputFormat.class.getName());
  hiveTable.setOutputFormat(OrcOutputFormat.class.getName());
  hiveTable.setSerDeType(OrcSerde.class.getName());

  Table table = HiveMetaStoreUtils.getTable(hiveTable);
  Assert.assertEquals(table.getDbName(), databaseName);
  Assert.assertEquals(table.getTableName(), tableName);

  StorageDescriptor sd = table.getSd();
  Assert.assertEquals(sd.getInputFormat(), OrcInputFormat.class.getName());
  Assert.assertEquals(sd.getOutputFormat(), OrcOutputFormat.class.getName());
  Assert.assertNotNull(sd.getSerdeInfo());
  Assert.assertEquals(sd.getSerdeInfo().getSerializationLib(), OrcSerde.class.getName());

  // verify column name
  List<FieldSchema> fields = sd.getCols();
  Assert.assertTrue(fields != null && fields.size() == 4);
  FieldSchema fieldA = fields.get(0);
  Assert.assertEquals(fieldA.getName(), "timestamp");
  Assert.assertEquals(fieldA.getType(), "bigint");

  FieldSchema fieldB = fields.get(1);
  Assert.assertEquals(fieldB.getName(), "namespace");
  Assert.assertEquals(fieldB.getType(), "string");

  FieldSchema fieldC = fields.get(2);
  Assert.assertEquals(fieldC.getName(), "name");
  Assert.assertEquals(fieldC.getType(), "string");


  FieldSchema fieldD = fields.get(3);
  Assert.assertEquals(fieldD.getName(), "metadata");
  Assert.assertEquals(fieldD.getType(), "map<string,string>");
}