org.apache.hadoop.hive.ql.io.StorageFormatDescriptor Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.io.StorageFormatDescriptor.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveCatalog.java From flink with Apache License 2.0 | 5 votes |
private static void setStorageFormat(StorageDescriptor sd, Map<String, String> properties) { // TODO: allow user to specify storage format. Simply use text format for now String storageFormatName = DEFAULT_HIVE_TABLE_STORAGE_FORMAT; StorageFormatDescriptor storageFormatDescriptor = storageFormatFactory.get(storageFormatName); checkArgument(storageFormatDescriptor != null, "Unknown storage format " + storageFormatName); sd.setInputFormat(storageFormatDescriptor.getInputFormat()); sd.setOutputFormat(storageFormatDescriptor.getOutputFormat()); String serdeLib = storageFormatDescriptor.getSerde(); sd.getSerdeInfo().setSerializationLib(serdeLib != null ? serdeLib : LazySimpleSerDe.class.getName()); }
Example #2
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 5 votes |
@Test public void testTableUsingSequenceFileWithBinarySerde() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE); TableMeta meta = new TableMeta(BuiltinStorages.SEQUENCE_FILE, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("r_regionkey", TajoDataTypes.Type.INT4) .add("r_name", TajoDataTypes.Type.TEXT) .add("r_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta, new Path(warehousePath, new Path(DB_NAME, REGION)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, REGION)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.SEQUENCEFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StorageConstants.DEFAULT_BINARY_SERDE, table1.getMeta().getProperty(StorageConstants.SEQUENCEFILE_SERDE)); store.dropTable(DB_NAME, REGION); }
Example #3
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 5 votes |
@Test public void testTableUsingSequenceFileWithTextSerde() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE); options.set(StorageConstants.TEXT_DELIMITER, "\u0001"); options.set(StorageConstants.TEXT_NULL, NullDatum.DEFAULT_TEXT); TableMeta meta = new TableMeta(BuiltinStorages.SEQUENCE_FILE, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("r_regionkey", TajoDataTypes.Type.INT4) .add("r_name", TajoDataTypes.Type.TEXT) .add("r_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta, new Path(warehousePath, new Path(DB_NAME, REGION)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, REGION)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.SEQUENCEFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StorageConstants.DEFAULT_TEXT_SERDE, table1.getMeta().getProperty(StorageConstants.SEQUENCEFILE_SERDE)); assertEquals("\u0001", StringEscapeUtils.unescapeJava(table1.getMeta().getProperty(StorageConstants .TEXT_DELIMITER))); assertEquals(NullDatum.DEFAULT_TEXT, table1.getMeta().getProperty(StorageConstants.TEXT_NULL)); store.dropTable(DB_NAME, REGION); }
Example #4
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 5 votes |
@Test public void testTableUsingParquet() throws Exception { TableMeta meta = new TableMeta("PARQUET", new KeyValueSet()); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("c_custkey", TajoDataTypes.Type.INT4) .add("c_name", TajoDataTypes.Type.TEXT) .add("c_address", TajoDataTypes.Type.TEXT) .add("c_nationkey", TajoDataTypes.Type.INT4) .add("c_phone", TajoDataTypes.Type.TEXT) .add("c_acctbal", TajoDataTypes.Type.FLOAT8) .add("c_mktsegment", TajoDataTypes.Type.TEXT) .add("c_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta, new Path(warehousePath, new Path(DB_NAME, CUSTOMER)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, CUSTOMER)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.PARQUET); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, CUSTOMER); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } store.dropTable(DB_NAME, CUSTOMER); }
Example #5
Source File: HiveTableUtil.java From flink with Apache License 2.0 | 5 votes |
public static void setStorageFormat(StorageDescriptor sd, String format, HiveConf hiveConf) { StorageFormatDescriptor storageFormatDescriptor = storageFormatFactory.get(format); checkArgument(storageFormatDescriptor != null, "Unknown storage format " + format); sd.setInputFormat(storageFormatDescriptor.getInputFormat()); sd.setOutputFormat(storageFormatDescriptor.getOutputFormat()); String serdeLib = storageFormatDescriptor.getSerde(); if (serdeLib == null && storageFormatDescriptor instanceof RCFileStorageFormatDescriptor) { serdeLib = hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE); } if (serdeLib != null) { sd.getSerdeInfo().setSerializationLib(serdeLib); } }
Example #6
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testTableUsingTextFile() throws Exception { TableMeta meta = new TableMeta(BuiltinStorages.TEXT, new KeyValueSet()); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("c_custkey", TajoDataTypes.Type.INT4) .add("c_name", TajoDataTypes.Type.TEXT) .add("c_address", TajoDataTypes.Type.TEXT) .add("c_nationkey", TajoDataTypes.Type.INT4) .add("c_phone", TajoDataTypes.Type.TEXT) .add("c_acctbal", TajoDataTypes.Type.FLOAT8) .add("c_mktsegment", TajoDataTypes.Type.TEXT) .add("c_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta, new Path(warehousePath, new Path(DB_NAME, CUSTOMER)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, CUSTOMER)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.TEXTFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, CUSTOMER); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); //IgnoreKeyTextOutputFormat was deprecated assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StringEscapeUtils.escapeJava(StorageConstants.DEFAULT_FIELD_DELIMITER), table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER)); Map<String, String> expected = getProperties(DB_NAME, CUSTOMER); Map<String, String> toSet = new ImmutableMap.Builder<String, String>() .put("key1", "value1") .put("key2", "value2") .build(); expected.putAll(toSet); setProperty(DB_NAME, CUSTOMER, toSet); Map<String, String> actual = getProperties(DB_NAME, CUSTOMER); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertEquals(actual.get("key2"), expected.get("key2")); Set<String> toUnset = Sets.newHashSet("key2", "key3"); for (String key : toUnset) { expected.remove(key); } unSetProperty(DB_NAME, CUSTOMER, toUnset); actual = getProperties(DB_NAME, CUSTOMER); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertNull(actual.get("key2")); store.dropTable(DB_NAME, CUSTOMER); }
Example #7
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testTableUsingRCFileWithBinarySerde() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE); TableMeta meta = new TableMeta(BuiltinStorages.RCFILE, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("r_regionkey", TajoDataTypes.Type.INT4) .add("r_name", TajoDataTypes.Type.TEXT) .add("r_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta, new Path(warehousePath, new Path(DB_NAME, REGION)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, REGION)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.RCFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StorageConstants.DEFAULT_BINARY_SERDE, table1.getMeta().getProperty(StorageConstants.RCFILE_SERDE)); Map<String, String> expected = getProperties(DB_NAME, REGION); Map<String, String> toSet = new ImmutableMap.Builder<String, String>() .put("key1", "value1") .put("key2", "value2") .build(); expected.putAll(toSet); setProperty(DB_NAME, REGION, toSet); Map<String, String> actual = getProperties(DB_NAME, REGION); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertEquals(actual.get("key2"), expected.get("key2")); Set<String> toUnset = Sets.newHashSet("key2", "key3"); for (String key : toUnset) { expected.remove(key); } unSetProperty(DB_NAME, REGION, toUnset); actual = getProperties(DB_NAME, REGION); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertNull(actual.get("key2")); store.dropTable(DB_NAME, REGION); }
Example #8
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testTableUsingRCFileWithTextSerde() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE); TableMeta meta = new TableMeta(BuiltinStorages.RCFILE, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("r_regionkey", TajoDataTypes.Type.INT4) .add("r_name", TajoDataTypes.Type.TEXT) .add("r_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta, new Path(warehousePath, new Path(DB_NAME, REGION)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, REGION)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.RCFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StorageConstants.DEFAULT_TEXT_SERDE, table1.getMeta().getProperty(StorageConstants.RCFILE_SERDE)); Map<String, String> expected = getProperties(DB_NAME, REGION); Map<String, String> toSet = new ImmutableMap.Builder<String, String>() .put("key1", "value1") .put("key2", "value2") .build(); expected.putAll(toSet); setProperty(DB_NAME, REGION, toSet); Map<String, String> actual = getProperties(DB_NAME, REGION); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertEquals(actual.get("key2"), expected.get("key2")); Set<String> toUnset = Sets.newHashSet("key2", "key3"); for (String key : toUnset) { expected.remove(key); } unSetProperty(DB_NAME, REGION, toUnset); actual = getProperties(DB_NAME, REGION); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertNull(actual.get("key2")); store.dropTable(DB_NAME, REGION); }
Example #9
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testTableWithNullValue() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.TEXT_DELIMITER, StringEscapeUtils.escapeJava("\u0002")); options.set(StorageConstants.TEXT_NULL, StringEscapeUtils.escapeJava("\u0003")); TableMeta meta = new TableMeta(BuiltinStorages.TEXT, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("s_suppkey", TajoDataTypes.Type.INT4) .add("s_name", TajoDataTypes.Type.TEXT) .add("s_address", TajoDataTypes.Type.TEXT) .add("s_nationkey", TajoDataTypes.Type.INT4) .add("s_phone", TajoDataTypes.Type.TEXT) .add("s_acctbal", TajoDataTypes.Type.FLOAT8) .add("s_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, SUPPLIER), schema, meta, new Path(warehousePath, new Path(DB_NAME, SUPPLIER)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, SUPPLIER)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.TEXTFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, SUPPLIER); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); //IgnoreKeyTextOutputFormat was deprecated assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, SUPPLIER)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(table.getMeta().getProperty(StorageConstants.TEXT_DELIMITER), table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER)); assertEquals(table.getMeta().getProperty(StorageConstants.TEXT_NULL), table1.getMeta().getProperty(StorageConstants.TEXT_NULL)); assertEquals(table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER), StringEscapeUtils.escapeJava("\u0002")); assertEquals(table1.getMeta().getProperty(StorageConstants.TEXT_NULL), StringEscapeUtils.escapeJava("\u0003")); Map<String, String> expected = getProperties(DB_NAME, SUPPLIER); Map<String, String> toSet = new ImmutableMap.Builder<String, String>() .put("key1", "value1") .put("key2", "value2") .build(); expected.putAll(toSet); setProperty(DB_NAME, SUPPLIER, toSet); Map<String, String> actual = getProperties(DB_NAME, SUPPLIER); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertEquals(actual.get("key2"), expected.get("key2")); Set<String> toUnset = Sets.newHashSet("key2", "key3"); for (String key : toUnset) { expected.remove(key); } unSetProperty(DB_NAME, SUPPLIER, toUnset); actual = getProperties(DB_NAME, SUPPLIER); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertNull(actual.get("key2")); store.dropTable(DB_NAME, SUPPLIER); }