org.apache.hadoop.hive.ql.io.StorageFormatDescriptor Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.StorageFormatDescriptor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveCatalog.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void setStorageFormat(StorageDescriptor sd, Map<String, String> properties) {
	// TODO: allow user to specify storage format. Simply use text format for now
	String storageFormatName = DEFAULT_HIVE_TABLE_STORAGE_FORMAT;
	StorageFormatDescriptor storageFormatDescriptor = storageFormatFactory.get(storageFormatName);
	checkArgument(storageFormatDescriptor != null, "Unknown storage format " + storageFormatName);
	sd.setInputFormat(storageFormatDescriptor.getInputFormat());
	sd.setOutputFormat(storageFormatDescriptor.getOutputFormat());
	String serdeLib = storageFormatDescriptor.getSerde();
	sd.getSerdeInfo().setSerializationLib(serdeLib != null ? serdeLib : LazySimpleSerDe.class.getName());
}
 
Example #2
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 5 votes vote down vote up
@Test
public void testTableUsingSequenceFileWithBinarySerde() throws Exception {
  KeyValueSet options = new KeyValueSet();
  options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE);
  TableMeta meta = new TableMeta(BuiltinStorages.SEQUENCE_FILE, options);

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("r_regionkey", TajoDataTypes.Type.INT4)
      .add("r_name", TajoDataTypes.Type.TEXT)
      .add("r_comment", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, REGION)).toUri());
  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, REGION));

  StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.SEQUENCEFILE);
  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION);
  assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat());
  assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  assertEquals(StorageConstants.DEFAULT_BINARY_SERDE,
    table1.getMeta().getProperty(StorageConstants.SEQUENCEFILE_SERDE));
  store.dropTable(DB_NAME, REGION);
}
 
Example #3
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 5 votes vote down vote up
@Test
public void testTableUsingSequenceFileWithTextSerde() throws Exception {
  KeyValueSet options = new KeyValueSet();
  options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE);
  options.set(StorageConstants.TEXT_DELIMITER, "\u0001");
  options.set(StorageConstants.TEXT_NULL, NullDatum.DEFAULT_TEXT);
  TableMeta meta = new TableMeta(BuiltinStorages.SEQUENCE_FILE, options);

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("r_regionkey", TajoDataTypes.Type.INT4)
      .add("r_name", TajoDataTypes.Type.TEXT)
      .add("r_comment", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, REGION)).toUri());
  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, REGION));

  StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.SEQUENCEFILE);
  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION);
  assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat());
  assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  assertEquals(StorageConstants.DEFAULT_TEXT_SERDE, table1.getMeta().getProperty(StorageConstants.SEQUENCEFILE_SERDE));
  assertEquals("\u0001", StringEscapeUtils.unescapeJava(table1.getMeta().getProperty(StorageConstants
    .TEXT_DELIMITER)));
  assertEquals(NullDatum.DEFAULT_TEXT, table1.getMeta().getProperty(StorageConstants.TEXT_NULL));
  store.dropTable(DB_NAME, REGION);
}
 
Example #4
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 5 votes vote down vote up
@Test
public void testTableUsingParquet() throws Exception {
  TableMeta meta = new TableMeta("PARQUET", new KeyValueSet());

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("c_custkey", TajoDataTypes.Type.INT4)
      .add("c_name", TajoDataTypes.Type.TEXT)
      .add("c_address", TajoDataTypes.Type.TEXT)
      .add("c_nationkey", TajoDataTypes.Type.INT4)
      .add("c_phone", TajoDataTypes.Type.TEXT)
      .add("c_acctbal", TajoDataTypes.Type.FLOAT8)
      .add("c_mktsegment", TajoDataTypes.Type.TEXT)
      .add("c_comment", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, CUSTOMER)).toUri());
  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, CUSTOMER));

  StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.PARQUET);
  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, CUSTOMER);
  assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat());
  assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  store.dropTable(DB_NAME, CUSTOMER);
}
 
Example #5
Source File: HiveTableUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void setStorageFormat(StorageDescriptor sd, String format, HiveConf hiveConf) {
	StorageFormatDescriptor storageFormatDescriptor = storageFormatFactory.get(format);
	checkArgument(storageFormatDescriptor != null, "Unknown storage format " + format);
	sd.setInputFormat(storageFormatDescriptor.getInputFormat());
	sd.setOutputFormat(storageFormatDescriptor.getOutputFormat());
	String serdeLib = storageFormatDescriptor.getSerde();
	if (serdeLib == null && storageFormatDescriptor instanceof RCFileStorageFormatDescriptor) {
		serdeLib = hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE);
	}
	if (serdeLib != null) {
		sd.getSerdeInfo().setSerializationLib(serdeLib);
	}
}
 
Example #6
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public void testTableUsingTextFile() throws Exception {
  TableMeta meta = new TableMeta(BuiltinStorages.TEXT, new KeyValueSet());

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("c_custkey", TajoDataTypes.Type.INT4)
      .add("c_name", TajoDataTypes.Type.TEXT)
      .add("c_address", TajoDataTypes.Type.TEXT)
      .add("c_nationkey", TajoDataTypes.Type.INT4)
      .add("c_phone", TajoDataTypes.Type.TEXT)
      .add("c_acctbal", TajoDataTypes.Type.FLOAT8)
      .add("c_mktsegment", TajoDataTypes.Type.TEXT)
      .add("c_comment", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, CUSTOMER)).toUri());
  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, CUSTOMER));

  StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.TEXTFILE);
  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, CUSTOMER);
  assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat());
  //IgnoreKeyTextOutputFormat was deprecated
  assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  assertEquals(StringEscapeUtils.escapeJava(StorageConstants.DEFAULT_FIELD_DELIMITER),
      table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER));

  Map<String, String> expected = getProperties(DB_NAME, CUSTOMER);
  Map<String, String> toSet = new ImmutableMap.Builder<String, String>()
      .put("key1", "value1")
      .put("key2", "value2")
      .build();
  expected.putAll(toSet);

  setProperty(DB_NAME, CUSTOMER, toSet);
  Map<String, String> actual = getProperties(DB_NAME, CUSTOMER);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertEquals(actual.get("key2"), expected.get("key2"));

  Set<String> toUnset = Sets.newHashSet("key2", "key3");
  for (String key : toUnset) {
    expected.remove(key);
  }
  unSetProperty(DB_NAME, CUSTOMER, toUnset);
  actual = getProperties(DB_NAME, CUSTOMER);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertNull(actual.get("key2"));

  store.dropTable(DB_NAME, CUSTOMER);
}
 
Example #7
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public void testTableUsingRCFileWithBinarySerde() throws Exception {
  KeyValueSet options = new KeyValueSet();
  options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE);
  TableMeta meta = new TableMeta(BuiltinStorages.RCFILE, options);

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("r_regionkey", TajoDataTypes.Type.INT4)
      .add("r_name", TajoDataTypes.Type.TEXT)
      .add("r_comment", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, REGION)).toUri());
  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, REGION));

  StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.RCFILE);
  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION);
  assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat());
  assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  assertEquals(StorageConstants.DEFAULT_BINARY_SERDE,
      table1.getMeta().getProperty(StorageConstants.RCFILE_SERDE));

  Map<String, String> expected = getProperties(DB_NAME, REGION);
  Map<String, String> toSet = new ImmutableMap.Builder<String, String>()
      .put("key1", "value1")
      .put("key2", "value2")
      .build();
  expected.putAll(toSet);

  setProperty(DB_NAME, REGION, toSet);
  Map<String, String> actual = getProperties(DB_NAME, REGION);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertEquals(actual.get("key2"), expected.get("key2"));

  Set<String> toUnset = Sets.newHashSet("key2", "key3");
  for (String key : toUnset) {
    expected.remove(key);
  }
  unSetProperty(DB_NAME, REGION, toUnset);
  actual = getProperties(DB_NAME, REGION);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertNull(actual.get("key2"));

  store.dropTable(DB_NAME, REGION);
}
 
Example #8
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public void testTableUsingRCFileWithTextSerde() throws Exception {
  KeyValueSet options = new KeyValueSet();
  options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE);
  TableMeta meta = new TableMeta(BuiltinStorages.RCFILE, options);

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("r_regionkey", TajoDataTypes.Type.INT4)
      .add("r_name", TajoDataTypes.Type.TEXT)
      .add("r_comment", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, REGION)).toUri());
  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, REGION));

  StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.RCFILE);
  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION);
  assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat());
  assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  assertEquals(StorageConstants.DEFAULT_TEXT_SERDE, table1.getMeta().getProperty(StorageConstants.RCFILE_SERDE));

  Map<String, String> expected = getProperties(DB_NAME, REGION);
  Map<String, String> toSet = new ImmutableMap.Builder<String, String>()
          .put("key1", "value1")
          .put("key2", "value2")
          .build();
  expected.putAll(toSet);

  setProperty(DB_NAME, REGION, toSet);
  Map<String, String> actual = getProperties(DB_NAME, REGION);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertEquals(actual.get("key2"), expected.get("key2"));

  Set<String> toUnset = Sets.newHashSet("key2", "key3");
  for (String key : toUnset) {
    expected.remove(key);
  }
  unSetProperty(DB_NAME, REGION, toUnset);
  actual = getProperties(DB_NAME, REGION);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertNull(actual.get("key2"));

  store.dropTable(DB_NAME, REGION);
}
 
Example #9
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public void testTableWithNullValue() throws Exception {
  KeyValueSet options = new KeyValueSet();
  options.set(StorageConstants.TEXT_DELIMITER, StringEscapeUtils.escapeJava("\u0002"));
  options.set(StorageConstants.TEXT_NULL, StringEscapeUtils.escapeJava("\u0003"));
  TableMeta meta = new TableMeta(BuiltinStorages.TEXT, options);

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("s_suppkey", TajoDataTypes.Type.INT4)
      .add("s_name", TajoDataTypes.Type.TEXT)
      .add("s_address", TajoDataTypes.Type.TEXT)
      .add("s_nationkey", TajoDataTypes.Type.INT4)
      .add("s_phone", TajoDataTypes.Type.TEXT)
      .add("s_acctbal", TajoDataTypes.Type.FLOAT8)
      .add("s_comment", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, SUPPLIER), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, SUPPLIER)).toUri());

  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, SUPPLIER));

  StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.TEXTFILE);
  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, SUPPLIER);
  assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat());
  //IgnoreKeyTextOutputFormat was deprecated
  assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, SUPPLIER));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  assertEquals(table.getMeta().getProperty(StorageConstants.TEXT_DELIMITER),
      table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER));

  assertEquals(table.getMeta().getProperty(StorageConstants.TEXT_NULL),
      table1.getMeta().getProperty(StorageConstants.TEXT_NULL));

  assertEquals(table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER),
      StringEscapeUtils.escapeJava("\u0002"));

  assertEquals(table1.getMeta().getProperty(StorageConstants.TEXT_NULL),
      StringEscapeUtils.escapeJava("\u0003"));

  Map<String, String> expected = getProperties(DB_NAME, SUPPLIER);
  Map<String, String> toSet = new ImmutableMap.Builder<String, String>()
          .put("key1", "value1")
          .put("key2", "value2")
          .build();
  expected.putAll(toSet);

  setProperty(DB_NAME, SUPPLIER, toSet);
  Map<String, String> actual = getProperties(DB_NAME, SUPPLIER);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertEquals(actual.get("key2"), expected.get("key2"));

  Set<String> toUnset = Sets.newHashSet("key2", "key3");
  for (String key : toUnset) {
    expected.remove(key);
  }
  unSetProperty(DB_NAME, SUPPLIER, toUnset);
  actual = getProperties(DB_NAME, SUPPLIER);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertNull(actual.get("key2"));

  store.dropTable(DB_NAME, SUPPLIER);

}