Java Code Examples for org.apache.hadoop.hive.metastore.api.StorageDescriptor

The following examples show how to use org.apache.hadoop.hive.metastore.api.StorageDescriptor. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: kite   Source File: HiveUtils.java    License: Apache License 2.0 6 votes vote down vote up
static Table createEmptyTable(String namespace, String name) {
  Table table = new Table();
  table.setDbName(namespace);
  table.setTableName(name);
  table.setPartitionKeys(new ArrayList<FieldSchema>());
  table.setParameters(new HashMap<String, String>());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setSerdeInfo(new SerDeInfo());
  sd.setNumBuckets(-1);
  sd.setBucketCols(new ArrayList<String>());
  sd.setCols(new ArrayList<FieldSchema>());
  sd.setParameters(new HashMap<String, String>());
  sd.setSortCols(new ArrayList<Order>());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  SkewedInfo skewInfo = new SkewedInfo();
  skewInfo.setSkewedColNames(new ArrayList<String>());
  skewInfo.setSkewedColValues(new ArrayList<List<String>>());
  skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
  sd.setSkewedInfo(skewInfo);
  table.setSd(sd);

  return table;
}
 
Example 2
public static com.amazonaws.services.glue.model.StorageDescriptor convertStorageDescriptor(
        StorageDescriptor hiveSd) {
  com.amazonaws.services.glue.model.StorageDescriptor catalogSd =
          new com.amazonaws.services.glue.model.StorageDescriptor();
  catalogSd.setNumberOfBuckets(hiveSd.getNumBuckets());
  catalogSd.setCompressed(hiveSd.isCompressed());
  catalogSd.setParameters(hiveSd.getParameters());
  catalogSd.setBucketColumns(hiveSd.getBucketCols());
  catalogSd.setColumns(convertFieldSchemaList(hiveSd.getCols()));
  catalogSd.setInputFormat(hiveSd.getInputFormat());
  catalogSd.setLocation(hiveSd.getLocation());
  catalogSd.setOutputFormat(hiveSd.getOutputFormat());
  catalogSd.setSerdeInfo(convertSerDeInfo(hiveSd.getSerdeInfo()));
  catalogSd.setSkewedInfo(convertSkewedInfo(hiveSd.getSkewedInfo()));
  catalogSd.setSortColumns(convertOrderList(hiveSd.getSortCols()));
  catalogSd.setStoredAsSubDirectories(hiveSd.isStoredAsSubDirectories());

  return catalogSd;
}
 
Example 3
@Test
public void testCheckTableSchemaTypeMappingInvalid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
          "col2:dynamo_col2#,hashKey:hashKey");
  parameters.put(DynamoDBConstants.DYNAMODB_TYPE_MAPPING, "col2:NS");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "bigint", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("The DynamoDB type NS does not support Hive type bigint");
  storageHandler.checkTableSchemaType(description, table);
}
 
Example 4
Source Project: presto   Source File: ThriftMetastoreUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static Partition fromMetastoreApiPartition(org.apache.hadoop.hive.metastore.api.Partition partition, List<FieldSchema> schema)
{
    StorageDescriptor storageDescriptor = partition.getSd();
    if (storageDescriptor == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
    }

    Partition.Builder partitionBuilder = Partition.builder()
            .setDatabaseName(partition.getDbName())
            .setTableName(partition.getTableName())
            .setValues(partition.getValues())
            .setColumns(schema.stream()
                    .map(ThriftMetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toImmutableList()))
            .setParameters(partition.getParameters());

    // TODO is bucketing_version set on partition level??
    fromMetastoreApiStorageDescriptor(
            partition.getParameters(),
            storageDescriptor,
            partitionBuilder.getStorageBuilder(),
            format("%s.%s", partition.getTableName(), partition.getValues()));

    return partitionBuilder.build();
}
 
Example 5
Source Project: presto   Source File: ThriftMetastoreUtil.java    License: Apache License 2.0 6 votes vote down vote up
private static void fromMetastoreApiStorageDescriptor(
        Map<String, String> tableParameters,
        StorageDescriptor storageDescriptor,
        Storage.Builder builder,
        String tablePartitionName)
{
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    builder.setStorageFormat(StorageFormat.createNullable(serdeInfo.getSerializationLib(), storageDescriptor.getInputFormat(), storageDescriptor.getOutputFormat()))
            .setLocation(nullToEmpty(storageDescriptor.getLocation()))
            .setBucketProperty(HiveBucketProperty.fromStorageDescriptor(tableParameters, storageDescriptor, tablePartitionName))
            .setSkewed(storageDescriptor.isSetSkewedInfo() && storageDescriptor.getSkewedInfo().isSetSkewedColNames() && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
            .setSerdeParameters(serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
}
 
Example 6
@Test
public void testCheckTableSchemaTypeValid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
      "col2:dynamo_col2#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "bigint", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);
  // This check is expected to pass for the given input
  storageHandler.checkTableSchemaType(description, table);
}
 
Example 7
Source Project: flink   Source File: HiveCatalog.java    License: Apache License 2.0 6 votes vote down vote up
private Partition instantiateHivePartition(Table hiveTable, CatalogPartitionSpec partitionSpec, CatalogPartition catalogPartition)
		throws PartitionSpecInvalidException {
	List<String> partCols = getFieldNames(hiveTable.getPartitionKeys());
	List<String> partValues = getOrderedFullPartitionValues(
		partitionSpec, partCols, new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName()));
	// validate partition values
	for (int i = 0; i < partCols.size(); i++) {
		if (StringUtils.isNullOrWhitespaceOnly(partValues.get(i))) {
			throw new PartitionSpecInvalidException(getName(), partCols,
				new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName()), partitionSpec);
		}
	}
	// TODO: handle GenericCatalogPartition
	StorageDescriptor sd = hiveTable.getSd().deepCopy();
	sd.setLocation(catalogPartition.getProperties().remove(HiveCatalogConfig.PARTITION_LOCATION));

	Map<String, String> properties = new HashMap<>(catalogPartition.getProperties());
	properties.put(HiveCatalogConfig.COMMENT, catalogPartition.getComment());

	return HiveTableUtil.createHivePartition(
			hiveTable.getDbName(),
			hiveTable.getTableName(),
			partValues,
			sd,
			properties);
}
 
Example 8
Source Project: flink   Source File: HiveTableUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create properties info to initialize a SerDe.
 * @param storageDescriptor
 * @return
 */
public static Properties createPropertiesFromStorageDescriptor(StorageDescriptor storageDescriptor) {
	SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
	Map<String, String> parameters = serDeInfo.getParameters();
	Properties properties = new Properties();
	properties.setProperty(
			serdeConstants.SERIALIZATION_FORMAT,
			parameters.get(serdeConstants.SERIALIZATION_FORMAT));
	List<String> colTypes = new ArrayList<>();
	List<String> colNames = new ArrayList<>();
	List<FieldSchema> cols = storageDescriptor.getCols();
	for (FieldSchema col: cols){
		colTypes.add(col.getType());
		colNames.add(col.getName());
	}
	properties.setProperty(serdeConstants.LIST_COLUMNS, StringUtils.join(colNames, String.valueOf(SerDeUtils.COMMA)));
	// Note: serdeConstants.COLUMN_NAME_DELIMITER is not defined in previous Hive. We use a literal to save on shim
	properties.setProperty("column.name.delimite", String.valueOf(SerDeUtils.COMMA));
	properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, StringUtils.join(colTypes, DEFAULT_LIST_COLUMN_TYPES_SEPARATOR));
	properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
	properties.putAll(parameters);
	return properties;
}
 
Example 9
/**
 * @param entity, name of the entity to be changed, e.g. hive table or partition
 * @param sd, StorageDescriptor of the entity
 */
public static void updateAvroSchemaURL(String entity, StorageDescriptor sd, HiveCopyEntityHelper hiveHelper) {
  String oldAvroSchemaURL = sd.getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL);
  if (oldAvroSchemaURL != null) {

    Path oldAvroSchemaPath = new Path(oldAvroSchemaURL);
    URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri();

    if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath)
        || (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme())
        && oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) {

      String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(),
          Optional.<Partition>absent(), true).toString();

      sd.getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL);
      log.info(String.format("For entity %s, change %s from %s to %s", entity,
          HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL));
    }
  }
}
 
Example 10
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 6 votes vote down vote up
private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData =
					getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}
 
Example 11
private String getFilePath(Table tbl) throws Exception {

        StorageDescriptor descTable = tbl.getSd();

        InputFormat<?, ?> fformat = HiveDataFragmenter.makeInputFormat(descTable.getInputFormat(), jobConf);

        FileInputFormat.setInputPaths(jobConf, new Path(descTable.getLocation()));

        InputSplit[] splits;
        try {
            splits = fformat.getSplits(jobConf, 1);
        } catch (org.apache.hadoop.mapred.InvalidInputException e) {
            LOG.debug("getSplits failed on " + e.getMessage());
            throw new RuntimeException("Unable to get file path for table.");
        }

        for (InputSplit split : splits) {
            FileSplit fsp = (FileSplit) split;
            String[] hosts = fsp.getLocations();
            String filepath = fsp.getPath().toString();
            return filepath;
        }
        throw new RuntimeException("Unable to get file path for table.");
    }
 
Example 12
public Table makeMetastoreTableObject(HiveMetaStoreClient client,
    String dbName, String tabName, List<FieldSchema> cols) throws Exception {
  Table tbl = new Table();
  tbl.setDbName(dbName);
  tbl.setTableName(tabName);
  StorageDescriptor sd = new StorageDescriptor();
  tbl.setSd(sd);
  tbl.setParameters(new HashMap<String, String>());
  sd.setCols(cols);
  sd.setCompressed(false);
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setName(tbl.getTableName());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  sd.getSerdeInfo().getParameters()
      .put(serdeConstants.SERIALIZATION_FORMAT, "1");
  sd.setSortCols(new ArrayList<Order>());
  return tbl;
}
 
Example 13
Source Project: metacat   Source File: HiveConnectorTableService.java    License: Apache License 2.0 6 votes vote down vote up
private HiveStorageFormat extractHiveStorageFormat(final Table table) throws MetaException {
    final StorageDescriptor descriptor = table.getSd();
    if (descriptor == null) {
        throw new MetaException("Table is missing storage descriptor");
    }
    final SerDeInfo serdeInfo = descriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new MetaException(
            "Table storage descriptor is missing SerDe info");
    }
    final String outputFormat = descriptor.getOutputFormat();
    final String serializationLib = serdeInfo.getSerializationLib();

    for (HiveStorageFormat format : HiveStorageFormat.values()) {
        if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serializationLib)) {
            return format;
        }
    }
    throw new MetaException(
        String.format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib));
}
 
Example 14
@Test
public void testCheckTableSchemaMappingMissingColumn() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$,hashMap:hashMap");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "tinyint", ""));
  cols.add(new FieldSchema("col3", "string", ""));
  cols.add(new FieldSchema("hashMap", "map<string,string>", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("Could not find column mapping for column: col2");
  storageHandler.checkTableSchemaMapping(description, table);
}
 
Example 15
@Test
public void testCheckListTableSchemaTypeValid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
          "col2:dynamo_col2#,col3:dynamo_col3#,col4:dynamo_col4#,col5:dynamo_col5#," +
          "col6:dynamo_col6#,col7:dynamo_col7#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "map<string,bigint>", ""));
  cols.add(new FieldSchema("col2", "array<map<string,bigint>>", ""));
  cols.add(new FieldSchema("col3", "array<map<string,double>>", ""));
  cols.add(new FieldSchema("col4", "array<map<string,string>>", ""));
  cols.add(new FieldSchema("col5", "array<bigint>", ""));
  cols.add(new FieldSchema("col6", "array<double>", ""));
  cols.add(new FieldSchema("col7", "array<string>", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);
  // This check is expected to pass for the given input
  storageHandler.checkTableSchemaType(description, table);
}
 
Example 16
Source Project: beeju   Source File: HiveServer2CoreTest.java    License: Apache License 2.0 6 votes vote down vote up
private Table createUnpartitionedTable(String databaseName, String tableName, HiveServer2Core server)
    throws Exception {
  Table table = new Table();
  table.setDbName(databaseName);
  table.setTableName(tableName);
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}
 
Example 17
Source Project: beeju   Source File: HiveServer2CoreTest.java    License: Apache License 2.0 6 votes vote down vote up
private Table createPartitionedTable(String databaseName, String tableName, HiveServer2Core server) throws Exception {
  Table table = new Table();
  table.setDbName(DATABASE);
  table.setTableName(tableName);
  table.setPartitionKeys(Arrays.asList(new FieldSchema("partcol", "int", null)));
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}
 
Example 18
@Test
public void testCheckStructTableSchemaTypeInvalid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
      "col2:dynamo_col2#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "struct<bignum:bigint,smallnum:tinyint>", ""));
  cols.add(new FieldSchema("col2", "array<map<string,bigint>>", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("The hive type struct<bignum:bigint,smallnum:tinyint> is not " +
      "supported in DynamoDB");
  storageHandler.checkTableSchemaType(description, table);
}
 
Example 19
private void setupHiveTables() throws TException, IOException {
  List<FieldSchema> partitionKeys = Lists.newArrayList(newFieldSchema("p1"), newFieldSchema("p2"));

  File tableLocation = new File("db1", "table1");
  StorageDescriptor sd = newStorageDescriptor(tableLocation, "col0");
  table1 = newTable("table1", "db1", partitionKeys, sd);
  Partition partition1 = newPartition(table1, "value1", "value2");
  Partition partition2 = newPartition(table1, "value11", "value22");
  table1Partitions = Arrays.asList(partition1, partition2); //
  table1PartitionNames = Arrays
      .asList(Warehouse.makePartName(partitionKeys, partition1.getValues()),
          Warehouse.makePartName(partitionKeys, partition2.getValues()));

  File tableLocation2 = new File("db2", "table2");
  StorageDescriptor sd2 = newStorageDescriptor(tableLocation2, "col0");
  table2 = newTable("table2", "db2", partitionKeys, sd2);
}
 
Example 20
Source Project: circus-train   Source File: ReplicaTest.java    License: Apache License 2.0 6 votes vote down vote up
private Table newTable() {
  Table table = new Table();
  table.setDbName(DB_NAME);
  table.setTableName(TABLE_NAME);
  table.setTableType(TableType.EXTERNAL_TABLE.name());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setLocation(tableLocation);
  table.setSd(sd);

  HashMap<String, String> parameters = new HashMap<>();
  parameters.put(StatsSetupConst.ROW_COUNT, "1");
  table.setParameters(parameters);

  table.setPartitionKeys(PARTITIONS);
  return table;
}
 
Example 21
Source Project: circus-train   Source File: DestructiveReplicaTest.java    License: Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() {
  SourceTable sourceTable = new SourceTable();
  sourceTable.setDatabaseName(DATABASE);
  sourceTable.setTableName(TABLE);
  tableReplication.setSourceTable(sourceTable);
  ReplicaTable replicaTable = new ReplicaTable();
  replicaTable.setDatabaseName(DATABASE);
  replicaTable.setTableName(REPLICA_TABLE);
  tableReplication.setReplicaTable(replicaTable);
  when(replicaMetaStoreClientSupplier.get()).thenReturn(client);
  replica = new DestructiveReplica(replicaMetaStoreClientSupplier, cleanupLocationManager, tableReplication);

  table = new Table();
  table.setDbName(DATABASE);
  table.setTableName(REPLICA_TABLE);
  table.setPartitionKeys(Lists.newArrayList(new FieldSchema("part1", "string", "")));
  Map<String, String> parameters = new HashMap<>();
  parameters.put(CircusTrainTableParameter.SOURCE_TABLE.parameterName(), DATABASE + "." + TABLE);
  parameters.put(REPLICATION_EVENT.parameterName(), EVENT_ID);
  table.setParameters(parameters);
  StorageDescriptor sd1 = new StorageDescriptor();
  sd1.setLocation(tableLocation.toString());
  table.setSd(sd1);
}
 
Example 22
Source Project: flink   Source File: HiveTableSink.java    License: Apache License 2.0 6 votes vote down vote up
private Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(String[] partitionColumns,
		StorageDescriptor sd) {
	String serLib = sd.getSerdeInfo().getSerializationLib().toLowerCase();
	int formatFieldCount = tableSchema.getFieldCount() - partitionColumns.length;
	String[] formatNames = new String[formatFieldCount];
	LogicalType[] formatTypes = new LogicalType[formatFieldCount];
	for (int i = 0; i < formatFieldCount; i++) {
		formatNames[i] = tableSchema.getFieldName(i).get();
		formatTypes[i] = tableSchema.getFieldDataType(i).get().getLogicalType();
	}
	RowType formatType = RowType.of(formatTypes, formatNames);
	Configuration formatConf = new Configuration(jobConf);
	sd.getSerdeInfo().getParameters().forEach(formatConf::set);
	if (serLib.contains("parquet")) {
		return Optional.of(ParquetRowDataBuilder.createWriterFactory(
				formatType, formatConf, hiveVersion.startsWith("3.")));
	} else if (serLib.contains("orc")) {
		TypeDescription typeDescription = OrcSplitReaderUtil.logicalTypeToOrcType(formatType);
		return Optional.of(hiveShim.createOrcBulkWriterFactory(
				formatConf, typeDescription.toString(), formatTypes));
	} else {
		return Optional.empty();
	}
}
 
Example 23
@Test
public void testCheckTableSchemaTypeInvalidType() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
      "col2:dynamo_col2#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "tinyint", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("The hive type tinyint is not supported in DynamoDB");
  storageHandler.checkTableSchemaType(description, table);
}
 
Example 24
Source Project: flink   Source File: HiveTableSource.java    License: Apache License 2.0 5 votes vote down vote up
public static HiveTablePartition toHiveTablePartition(
		List<String> partitionKeys,
		String[] fieldNames,
		DataType[] fieldTypes,
		HiveShim shim,
		Properties tableProps,
		String defaultPartitionName,
		Partition partition) {
	StorageDescriptor sd = partition.getSd();
	Map<String, Object> partitionColValues = new HashMap<>();
	List<String> nameList = Arrays.asList(fieldNames);
	for (int i = 0; i < partitionKeys.size(); i++) {
		String partitionColName = partitionKeys.get(i);
		String partitionValue = partition.getValues().get(i);
		DataType type = fieldTypes[nameList.indexOf(partitionColName)];
		Object partitionObject;
		if (defaultPartitionName.equals(partitionValue)) {
			LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
			// while this is inline with Hive, seems it should be null for string columns as well
			partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
		} else {
			partitionObject = restorePartitionValueFromFromType(shim, partitionValue, type);
		}
		partitionColValues.put(partitionColName, partitionObject);
	}
	return new HiveTablePartition(sd, partitionColValues, tableProps);
}
 
Example 25
Source Project: data-highway   Source File: AvroStorageDescriptorFactory.java    License: Apache License 2.0 5 votes vote down vote up
public static StorageDescriptor create(String location) {
  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setInputFormat(AVRO_INPUT_FORMAT);
  storageDescriptor.setOutputFormat(AVRO_OUTPUT_FORMAT);
  storageDescriptor.setLocation(location);
  storageDescriptor.setCols(emptyList());

  SerDeInfo serdeInfo = new SerDeInfo();
  serdeInfo.setSerializationLib(AVRO_SERDE);
  storageDescriptor.setSerdeInfo(serdeInfo);

  return storageDescriptor;
}
 
Example 26
Source Project: data-highway   Source File: AvroHiveTableStrategyTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void newHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table result = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("1"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example 27
Source Project: data-highway   Source File: AvroHiveTableStrategyTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void alterHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  when(uriResolver.resolve(schema2, TABLE, 2))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table table = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  Table result = underTest.alterHiveTable(table, schema2, 2);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("2"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example 28
Source Project: presto   Source File: ThriftMetastoreUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static SerDeInfo getSerdeInfo(org.apache.hadoop.hive.metastore.api.Table table)
{
    StorageDescriptor storageDescriptor = table.getSd();
    if (storageDescriptor == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table does not contain a storage descriptor: " + table);
    }
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    return serdeInfo;
}
 
Example 29
Source Project: incubator-gobblin   Source File: HiveSchemaEvolutionTest.java    License: Apache License 2.0 5 votes vote down vote up
private Optional<Table> createEvolvedDestinationTable(String tableName, String dbName, String location,
    boolean withComment) {
  List<FieldSchema> cols = new ArrayList<>();
  // Existing columns that match avroToOrcSchemaEvolutionTest/source_schema_evolution_enabled.ddl
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldString", "string",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldString" : ""));
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldInt", "int",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldInt" : ""));
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldString", "string",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldString" : ""));
  // The following column is skipped (simulating un-evolved schema):
  // Column name   : parentFieldRecord__nestedFieldInt
  // Column type   : int
  // Column comment: from flatten_source parentFieldRecord.nestedFieldInt
  cols.add(new FieldSchema("parentFieldInt", "int",
      withComment ? "from flatten_source parentFieldInt" : ""));
  // Extra schema
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldString2", "string",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldString2" : ""));

  String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
  String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
  StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, outputFormat, false, 0,
      new SerDeInfo(), null, Lists.<Order>newArrayList(), null);
  Table table = new Table(tableName, dbName, "ketl_dev", 0, 0, 0, storageDescriptor,
      Lists.<FieldSchema>newArrayList(), Maps.<String,String>newHashMap(), "", "", "");

  return Optional.of(table);
}
 
Example 30
Source Project: presto   Source File: ThriftMetastoreUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static StorageDescriptor makeStorageDescriptor(String tableName, List<Column> columns, Storage storage)
{
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
    serdeInfo.setParameters(storage.getSerdeParameters());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(emptyToNull(storage.getLocation()));
    sd.setCols(columns.stream()
            .map(ThriftMetastoreUtil::toMetastoreApiFieldSchema)
            .collect(toImmutableList()));
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
    sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
    sd.setSkewedInfoIsSet(storage.isSkewed());
    sd.setParameters(ImmutableMap.of());

    Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
    if (bucketProperty.isPresent()) {
        sd.setNumBuckets(bucketProperty.get().getBucketCount());
        sd.setBucketCols(bucketProperty.get().getBucketedBy());
        if (!bucketProperty.get().getSortedBy().isEmpty()) {
            sd.setSortCols(bucketProperty.get().getSortedBy().stream()
                    .map(column -> new Order(column.getColumnName(), column.getOrder().getHiveOrder()))
                    .collect(toImmutableList()));
        }
    }

    return sd;
}