Java Code Examples for org.apache.hadoop.hive.metastore.api.Table#getPartitionKeys()

The following examples show how to use org.apache.hadoop.hive.metastore.api.Table#getPartitionKeys() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MetastoreClientUtils.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
/**
 * Taken from HiveMetaStore#create_table_core
 * https://github.com/apache/hive/blob/rel/release-2.3.0/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java#L1370-L1383
 */
public static void validateTableObject(Table table, Configuration conf) throws InvalidObjectException {
  checkNotNull(table, "table cannot be null");
  checkNotNull(table.getSd(), "Table#StorageDescriptor cannot be null");

  if (!hiveShims.validateTableName(table.getTableName(), conf)) {
    throw new InvalidObjectException(table.getTableName() + " is not a valid object name");
  }
  String validate = MetaStoreUtils.validateTblColumns(table.getSd().getCols());
  if (validate != null) {
    throw new InvalidObjectException("Invalid column " + validate);
  }

  if (table.getPartitionKeys() != null) {
    validate = MetaStoreUtils.validateTblColumns(table.getPartitionKeys());
    if (validate != null) {
      throw new InvalidObjectException("Invalid partition column " + validate);
    }
  }
}
 
Example 2
Source File: HiveClientWrapper.java    From pxf with Apache License 2.0 6 votes vote down vote up
/**
 * Populates the given metadata object with the given table's fields and partitions,
 * The partition fields are added at the end of the table schema.
 * Throws an exception if the table contains unsupported field types.
 * Supported HCatalog types: TINYINT,
 * SMALLINT, INT, BIGINT, BOOLEAN, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP,
 * DATE, DECIMAL, VARCHAR, CHAR.
 *
 * @param tbl      Hive table
 * @param metadata schema of given table
 */
public void getSchema(Table tbl, Metadata metadata) {

    int hiveColumnsSize = tbl.getSd().getColsSize();
    int hivePartitionsSize = tbl.getPartitionKeysSize();

    LOG.debug("Hive table: {} fields. {} partitions.", hiveColumnsSize, hivePartitionsSize);

    // check hive fields
    try {
        List<FieldSchema> hiveColumns = tbl.getSd().getCols();
        for (FieldSchema hiveCol : hiveColumns) {
            metadata.addField(HiveUtilities.mapHiveType(hiveCol));
        }
        // check partition fields
        List<FieldSchema> hivePartitions = tbl.getPartitionKeys();
        for (FieldSchema hivePart : hivePartitions) {
            metadata.addField(HiveUtilities.mapHiveType(hivePart));
        }
    } catch (UnsupportedTypeException e) {
        String errorMsg = "Failed to retrieve metadata for table " + metadata.getItem() + ". " +
                e.getMessage();
        throw new UnsupportedTypeException(errorMsg);
    }
}
 
Example 3
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public static List<PartitionValue> getPartitionValues(Table table, Partition partition, boolean enforceVarcharWidth) {
  if (partition == null) {
    return Collections.emptyList();
  }

  final List<String> partitionValues = partition.getValues();
  final List<PartitionValue> output = new ArrayList<>();
  final List<FieldSchema> partitionKeys = table.getPartitionKeys();
  for (int i = 0; i < partitionKeys.size(); i++) {
    final PartitionValue value = getPartitionValue(partitionKeys.get(i), partitionValues.get(i), enforceVarcharWidth);
    if (value != null) {
      output.add(value);
    }
  }
  return output;
}
 
Example 4
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public static List<PartitionValue> getPartitionValues(Table table, Partition partition, boolean enableVarcharWidth) {
  if (partition == null) {
    return Collections.emptyList();
  }

  final List<String> partitionValues = partition.getValues();
  final List<PartitionValue> output = new ArrayList<>();
  final List<FieldSchema> partitionKeys = table.getPartitionKeys();
  for (int i = 0; i < partitionKeys.size(); i++) {
    final PartitionValue value = getPartitionValue(partitionKeys.get(i), partitionValues.get(i), enableVarcharWidth);
    if (value != null) {
      output.add(value);
    }
  }
  return output;
}
 
Example 5
Source File: PartitionUtil.java    From metacat with Apache License 2.0 6 votes vote down vote up
/**
 * Retrieves the partition values from the partition name. This method also validates the partition keys to that
 * of the table.
 *
 * @param tableQName  table name
 * @param table       table
 * @param partName    partition name
 * @return list of partition values
 */
public static List<String> getPartValuesFromPartName(final QualifiedName tableQName, final Table table,
    final String partName) {
    if (Strings.isNullOrEmpty(partName)) {
        throw new InvalidMetaException(tableQName, partName, null);
    }
    final LinkedHashMap<String, String> partSpec = new LinkedHashMap<>();
    Warehouse.makeSpecFromName(partSpec, new Path(partName));
    final List<String> values = new ArrayList<>();
    for (FieldSchema field : table.getPartitionKeys()) {
        final String key = field.getName();
        final String val = partSpec.get(key);
        if (val == null) {
            throw new InvalidMetaException(tableQName, partName, null);
        }
        values.add(val);
    }
    return values;
}
 
Example 6
Source File: AvroHiveTableStrategyTest.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Test
public void newHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table result = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("1"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example 7
Source File: AvroHiveTableStrategyTest.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Test
public void alterHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  when(uriResolver.resolve(schema2, TABLE, 2))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table table = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  Table result = underTest.alterHiveTable(table, schema2, 2);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("2"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example 8
Source File: HiveDataFragmenter.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * Verifies that all the Greenplum defined columns are present in the Hive
 * table schema. Then return a list of indexes corresponding to the
 * matching columns in Greenplum, ordered by the Greenplum schema order.
 *
 * @param tbl the hive table
 * @return a list of indexes
 */
List<Integer> verifySchema(Table tbl) {

    List<Integer> indexes = new ArrayList<>();
    List<FieldSchema> hiveColumns = tbl.getSd().getCols();
    List<FieldSchema> hivePartitions = tbl.getPartitionKeys();

    Set<String> columnAndPartitionNames =
            Stream.concat(hiveColumns.stream(), hivePartitions.stream())
                    .map(FieldSchema::getName)
                    .collect(Collectors.toSet());

    Map<String, Integer> columnNameToColsIndexMap =
            IntStream.range(0, hiveColumns.size())
                    .boxed()
                    .collect(Collectors.toMap(i -> hiveColumns.get(i).getName(), i -> i));

    for (ColumnDescriptor cd : context.getTupleDescription()) {
        if (!columnAndPartitionNames.contains(cd.columnName()) &&
                !columnAndPartitionNames.contains(cd.columnName().toLowerCase())) {
            throw new IllegalArgumentException(
                    String.format("Column '%s' does not exist in the Hive schema. " +
                                    "Ensure the column exists and check the column name spelling and case",
                            cd.columnName()));
        }

        // The index of the column on the Hive schema
        Integer index =
                defaultIfNull(columnNameToColsIndexMap.get(cd.columnName()),
                        columnNameToColsIndexMap.get(cd.columnName().toLowerCase()));
        indexes.add(index);
    }
    return indexes;
}
 
Example 9
Source File: HiveEndpoint.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public PartitionsAndStatistics getPartitions(Table table, String partitionPredicate, int maxPartitions)
  throws TException {
  try (CloseableMetaStoreClient client = metaStoreClientSupplier.get()) {
    List<Partition> partitions = null;
    if (Strings.isNullOrEmpty(partitionPredicate)) {
      partitions = client.listPartitions(table.getDbName(), table.getTableName(), (short) maxPartitions);
    } else {
      partitions = client.listPartitionsByFilter(table.getDbName(), table.getTableName(), partitionPredicate,
          (short) maxPartitions);
    }

    // Generate a list of partition names
    List<String> partitionNames = getPartitionNames(table.getPartitionKeys(), partitions);
    // Fetch the partition statistics
    List<String> columnNames = getColumnNames(table);

    Map<String, List<ColumnStatisticsObj>> statisticsByPartitionName = client
        .getPartitionColumnStatistics(table.getDbName(), table.getTableName(), partitionNames, columnNames);
    if (statisticsByPartitionName != null && !statisticsByPartitionName.isEmpty()) {
      log.debug("Retrieved column stats entries for {} partitions of table {}.{}", statisticsByPartitionName.size(),
          table.getDbName(), table.getTableName());
    } else {
      log.debug("No partition column stats retrieved for table {}.{}", table.getDbName(), table.getTableName());
    }

    return new PartitionsAndStatistics(table.getPartitionKeys(), partitions, statisticsByPartitionName);
  }
}
 
Example 10
Source File: DestructiveReplica.java    From circus-train with Apache License 2.0 5 votes vote down vote up
private void dropAndDeletePartitions(CloseableMetaStoreClient client, Predicate<String> shouldDelete)
  throws MetaException, TException, NoSuchObjectException {
  Table replicaTable = client.getTable(databaseName, tableName);
  List<FieldSchema> partitionKeys = replicaTable.getPartitionKeys();
  if (partitionKeys == null || partitionKeys.isEmpty()) {
    // unpartitioned table nothing to delete
    return;
  }
  PartitionIterator partitionIterator = new PartitionIterator(client, replicaTable, (short) 1000);
  while (partitionIterator.hasNext()) {
    Partition replicaPartition = partitionIterator.next();
    List<String> values = replicaPartition.getValues();
    String partitionName = Warehouse.makePartName(partitionKeys, values);
    if (shouldDelete.apply(partitionName)) {
      log
          .info("Dropping partition for replica table: "
              + databaseName
              + "."
              + tableName
              + ", partition value: '"
              + partitionName
              + "'");
      client.dropPartition(databaseName, tableName, partitionName, DELETE_DATA);
      Path oldLocation = locationAsPath(replicaPartition);
      String oldEventId = replicaPartition.getParameters().get(REPLICATION_EVENT.parameterName());
      cleanupLocationManager.addCleanupLocation(oldEventId, oldLocation);
    }
  }
}
 
Example 11
Source File: EventUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static EventPartitions toEventPartitions(Table table, List<Partition> partitions) {
  LinkedHashMap<String, String> partitionKeyTypes = new LinkedHashMap<>();
  List<FieldSchema> partitionKeys = table.getPartitionKeys();
  for (FieldSchema partitionKey : partitionKeys) {
    partitionKeyTypes.put(partitionKey.getName(), partitionKey.getType());
  }
  EventPartitions eventPartitions = new EventPartitions(partitionKeyTypes);
  if (partitions != null) {
    for (Partition partition : partitions) {
      eventPartitions.add(new EventPartition(partition.getValues(),
          LocationUtils.hasLocation(partition) ? LocationUtils.locationAsUri(partition) : null));
    }
  }
  return eventPartitions;
}
 
Example 12
Source File: HiveConvertersImpl.java    From metacat with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public TableDto hiveToMetacatTable(final QualifiedName name, final Table table) {
    final TableDto dto = new TableDto();
    dto.setSerde(toStorageDto(table.getSd(), table.getOwner()));
    dto.setAudit(new AuditDto());
    dto.setName(name);
    if (table.isSetCreateTime()) {
        dto.getAudit().setCreatedDate(epochSecondsToDate(table.getCreateTime()));
    }
    dto.setMetadata(table.getParameters());

    final List<FieldSchema> nonPartitionColumns = table.getSd().getCols();
    final List<FieldSchema> partitionColumns = table.getPartitionKeys();
    final List<FieldDto> allFields =
        Lists.newArrayListWithCapacity(nonPartitionColumns.size() + partitionColumns.size());
    nonPartitionColumns.stream()
        .map(field -> this.hiveToMetacatField(field, false))
        .forEachOrdered(allFields::add);
    partitionColumns.stream()
        .map(field -> this.hiveToMetacatField(field, true))
        .forEachOrdered(allFields::add);
    dto.setFields(allFields);
    dto.setView(new ViewDto(table.getViewOriginalText(),
        table.getViewExpandedText()));
    return dto;
}
 
Example 13
Source File: TableTypeFilter.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
public boolean apply(@Nullable Table input) {
  if (input == null) {
    return false;
  }

  switch (tableType) {
    case SNAPSHOT:
      return input.getPartitionKeys() == null || input.getPartitionKeys().size() == 0;
    case PARTITIONED:
      return input.getPartitionKeys() != null && input.getPartitionKeys().size() > 0;
    default:
      throw new UnsupportedOperationException("Invalid type: " + tableType);
  }
}
 
Example 14
Source File: HiveUtils.java    From kite with Apache License 2.0 5 votes vote down vote up
private static List<FieldSchema> getPartCols(Table table) {
  List<FieldSchema> partKeys = table.getPartitionKeys();
  if (partKeys == null) {
    partKeys = new ArrayList<FieldSchema>();
    table.setPartitionKeys(partKeys);
  }
  return partKeys;
}
 
Example 15
Source File: HiveConnectorInfoConverter.java    From metacat with Apache License 2.0 4 votes vote down vote up
/**
 * Converts to TableDto.
 *
 * @param table connector table
 * @return Metacat table Info
 */
@Override
public TableInfo toTableInfo(final QualifiedName name, final Table table) {
    final List<FieldSchema> nonPartitionColumns =
        (table.getSd() != null) ? table.getSd().getCols() : Collections.emptyList();
    // add the data fields to the nonPartitionColumns
    //ignore all exceptions
    try {
        if (nonPartitionColumns.isEmpty()) {
            for (StructField field : HiveTableUtil.getTableStructFields(table)) {
                final FieldSchema fieldSchema = new FieldSchema(field.getFieldName(),
                    field.getFieldObjectInspector().getTypeName(),
                    field.getFieldComment());
                nonPartitionColumns.add(fieldSchema);
            }
        }
    } catch (final Exception e) {
        log.error(e.getMessage(), e);
    }

    final List<FieldSchema> partitionColumns = table.getPartitionKeys();
    final Date creationDate = table.isSetCreateTime() ? epochSecondsToDate(table.getCreateTime()) : null;
    final List<FieldInfo> allFields =
        Lists.newArrayListWithCapacity(nonPartitionColumns.size() + partitionColumns.size());
    nonPartitionColumns.stream()
        .map(field -> hiveToMetacatField(field, false))
        .forEachOrdered(allFields::add);
    partitionColumns.stream()
        .map(field -> hiveToMetacatField(field, true))
        .forEachOrdered(allFields::add);
    final AuditInfo auditInfo = AuditInfo.builder().createdDate(creationDate).build();
    if (null != table.getTableType() && table.getTableType().equals(TableType.VIRTUAL_VIEW.name())) {
        return TableInfo.builder()
            .serde(toStorageInfo(table.getSd(), table.getOwner())).fields(allFields)
            .metadata(table.getParameters()).name(name).auditInfo(auditInfo)
            .view(ViewInfo.builder().
                viewOriginalText(table.getViewOriginalText())
                .viewExpandedText(table.getViewExpandedText()).build()
            ).build();
    } else {
        return TableInfo.builder()
            .serde(toStorageInfo(table.getSd(), table.getOwner())).fields(allFields)
            .metadata(table.getParameters()).name(name).auditInfo(auditInfo)
            .build();
    }
}