Java Code Examples for org.apache.hadoop.hive.metastore.api.StorageDescriptor#getSerdeInfo()

The following examples show how to use org.apache.hadoop.hive.metastore.api.StorageDescriptor#getSerdeInfo() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
private static void fromMetastoreApiStorageDescriptor(
        Map<String, String> tableParameters,
        StorageDescriptor storageDescriptor,
        Storage.Builder builder,
        String tablePartitionName)
{
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    builder.setStorageFormat(StorageFormat.createNullable(serdeInfo.getSerializationLib(), storageDescriptor.getInputFormat(), storageDescriptor.getOutputFormat()))
            .setLocation(nullToEmpty(storageDescriptor.getLocation()))
            .setBucketProperty(HiveBucketProperty.fromStorageDescriptor(tableParameters, storageDescriptor, tablePartitionName))
            .setSkewed(storageDescriptor.isSetSkewedInfo() && storageDescriptor.getSkewedInfo().isSetSkewedColNames() && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
            .setSerdeParameters(serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
}
 
Example 2
Source File: HiveTableUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Create properties info to initialize a SerDe.
 * @param storageDescriptor
 * @return
 */
public static Properties createPropertiesFromStorageDescriptor(StorageDescriptor storageDescriptor) {
	SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
	Map<String, String> parameters = serDeInfo.getParameters();
	Properties properties = new Properties();
	properties.setProperty(
			serdeConstants.SERIALIZATION_FORMAT,
			parameters.get(serdeConstants.SERIALIZATION_FORMAT));
	List<String> colTypes = new ArrayList<>();
	List<String> colNames = new ArrayList<>();
	List<FieldSchema> cols = storageDescriptor.getCols();
	for (FieldSchema col: cols){
		colTypes.add(col.getType());
		colNames.add(col.getName());
	}
	properties.setProperty(serdeConstants.LIST_COLUMNS, StringUtils.join(colNames, String.valueOf(SerDeUtils.COMMA)));
	// Note: serdeConstants.COLUMN_NAME_DELIMITER is not defined in previous Hive. We use a literal to save on shim
	properties.setProperty("column.name.delimite", String.valueOf(SerDeUtils.COMMA));
	properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, StringUtils.join(colTypes, DEFAULT_LIST_COLUMN_TYPES_SEPARATOR));
	properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
	properties.putAll(parameters);
	return properties;
}
 
Example 3
Source File: HiveConnectorTableService.java    From metacat with Apache License 2.0 6 votes vote down vote up
private HiveStorageFormat extractHiveStorageFormat(final Table table) throws MetaException {
    final StorageDescriptor descriptor = table.getSd();
    if (descriptor == null) {
        throw new MetaException("Table is missing storage descriptor");
    }
    final SerDeInfo serdeInfo = descriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new MetaException(
            "Table storage descriptor is missing SerDe info");
    }
    final String outputFormat = descriptor.getOutputFormat();
    final String serializationLib = serdeInfo.getSerializationLib();

    for (HiveStorageFormat format : HiveStorageFormat.values()) {
        if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serializationLib)) {
            return format;
        }
    }
    throw new MetaException(
        String.format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib));
}
 
Example 4
Source File: HiveConnectorInfoConverter.java    From metacat with Apache License 2.0 6 votes vote down vote up
private StorageInfo toStorageInfo(final StorageDescriptor sd, final String owner) {
    if (sd == null) {
        return new StorageInfo();
    }
    if (sd.getSerdeInfo() != null) {
        return StorageInfo.builder().owner(owner)
            .uri(sd.getLocation())
            .inputFormat(sd.getInputFormat())
            .outputFormat(sd.getOutputFormat())
            .parameters(sd.getParameters())
            .serializationLib(sd.getSerdeInfo().getSerializationLib())
            .serdeInfoParameters(sd.getSerdeInfo().getParameters())
            .build();
    }
    return StorageInfo.builder().owner(owner).uri(sd.getLocation()).inputFormat(sd.getInputFormat())
        .outputFormat(sd.getOutputFormat()).parameters(sd.getParameters()).build();
}
 
Example 5
Source File: HiveConvertersImpl.java    From metacat with Apache License 2.0 6 votes vote down vote up
private StorageDto toStorageDto(@Nullable final StorageDescriptor sd, final String owner) {
    final StorageDto result = new StorageDto();
    if (sd != null) {
        result.setOwner(owner);
        result.setUri(sd.getLocation());
        result.setInputFormat(sd.getInputFormat());
        result.setOutputFormat(sd.getOutputFormat());
        result.setParameters(sd.getParameters());
        final SerDeInfo serde = sd.getSerdeInfo();
        if (serde != null) {
            result.setSerializationLib(serde.getSerializationLib());
            result.setSerdeInfoParameters(serde.getParameters());
        }
    }
    return result;
}
 
Example 6
Source File: AvroHiveTableStrategyTest.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Test
public void newHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table result = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("1"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example 7
Source File: AvroHiveTableStrategyTest.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Test
public void alterHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  when(uriResolver.resolve(schema2, TABLE, 2))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table table = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  Table result = underTest.alterHiveTable(table, schema2, 2);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("2"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example 8
Source File: ThriftMetastoreUtil.java    From presto with Apache License 2.0 5 votes vote down vote up
private static SerDeInfo getSerdeInfo(org.apache.hadoop.hive.metastore.api.Table table)
{
    StorageDescriptor storageDescriptor = table.getSd();
    if (storageDescriptor == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table does not contain a storage descriptor: " + table);
    }
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    return serdeInfo;
}
 
Example 9
Source File: HiveClientWrapper.java    From pxf with Apache License 2.0 5 votes vote down vote up
private String getSerdeParameter(StorageDescriptor sd, String parameterKey) {
    String parameterValue = null;
    if (sd != null && sd.getSerdeInfo() != null && sd.getSerdeInfo().getParameters() != null && sd.getSerdeInfo().getParameters().get(parameterKey) != null) {
        parameterValue = sd.getSerdeInfo().getParameters().get(parameterKey);
    }

    return parameterValue;
}
 
Example 10
Source File: HiveConnectorFastPartitionService.java    From metacat with Apache License 2.0 5 votes vote down vote up
private void copyTableSdToPartitionInfoSd(final PartitionInfo partitionInfo, final Table table) {
    StorageInfo sd = partitionInfo.getSerde();
    //
    // Partitions can be provided in the request without the storage information.
    //
    if (sd == null) {
        sd = new StorageInfo();
        partitionInfo.setSerde(sd);
    }
    final StorageDescriptor tableSd = table.getSd();

    if (StringUtils.isBlank(sd.getInputFormat())) {
        sd.setInputFormat(tableSd.getInputFormat());
    }
    if (StringUtils.isBlank(sd.getOutputFormat())) {
        sd.setOutputFormat(tableSd.getOutputFormat());
    }
    if (sd.getParameters() == null || sd.getParameters().isEmpty()) {
        sd.setParameters(tableSd.getParameters());
    }
    final SerDeInfo tableSerde = tableSd.getSerdeInfo();
    if (tableSerde != null) {
        if (StringUtils.isBlank(sd.getSerializationLib())) {
            sd.setSerializationLib(tableSerde.getSerializationLib());
        }
        if (sd.getSerdeInfoParameters() == null || sd.getSerdeInfoParameters().isEmpty()) {
            sd.setSerdeInfoParameters(tableSerde.getParameters());
        }
    }
}
 
Example 11
Source File: HiveConnectorPartitionService.java    From metacat with Apache License 2.0 5 votes vote down vote up
private void copyTableSdToPartitionSd(final List<Partition> hivePartitions, final Table table) {
    //
    // Update the partition info based on that of the table.
    //
    for (Partition partition : hivePartitions) {
        final StorageDescriptor sd = partition.getSd();
        final StorageDescriptor tableSdCopy = table.getSd().deepCopy();
        if (tableSdCopy.getSerdeInfo() == null) {
            final SerDeInfo serDeInfo = new SerDeInfo(null, null, new HashMap<>());
            tableSdCopy.setSerdeInfo(serDeInfo);
        }

        tableSdCopy.setLocation(sd.getLocation());
        if (!Strings.isNullOrEmpty(sd.getInputFormat())) {
            tableSdCopy.setInputFormat(sd.getInputFormat());
        }
        if (!Strings.isNullOrEmpty(sd.getOutputFormat())) {
            tableSdCopy.setOutputFormat(sd.getOutputFormat());
        }
        if (sd.getParameters() != null && !sd.getParameters().isEmpty()) {
            tableSdCopy.setParameters(sd.getParameters());
        }
        if (sd.getSerdeInfo() != null) {
            if (!Strings.isNullOrEmpty(sd.getSerdeInfo().getName())) {
                tableSdCopy.getSerdeInfo().setName(sd.getSerdeInfo().getName());
            }
            if (!Strings.isNullOrEmpty(sd.getSerdeInfo().getSerializationLib())) {
                tableSdCopy.getSerdeInfo().setSerializationLib(sd.getSerdeInfo().getSerializationLib());
            }
            if (sd.getSerdeInfo().getParameters() != null && !sd.getSerdeInfo().getParameters().isEmpty()) {
                tableSdCopy.getSerdeInfo().setParameters(sd.getSerdeInfo().getParameters());
            }
        }
        partition.setSd(tableSdCopy);
    }
}
 
Example 12
Source File: HiveMetaStoreBridge.java    From atlas with Apache License 2.0 4 votes vote down vote up
private AtlasEntity toStorageDescEntity(StorageDescriptor storageDesc, String tableQualifiedName, String sdQualifiedName, AtlasObjectId tableId ) throws AtlasHookException {
    AtlasEntity ret = new AtlasEntity(HiveDataTypes.HIVE_STORAGEDESC.getName());

    ret.setRelationshipAttribute(ATTRIBUTE_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(tableId, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC));
    ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName);
    ret.setAttribute(ATTRIBUTE_PARAMETERS, storageDesc.getParameters());
    ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(storageDesc.getLocation()));
    ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, storageDesc.getInputFormat());
    ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, storageDesc.getOutputFormat());
    ret.setAttribute(ATTRIBUTE_COMPRESSED, storageDesc.isCompressed());
    ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, storageDesc.getNumBuckets());
    ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, storageDesc.isStoredAsSubDirectories());

    if (storageDesc.getBucketCols().size() > 0) {
        ret.setAttribute(ATTRIBUTE_BUCKET_COLS, storageDesc.getBucketCols());
    }

    if (storageDesc.getSerdeInfo() != null) {
        SerDeInfo serdeInfo = storageDesc.getSerdeInfo();

        LOG.debug("serdeInfo = {}", serdeInfo);
        // SkewedInfo skewedInfo = storageDesc.getSkewedInfo();

        AtlasStruct serdeInfoStruct = new AtlasStruct(HiveDataTypes.HIVE_SERDE.getName());

        serdeInfoStruct.setAttribute(ATTRIBUTE_NAME, serdeInfo.getName());
        serdeInfoStruct.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, serdeInfo.getSerializationLib());
        serdeInfoStruct.setAttribute(ATTRIBUTE_PARAMETERS, serdeInfo.getParameters());

        ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfoStruct);
    }

    if (CollectionUtils.isNotEmpty(storageDesc.getSortCols())) {
        List<AtlasStruct> sortColsStruct = new ArrayList<>();

        for (Order sortcol : storageDesc.getSortCols()) {
            String hiveOrderName = HiveDataTypes.HIVE_ORDER.getName();
            AtlasStruct colStruct = new AtlasStruct(hiveOrderName);
            colStruct.setAttribute("col", sortcol.getCol());
            colStruct.setAttribute("order", sortcol.getOrder());

            sortColsStruct.add(colStruct);
        }

        ret.setAttribute(ATTRIBUTE_SORT_COLS, sortColsStruct);
    }

    return ret;
}
 
Example 13
Source File: HiveConvertersImpl.java    From metacat with Apache License 2.0 4 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public Partition metacatToHivePartition(final PartitionDto partitionDto, @Nullable final TableDto tableDto) {
    final Partition result = new Partition();

    final QualifiedName name = partitionDto.getName();
    List<String> values = Lists.newArrayListWithCapacity(16);
    String databaseName = null;
    String tableName = null;
    if (name != null) {
        if (name.getPartitionName() != null) {
            //
            // Unescape the partition name to get the right partition values.
            // Partition name always are escaped where as the parition values are not.
            //
            values = getPartValsFromName(tableDto, name.getPartitionName());
        }

        if (name.getDatabaseName() != null) {
            databaseName = name.getDatabaseName();
        }

        if (name.getTableName() != null) {
            tableName = name.getTableName();
        }
    }
    result.setValues(values);
    result.setDbName(databaseName);
    result.setTableName(tableName);

    Map<String, String> metadata = partitionDto.getMetadata();
    if (metadata == null) {
        metadata = Maps.newHashMap();
    }
    result.setParameters(metadata);

    result.setSd(fromStorageDto(partitionDto.getSerde(), tableName));
    final StorageDescriptor sd = result.getSd();
    if (tableDto != null) {
        if (sd.getSerdeInfo() != null && tableDto.getSerde() != null && Strings.isNullOrEmpty(
            sd.getSerdeInfo().getSerializationLib())) {
            sd.getSerdeInfo().setSerializationLib(tableDto.getSerde().getSerializationLib());
        }

        final List<FieldDto> fields = tableDto.getFields();
        if (fields == null) {
            sd.setCols(Collections.emptyList());
        } else {
            sd.setCols(fields.stream()
                .filter(field -> !field.isPartition_key())
                .map(this::metacatToHiveField)
                .collect(Collectors.toList()));
        }
    }

    final AuditDto auditDto = partitionDto.getAudit();
    if (auditDto != null) {
        if (auditDto.getCreatedDate() != null) {
            result.setCreateTime(dateToEpochSeconds(auditDto.getCreatedDate()));
        }
        if (auditDto.getLastModifiedDate() != null) {
            result.setLastAccessTime(dateToEpochSeconds(auditDto.getLastModifiedDate()));
        }
    }

    return result;
}
 
Example 14
Source File: HiveMetaStoreBridge.java    From incubator-atlas with Apache License 2.0 4 votes vote down vote up
public Referenceable fillStorageDesc(StorageDescriptor storageDesc, String tableQualifiedName,
    String sdQualifiedName, Id tableId) throws AtlasHookException {
    LOG.debug("Filling storage descriptor information for {}", storageDesc);

    Referenceable sdReferenceable = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
    sdReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, sdQualifiedName);

    SerDeInfo serdeInfo = storageDesc.getSerdeInfo();
    LOG.debug("serdeInfo = {}", serdeInfo);
    // SkewedInfo skewedInfo = storageDesc.getSkewedInfo();

    String serdeInfoName = HiveDataTypes.HIVE_SERDE.getName();
    Struct serdeInfoStruct = new Struct(serdeInfoName);

    serdeInfoStruct.set(AtlasClient.NAME, serdeInfo.getName());
    serdeInfoStruct.set("serializationLib", serdeInfo.getSerializationLib());
    serdeInfoStruct.set(PARAMETERS, serdeInfo.getParameters());

    sdReferenceable.set("serdeInfo", serdeInfoStruct);
    sdReferenceable.set(STORAGE_NUM_BUCKETS, storageDesc.getNumBuckets());
    sdReferenceable
            .set(STORAGE_IS_STORED_AS_SUB_DIRS, storageDesc.isStoredAsSubDirectories());

    List<Struct> sortColsStruct = new ArrayList<>();
    for (Order sortcol : storageDesc.getSortCols()) {
        String hiveOrderName = HiveDataTypes.HIVE_ORDER.getName();
        Struct colStruct = new Struct(hiveOrderName);
        colStruct.set("col", sortcol.getCol());
        colStruct.set("order", sortcol.getOrder());

        sortColsStruct.add(colStruct);
    }
    if (sortColsStruct.size() > 0) {
        sdReferenceable.set("sortCols", sortColsStruct);
    }

    sdReferenceable.set(LOCATION, storageDesc.getLocation());
    sdReferenceable.set("inputFormat", storageDesc.getInputFormat());
    sdReferenceable.set("outputFormat", storageDesc.getOutputFormat());
    sdReferenceable.set("compressed", storageDesc.isCompressed());

    if (storageDesc.getBucketCols().size() > 0) {
        sdReferenceable.set("bucketCols", storageDesc.getBucketCols());
    }

    sdReferenceable.set(PARAMETERS, storageDesc.getParameters());
    sdReferenceable.set("storedAsSubDirectories", storageDesc.isStoredAsSubDirectories());
    sdReferenceable.set(TABLE, tableId);

    return sdReferenceable;
}