Java Code Examples for org.apache.hadoop.hive.metastore.api.Table#getSd()

The following examples show how to use org.apache.hadoop.hive.metastore.api.Table#getSd() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MultipleHiveFragmentsPerFileFragmenter.java    From pxf with Apache License 2.0 6 votes vote down vote up
private String getFilePath(Table tbl) throws Exception {

        StorageDescriptor descTable = tbl.getSd();

        InputFormat<?, ?> fformat = HiveDataFragmenter.makeInputFormat(descTable.getInputFormat(), jobConf);

        FileInputFormat.setInputPaths(jobConf, new Path(descTable.getLocation()));

        InputSplit[] splits;
        try {
            splits = fformat.getSplits(jobConf, 1);
        } catch (org.apache.hadoop.mapred.InvalidInputException e) {
            LOG.debug("getSplits failed on " + e.getMessage());
            throw new RuntimeException("Unable to get file path for table.");
        }

        for (InputSplit split : splits) {
            FileSplit fsp = (FileSplit) split;
            String[] hosts = fsp.getLocations();
            String filepath = fsp.getPath().toString();
            return filepath;
        }
        throw new RuntimeException("Unable to get file path for table.");
    }
 
Example 2
Source File: HiveConnectorTableService.java    From metacat with Apache License 2.0 6 votes vote down vote up
private HiveStorageFormat extractHiveStorageFormat(final Table table) throws MetaException {
    final StorageDescriptor descriptor = table.getSd();
    if (descriptor == null) {
        throw new MetaException("Table is missing storage descriptor");
    }
    final SerDeInfo serdeInfo = descriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new MetaException(
            "Table storage descriptor is missing SerDe info");
    }
    final String outputFormat = descriptor.getOutputFormat();
    final String serializationLib = serdeInfo.getSerializationLib();

    for (HiveStorageFormat format : HiveStorageFormat.values()) {
        if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serializationLib)) {
            return format;
        }
    }
    throw new MetaException(
        String.format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib));
}
 
Example 3
Source File: AvroHiveTableStrategyTest.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Test
public void newHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table result = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("1"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example 4
Source File: AvroHiveTableStrategyTest.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Test
public void alterHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  when(uriResolver.resolve(schema2, TABLE, 2))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table table = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  Table result = underTest.alterHiveTable(table, schema2, 2);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("2"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example 5
Source File: HiveConnectorFastPartitionService.java    From metacat with Apache License 2.0 5 votes vote down vote up
private void copyTableSdToPartitionInfoSd(final PartitionInfo partitionInfo, final Table table) {
    StorageInfo sd = partitionInfo.getSerde();
    //
    // Partitions can be provided in the request without the storage information.
    //
    if (sd == null) {
        sd = new StorageInfo();
        partitionInfo.setSerde(sd);
    }
    final StorageDescriptor tableSd = table.getSd();

    if (StringUtils.isBlank(sd.getInputFormat())) {
        sd.setInputFormat(tableSd.getInputFormat());
    }
    if (StringUtils.isBlank(sd.getOutputFormat())) {
        sd.setOutputFormat(tableSd.getOutputFormat());
    }
    if (sd.getParameters() == null || sd.getParameters().isEmpty()) {
        sd.setParameters(tableSd.getParameters());
    }
    final SerDeInfo tableSerde = tableSd.getSerdeInfo();
    if (tableSerde != null) {
        if (StringUtils.isBlank(sd.getSerializationLib())) {
            sd.setSerializationLib(tableSerde.getSerializationLib());
        }
        if (sd.getSerdeInfoParameters() == null || sd.getSerdeInfoParameters().isEmpty()) {
            sd.setSerdeInfoParameters(tableSerde.getParameters());
        }
    }
}
 
Example 6
Source File: HiveCatalog.java    From flink with Apache License 2.0 4 votes vote down vote up
private  static Table instantiateHiveTable(ObjectPath tablePath, CatalogBaseTable table) {
	// let Hive set default parameters for us, e.g. serialization.format
	Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(),
		tablePath.getObjectName());
	hiveTable.setCreateTime((int) (System.currentTimeMillis() / 1000));

	Map<String, String> properties = new HashMap<>(table.getProperties());
	// Table comment
	properties.put(HiveCatalogConfig.COMMENT, table.getComment());

	boolean isGeneric = Boolean.valueOf(properties.get(CatalogConfig.IS_GENERIC));

	if (isGeneric) {
		properties = maskFlinkProperties(properties);
	}
	// Table properties
	hiveTable.setParameters(properties);

	// Hive table's StorageDescriptor
	StorageDescriptor sd = hiveTable.getSd();
	setStorageFormat(sd, properties);

	List<FieldSchema> allColumns = HiveTableUtil.createHiveColumns(table.getSchema());

	// Table columns and partition keys
	if (table instanceof CatalogTableImpl) {
		CatalogTable catalogTable = (CatalogTableImpl) table;

		if (catalogTable.isPartitioned()) {
			int partitionKeySize = catalogTable.getPartitionKeys().size();
			List<FieldSchema> regularColumns = allColumns.subList(0, allColumns.size() - partitionKeySize);
			List<FieldSchema> partitionColumns = allColumns.subList(allColumns.size() - partitionKeySize, allColumns.size());

			sd.setCols(regularColumns);
			hiveTable.setPartitionKeys(partitionColumns);
		} else {
			sd.setCols(allColumns);
			hiveTable.setPartitionKeys(new ArrayList<>());
		}
	} else if (table instanceof CatalogViewImpl) {
		CatalogView view = (CatalogViewImpl) table;

		// TODO: [FLINK-12398] Support partitioned view in catalog API
		sd.setCols(allColumns);
		hiveTable.setPartitionKeys(new ArrayList<>());

		hiveTable.setViewOriginalText(view.getOriginalQuery());
		hiveTable.setViewExpandedText(view.getExpandedQuery());
		hiveTable.setTableType(TableType.VIRTUAL_VIEW.name());
	} else {
		throw new CatalogException(
			"HiveCatalog only supports CatalogTableImpl and CatalogViewImpl");
	}

	return hiveTable;
}
 
Example 7
Source File: HiveConnectorInfoConverter.java    From metacat with Apache License 2.0 4 votes vote down vote up
/**
 * Converts to TableDto.
 *
 * @param table connector table
 * @return Metacat table Info
 */
@Override
public TableInfo toTableInfo(final QualifiedName name, final Table table) {
    final List<FieldSchema> nonPartitionColumns =
        (table.getSd() != null) ? table.getSd().getCols() : Collections.emptyList();
    // add the data fields to the nonPartitionColumns
    //ignore all exceptions
    try {
        if (nonPartitionColumns.isEmpty()) {
            for (StructField field : HiveTableUtil.getTableStructFields(table)) {
                final FieldSchema fieldSchema = new FieldSchema(field.getFieldName(),
                    field.getFieldObjectInspector().getTypeName(),
                    field.getFieldComment());
                nonPartitionColumns.add(fieldSchema);
            }
        }
    } catch (final Exception e) {
        log.error(e.getMessage(), e);
    }

    final List<FieldSchema> partitionColumns = table.getPartitionKeys();
    final Date creationDate = table.isSetCreateTime() ? epochSecondsToDate(table.getCreateTime()) : null;
    final List<FieldInfo> allFields =
        Lists.newArrayListWithCapacity(nonPartitionColumns.size() + partitionColumns.size());
    nonPartitionColumns.stream()
        .map(field -> hiveToMetacatField(field, false))
        .forEachOrdered(allFields::add);
    partitionColumns.stream()
        .map(field -> hiveToMetacatField(field, true))
        .forEachOrdered(allFields::add);
    final AuditInfo auditInfo = AuditInfo.builder().createdDate(creationDate).build();
    if (null != table.getTableType() && table.getTableType().equals(TableType.VIRTUAL_VIEW.name())) {
        return TableInfo.builder()
            .serde(toStorageInfo(table.getSd(), table.getOwner())).fields(allFields)
            .metadata(table.getParameters()).name(name).auditInfo(auditInfo)
            .view(ViewInfo.builder().
                viewOriginalText(table.getViewOriginalText())
                .viewExpandedText(table.getViewExpandedText()).build()
            ).build();
    } else {
        return TableInfo.builder()
            .serde(toStorageInfo(table.getSd(), table.getOwner())).fields(allFields)
            .metadata(table.getParameters()).name(name).auditInfo(auditInfo)
            .build();
    }
}
 
Example 8
Source File: HiveTableUtil.java    From flink with Apache License 2.0 4 votes vote down vote up
public static Table instantiateHiveTable(ObjectPath tablePath, CatalogBaseTable table, HiveConf hiveConf) {
	if (!(table instanceof CatalogTableImpl) && !(table instanceof CatalogViewImpl)) {
		throw new CatalogException(
				"HiveCatalog only supports CatalogTableImpl and CatalogViewImpl");
	}
	// let Hive set default parameters for us, e.g. serialization.format
	Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(),
			tablePath.getObjectName());
	hiveTable.setCreateTime((int) (System.currentTimeMillis() / 1000));

	Map<String, String> properties = new HashMap<>(table.getProperties());
	// Table comment
	if (table.getComment() != null) {
		properties.put(HiveCatalogConfig.COMMENT, table.getComment());
	}

	boolean isGeneric = HiveCatalog.isGenericForCreate(properties);

	// Hive table's StorageDescriptor
	StorageDescriptor sd = hiveTable.getSd();
	HiveTableUtil.setDefaultStorageFormat(sd, hiveConf);

	if (isGeneric) {
		DescriptorProperties tableSchemaProps = new DescriptorProperties(true);
		tableSchemaProps.putTableSchema(Schema.SCHEMA, table.getSchema());

		if (table instanceof CatalogTable) {
			tableSchemaProps.putPartitionKeys(((CatalogTable) table).getPartitionKeys());
		}

		properties.putAll(tableSchemaProps.asMap());
		properties = maskFlinkProperties(properties);
		hiveTable.setParameters(properties);
	} else {
		HiveTableUtil.initiateTableFromProperties(hiveTable, properties, hiveConf);
		List<FieldSchema> allColumns = HiveTableUtil.createHiveColumns(table.getSchema());
		// Table columns and partition keys
		if (table instanceof CatalogTableImpl) {
			CatalogTable catalogTable = (CatalogTableImpl) table;

			if (catalogTable.isPartitioned()) {
				int partitionKeySize = catalogTable.getPartitionKeys().size();
				List<FieldSchema> regularColumns = allColumns.subList(0, allColumns.size() - partitionKeySize);
				List<FieldSchema> partitionColumns = allColumns.subList(allColumns.size() - partitionKeySize, allColumns.size());

				sd.setCols(regularColumns);
				hiveTable.setPartitionKeys(partitionColumns);
			} else {
				sd.setCols(allColumns);
				hiveTable.setPartitionKeys(new ArrayList<>());
			}
		} else {
			sd.setCols(allColumns);
		}
		// Table properties
		hiveTable.getParameters().putAll(properties);
	}

	if (table instanceof CatalogViewImpl) {
		// TODO: [FLINK-12398] Support partitioned view in catalog API
		hiveTable.setPartitionKeys(new ArrayList<>());

		CatalogView view = (CatalogView) table;
		hiveTable.setViewOriginalText(view.getOriginalQuery());
		hiveTable.setViewExpandedText(view.getExpandedQuery());
		hiveTable.setTableType(TableType.VIRTUAL_VIEW.name());
	}

	return hiveTable;
}