Java Code Examples for org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData

The following examples show how to use org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: presto   Source File: TestThriftMetastoreUtil.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDoubleStatsToColumnStatistics()
{
    DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData();
    doubleColumnStatsData.setLowValue(0);
    doubleColumnStatsData.setHighValue(100);
    doubleColumnStatsData.setNumNulls(1);
    doubleColumnStatsData.setNumDVs(20);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000));

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(100))));
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(1));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19));
}
 
Example 2
Source Project: presto   Source File: TestThriftMetastoreUtil.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyDoubleStatsToColumnStatistics()
{
    DoubleColumnStatsData emptyDoubleColumnStatsData = new DoubleColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(emptyDoubleColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())));
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example 3
public static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData getHiveDoubleColumnStatsData() {
  DoubleColumnStatsData statsData = new DoubleColumnStatsData();
  statsData.setHighValue(9999.9);
  statsData.setLowValue(-1111.1);
  statsData.setNumDVs(123L);
  statsData.setNumNulls(456L);
  org.apache.hadoop.hive.metastore.api.ColumnStatisticsData  statsWrapper =
          new org.apache.hadoop.hive.metastore.api.ColumnStatisticsData();
  statsWrapper.setDoubleStats(statsData);
  return statsWrapper;
}
 
Example 4
Source Project: presto   Source File: ThriftMetastoreUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static ColumnStatisticsObj createDoubleStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics)
{
    DoubleColumnStatsData data = new DoubleColumnStatsData();
    statistics.getDoubleStatistics().ifPresent(doubleStatistics -> {
        doubleStatistics.getMin().ifPresent(data::setLowValue);
        doubleStatistics.getMax().ifPresent(data::setHighValue);
    });
    statistics.getNullsCount().ifPresent(data::setNumNulls);
    toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs);
    return new ColumnStatisticsObj(columnName, columnType.toString(), doubleStats(data));
}
 
Example 5
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Create Flink ColumnStats from Hive ColumnStatisticsData.
 */
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats) {
	if (stats.isSetBinaryStats()) {
		BinaryColumnStatsData binaryStats = stats.getBinaryStats();
		return new CatalogColumnStatisticsDataBinary(
				binaryStats.getMaxColLen(),
				binaryStats.getAvgColLen(),
				binaryStats.getNumNulls());
	} else if (stats.isSetBooleanStats()) {
		BooleanColumnStatsData booleanStats = stats.getBooleanStats();
		return new CatalogColumnStatisticsDataBoolean(
				booleanStats.getNumTrues(),
				booleanStats.getNumFalses(),
				booleanStats.getNumNulls());
	} else if (stats.isSetDateStats()) {
		DateColumnStatsData dateStats = stats.getDateStats();
		return new CatalogColumnStatisticsDataDate(
				new org.apache.flink.table.catalog.stats.Date(dateStats.getLowValue().getDaysSinceEpoch()),
				new org.apache.flink.table.catalog.stats.Date(dateStats.getHighValue().getDaysSinceEpoch()),
				dateStats.getNumDVs(),
				dateStats.getNumNulls());
	} else if (stats.isSetDoubleStats()) {
			DoubleColumnStatsData doubleStats = stats.getDoubleStats();
			return new CatalogColumnStatisticsDataDouble(
					doubleStats.getLowValue(),
					doubleStats.getHighValue(),
					doubleStats.getNumDVs(),
					doubleStats.getNumNulls());
	} else if (stats.isSetLongStats()) {
			LongColumnStatsData longColStats = stats.getLongStats();
			return new CatalogColumnStatisticsDataLong(
					longColStats.getLowValue(),
					longColStats.getHighValue(),
					longColStats.getNumDVs(),
					longColStats.getNumNulls());
	} else if (stats.isSetStringStats()) {
		StringColumnStatsData stringStats = stats.getStringStats();
		return new CatalogColumnStatisticsDataString(
				stringStats.getMaxColLen(),
				stringStats.getAvgColLen(),
				stringStats.getNumDVs(),
				stringStats.getNumNulls());
	} else {
		LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
		return null;
	}
}
 
Example 6
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type.
 * Note we currently assume that, in Flink, the max and min of ColumnStats will be same type as the Flink column type.
 * For example, for SHORT and Long columns, the max and min of their ColumnStats should be of type SHORT and LONG.
 */
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat) {
	LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
	if (type.equals(LogicalTypeRoot.CHAR)
	|| type.equals(LogicalTypeRoot.VARCHAR)) {
		if (colStat instanceof CatalogColumnStatisticsDataString) {
			CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
			return ColumnStatisticsData.stringStats(new StringColumnStatsData(stringColStat.getMaxLength(), stringColStat.getAvgLength(), stringColStat.getNullCount(), stringColStat.getNdv()));
		}
	} else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
		if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
			CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
			BooleanColumnStatsData boolStats = new BooleanColumnStatsData(
					booleanColStat.getTrueCount(),
					booleanColStat.getFalseCount(),
					booleanColStat.getNullCount());
			return ColumnStatisticsData.booleanStats(boolStats);
		}
	} else if (type.equals(LogicalTypeRoot.TINYINT)
			|| type.equals(LogicalTypeRoot.SMALLINT)
			|| type.equals(LogicalTypeRoot.INTEGER)
			|| type.equals(LogicalTypeRoot.BIGINT)
			|| type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE)
			|| type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE)
			|| type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
		if (colStat instanceof CatalogColumnStatisticsDataLong) {
			CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
			LongColumnStatsData longColumnStatsData = new LongColumnStatsData(longColStat.getNullCount(), longColStat.getNdv());
			longColumnStatsData.setHighValue(longColStat.getMax());
			longColumnStatsData.setLowValue(longColStat.getMin());
			return ColumnStatisticsData.longStats(longColumnStatsData);
		}
	} else if (type.equals(LogicalTypeRoot.FLOAT)
			|| type.equals(LogicalTypeRoot.DOUBLE)) {
		if (colStat instanceof CatalogColumnStatisticsDataDouble) {
			CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
			DoubleColumnStatsData floatStats = new DoubleColumnStatsData(doubleColumnStatsData.getNullCount(), doubleColumnStatsData.getNdv());
			floatStats.setHighValue(doubleColumnStatsData.getMax());
			floatStats.setLowValue(doubleColumnStatsData.getMin());
			return ColumnStatisticsData.doubleStats(floatStats);
		}
	} else if (type.equals(LogicalTypeRoot.DATE)) {
		if (colStat instanceof CatalogColumnStatisticsDataDate) {
			CatalogColumnStatisticsDataDate dateColumnStatsData = (CatalogColumnStatisticsDataDate) colStat;
			DateColumnStatsData dateStats = new DateColumnStatsData(dateColumnStatsData.getNullCount(), dateColumnStatsData.getNdv());
			dateStats.setHighValue(new org.apache.hadoop.hive.metastore.api.Date(dateColumnStatsData.getMax().getDaysSinceEpoch()));
			dateStats.setLowValue(new org.apache.hadoop.hive.metastore.api.Date(dateColumnStatsData.getMin().getDaysSinceEpoch()));
			return ColumnStatisticsData.dateStats(dateStats);
		}
	} else if (type.equals(LogicalTypeRoot.VARBINARY)
			|| type.equals(LogicalTypeRoot.BINARY)
			|| type.equals(LogicalTypeRoot.BINARY)) {
		if (colStat instanceof CatalogColumnStatisticsDataBinary) {
			CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
			BinaryColumnStatsData binaryColumnStats = new BinaryColumnStatsData(binaryColumnStatsData.getMaxLength(), binaryColumnStatsData.getAvgLength(), binaryColumnStatsData.getNullCount());
			return ColumnStatisticsData.binaryStats(binaryColumnStats);
		}
	}
	throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " +
											"type '%s' yet", colStat, colType));
}
 
Example 7
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Create Flink ColumnStats from Hive ColumnStatisticsData.
 */
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats, String hiveVersion) {
	HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
	if (stats.isSetBinaryStats()) {
		BinaryColumnStatsData binaryStats = stats.getBinaryStats();
		return new CatalogColumnStatisticsDataBinary(
				binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null,
				binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null,
				binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null);
	} else if (stats.isSetBooleanStats()) {
		BooleanColumnStatsData booleanStats = stats.getBooleanStats();
		return new CatalogColumnStatisticsDataBoolean(
				booleanStats.isSetNumTrues() ? booleanStats.getNumTrues() : null,
				booleanStats.isSetNumFalses() ? booleanStats.getNumFalses() : null,
				booleanStats.isSetNumNulls() ? booleanStats.getNumNulls() : null);
	} else if (hiveShim.isDateStats(stats)) {
		return hiveShim.toFlinkDateColStats(stats);
	} else if (stats.isSetDoubleStats()) {
			DoubleColumnStatsData doubleStats = stats.getDoubleStats();
			return new CatalogColumnStatisticsDataDouble(
					doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null,
					doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null,
					doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null,
					doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null);
	} else if (stats.isSetLongStats()) {
			LongColumnStatsData longColStats = stats.getLongStats();
			return new CatalogColumnStatisticsDataLong(
					longColStats.isSetLowValue() ? longColStats.getLowValue() : null,
					longColStats.isSetHighValue() ? longColStats.getHighValue() : null,
					longColStats.isSetNumDVs() ? longColStats.getNumDVs() : null,
					longColStats.isSetNumNulls() ? longColStats.getNumNulls() : null);
	} else if (stats.isSetStringStats()) {
		StringColumnStatsData stringStats = stats.getStringStats();
		return new CatalogColumnStatisticsDataString(
				stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null,
				stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null,
				stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null,
				stringStats.isSetNumDVs() ? stringStats.getNumNulls() : null);
	} else if (stats.isSetDecimalStats()) {
		DecimalColumnStatsData decimalStats = stats.getDecimalStats();
		// for now, just return CatalogColumnStatisticsDataDouble for decimal columns
		Double max = null;
		if (decimalStats.isSetHighValue()) {
			max = toHiveDecimal(decimalStats.getHighValue()).doubleValue();
		}
		Double min = null;
		if (decimalStats.isSetLowValue()) {
			min = toHiveDecimal(decimalStats.getLowValue()).doubleValue();
		}
		Long ndv = decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null;
		Long nullCount = decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null;
		return new CatalogColumnStatisticsDataDouble(min, max, ndv, nullCount);
	} else {
		LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
		return null;
	}
}