Java Code Examples for org.apache.hadoop.hive.metastore.api.ColumnStatisticsData

The following examples show how to use org.apache.hadoop.hive.metastore.api.ColumnStatisticsData. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 6 votes vote down vote up
private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData =
					getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}
 
Example 2
Source Project: flink   Source File: HiveShimV120.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public CatalogColumnStatisticsDataDate toFlinkDateColStats(ColumnStatisticsData hiveDateColStats) {
	try {
		Object dateStats = ColumnStatisticsData.class.getDeclaredMethod("getDateStats").invoke(hiveDateColStats);
		Class dateStatsClz = dateStats.getClass();
		boolean isSetNumDv = (boolean) dateStatsClz.getMethod("isSetNumDVs").invoke(dateStats);
		boolean isSetNumNull = (boolean) dateStatsClz.getMethod("isSetNumNulls").invoke(dateStats);
		boolean isSetHighValue = (boolean) dateStatsClz.getMethod("isSetHighValue").invoke(dateStats);
		boolean isSetLowValue = (boolean) dateStatsClz.getMethod("isSetLowValue").invoke(dateStats);
		Long numDV = isSetNumDv ? (Long) dateStatsClz.getMethod("getNumDVs").invoke(dateStats) : null;
		Long numNull = isSetNumNull ? (Long) dateStatsClz.getMethod("getNumNulls").invoke(dateStats) : null;
		Object hmsHighDate = dateStatsClz.getMethod("getHighValue").invoke(dateStats);
		Object hmsLowDate = dateStatsClz.getMethod("getLowValue").invoke(dateStats);
		Class hmsDateClz = hmsHighDate.getClass();
		Method hmsDateDays = hmsDateClz.getMethod("getDaysSinceEpoch");
		Date highDateDays = isSetHighValue ? new Date((Long) hmsDateDays.invoke(hmsHighDate)) : null;
		Date lowDateDays = isSetLowValue ? new Date((Long) hmsDateDays.invoke(hmsLowDate)) : null;
		return new CatalogColumnStatisticsDataDate(lowDateDays, highDateDays, numDV, numNull);
	} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
		throw new CatalogException("Failed to create Flink statistics for date column", e);
	}
}
 
Example 3
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 6 votes vote down vote up
private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc,
		String hiveVersion) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData = getColumnStatisticsData(
					HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)),
					flinkColStat,
					hiveVersion);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}
 
Example 4
Source Project: presto   Source File: MockThriftMetastoreClient.java    License: Apache License 2.0 5 votes vote down vote up
private static ColumnStatisticsObj createTestStats()
{
    ColumnStatisticsObj stats = new ColumnStatisticsObj();
    ColumnStatisticsData data = new ColumnStatisticsData();
    data.setLongStats(new LongColumnStatsData());
    stats.setStatsData(data);
    stats.setColName(TEST_COLUMN);
    return stats;
}
 
Example 5
Source Project: circus-train   Source File: ReplicaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Before
public void prepare() throws Exception {
  when(metaStoreClientSupplier.get()).thenReturn(mockMetaStoreClient);
  when(replicaCatalog.getName()).thenReturn(NAME);

  hiveConf = new HiveConf();
  hiveConf.setVar(ConfVars.METASTOREURIS, REPLICA_META_STORE_URIS);
  replica = newReplica(tableReplication);
  tableLocation = temporaryFolder.newFolder("table_location").toURI().toString();

  sourceTable = newTable();
  existingPartition = newPartition("one", "two");

  ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj(COLUMN_A, "string",
      new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(0, 1)));
  ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj(COLUMN_B, "string",
      new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1, 2)));
  columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2);
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, DB_NAME, TABLE_NAME);
  columnStatistics = new ColumnStatistics(statsDesc, columnStatisticsObjs);

  tableAndStatistics = new TableAndStatistics(sourceTable, columnStatistics);

  existingReplicaTable = new Table(sourceTable);

  when(mockReplicaLocationManager.getTableLocation()).thenReturn(new Path(tableLocation));
  when(mockReplicaLocationManager.getPartitionBaseLocation()).thenReturn(new Path(tableLocation));

  when(mockMetaStoreClient.getTable(DB_NAME, TABLE_NAME)).thenReturn(existingReplicaTable);
}
 
Example 6
Source Project: circus-train   Source File: ReplicaTest.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnStatistics newPartitionStatistics(String... values) {
  ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj(COLUMN_A, "string",
      new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(0, 1)));
  ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj(COLUMN_B, "string",
      new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1, 2)));
  List<ColumnStatisticsObj> columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2);
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME);
  statsDesc.setPartName(partitionName(values));
  return new ColumnStatistics(statsDesc, columnStatisticsObjs);
}
 
Example 7
Source Project: circus-train   Source File: TestUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(TextInputFormat.class.getName());
  sd.setOutputFormat(TextOutputFormat.class.getName());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
 
Example 8
@Before
public void init() {
  stats = new ColumnStatistics(new ColumnStatisticsDesc(true, "database", "table"),
      ImmutableList.of(
          new ColumnStatisticsObj("a", "int",
              new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L))),
          new ColumnStatisticsObj("b", "string",
              new ColumnStatisticsData(_Fields.STRING_STATS, new StringColumnStatsData(10L, 3L, 0L, 1L)))));
}
 
Example 9
Source Project: flink   Source File: HiveShimV120.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ColumnStatisticsData toHiveDateColStats(CatalogColumnStatisticsDataDate flinkDateColStats) {
	try {
		Class dateStatsClz = Class.forName("org.apache.hadoop.hive.metastore.api.DateColumnStatsData");
		Object dateStats = dateStatsClz.getDeclaredConstructor().newInstance();
		dateStatsClz.getMethod("clear").invoke(dateStats);
		if (null != flinkDateColStats.getNdv()) {
			dateStatsClz.getMethod("setNumDVs", long.class).invoke(dateStats, flinkDateColStats.getNdv());
		}
		if (null != flinkDateColStats.getNullCount()) {
			dateStatsClz.getMethod("setNumNulls", long.class).invoke(dateStats, flinkDateColStats.getNullCount());
		}
		Class hmsDateClz = Class.forName("org.apache.hadoop.hive.metastore.api.Date");
		Constructor hmsDateConstructor = hmsDateClz.getConstructor(long.class);
		if (null != flinkDateColStats.getMax()) {
			Method setHigh = dateStatsClz.getDeclaredMethod("setHighValue", hmsDateClz);
			setHigh.invoke(dateStats,
						hmsDateConstructor.newInstance(flinkDateColStats.getMax().getDaysSinceEpoch()));
		}
		if (null != flinkDateColStats.getMin()) {
			Method setLow = dateStatsClz.getDeclaredMethod("setLowValue", hmsDateClz);
			setLow.invoke(dateStats,
						hmsDateConstructor.newInstance(flinkDateColStats.getMin().getDaysSinceEpoch()));
		}
		Class colStatsClz = ColumnStatisticsData.class;
		return (ColumnStatisticsData) colStatsClz.getDeclaredMethod("dateStats", dateStatsClz).invoke(null, dateStats);
	} catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
		throw new CatalogException("Failed to create Hive statistics for date column", e);
	}
}
 
Example 10
Source Project: flink   Source File: HiveShimV120.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean isDateStats(ColumnStatisticsData colStatsData) {
	try {
		Method method = ColumnStatisticsData.class.getDeclaredMethod("isSetDateStats");
		return (boolean) method.invoke(colStatsData);
	} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
		throw new CatalogException("Failed to decide whether ColumnStatisticsData is for DATE column", e);
	}
}
 
Example 11
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Create Flink ColumnStats from Hive ColumnStatisticsData.
 */
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats) {
	if (stats.isSetBinaryStats()) {
		BinaryColumnStatsData binaryStats = stats.getBinaryStats();
		return new CatalogColumnStatisticsDataBinary(
				binaryStats.getMaxColLen(),
				binaryStats.getAvgColLen(),
				binaryStats.getNumNulls());
	} else if (stats.isSetBooleanStats()) {
		BooleanColumnStatsData booleanStats = stats.getBooleanStats();
		return new CatalogColumnStatisticsDataBoolean(
				booleanStats.getNumTrues(),
				booleanStats.getNumFalses(),
				booleanStats.getNumNulls());
	} else if (stats.isSetDateStats()) {
		DateColumnStatsData dateStats = stats.getDateStats();
		return new CatalogColumnStatisticsDataDate(
				new org.apache.flink.table.catalog.stats.Date(dateStats.getLowValue().getDaysSinceEpoch()),
				new org.apache.flink.table.catalog.stats.Date(dateStats.getHighValue().getDaysSinceEpoch()),
				dateStats.getNumDVs(),
				dateStats.getNumNulls());
	} else if (stats.isSetDoubleStats()) {
			DoubleColumnStatsData doubleStats = stats.getDoubleStats();
			return new CatalogColumnStatisticsDataDouble(
					doubleStats.getLowValue(),
					doubleStats.getHighValue(),
					doubleStats.getNumDVs(),
					doubleStats.getNumNulls());
	} else if (stats.isSetLongStats()) {
			LongColumnStatsData longColStats = stats.getLongStats();
			return new CatalogColumnStatisticsDataLong(
					longColStats.getLowValue(),
					longColStats.getHighValue(),
					longColStats.getNumDVs(),
					longColStats.getNumNulls());
	} else if (stats.isSetStringStats()) {
		StringColumnStatsData stringStats = stats.getStringStats();
		return new CatalogColumnStatisticsDataString(
				stringStats.getMaxColLen(),
				stringStats.getAvgColLen(),
				stringStats.getNumDVs(),
				stringStats.getNumNulls());
	} else {
		LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
		return null;
	}
}
 
Example 12
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type.
 * Note we currently assume that, in Flink, the max and min of ColumnStats will be same type as the Flink column type.
 * For example, for SHORT and Long columns, the max and min of their ColumnStats should be of type SHORT and LONG.
 */
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat) {
	LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
	if (type.equals(LogicalTypeRoot.CHAR)
	|| type.equals(LogicalTypeRoot.VARCHAR)) {
		if (colStat instanceof CatalogColumnStatisticsDataString) {
			CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
			return ColumnStatisticsData.stringStats(new StringColumnStatsData(stringColStat.getMaxLength(), stringColStat.getAvgLength(), stringColStat.getNullCount(), stringColStat.getNdv()));
		}
	} else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
		if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
			CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
			BooleanColumnStatsData boolStats = new BooleanColumnStatsData(
					booleanColStat.getTrueCount(),
					booleanColStat.getFalseCount(),
					booleanColStat.getNullCount());
			return ColumnStatisticsData.booleanStats(boolStats);
		}
	} else if (type.equals(LogicalTypeRoot.TINYINT)
			|| type.equals(LogicalTypeRoot.SMALLINT)
			|| type.equals(LogicalTypeRoot.INTEGER)
			|| type.equals(LogicalTypeRoot.BIGINT)
			|| type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE)
			|| type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE)
			|| type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
		if (colStat instanceof CatalogColumnStatisticsDataLong) {
			CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
			LongColumnStatsData longColumnStatsData = new LongColumnStatsData(longColStat.getNullCount(), longColStat.getNdv());
			longColumnStatsData.setHighValue(longColStat.getMax());
			longColumnStatsData.setLowValue(longColStat.getMin());
			return ColumnStatisticsData.longStats(longColumnStatsData);
		}
	} else if (type.equals(LogicalTypeRoot.FLOAT)
			|| type.equals(LogicalTypeRoot.DOUBLE)) {
		if (colStat instanceof CatalogColumnStatisticsDataDouble) {
			CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
			DoubleColumnStatsData floatStats = new DoubleColumnStatsData(doubleColumnStatsData.getNullCount(), doubleColumnStatsData.getNdv());
			floatStats.setHighValue(doubleColumnStatsData.getMax());
			floatStats.setLowValue(doubleColumnStatsData.getMin());
			return ColumnStatisticsData.doubleStats(floatStats);
		}
	} else if (type.equals(LogicalTypeRoot.DATE)) {
		if (colStat instanceof CatalogColumnStatisticsDataDate) {
			CatalogColumnStatisticsDataDate dateColumnStatsData = (CatalogColumnStatisticsDataDate) colStat;
			DateColumnStatsData dateStats = new DateColumnStatsData(dateColumnStatsData.getNullCount(), dateColumnStatsData.getNdv());
			dateStats.setHighValue(new org.apache.hadoop.hive.metastore.api.Date(dateColumnStatsData.getMax().getDaysSinceEpoch()));
			dateStats.setLowValue(new org.apache.hadoop.hive.metastore.api.Date(dateColumnStatsData.getMin().getDaysSinceEpoch()));
			return ColumnStatisticsData.dateStats(dateStats);
		}
	} else if (type.equals(LogicalTypeRoot.VARBINARY)
			|| type.equals(LogicalTypeRoot.BINARY)
			|| type.equals(LogicalTypeRoot.BINARY)) {
		if (colStat instanceof CatalogColumnStatisticsDataBinary) {
			CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
			BinaryColumnStatsData binaryColumnStats = new BinaryColumnStatsData(binaryColumnStatsData.getMaxLength(), binaryColumnStatsData.getAvgLength(), binaryColumnStatsData.getNullCount());
			return ColumnStatisticsData.binaryStats(binaryColumnStats);
		}
	}
	throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " +
											"type '%s' yet", colStat, colType));
}
 
Example 13
Source Project: circus-train   Source File: TestUtils.java    License: Apache License 2.0 4 votes vote down vote up
public static Table createPartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location,
    List<FieldSchema> columns,
    List<FieldSchema> partitionKeys,
    String serializationLib,
    String inputFormatClassName,
    String outputFormatClassName)
    throws Exception {

  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  hiveTable.setPartitionKeys(partitionKeys);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(columns);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(inputFormatClassName);
  sd.setOutputFormat(outputFormatClassName);
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib(serializationLib);

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
 
Example 14
Source Project: flink   Source File: HiveShimV100.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public ColumnStatisticsData toHiveDateColStats(CatalogColumnStatisticsDataDate flinkDateColStats) {
	throw new UnsupportedOperationException("DATE column stats are not supported until Hive 1.2.0");
}
 
Example 15
Source Project: flink   Source File: HiveShimV100.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public boolean isDateStats(ColumnStatisticsData colStatsData) {
	return false;
}
 
Example 16
Source Project: flink   Source File: HiveShimV100.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public CatalogColumnStatisticsDataDate toFlinkDateColStats(ColumnStatisticsData hiveDateColStats) {
	throw new UnsupportedOperationException("DATE column stats are not supported until Hive 1.2.0");
}
 
Example 17
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Create Flink ColumnStats from Hive ColumnStatisticsData.
 */
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats, String hiveVersion) {
	HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
	if (stats.isSetBinaryStats()) {
		BinaryColumnStatsData binaryStats = stats.getBinaryStats();
		return new CatalogColumnStatisticsDataBinary(
				binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null,
				binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null,
				binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null);
	} else if (stats.isSetBooleanStats()) {
		BooleanColumnStatsData booleanStats = stats.getBooleanStats();
		return new CatalogColumnStatisticsDataBoolean(
				booleanStats.isSetNumTrues() ? booleanStats.getNumTrues() : null,
				booleanStats.isSetNumFalses() ? booleanStats.getNumFalses() : null,
				booleanStats.isSetNumNulls() ? booleanStats.getNumNulls() : null);
	} else if (hiveShim.isDateStats(stats)) {
		return hiveShim.toFlinkDateColStats(stats);
	} else if (stats.isSetDoubleStats()) {
			DoubleColumnStatsData doubleStats = stats.getDoubleStats();
			return new CatalogColumnStatisticsDataDouble(
					doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null,
					doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null,
					doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null,
					doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null);
	} else if (stats.isSetLongStats()) {
			LongColumnStatsData longColStats = stats.getLongStats();
			return new CatalogColumnStatisticsDataLong(
					longColStats.isSetLowValue() ? longColStats.getLowValue() : null,
					longColStats.isSetHighValue() ? longColStats.getHighValue() : null,
					longColStats.isSetNumDVs() ? longColStats.getNumDVs() : null,
					longColStats.isSetNumNulls() ? longColStats.getNumNulls() : null);
	} else if (stats.isSetStringStats()) {
		StringColumnStatsData stringStats = stats.getStringStats();
		return new CatalogColumnStatisticsDataString(
				stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null,
				stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null,
				stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null,
				stringStats.isSetNumDVs() ? stringStats.getNumNulls() : null);
	} else if (stats.isSetDecimalStats()) {
		DecimalColumnStatsData decimalStats = stats.getDecimalStats();
		// for now, just return CatalogColumnStatisticsDataDouble for decimal columns
		Double max = null;
		if (decimalStats.isSetHighValue()) {
			max = toHiveDecimal(decimalStats.getHighValue()).doubleValue();
		}
		Double min = null;
		if (decimalStats.isSetLowValue()) {
			min = toHiveDecimal(decimalStats.getLowValue()).doubleValue();
		}
		Long ndv = decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null;
		Long nullCount = decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null;
		return new CatalogColumnStatisticsDataDouble(min, max, ndv, nullCount);
	} else {
		LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
		return null;
	}
}
 
Example 18
Source Project: flink   Source File: HiveShim.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Generate Hive ColumnStatisticsData from Flink CatalogColumnStatisticsDataDate for DATE columns.
 */
ColumnStatisticsData toHiveDateColStats(CatalogColumnStatisticsDataDate flinkDateColStats);
 
Example 19
Source Project: flink   Source File: HiveShim.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Whether a Hive ColumnStatisticsData is for DATE columns.
 */
boolean isDateStats(ColumnStatisticsData colStatsData);
 
Example 20
Source Project: flink   Source File: HiveShim.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Generate Flink CatalogColumnStatisticsDataDate from Hive ColumnStatisticsData for DATE columns.
 */
CatalogColumnStatisticsDataDate toFlinkDateColStats(ColumnStatisticsData hiveDateColStats);