org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj Java Examples

The following examples show how to use org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyDecimalStatsToColumnStatistics()
{
    DecimalColumnStatsData emptyDecimalColumnStatsData = new DecimalColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(emptyDecimalColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.empty(), Optional.empty())));
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example #2
Source File: HiveCatalog.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public CatalogColumnStatistics getTableColumnStatistics(ObjectPath tablePath) throws TableNotExistException, CatalogException {
	Table hiveTable = getHiveTable(tablePath);
	try {
		if (!isTablePartitioned(hiveTable)) {
			List<ColumnStatisticsObj> columnStatisticsObjs = client.getTableColumnStatistics(
					hiveTable.getDbName(), hiveTable.getTableName(), getFieldNames(hiveTable.getSd().getCols()));
			return new CatalogColumnStatistics(HiveStatsUtil.createCatalogColumnStats(columnStatisticsObjs));
		} else {
			// TableColumnStats of partitioned table is unknown, the behavior is same as HIVE
			return CatalogColumnStatistics.UNKNOWN;
		}
	} catch (TException e) {
		throw new CatalogException(String.format("Failed to get table column stats of table %s",
												tablePath.getFullName()), e);
	}
}
 
Example #3
Source File: HiveStatsUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData =
					getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}
 
Example #4
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyBinaryStatsToColumnStatistics()
{
    BinaryColumnStatsData emptyBinaryColumnStatsData = new BinaryColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BINARY_TYPE_NAME, binaryStats(emptyBinaryColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example #5
Source File: PartitionedTableReplicationTest.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@Test
public void noMatchingPartitions() throws Exception {
  when(replica.getLocationManager(TableType.PARTITIONED, targetTableLocation, EVENT_ID, sourceLocationManager))
      .thenReturn(replicaLocationManager);
  PartitionsAndStatistics emptyPartitionsAndStats = new PartitionsAndStatistics(sourceTable.getPartitionKeys(),
      Collections.<Partition>emptyList(), Collections.<String, List<ColumnStatisticsObj>>emptyMap());
  when(source.getPartitions(sourceTable, PARTITION_PREDICATE, MAX_PARTITIONS)).thenReturn(emptyPartitionsAndStats);
  when(source.getLocationManager(sourceTable, Collections.<Partition>emptyList(), EVENT_ID, copierOptions))
      .thenReturn(sourceLocationManager);

  PartitionedTableReplication replication = new PartitionedTableReplication(DATABASE, TABLE, partitionPredicate,
      source, replica, copierFactoryManager, eventIdFactory, targetTableLocation, DATABASE, TABLE, copierOptions,
      listener, dataManipulatorFactoryManager);
  replication.replicate();

  verifyZeroInteractions(copier);
  InOrder replicationOrder = inOrder(sourceLocationManager, replica, replicaLocationManager, listener);
  replicationOrder.verify(replica).validateReplicaTable(DATABASE, TABLE);
  replicationOrder
      .verify(replica)
      .updateMetadata(EVENT_ID, sourceTableAndStatistics, DATABASE, TABLE, replicaLocationManager);
}
 
Example #6
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testBinaryStatsToColumnStatistics()
{
    BinaryColumnStatsData binaryColumnStatsData = new BinaryColumnStatsData();
    binaryColumnStatsData.setMaxColLen(100);
    binaryColumnStatsData.setAvgColLen(22.2);
    binaryColumnStatsData.setNumNulls(2);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BINARY_TYPE_NAME, binaryStats(binaryColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(4));

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.of(100));
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.of(44));
    assertEquals(actual.getNullsCount(), OptionalLong.of(2));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example #7
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyStringColumnStatsData()
{
    StringColumnStatsData emptyStringColumnStatsData = new StringColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", STRING_TYPE_NAME, stringStats(emptyStringColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example #8
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringStatsToColumnStatistics()
{
    StringColumnStatsData stringColumnStatsData = new StringColumnStatsData();
    stringColumnStatsData.setMaxColLen(100);
    stringColumnStatsData.setAvgColLen(23.333);
    stringColumnStatsData.setNumNulls(1);
    stringColumnStatsData.setNumDVs(20);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", STRING_TYPE_NAME, stringStats(stringColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(2));

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.of(100));
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.of(23));
    assertEquals(actual.getNullsCount(), OptionalLong.of(1));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(1));
}
 
Example #9
Source File: ThriftHiveMetastore.java    From presto with Apache License 2.0 6 votes vote down vote up
private void setPartitionColumnStatistics(
        HiveIdentity identity,
        String databaseName,
        String tableName,
        String partitionName,
        Map<String, HiveType> columns,
        Map<String, HiveColumnStatistics> columnStatistics,
        OptionalLong rowCount)
{
    List<ColumnStatisticsObj> metastoreColumnStatistics = columnStatistics.entrySet().stream()
            .filter(entry -> columns.containsKey(entry.getKey()))
            .map(entry -> createMetastoreColumnStatistics(entry.getKey(), columns.get(entry.getKey()), entry.getValue(), rowCount))
            .collect(toImmutableList());
    if (!metastoreColumnStatistics.isEmpty()) {
        setPartitionColumnStatistics(identity, databaseName, tableName, partitionName, metastoreColumnStatistics);
    }
}
 
Example #10
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyDateStatsToColumnStatistics()
{
    DateColumnStatsData emptyDateColumnStatsData = new DateColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(emptyDateColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.of(new DateStatistics(Optional.empty(), Optional.empty())));
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example #11
Source File: ReplicaTest.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@Test
public void alteringExistingPartitionedReplicaViewSucceeds() throws Exception, IOException {
  convertSourceTableToView();
  convertExistingReplicaTableToView();
  when(mockMetaStoreClient
      .getPartitionsByNames(DB_NAME, TABLE_NAME, Lists.newArrayList("c=one/d=two", "c=three/d=four")))
          .thenReturn(Arrays.asList(existingPartition));
  existingReplicaTable.getParameters().put(REPLICATION_EVENT.parameterName(), "previousEventId");
  replica
      .updateMetadata(EVENT_ID, tableAndStatistics,
          new PartitionsAndStatistics(sourceTable.getPartitionKeys(), Collections.<Partition>emptyList(),
              Collections.<String, List<ColumnStatisticsObj>>emptyMap()),
          DB_NAME, TABLE_NAME, mockReplicaLocationManager);
  verify(alterTableService).alterTable(eq(mockMetaStoreClient), eq(existingReplicaTable), any(Table.class));
  verify(mockReplicaLocationManager, never()).addCleanUpLocation(anyString(), any(Path.class));
}
 
Example #12
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testDateStatsToColumnStatistics()
{
    DateColumnStatsData dateColumnStatsData = new DateColumnStatsData();
    dateColumnStatsData.setLowValue(new Date(1000));
    dateColumnStatsData.setHighValue(new Date(2000));
    dateColumnStatsData.setNumNulls(1);
    dateColumnStatsData.setNumDVs(20);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(dateColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000));

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.of(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1000)), Optional.of(LocalDate.ofEpochDay(2000)))));
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(1));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19));
}
 
Example #13
Source File: MockThriftMetastoreClient.java    From presto with Apache License 2.0 6 votes vote down vote up
@Override
public List<ColumnStatisticsObj> getTableColumnStatistics(String databaseName, String tableName, List<String> columnNames)
        throws TException
{
    accessCount.incrementAndGet();
    if (throwException) {
        throw new RuntimeException();
    }

    if (!databaseName.equals(TEST_DATABASE)
            || !tableName.equals(TEST_TABLE)
            || !columnNames.equals(ImmutableList.of(TEST_COLUMN))) {
        throw new NoSuchObjectException();
    }

    return ImmutableList.of(createTestStats());
}
 
Example #14
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testDecimalStatsToColumnStatistics()
{
    DecimalColumnStatsData decimalColumnStatsData = new DecimalColumnStatsData();
    BigDecimal low = new BigDecimal("0");
    decimalColumnStatsData.setLowValue(toMetastoreDecimal(low));
    BigDecimal high = new BigDecimal("100");
    decimalColumnStatsData.setHighValue(toMetastoreDecimal(high));
    decimalColumnStatsData.setNumNulls(1);
    decimalColumnStatsData.setNumDVs(20);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(decimalColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000));

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.of(low), Optional.of(high))));
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(1));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19));
}
 
Example #15
Source File: PartitionedTableMetadataMirrorReplicationTest.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@Test
public void noMatchingPartitions() throws Exception {
  PartitionsAndStatistics emptyPartitionsAndStats = new PartitionsAndStatistics(sourceTable.getPartitionKeys(),
      Collections.<Partition>emptyList(), Collections.<String, List<ColumnStatisticsObj>>emptyMap());
  when(source.getPartitions(sourceTable, PARTITION_PREDICATE, MAX_PARTITIONS)).thenReturn(emptyPartitionsAndStats);
  when(source.getLocationManager(sourceTable, Collections.<Partition>emptyList(), EVENT_ID, copierOptions))
      .thenReturn(sourceLocationManager);

  PartitionedTableMetadataMirrorReplication replication = new PartitionedTableMetadataMirrorReplication(DATABASE,
      TABLE, partitionPredicate, source, replica, eventIdFactory, DATABASE, TABLE);
  replication.replicate();

  verify(replica).validateReplicaTable(DATABASE, TABLE);
  verify(replica)
      .updateMetadata(eq(EVENT_ID), eq(sourceTableAndStatistics), eq(DATABASE), eq(TABLE),
          any(ReplicaLocationManager.class));
}
 
Example #16
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyDoubleStatsToColumnStatistics()
{
    DoubleColumnStatsData emptyDoubleColumnStatsData = new DoubleColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(emptyDoubleColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())));
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example #17
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testDoubleStatsToColumnStatistics()
{
    DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData();
    doubleColumnStatsData.setLowValue(0);
    doubleColumnStatsData.setHighValue(100);
    doubleColumnStatsData.setNumNulls(1);
    doubleColumnStatsData.setNumDVs(20);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000));

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(100))));
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(1));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19));
}
 
Example #18
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyLongStatsToColumnStatistics()
{
    LongColumnStatsData emptyLongColumnStatsData = new LongColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(emptyLongColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.of(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())));
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example #19
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testLongStatsToColumnStatistics()
{
    LongColumnStatsData longColumnStatsData = new LongColumnStatsData();
    longColumnStatsData.setLowValue(0);
    longColumnStatsData.setHighValue(100);
    longColumnStatsData.setNumNulls(1);
    longColumnStatsData.setNumDVs(20);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(longColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000));

    assertEquals(actual.getIntegerStatistics(), Optional.of(new IntegerStatistics(OptionalLong.of(0), OptionalLong.of(100))));
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(1));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19));
}
 
Example #20
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testBooleanStatsToColumnStatistics()
{
    BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData();
    booleanColumnStatsData.setNumTrues(100);
    booleanColumnStatsData.setNumFalses(10);
    booleanColumnStatsData.setNumNulls(0);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10))));
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(0));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example #21
Source File: MockThriftMetastoreClient.java    From presto with Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, List<ColumnStatisticsObj>> getPartitionColumnStatistics(String databaseName, String tableName, List<String> partitionNames, List<String> columnNames)
        throws TException
{
    accessCount.incrementAndGet();
    if (throwException) {
        throw new RuntimeException();
    }

    if (!databaseName.equals(TEST_DATABASE)
            || !tableName.equals(TEST_TABLE)
            || !partitionNames.equals(ImmutableList.of(TEST_PARTITION1))
            || !columnNames.equals(ImmutableList.of(TEST_COLUMN))) {
        throw new NoSuchObjectException();
    }

    return ImmutableMap.of(TEST_PARTITION1, ImmutableList.of(createTestStats()));
}
 
Example #22
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testImpalaGeneratedBooleanStatistics()
{
    BooleanColumnStatsData statsData = new BooleanColumnStatsData(1L, -1L, 2L);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(statsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(2));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())));
}
 
Example #23
Source File: TestThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyBooleanStatsToColumnStatistics()
{
    BooleanColumnStatsData emptyBooleanColumnStatsData = new BooleanColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(emptyBooleanColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());

    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())));
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
 
Example #24
Source File: HiveEndpoint.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public PartitionsAndStatistics getPartitions(Table table, String partitionPredicate, int maxPartitions)
  throws TException {
  try (CloseableMetaStoreClient client = metaStoreClientSupplier.get()) {
    List<Partition> partitions = null;
    if (Strings.isNullOrEmpty(partitionPredicate)) {
      partitions = client.listPartitions(table.getDbName(), table.getTableName(), (short) maxPartitions);
    } else {
      partitions = client.listPartitionsByFilter(table.getDbName(), table.getTableName(), partitionPredicate,
          (short) maxPartitions);
    }

    // Generate a list of partition names
    List<String> partitionNames = getPartitionNames(table.getPartitionKeys(), partitions);
    // Fetch the partition statistics
    List<String> columnNames = getColumnNames(table);

    Map<String, List<ColumnStatisticsObj>> statisticsByPartitionName = client
        .getPartitionColumnStatistics(table.getDbName(), table.getTableName(), partitionNames, columnNames);
    if (statisticsByPartitionName != null && !statisticsByPartitionName.isEmpty()) {
      log.debug("Retrieved column stats entries for {} partitions of table {}.{}", statisticsByPartitionName.size(),
          table.getDbName(), table.getTableName());
    } else {
      log.debug("No partition column stats retrieved for table {}.{}", table.getDbName(), table.getTableName());
    }

    return new PartitionsAndStatistics(table.getPartitionKeys(), partitions, statisticsByPartitionName);
  }
}
 
Example #25
Source File: PartitionedTableMetadataUpdateReplicationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void noMatchingPartitions() throws Exception {
  PartitionsAndStatistics emptyPartitionsAndStats = new PartitionsAndStatistics(sourceTable.getPartitionKeys(),
      Collections.<Partition>emptyList(), Collections.<String, List<ColumnStatisticsObj>>emptyMap());
  when(source.getPartitions(sourceTable, PARTITION_PREDICATE, MAX_PARTITIONS)).thenReturn(emptyPartitionsAndStats);

  PartitionedTableMetadataUpdateReplication replication = new PartitionedTableMetadataUpdateReplication(DATABASE,
      TABLE, partitionPredicate, source, replica, eventIdFactory, replicaLocation, DATABASE, TABLE);
  replication.replicate();

  verify(replica).validateReplicaTable(DATABASE, TABLE);
  verify(replica)
      .updateMetadata(eq(EVENT_ID), eq(sourceTableAndStatistics), eq(DATABASE), eq(TABLE),
          any(MetadataUpdateReplicaLocationManager.class));
}
 
Example #26
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void newReplicaPartitionStatistics() throws MetaException {
  sourceTable.setPartitionKeys(
      Arrays.asList(new FieldSchema("one", "string", null), new FieldSchema("two", "string", null)));

  Partition replicaPartition = new Partition(sourcePartition);
  replicaPartition.setDbName(MAPPED_DB_NAME);
  replicaPartition.setTableName(MAPPED_TABLE_NAME);
  replicaPartition.setValues(Arrays.asList("A", "B"));

  ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj();
  ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj();
  List<ColumnStatisticsObj> columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2);

  ColumnStatisticsDesc columnStatisticsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME);
  columnStatisticsDesc
      .setPartName(Warehouse.makePartName(sourceTable.getPartitionKeys(), replicaPartition.getValues()));

  ColumnStatistics sourcePartitionStatistics = new ColumnStatistics(columnStatisticsDesc, columnStatisticsObjs);

  ColumnStatistics replicaPartitionStatistics = factory.newReplicaPartitionStatistics(sourceTable, replicaPartition,
      sourcePartitionStatistics);

  assertThat(replicaPartitionStatistics.getStatsDesc().getDbName(), is(MAPPED_DB_NAME));
  assertThat(replicaPartitionStatistics.getStatsDesc().getTableName(), is(MAPPED_TABLE_NAME));
  assertThat(replicaPartitionStatistics.getStatsDesc().getPartName(), is("one=A/two=B"));
  assertThat(replicaPartitionStatistics.getStatsObj().size(), is(2));
  assertThat(replicaPartitionStatistics.getStatsObj().get(0), is(columnStatisticsObj1));
  assertThat(replicaPartitionStatistics.getStatsObj().get(1), is(columnStatisticsObj2));
}
 
Example #27
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void newReplicaPartitionStatisticsWithTransformation() throws MetaException {
  sourceTable.setPartitionKeys(
      Arrays.asList(new FieldSchema("one", "string", null), new FieldSchema("two", "string", null)));

  Partition replicaPartition = new Partition(sourcePartition);
  replicaPartition.setDbName(MAPPED_DB_NAME);
  replicaPartition.setTableName(MAPPED_TABLE_NAME);
  replicaPartition.setValues(Arrays.asList("A", "B"));

  ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj();
  ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj();
  List<ColumnStatisticsObj> columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2);

  ColumnStatisticsDesc columnStatisticsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME);
  columnStatisticsDesc
      .setPartName(Warehouse.makePartName(sourceTable.getPartitionKeys(), replicaPartition.getValues()));

  ColumnStatistics sourcePartitionStatistics = new ColumnStatistics(columnStatisticsDesc, columnStatisticsObjs);

  ReplicaTableFactory factory = new ReplicaTableFactory(SOURCE_META_STORE_URIS, TableTransformation.IDENTITY,
      PartitionTransformation.IDENTITY, COLUMN_STATISTICS_TRANSFORMATION);

  ColumnStatistics replicaPartitionStatistics = factory.newReplicaPartitionStatistics(sourceTable, replicaPartition,
      sourcePartitionStatistics);

  assertThat(replicaPartitionStatistics.getStatsDesc().getDbName(), is("new_db"));
  assertThat(replicaPartitionStatistics.getStatsDesc().getTableName(), is("new_table"));
  assertThat(replicaPartitionStatistics.getStatsDesc().getPartName(), is("part=newPart"));
  assertThat(replicaPartitionStatistics.getStatsObj().size(), is(2));
  assertThat(replicaPartitionStatistics.getStatsObj().get(0), is(columnStatisticsObj1));
  assertThat(replicaPartitionStatistics.getStatsObj().get(1), is(columnStatisticsObj2));
}
 
Example #28
Source File: SourceTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void getTableNoStats() throws Exception {
  when(metaStoreClient.getTable(DATABASE, TABLE)).thenReturn(table);
  when(metaStoreClient.getTableColumnStatistics(DATABASE, TABLE, COLUMN_NAMES))
      .thenReturn(Collections.<ColumnStatisticsObj> emptyList());

  TableAndStatistics sourceTable = source.getTableAndStatistics(DATABASE, TABLE);
  assertThat(sourceTable.getTable(), is(table));
  assertThat(sourceTable.getStatistics(), is(nullValue()));
}
 
Example #29
Source File: SourceTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void getPartitionsNoStats() throws Exception {
  when(metaStoreClient.listPartitionsByFilter(DATABASE, TABLE, PARTITION_PREDICATE, (short) MAX_PARTITIONS))
      .thenReturn(partitions);
  when(metaStoreClient.getPartitionColumnStatistics(DATABASE, TABLE, PARTITION_NAMES, COLUMN_NAMES))
      .thenReturn(Collections.<String, List<ColumnStatisticsObj>> emptyMap());

  PartitionsAndStatistics partitionsAndStatistics = source.getPartitions(table, PARTITION_PREDICATE, MAX_PARTITIONS);
  assertThat(partitionsAndStatistics.getPartitions(), is(partitions));
  assertThat(partitionsAndStatistics.getStatisticsForPartition(partition), is(nullValue()));
}
 
Example #30
Source File: TestUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(TextInputFormat.class.getName());
  sd.setOutputFormat(TextOutputFormat.class.getName());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}