org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc Java Examples

The following examples show how to use org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PartitionsAndStatisticsTest.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@Test
public void typical() throws Exception {
  List<FieldSchema> partitionKeys = Lists.newArrayList(newFieldSchema("a"), newFieldSchema("c"));
  Table table = newTable("t1", "db1", partitionKeys, newStorageDescriptor(new File("bla"), "col1"));
  List<Partition> partitions = Lists.newArrayList(newPartition(table, "b", "d"));
  statisticsPerPartitionName.put("a=b/c=d", columnStats);

  PartitionsAndStatistics partitionsAndStatistics = new PartitionsAndStatistics(partitionKeys, partitions,
      statisticsPerPartitionName);
  List<String> expectedName = Lists.newArrayList("a=b/c=d");

  assertThat(partitionsAndStatistics.getPartitionNames(), is(expectedName));
  assertThat(partitionsAndStatistics.getPartitions(), is(partitions));
  assertThat(partitionsAndStatistics.getPartitionKeys(), is(partitionKeys));
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, "db1", "t1");
  statsDesc.setPartName("a=b/c=d");
  ColumnStatistics expectedStats = new ColumnStatistics(statsDesc, columnStats);
  assertThat(partitionsAndStatistics.getStatisticsForPartition(partitions.get(0)), is(expectedStats));
}
 
Example #2
Source File: HiveStatsUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc,
		String hiveVersion) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData = getColumnStatisticsData(
					HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)),
					flinkColStat,
					hiveVersion);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}
 
Example #3
Source File: DatabaseMappingImplTest.java    From waggle-dance with Apache License 2.0 6 votes vote down vote up
@Test
public void transformInboundSetPartitionStatsRequest() throws Exception {
  SetPartitionsStatsRequest setPartitionsStatsRequest = new SetPartitionsStatsRequest();
  ColumnStatistics columnStatistics = new ColumnStatistics();
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
  statsDesc.setDbName(DB_NAME);
  columnStatistics.setStatsDesc(statsDesc);
  setPartitionsStatsRequest.setColStats(Lists.newArrayList(columnStatistics));
  SetPartitionsStatsRequest result = databaseMapping
      .transformInboundSetPartitionStatsRequest(setPartitionsStatsRequest);
  assertThat(result, is(sameInstance(setPartitionsStatsRequest)));
  ColumnStatistics resultColStats = result.getColStats().get(0);
  assertThat(resultColStats, is(sameInstance(columnStatistics)));
  assertThat(resultColStats.getStatsDesc(), is(sameInstance(statsDesc)));
  assertThat(resultColStats.getStatsDesc().getDbName(), is(IN_DB_NAME));
}
 
Example #4
Source File: HiveStatsUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData =
					getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}
 
Example #5
Source File: ReplicaTableFactory.java    From circus-train with Apache License 2.0 5 votes vote down vote up
ColumnStatistics newReplicaPartitionStatistics(
    Table replicaTable,
    Partition replicaPartition,
    ColumnStatistics sourcePartitionStatistics) {
  ColumnStatisticsDesc statisticsDesc = new ColumnStatisticsDesc(false, replicaPartition.getDbName(),
      replicaPartition.getTableName());
  try {
    statisticsDesc.setPartName(Warehouse.makePartName(replicaTable.getPartitionKeys(), replicaPartition.getValues()));
  } catch (MetaException e) {
    throw new RuntimeException(e);
  }

  return columnStatisticsTransformation
      .transform(new ColumnStatistics(statisticsDesc, sourcePartitionStatistics.getStatsObj()));
}
 
Example #6
Source File: ThriftHiveMetastoreClient.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public void setPartitionColumnStatistics(String databaseName, String tableName, String partitionName, List<ColumnStatisticsObj> statistics)
        throws TException
{
    ColumnStatisticsDesc statisticsDescription = new ColumnStatisticsDesc(false, databaseName, tableName);
    statisticsDescription.setPartName(partitionName);
    ColumnStatistics request = new ColumnStatistics(statisticsDescription, statistics);
    client.update_partition_column_statistics(request);
}
 
Example #7
Source File: HiveStatsUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Create columnStatistics from the given Hive column stats of a hive partition.
 */
public static ColumnStatistics createPartitionColumnStats(
		Partition hivePartition,
		String partName,
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		String hiveVersion) {
	ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, hivePartition.getDbName(), hivePartition.getTableName());
	desc.setPartName(partName);
	return createHiveColumnStatistics(colStats, hivePartition.getSd(), desc, hiveVersion);
}
 
Example #8
Source File: HiveStatsUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Create columnStatistics from the given Hive column stats of a hive table.
 */
public static ColumnStatistics createTableColumnStats(
		Table hiveTable,
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		String hiveVersion) {
	ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, hiveTable.getDbName(), hiveTable.getTableName());
	return createHiveColumnStatistics(colStats, hiveTable.getSd(), desc, hiveVersion);
}
 
Example #9
Source File: FederatedHMSHandlerTest.java    From waggle-dance with Apache License 2.0 5 votes vote down vote up
@Test
public void set_aggr_stats_for() throws TException {
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, DB_P, "table");
  ColumnStatistics colStatistics = new ColumnStatistics(statsDesc, Collections.emptyList());
  List<ColumnStatistics> colStats = Collections.singletonList(colStatistics);
  SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
  SetPartitionsStatsRequest inboundRequest = new SetPartitionsStatsRequest();
  when(primaryMapping.transformInboundSetPartitionStatsRequest(request)).thenReturn(inboundRequest);
  when(primaryClient.set_aggr_stats_for(inboundRequest)).thenReturn(true);
  boolean result = handler.set_aggr_stats_for(request);
  assertThat(result, is(true));
}
 
Example #10
Source File: FederatedHMSHandlerTest.java    From waggle-dance with Apache License 2.0 5 votes vote down vote up
@Test
public void update_partition_column_statistics() throws TException {
  ColumnStatisticsDesc columnStatisticsDesc = new ColumnStatisticsDesc(true, DB_P, "table");
  ColumnStatistics columnStatistics = new ColumnStatistics(columnStatisticsDesc, Collections.emptyList());
  ColumnStatistics inboundColumnStatistics = new ColumnStatistics();
  when(primaryMapping.transformInboundColumnStatistics(columnStatistics)).thenReturn(inboundColumnStatistics);
  when(primaryClient.update_partition_column_statistics(inboundColumnStatistics)).thenReturn(true);
  boolean result = handler.update_partition_column_statistics(columnStatistics);
  verify(primaryMapping).checkWritePermissions(DB_P);
  assertThat(result, is(true));
}
 
Example #11
Source File: FederatedHMSHandlerTest.java    From waggle-dance with Apache License 2.0 5 votes vote down vote up
@Test
public void update_table_column_statistics() throws TException {
  ColumnStatisticsDesc columnStatisticsDesc = new ColumnStatisticsDesc(true, DB_P, "table");
  ColumnStatistics columnStatistics = new ColumnStatistics(columnStatisticsDesc, Collections.emptyList());
  ColumnStatistics inboundColumnStatistics = new ColumnStatistics();
  when(primaryMapping.transformInboundColumnStatistics(columnStatistics)).thenReturn(inboundColumnStatistics);
  when(primaryClient.update_table_column_statistics(inboundColumnStatistics)).thenReturn(true);
  boolean result = handler.update_table_column_statistics(columnStatistics);
  verify(primaryMapping).checkWritePermissions(DB_P);
  assertThat(result, is(true));
}
 
Example #12
Source File: HiveStatsUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Create columnStatistics from the given Hive column stats of a hive table.
 */
public static ColumnStatistics createTableColumnStats(
		Table hiveTable,
		Map<String, CatalogColumnStatisticsDataBase> colStats) {
	ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, hiveTable.getDbName(), hiveTable.getTableName());
	return createHiveColumnStatistics(colStats, hiveTable.getSd(), desc);
}
 
Example #13
Source File: DatabaseMappingImplTest.java    From waggle-dance with Apache License 2.0 5 votes vote down vote up
@Test
public void transformOutboundColumnStatistics() throws Exception {
  ColumnStatistics columnStatistics = new ColumnStatistics();
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
  statsDesc.setDbName(DB_NAME);
  columnStatistics.setStatsDesc(statsDesc);
  ColumnStatistics result = databaseMapping.transformOutboundColumnStatistics(columnStatistics);
  assertThat(result, is(sameInstance(columnStatistics)));
  assertThat(result.getStatsDesc(), is(sameInstance(columnStatistics.getStatsDesc())));
  assertThat(result.getStatsDesc().getDbName(), is(OUT_DB_NAME));
}
 
Example #14
Source File: DatabaseMappingImplTest.java    From waggle-dance with Apache License 2.0 5 votes vote down vote up
@Test
public void transformInboundColumnStatistics() throws Exception {
  ColumnStatistics columnStatistics = new ColumnStatistics();
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
  statsDesc.setDbName(DB_NAME);
  columnStatistics.setStatsDesc(statsDesc);
  ColumnStatistics result = databaseMapping.transformInboundColumnStatistics(columnStatistics);
  assertThat(result, is(sameInstance(columnStatistics)));
  assertThat(result.getStatsDesc(), is(sameInstance(columnStatistics.getStatsDesc())));
  assertThat(result.getStatsDesc().getDbName(), is(IN_DB_NAME));
}
 
Example #15
Source File: ColumnStatisticsTransformationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Before
public void init() {
  stats = new ColumnStatistics(new ColumnStatisticsDesc(true, "database", "table"),
      ImmutableList.of(
          new ColumnStatisticsObj("a", "int",
              new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L))),
          new ColumnStatisticsObj("b", "string",
              new ColumnStatisticsData(_Fields.STRING_STATS, new StringColumnStatsData(10L, 3L, 0L, 1L)))));
}
 
Example #16
Source File: TestUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(TextInputFormat.class.getName());
  sd.setOutputFormat(TextOutputFormat.class.getName());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
 
Example #17
Source File: ThriftHiveMetastoreClient.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public void setTableColumnStatistics(String databaseName, String tableName, List<ColumnStatisticsObj> statistics)
        throws TException
{
    ColumnStatisticsDesc statisticsDescription = new ColumnStatisticsDesc(true, databaseName, tableName);
    ColumnStatistics request = new ColumnStatistics(statisticsDescription, statistics);
    client.update_table_column_statistics(request);
}
 
Example #18
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void newReplicaPartitionStatisticsWithTransformation() throws MetaException {
  sourceTable.setPartitionKeys(
      Arrays.asList(new FieldSchema("one", "string", null), new FieldSchema("two", "string", null)));

  Partition replicaPartition = new Partition(sourcePartition);
  replicaPartition.setDbName(MAPPED_DB_NAME);
  replicaPartition.setTableName(MAPPED_TABLE_NAME);
  replicaPartition.setValues(Arrays.asList("A", "B"));

  ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj();
  ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj();
  List<ColumnStatisticsObj> columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2);

  ColumnStatisticsDesc columnStatisticsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME);
  columnStatisticsDesc
      .setPartName(Warehouse.makePartName(sourceTable.getPartitionKeys(), replicaPartition.getValues()));

  ColumnStatistics sourcePartitionStatistics = new ColumnStatistics(columnStatisticsDesc, columnStatisticsObjs);

  ReplicaTableFactory factory = new ReplicaTableFactory(SOURCE_META_STORE_URIS, TableTransformation.IDENTITY,
      PartitionTransformation.IDENTITY, COLUMN_STATISTICS_TRANSFORMATION);

  ColumnStatistics replicaPartitionStatistics = factory.newReplicaPartitionStatistics(sourceTable, replicaPartition,
      sourcePartitionStatistics);

  assertThat(replicaPartitionStatistics.getStatsDesc().getDbName(), is("new_db"));
  assertThat(replicaPartitionStatistics.getStatsDesc().getTableName(), is("new_table"));
  assertThat(replicaPartitionStatistics.getStatsDesc().getPartName(), is("part=newPart"));
  assertThat(replicaPartitionStatistics.getStatsObj().size(), is(2));
  assertThat(replicaPartitionStatistics.getStatsObj().get(0), is(columnStatisticsObj1));
  assertThat(replicaPartitionStatistics.getStatsObj().get(1), is(columnStatisticsObj2));
}
 
Example #19
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void newReplicaPartitionStatistics() throws MetaException {
  sourceTable.setPartitionKeys(
      Arrays.asList(new FieldSchema("one", "string", null), new FieldSchema("two", "string", null)));

  Partition replicaPartition = new Partition(sourcePartition);
  replicaPartition.setDbName(MAPPED_DB_NAME);
  replicaPartition.setTableName(MAPPED_TABLE_NAME);
  replicaPartition.setValues(Arrays.asList("A", "B"));

  ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj();
  ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj();
  List<ColumnStatisticsObj> columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2);

  ColumnStatisticsDesc columnStatisticsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME);
  columnStatisticsDesc
      .setPartName(Warehouse.makePartName(sourceTable.getPartitionKeys(), replicaPartition.getValues()));

  ColumnStatistics sourcePartitionStatistics = new ColumnStatistics(columnStatisticsDesc, columnStatisticsObjs);

  ColumnStatistics replicaPartitionStatistics = factory.newReplicaPartitionStatistics(sourceTable, replicaPartition,
      sourcePartitionStatistics);

  assertThat(replicaPartitionStatistics.getStatsDesc().getDbName(), is(MAPPED_DB_NAME));
  assertThat(replicaPartitionStatistics.getStatsDesc().getTableName(), is(MAPPED_TABLE_NAME));
  assertThat(replicaPartitionStatistics.getStatsDesc().getPartName(), is("one=A/two=B"));
  assertThat(replicaPartitionStatistics.getStatsObj().size(), is(2));
  assertThat(replicaPartitionStatistics.getStatsObj().get(0), is(columnStatisticsObj1));
  assertThat(replicaPartitionStatistics.getStatsObj().get(1), is(columnStatisticsObj2));
}
 
Example #20
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void newTableWithNameMappingsAndStats() {
  ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj();
  ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj();
  List<ColumnStatisticsObj> columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2);

  TableAndStatistics source = new TableAndStatistics(sourceTable,
      new ColumnStatistics(new ColumnStatisticsDesc(true, DB_NAME, TABLE_NAME), columnStatisticsObjs));

  TableAndStatistics replicaAndStats = factory.newReplicaTable(EVENT_ID, source, MAPPED_DB_NAME, MAPPED_TABLE_NAME,
      REPLICA_DATA_DESTINATION, FULL);
  Table replica = replicaAndStats.getTable();
  ColumnStatistics replicaStatistics = replicaAndStats.getStatistics();

  assertThat(replica.getDbName(), is(MAPPED_DB_NAME));
  assertThat(replica.getTableName(), is(MAPPED_TABLE_NAME));
  assertThat(replica.getSd().getInputFormat(), is(INPUT_FORMAT));
  assertThat(replica.getSd().getOutputFormat(), is(OUTPUT_FORMAT));
  assertThat(replica.getSd().getLocation(), is(REPLICA_DATA_DESTINATION.toUri().toString()));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.table"), is(DB_NAME + "." + TABLE_NAME));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.metastore.uris"),
      is(SOURCE_META_STORE_URIS));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.location"), is(TABLE_LOCATION));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.replication.event"), is(EVENT_ID));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.last.replicated"), is(not(nullValue())));
  assertThat(replica.getParameters().get("DO_NOT_UPDATE_STATS"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED_VIA_STATS_TASK"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED"), is("true"));

  assertThat(replicaStatistics.getStatsDesc().getDbName(), is(MAPPED_DB_NAME));
  assertThat(replicaStatistics.getStatsDesc().getTableName(), is(MAPPED_TABLE_NAME));
  assertThat(replicaStatistics.getStatsObj().size(), is(2));
  assertThat(replicaStatistics.getStatsObj().get(0), is(columnStatisticsObj1));
  assertThat(replicaStatistics.getStatsObj().get(1), is(columnStatisticsObj2));
}
 
Example #21
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Override
public ColumnStatistics transform(ColumnStatistics columnStatistics) {
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, "new_db", "new_table");
  statsDesc.setPartName("part=newPart");
  columnStatistics.setStatsDesc(statsDesc);
  return columnStatistics;
}
 
Example #22
Source File: ReplicaTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
private ColumnStatistics newPartitionStatistics(String... values) {
  ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj(COLUMN_A, "string",
      new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(0, 1)));
  ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj(COLUMN_B, "string",
      new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1, 2)));
  List<ColumnStatisticsObj> columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2);
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME);
  statsDesc.setPartName(partitionName(values));
  return new ColumnStatistics(statsDesc, columnStatisticsObjs);
}
 
Example #23
Source File: ReplicaTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Before
public void prepare() throws Exception {
  when(metaStoreClientSupplier.get()).thenReturn(mockMetaStoreClient);
  when(replicaCatalog.getName()).thenReturn(NAME);

  hiveConf = new HiveConf();
  hiveConf.setVar(ConfVars.METASTOREURIS, REPLICA_META_STORE_URIS);
  replica = newReplica(tableReplication);
  tableLocation = temporaryFolder.newFolder("table_location").toURI().toString();

  sourceTable = newTable();
  existingPartition = newPartition("one", "two");

  ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj(COLUMN_A, "string",
      new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(0, 1)));
  ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj(COLUMN_B, "string",
      new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1, 2)));
  columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2);
  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, DB_NAME, TABLE_NAME);
  columnStatistics = new ColumnStatistics(statsDesc, columnStatisticsObjs);

  tableAndStatistics = new TableAndStatistics(sourceTable, columnStatistics);

  existingReplicaTable = new Table(sourceTable);

  when(mockReplicaLocationManager.getTableLocation()).thenReturn(new Path(tableLocation));
  when(mockReplicaLocationManager.getPartitionBaseLocation()).thenReturn(new Path(tableLocation));

  when(mockMetaStoreClient.getTable(DB_NAME, TABLE_NAME)).thenReturn(existingReplicaTable);
}
 
Example #24
Source File: HiveStatsUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Create columnStatistics from the given Hive column stats of a hive partition.
 */
public static ColumnStatistics createPartitionColumnStats(
		Partition hivePartition,
		String partName,
		Map<String, CatalogColumnStatisticsDataBase> colStats) {
	ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, hivePartition.getDbName(), hivePartition.getTableName());
	desc.setPartName(partName);
	return createHiveColumnStatistics(colStats, hivePartition.getSd(), desc);
}
 
Example #25
Source File: TestUtils.java    From circus-train with Apache License 2.0 4 votes vote down vote up
public static Table createPartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location,
    List<FieldSchema> columns,
    List<FieldSchema> partitionKeys,
    String serializationLib,
    String inputFormatClassName,
    String outputFormatClassName)
    throws Exception {

  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  hiveTable.setPartitionKeys(partitionKeys);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(columns);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(inputFormatClassName);
  sd.setOutputFormat(outputFormatClassName);
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib(serializationLib);

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}