org.apache.hadoop.hive.common.StatsSetupConst Java Examples

The following examples show how to use org.apache.hadoop.hive.common.StatsSetupConst. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveCatalogHiveMetadataTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void checkStatistics(int inputStat, int expectStat) throws Exception {
	catalog.dropTable(path1, true);

	Map<String, String> properties = new HashMap<>();
	properties.put(CatalogConfig.IS_GENERIC, "false");
	properties.put(StatsSetupConst.ROW_COUNT, String.valueOf(inputStat));
	properties.put(StatsSetupConst.NUM_FILES, String.valueOf(inputStat));
	properties.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(inputStat));
	properties.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(inputStat));
	CatalogTable catalogTable = new CatalogTableImpl(
			TableSchema.builder().field("f0", DataTypes.INT()).build(),
			properties,
			"");
	catalog.createTable(path1, catalogTable, false);

	CatalogTableStatistics statistics = catalog.getTableStatistics(path1);
	assertEquals(expectStat, statistics.getRowCount());
	assertEquals(expectStat, statistics.getFileCount());
	assertEquals(expectStat, statistics.getRawDataSize());
	assertEquals(expectStat, statistics.getTotalSize());
}
 
Example #2
Source File: HiveCatalog.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Determine if statistics is need to be updated, if it needs to be updated and updated its parameters.
 * @param statistics original ``hive table statistics.
 * @param parameters new catalog table statistics parameters.
 * @return needUpdateStatistics flag which indicates whether need to update stats.
 */
private static boolean compareAndUpdateStatisticsProperties(CatalogTableStatistics statistics, Map<String, String> parameters) {
	boolean needUpdateStatistics;
	String oldRowCount = parameters.getOrDefault(StatsSetupConst.ROW_COUNT, HiveStatsUtil.DEFAULT_STATS_ZERO_CONST);
	String oldTotalSize = parameters.getOrDefault(StatsSetupConst.TOTAL_SIZE, HiveStatsUtil.DEFAULT_STATS_ZERO_CONST);
	String oldNumFiles = parameters.getOrDefault(StatsSetupConst.NUM_FILES, HiveStatsUtil.DEFAULT_STATS_ZERO_CONST);
	String oldRawDataSize = parameters.getOrDefault(StatsSetupConst.RAW_DATA_SIZE, HiveStatsUtil.DEFAULT_STATS_ZERO_CONST);
	needUpdateStatistics = statistics.getRowCount() != Long.parseLong(oldRowCount) || statistics.getTotalSize() != Long.parseLong(oldTotalSize)
		|| statistics.getFileCount() != Integer.parseInt(oldNumFiles) || statistics.getRawDataSize() != Long.parseLong(oldRawDataSize);
	if (needUpdateStatistics) {
		parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(statistics.getRowCount()));
		parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(statistics.getTotalSize()));
		parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(statistics.getFileCount()));
		parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(statistics.getRawDataSize()));
	}
	return needUpdateStatistics;
}
 
Example #3
Source File: ReplicaTest.java    From circus-train with Apache License 2.0 6 votes vote down vote up
private Table newTable() {
  Table table = new Table();
  table.setDbName(DB_NAME);
  table.setTableName(TABLE_NAME);
  table.setTableType(TableType.EXTERNAL_TABLE.name());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setLocation(tableLocation);
  table.setSd(sd);

  HashMap<String, String> parameters = new HashMap<>();
  parameters.put(StatsSetupConst.ROW_COUNT, "1");
  table.setParameters(parameters);

  table.setPartitionKeys(PARTITIONS);
  return table;
}
 
Example #4
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Get the stats from table properties. If not found -1 is returned for each stats field.
 * CAUTION: stats may not be up-to-date with the underlying data. It is always good to run the ANALYZE command on
 * Hive table to have up-to-date stats.
 *
 * @param properties
 * @return
 */
public static HiveDatasetStats getStatsFromProps(final Properties properties) {
  long numRows = -1;
  long sizeInBytes = -1;
  try {
    final String numRowsProp = properties.getProperty(StatsSetupConst.ROW_COUNT);
    if (numRowsProp != null) {
      numRows = Long.valueOf(numRowsProp);
    }

    final String sizeInBytesProp = properties.getProperty(StatsSetupConst.TOTAL_SIZE);
    if (sizeInBytesProp != null) {
      sizeInBytes = Long.valueOf(sizeInBytesProp);
    }
  } catch (final NumberFormatException e) {
    logger.error("Failed to parse Hive stats in metastore.", e);
    // continue with the defaults.
  }

  return new HiveDatasetStats(numRows, sizeInBytes);
}
 
Example #5
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@Test
public void newPartition() {
  Path replicaPartitionPath = new Path(REPLICA_DATA_DESTINATION, REPLICA_PARTITION_SUBPATH);
  Partition replica = factory.newReplicaPartition(EVENT_ID, sourceTable, sourcePartition, DB_NAME, TABLE_NAME,
      replicaPartitionPath, FULL);

  assertThat(replica.getDbName(), is(sourceTable.getDbName()));
  assertThat(replica.getTableName(), is(sourceTable.getTableName()));
  assertThat(replica.getSd().getInputFormat(), is(INPUT_FORMAT));
  assertThat(replica.getSd().getOutputFormat(), is(OUTPUT_FORMAT));
  assertThat(replica.getSd().getLocation(), is(replicaPartitionPath.toUri().toString()));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.table"), is(DB_NAME + "." + TABLE_NAME));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.metastore.uris"),
      is(SOURCE_META_STORE_URIS));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.location"), is(PARTITION_LOCATION));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.replication.event"), is(EVENT_ID));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.last.replicated"), is(not(nullValue())));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.replication.mode"), is(FULL.name()));
  assertThat(replica.getParameters().get("DO_NOT_UPDATE_STATS"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED_VIA_STATS_TASK"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED"), is("true"));
  assertThat(replica.getParameters().get(StatsSetupConst.ROW_COUNT), is("1"));
}
 
Example #6
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@Test
public void newPartitionWithTransformation() {
  ReplicaTableFactory factory = new ReplicaTableFactory(SOURCE_META_STORE_URIS, TableTransformation.IDENTITY,
      PARTITION_TRANSFORMATION, ColumnStatisticsTransformation.IDENTITY);

  Path replicaPartitionPath = new Path(REPLICA_DATA_DESTINATION, REPLICA_PARTITION_SUBPATH);
  Partition replica = factory.newReplicaPartition(EVENT_ID, sourceTable, sourcePartition, DB_NAME, TABLE_NAME,
      replicaPartitionPath, FULL);

  assertThat(replica.getDbName(), is(sourceTable.getDbName()));
  assertThat(replica.getTableName(), is(sourceTable.getTableName()));
  assertThat(replica.getSd().getInputFormat(), is("newInputFormat"));
  assertThat(replica.getSd().getOutputFormat(), is(OUTPUT_FORMAT));
  assertThat(replica.getSd().getLocation(), is(replicaPartitionPath.toUri().toString()));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.table"), is(DB_NAME + "." + TABLE_NAME));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.metastore.uris"),
      is(SOURCE_META_STORE_URIS));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.location"), is(PARTITION_LOCATION));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.replication.event"), is(EVENT_ID));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.last.replicated"), is(not(nullValue())));
  assertThat(replica.getParameters().get("DO_NOT_UPDATE_STATS"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED_VIA_STATS_TASK"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED"), is("true"));
  assertThat(replica.getParameters().get(StatsSetupConst.ROW_COUNT), is("1"));
}
 
Example #7
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Get the stats from table properties. If not found -1 is returned for each stats field.
 * CAUTION: stats may not be up-to-date with the underlying data. It is always good to run the ANALYZE command on
 * Hive table to have up-to-date stats.
 *
 * @param properties
 * @return
 */
public static HiveDatasetStats getStatsFromProps(final Properties properties) {
  long numRows = -1;
  long sizeInBytes = -1;
  try {
    final String numRowsProp = properties.getProperty(StatsSetupConst.ROW_COUNT);
    if (numRowsProp != null) {
      numRows = Long.valueOf(numRowsProp);
    }

    final String sizeInBytesProp = properties.getProperty(StatsSetupConst.TOTAL_SIZE);
    if (sizeInBytesProp != null) {
      sizeInBytes = Long.valueOf(sizeInBytesProp);
    }
  } catch (final NumberFormatException e) {
    logger.error("Failed to parse Hive stats in metastore.", e);
    // continue with the defaults.
  }

  return new HiveDatasetStats(numRows, sizeInBytes);
}
 
Example #8
Source File: HiveShimV1.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void alterTable(IMetaStoreClient client, String databaseName, String tableName, Table table) throws InvalidOperationException, MetaException, TException {
	// For Hive-1.2.1, we need to tell HMS not to update stats. Otherwise, the stats we put in the table
	// parameters can be overridden. The extra config we add here will be removed by HMS after it's used.
	table.getParameters().put(StatsSetupConst.DO_NOT_UPDATE_STATS, "true");
	client.alter_table(databaseName, tableName, table);
}
 
Example #9
Source File: HiveCatalog.java    From flink with Apache License 2.0 5 votes vote down vote up
private static CatalogTableStatistics createCatalogTableStatistics(Map<String, String> parameters) {
	return new CatalogTableStatistics(
			parsePositiveLongStat(parameters, StatsSetupConst.ROW_COUNT),
			parsePositiveIntStat(parameters, StatsSetupConst.NUM_FILES),
			parsePositiveLongStat(parameters, StatsSetupConst.TOTAL_SIZE),
			parsePositiveLongStat(parameters, StatsSetupConst.RAW_DATA_SIZE));
}
 
Example #10
Source File: HiveCatalog.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Update original table statistics parameters.
 * @param newTableStats   new catalog table statistics.
 * @param parameters      original hive table statistics parameters.
 */
private void updateStats(CatalogTableStatistics newTableStats, Map<String, String> parameters) {
	parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(newTableStats.getRowCount()));
	parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(newTableStats.getTotalSize()));
	parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(newTableStats.getFileCount()));
	parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(newTableStats.getRawDataSize()));
}
 
Example #11
Source File: HiveCatalog.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Determine if statistics need to be updated or not.
 * @param newTableStats   new catalog table statistics.
 * @param parameters      original hive table statistics parameters.
 * @return                whether need to update stats.
 */
private boolean statsChanged(CatalogTableStatistics newTableStats, Map<String, String> parameters) {
	return newTableStats.getRowCount() != parsePositiveLongStat(parameters, StatsSetupConst.ROW_COUNT)
			|| newTableStats.getTotalSize() != parsePositiveLongStat(parameters, StatsSetupConst.TOTAL_SIZE)
			|| newTableStats.getFileCount() != parsePositiveIntStat(parameters, StatsSetupConst.NUM_FILES)
			|| newTableStats.getRawDataSize() != parsePositiveLongStat(parameters, StatsSetupConst.NUM_FILES);
}
 
Example #12
Source File: HiveCatalogStore.java    From tajo with Apache License 2.0 5 votes vote down vote up
private void addPartition(String databaseName, String tableName, CatalogProtos.PartitionDescProto
  partitionDescProto) {
  HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null;
  try {

    client = clientPool.getClient();

    Partition partition = new Partition();
    partition.setDbName(databaseName);
    partition.setTableName(tableName);

    Map<String, String> params = new HashMap<>();
    params.put(StatsSetupConst.TOTAL_SIZE, Long.toString(partitionDescProto.getNumBytes()));
    partition.setParameters(params);

    List<String> values = Lists.newArrayList();
    for(CatalogProtos.PartitionKeyProto keyProto : partitionDescProto.getPartitionKeysList()) {
      values.add(keyProto.getPartitionValue());
    }
    partition.setValues(values);

    Table table = client.getHiveClient().getTable(databaseName, tableName);
    StorageDescriptor sd = table.getSd();
    sd.setLocation(partitionDescProto.getPath());
    partition.setSd(sd);

    client.getHiveClient().add_partition(partition);
  } catch (Exception e) {
    throw new TajoInternalError(e);
  } finally {
    if (client != null) {
      client.release();
    }
  }
}
 
Example #13
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void newView() {
  sourceTableAndStats.getTable().setTableType(TableType.VIRTUAL_VIEW.name());
  sourceTableAndStats.getTable().getSd().setInputFormat(null);
  sourceTableAndStats.getTable().getSd().setOutputFormat(null);
  sourceTableAndStats.getTable().getSd().setLocation(null);

  TableAndStatistics replicaAndStats = factory.newReplicaTable(EVENT_ID, sourceTableAndStats, DB_NAME, TABLE_NAME,
      null, FULL);
  Table replica = replicaAndStats.getTable();

  assertThat(replica.getDbName(), is(sourceTable.getDbName()));
  assertThat(replica.getTableName(), is(sourceTable.getTableName()));
  assertThat(replica.getSd().getInputFormat(), is(nullValue()));
  assertThat(replica.getSd().getOutputFormat(), is(nullValue()));
  assertThat(replica.getSd().getLocation(), is(nullValue()));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.table"), is(DB_NAME + "." + TABLE_NAME));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.metastore.uris"),
      is(SOURCE_META_STORE_URIS));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.location"), is(""));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.replication.event"), is(EVENT_ID));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.last.replicated"), is(not(nullValue())));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.replication.mode"), is(FULL.name()));
  assertThat(replica.getParameters().get("DO_NOT_UPDATE_STATS"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED_VIA_STATS_TASK"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED"), is("true"));
  assertThat(replica.getParameters().get(StatsSetupConst.ROW_COUNT), is("1"));
  assertThat(replica.getTableType(), is(TableType.VIRTUAL_VIEW.name()));
  assertTrue(MetaStoreUtils.isView(replica));

  assertThat(replicaAndStats.getStatistics(), is(nullValue()));
}
 
Example #14
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void newTableWithTransformation() {
  ReplicaTableFactory factory = new ReplicaTableFactory(SOURCE_META_STORE_URIS, TABLE_TRANSFORMATION,
      PartitionTransformation.IDENTITY, ColumnStatisticsTransformation.IDENTITY);

  TableAndStatistics replicaAndStats = factory.newReplicaTable(EVENT_ID, sourceTableAndStats, DB_NAME, TABLE_NAME,
      REPLICA_DATA_DESTINATION, FULL);
  Table replica = replicaAndStats.getTable();

  assertThat(replica.getDbName(), is(sourceTable.getDbName()));
  assertThat(replica.getTableName(), is(sourceTable.getTableName()));
  assertThat(replica.getSd().getInputFormat(), is(INPUT_FORMAT));
  assertThat(replica.getSd().getOutputFormat(), is("newOutputFormat"));
  assertThat(replica.getSd().getLocation(), is(REPLICA_DATA_DESTINATION.toUri().toString()));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.table"), is(DB_NAME + "." + TABLE_NAME));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.metastore.uris"),
      is(SOURCE_META_STORE_URIS));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.location"), is(TABLE_LOCATION));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.replication.event"), is(EVENT_ID));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.last.replicated"), is(not(nullValue())));
  assertThat(replica.getParameters().get("DO_NOT_UPDATE_STATS"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED_VIA_STATS_TASK"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED"), is("true"));
  assertThat(replica.getParameters().get(StatsSetupConst.ROW_COUNT), is("1"));

  assertThat(replicaAndStats.getStatistics(), is(nullValue()));
}
 
Example #15
Source File: ReplicaTableFactoryTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void newTable() {
  TableAndStatistics replicaAndStats = factory.newReplicaTable(EVENT_ID, sourceTableAndStats, DB_NAME, TABLE_NAME,
      REPLICA_DATA_DESTINATION, FULL);
  Table replica = replicaAndStats.getTable();

  assertThat(replica.getDbName(), is(sourceTable.getDbName()));
  assertThat(replica.getTableName(), is(sourceTable.getTableName()));
  assertThat(replica.getSd().getInputFormat(), is(INPUT_FORMAT));
  assertThat(replica.getSd().getOutputFormat(), is(OUTPUT_FORMAT));
  assertThat(replica.getSd().getLocation(), is(REPLICA_DATA_DESTINATION.toUri().toString()));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.table"), is(DB_NAME + "." + TABLE_NAME));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.metastore.uris"),
      is(SOURCE_META_STORE_URIS));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.source.location"), is(TABLE_LOCATION));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.replication.event"), is(EVENT_ID));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.last.replicated"), is(not(nullValue())));
  assertThat(replica.getParameters().get("com.hotels.bdp.circustrain.replication.mode"), is(FULL.name()));
  assertThat(replica.getParameters().get("DO_NOT_UPDATE_STATS"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED_VIA_STATS_TASK"), is("true"));
  assertThat(replica.getParameters().get("STATS_GENERATED"), is("true"));
  assertThat(replica.getParameters().get(StatsSetupConst.ROW_COUNT), is("1"));
  assertThat(replica.getTableType(), is(TableType.EXTERNAL_TABLE.name()));
  assertThat(replica.getParameters().get("EXTERNAL"), is("TRUE"));
  assertTrue(MetaStoreUtils.isExternalTable(replica));

  assertThat(replicaAndStats.getStatistics(), is(nullValue()));
}
 
Example #16
Source File: ReplicaTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
private Partition newPartition(String... values) {
  Partition partition = new Partition();
  partition.setDbName(DB_NAME);
  partition.setTableName(TABLE_NAME);
  StorageDescriptor sd = new StorageDescriptor();
  sd.setLocation(new Path(tableLocation, partitionName(values)).toUri().toString());
  sd.setCols(FIELDS);
  partition.setSd(sd);
  HashMap<String, String> parameters = new HashMap<>();
  parameters.put(StatsSetupConst.ROW_COUNT, "1");
  partition.setParameters(parameters);
  partition.setValues(Arrays.asList(values));
  return partition;
}
 
Example #17
Source File: HiveCatalog.java    From flink with Apache License 2.0 5 votes vote down vote up
private static CatalogTableStatistics createCatalogTableStatistics(Map<String, String> parameters) {
	long rowRount = Long.parseLong(parameters.getOrDefault(StatsSetupConst.ROW_COUNT, HiveStatsUtil.DEFAULT_STATS_ZERO_CONST));
	long totalSize = Long.parseLong(parameters.getOrDefault(StatsSetupConst.TOTAL_SIZE, HiveStatsUtil.DEFAULT_STATS_ZERO_CONST));
	int numFiles = Integer.parseInt(parameters.getOrDefault(StatsSetupConst.NUM_FILES, HiveStatsUtil.DEFAULT_STATS_ZERO_CONST));
	long rawDataSize = Long.parseLong(parameters.getOrDefault(StatsSetupConst.RAW_DATA_SIZE, HiveStatsUtil.DEFAULT_STATS_ZERO_CONST));
	return new CatalogTableStatistics(rowRount, numFiles, totalSize, rawDataSize);
}
 
Example #18
Source File: CLIHiveClient.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
public long getHiveTableRows(String database, String tableName) throws Exception {
    Table table = getMetaStoreClient().getTable(database, tableName);
    return getBasicStatForTable(new org.apache.hadoop.hive.ql.metadata.Table(table), StatsSetupConst.ROW_COUNT);
}
 
Example #19
Source File: HiveCatalogStore.java    From tajo with Apache License 2.0 4 votes vote down vote up
/**
 * Get list of partitions matching specified filter.
 *
 * For example, consider you have a partitioned table for three columns (i.e., col1, col2, col3).
 * Assume that an user want to give a condition WHERE (col1 ='1' or col1 = '100') and col3 > 20 .
 *
 * Then, the filter string would be written as following:
 *   (col1 =\"1\" or col1 = \"100\") and col3 > 20
 *
 *
 * @param databaseName
 * @param tableName
 * @param filter
 * @return
 */
private List<PartitionDescProto> getPartitionsFromHiveMetaStore(String databaseName, String tableName,
                                                                       String filter) {
  HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null;
  List<PartitionDescProto> partitions = null;
  TableDescProto tableDesc = null;
  List<ColumnProto> parititonColumns = null;

  try {
    partitions = new ArrayList<>();
    client = clientPool.getClient();

    List<Partition> hivePartitions = client.getHiveClient().listPartitionsByFilter(databaseName, tableName
      , filter, (short) -1);

    tableDesc = getTable(databaseName, tableName);
    parititonColumns = tableDesc.getPartition().getExpressionSchema().getFieldsList();

    StringBuilder partitionName = new StringBuilder();
    for (Partition hivePartition : hivePartitions) {
      CatalogProtos.PartitionDescProto.Builder builder = CatalogProtos.PartitionDescProto.newBuilder();
      builder.setPath(hivePartition.getSd().getLocation());

      partitionName.delete(0, partitionName.length());
      for (int i = 0; i < parititonColumns.size(); i++) {
        if (i > 0) {
          partitionName.append(File.separator);
        }
        partitionName.append(IdentifierUtil.extractSimpleName(parititonColumns.get(i).getName()));
        partitionName.append("=");
        partitionName.append(hivePartition.getValues().get(i));
      }

      builder.setPartitionName(partitionName.toString());

      Map<String, String> params = hivePartition.getParameters();
      if (params != null) {
        if (params.get(StatsSetupConst.TOTAL_SIZE) != null) {
          builder.setNumBytes(Long.parseLong(params.get(StatsSetupConst.TOTAL_SIZE)));
        }
      }

      partitions.add(builder.build());
    }
  } catch (Exception e) {
    throw new TajoInternalError(e);
  } finally {
    if (client != null) {
      client.release();
    }
  }

  return partitions;
}
 
Example #20
Source File: CLIHiveClient.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public long getHiveTableRows(String database, String tableName) throws Exception {
    Table table = getMetaStoreClient().getTable(database, tableName);
    return getBasicStatForTable(new org.apache.hadoop.hive.ql.metadata.Table(table), StatsSetupConst.ROW_COUNT);
}
 
Example #21
Source File: GlueMetastoreClientDelegate.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 4 votes vote down vote up
private boolean isCascade(EnvironmentContext environmentContext) {
  return environmentContext != null &&
      environmentContext.isSetProperties() &&
      StatsSetupConst.TRUE.equals(environmentContext.getProperties().get(StatsSetupConst.CASCADE));
}
 
Example #22
Source File: HiveClient.java    From Kylin with Apache License 2.0 4 votes vote down vote up
public long getFileSizeForTable(Table table) {
    return getBasicStatForTable(new org.apache.hadoop.hive.ql.metadata.Table(table), StatsSetupConst.TOTAL_SIZE);
}
 
Example #23
Source File: HiveClient.java    From Kylin with Apache License 2.0 4 votes vote down vote up
public long getFileNumberForTable(Table table) {
    return getBasicStatForTable(new org.apache.hadoop.hive.ql.metadata.Table(table), StatsSetupConst.NUM_FILES);
}