Java Code Examples for org.apache.hadoop.hive.metastore.api.StorageDescriptor#setParameters()

The following examples show how to use org.apache.hadoop.hive.metastore.api.StorageDescriptor#setParameters() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveUtils.java    From kite with Apache License 2.0 6 votes vote down vote up
static Table createEmptyTable(String namespace, String name) {
  Table table = new Table();
  table.setDbName(namespace);
  table.setTableName(name);
  table.setPartitionKeys(new ArrayList<FieldSchema>());
  table.setParameters(new HashMap<String, String>());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setSerdeInfo(new SerDeInfo());
  sd.setNumBuckets(-1);
  sd.setBucketCols(new ArrayList<String>());
  sd.setCols(new ArrayList<FieldSchema>());
  sd.setParameters(new HashMap<String, String>());
  sd.setSortCols(new ArrayList<Order>());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  SkewedInfo skewInfo = new SkewedInfo();
  skewInfo.setSkewedColNames(new ArrayList<String>());
  skewInfo.setSkewedColValues(new ArrayList<List<String>>());
  skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
  sd.setSkewedInfo(skewInfo);
  table.setSd(sd);

  return table;
}
 
Example 2
Source File: AbstractMetastoreTestWithStaticConfiguration.java    From incubator-sentry with Apache License 2.0 6 votes vote down vote up
public Table makeMetastoreTableObject(HiveMetaStoreClient client,
    String dbName, String tabName, List<FieldSchema> cols) throws Exception {
  Table tbl = new Table();
  tbl.setDbName(dbName);
  tbl.setTableName(tabName);
  StorageDescriptor sd = new StorageDescriptor();
  tbl.setSd(sd);
  tbl.setParameters(new HashMap<String, String>());
  sd.setCols(cols);
  sd.setCompressed(false);
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setName(tbl.getTableName());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  sd.getSerdeInfo().getParameters()
      .put(serdeConstants.SERIALIZATION_FORMAT, "1");
  sd.setSortCols(new ArrayList<Order>());
  return tbl;
}
 
Example 3
Source File: TestUtils.java    From waggle-dance with Apache License 2.0 6 votes vote down vote up
static Table createPartitionedTable(HiveMetaStoreClient metaStoreClient, String database, String table, File location)
  throws Exception {

  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  hiveTable.setPartitionKeys(PARTITION_COLUMNS);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toURI().toString());
  sd.setParameters(new HashMap<>());
  sd.setSerdeInfo(new SerDeInfo());

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  return hiveTable;
}
 
Example 4
Source File: TestUtils.java    From waggle-dance with Apache License 2.0 6 votes vote down vote up
static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    File location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toURI().toString());
  sd.setParameters(new HashMap<>());
  sd.setSerdeInfo(new SerDeInfo());

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  return hiveTable;
}
 
Example 5
Source File: TestUtils.java    From circus-train with Apache License 2.0 6 votes vote down vote up
private static Table createView(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String view,
    String table,
    List<FieldSchema> partitionCols)
  throws TException {
  Table hiveView = new Table();
  hiveView.setDbName(database);
  hiveView.setTableName(view);
  hiveView.setTableType(TableType.VIRTUAL_VIEW.name());
  hiveView.setViewOriginalText(hql(database, table));
  hiveView.setViewExpandedText(expandHql(database, table, DATA_COLUMNS, partitionCols));
  hiveView.setPartitionKeys(partitionCols);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());
  hiveView.setSd(sd);

  metaStoreClient.createTable(hiveView);

  return hiveView;
}
 
Example 6
Source File: CatalogToHiveConverter.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
public static StorageDescriptor convertStorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor catalogSd) {
  StorageDescriptor hiveSd = new StorageDescriptor();
  hiveSd.setCols(convertFieldSchemaList(catalogSd.getColumns()));
  hiveSd.setLocation(catalogSd.getLocation());
  hiveSd.setInputFormat(catalogSd.getInputFormat());
  hiveSd.setOutputFormat(catalogSd.getOutputFormat());
  hiveSd.setCompressed(catalogSd.getCompressed());
  hiveSd.setNumBuckets(catalogSd.getNumberOfBuckets());
  hiveSd.setSerdeInfo(convertSerDeInfo(catalogSd.getSerdeInfo()));
  hiveSd.setBucketCols(firstNonNull(catalogSd.getBucketColumns(), Lists.<String>newArrayList()));
  hiveSd.setSortCols(convertOrderList(catalogSd.getSortColumns()));
  hiveSd.setParameters(firstNonNull(catalogSd.getParameters(), Maps.<String, String>newHashMap()));
  hiveSd.setSkewedInfo(convertSkewedInfo(catalogSd.getSkewedInfo()));
  hiveSd.setStoredAsSubDirectories(catalogSd.getStoredAsSubDirectories());

  return hiveSd;
}
 
Example 7
Source File: TestUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static Partition newPartition(String database, String tableName, String partitionValue) {
  Partition partition = new Partition();
  partition.setDbName(database);
  partition.setTableName(tableName);
  partition.setCreateTime(CREATE_TIME);
  partition.setValues(ImmutableList.of(partitionValue));

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  partition.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(COLS);
  storageDescriptor.setInputFormat(INPUT_FORMAT);
  storageDescriptor.setOutputFormat(OUTPUT_FORMAT);
  storageDescriptor.setSerdeInfo(new SerDeInfo(SERDE_INFO_NAME, SERIALIZATION_LIB, new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation(DATABASE + "/" + tableName + "/" + partitionValue + "/");
  partition.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  partition.setParameters(parameters);

  return partition;
}
 
Example 8
Source File: HiveMetaStoreUtils.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
private static StorageDescriptor getStorageDescriptor(HiveRegistrationUnit unit) {
  State props = unit.getStorageProps();
  StorageDescriptor sd = new StorageDescriptor();
  sd.setParameters(getParameters(props));
  //Treat AVRO and other formats differently. Details can be found in GOBBLIN-877
  if (unit.isRegisterSchema() ||
      (unit.getInputFormat().isPresent() && !unit.getInputFormat().get().equals(AvroContainerInputFormat.class.getName()))) {
    sd.setCols(getFieldSchemas(unit));
  }
  if (unit.getLocation().isPresent()) {
    sd.setLocation(unit.getLocation().get());
  }
  if (unit.getInputFormat().isPresent()) {
    sd.setInputFormat(unit.getInputFormat().get());
  }
  if (unit.getOutputFormat().isPresent()) {
    sd.setOutputFormat(unit.getOutputFormat().get());
  }
  if (unit.getIsCompressed().isPresent()) {
    sd.setCompressed(unit.getIsCompressed().get());
  }
  if (unit.getNumBuckets().isPresent()) {
    sd.setNumBuckets(unit.getNumBuckets().get());
  }
  if (unit.getBucketColumns().isPresent()) {
    sd.setBucketCols(unit.getBucketColumns().get());
  }
  if (unit.getIsStoredAsSubDirs().isPresent()) {
    sd.setStoredAsSubDirectories(unit.getIsStoredAsSubDirs().get());
  }
  sd.setSerdeInfo(getSerDeInfo(unit));
  return sd;
}
 
Example 9
Source File: HiveConnectorPartitionService.java    From metacat with Apache License 2.0 5 votes vote down vote up
private void copyTableSdToPartitionSd(final List<Partition> hivePartitions, final Table table) {
    //
    // Update the partition info based on that of the table.
    //
    for (Partition partition : hivePartitions) {
        final StorageDescriptor sd = partition.getSd();
        final StorageDescriptor tableSdCopy = table.getSd().deepCopy();
        if (tableSdCopy.getSerdeInfo() == null) {
            final SerDeInfo serDeInfo = new SerDeInfo(null, null, new HashMap<>());
            tableSdCopy.setSerdeInfo(serDeInfo);
        }

        tableSdCopy.setLocation(sd.getLocation());
        if (!Strings.isNullOrEmpty(sd.getInputFormat())) {
            tableSdCopy.setInputFormat(sd.getInputFormat());
        }
        if (!Strings.isNullOrEmpty(sd.getOutputFormat())) {
            tableSdCopy.setOutputFormat(sd.getOutputFormat());
        }
        if (sd.getParameters() != null && !sd.getParameters().isEmpty()) {
            tableSdCopy.setParameters(sd.getParameters());
        }
        if (sd.getSerdeInfo() != null) {
            if (!Strings.isNullOrEmpty(sd.getSerdeInfo().getName())) {
                tableSdCopy.getSerdeInfo().setName(sd.getSerdeInfo().getName());
            }
            if (!Strings.isNullOrEmpty(sd.getSerdeInfo().getSerializationLib())) {
                tableSdCopy.getSerdeInfo().setSerializationLib(sd.getSerdeInfo().getSerializationLib());
            }
            if (sd.getSerdeInfo().getParameters() != null && !sd.getSerdeInfo().getParameters().isEmpty()) {
                tableSdCopy.getSerdeInfo().setParameters(sd.getSerdeInfo().getParameters());
            }
        }
        partition.setSd(tableSdCopy);
    }
}
 
Example 10
Source File: PartitionTransformationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Before
public void init() {
  partition = new Partition();
  partition.setDbName("database");
  partition.setTableName("table");
  partition.setValues(ImmutableList.of("part"));

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  partition.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(Arrays.asList(new FieldSchema("a", "int", null)));
  storageDescriptor.setInputFormat("input_format");
  storageDescriptor.setOutputFormat("output_format");
  storageDescriptor.setSerdeInfo(new SerDeInfo("serde", "lib", new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation("database/table/part/");
  partition.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  partition.setParameters(parameters);
}
 
Example 11
Source File: TableTransformationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Before
public void init() {
  table = new Table();
  table.setDbName("database");
  table.setTableName("table");
  table.setTableType("type");

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  table.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(Arrays.asList(new FieldSchema("a", "int", null)));
  storageDescriptor.setInputFormat("input_format");
  storageDescriptor.setOutputFormat("output_format");
  storageDescriptor.setSerdeInfo(new SerDeInfo("serde", "lib", new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation("database/table/");
  table.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  table.setParameters(parameters);
}
 
Example 12
Source File: TestUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static Table newTable(String database, String tableName) {
  Table table = new Table();
  table.setDbName(database);
  table.setTableName(tableName);
  table.setTableType(TABLE_TYPE);
  table.setOwner(OWNER);
  table.setCreateTime(CREATE_TIME);
  table.setRetention(RETENTION);

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  table.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(COLS);
  storageDescriptor.setInputFormat(INPUT_FORMAT);
  storageDescriptor.setOutputFormat(OUTPUT_FORMAT);
  storageDescriptor.setSerdeInfo(new SerDeInfo(SERDE_INFO_NAME, SERIALIZATION_LIB, new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation(DATABASE + "/" + tableName + "/");
  table.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  table.setParameters(parameters);

  return table;
}
 
Example 13
Source File: TestUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(TextInputFormat.class.getName());
  sd.setOutputFormat(TextOutputFormat.class.getName());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
 
Example 14
Source File: FilterToolIntegrationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
private void createTable(File sourceTableUri) throws Exception {
  File partitionEurope = new File(sourceTableUri, "local_date=2000-01-01");
  File partitionUk = new File(partitionEurope, "local_hour=0");
  File dataFileUk = new File(partitionUk, PART_00000);
  FileUtils.writeStringToFile(dataFileUk, "1\tadam\tlondon\n2\tsusan\tglasgow\n");

  File partitionAsia = new File(sourceTableUri, "local_date=2000-01-02");
  File partitionChina = new File(partitionAsia, "local_hour=0");
  File dataFileChina = new File(partitionChina, PART_00000);
  String data = "1\tchun\tbeijing\n2\tshanghai\tmilan\n";
  FileUtils.writeStringToFile(dataFileChina, data);

  HiveMetaStoreClient sourceClient = sourceCatalog.client();

  Table source = new Table();
  source.setDbName(DATABASE);
  source.setTableName(TABLE);
  source.setTableType(TableType.EXTERNAL_TABLE.name());
  source.setParameters(new HashMap<String, String>());

  List<FieldSchema> partitionColumns = Arrays.asList(new FieldSchema("local_date", "string", ""),
      new FieldSchema("local_hour", "string", ""));
  source.setPartitionKeys(partitionColumns);

  List<FieldSchema> dataColumns = Arrays.asList(new FieldSchema("id", "bigint", ""),
      new FieldSchema("name", "string", ""), new FieldSchema("city", "tinyint", ""));

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(dataColumns);
  sd.setLocation(sourceTableUri.toURI().toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());

  source.setSd(sd);

  sourceClient.createTable(source);
  LOG.info(">>>> Partitions added: {}",
      +sourceClient.add_partitions(Arrays.asList(newPartition(sd, Arrays.asList("2000-01-01", "0"), partitionUk),
          newPartition(sd, Arrays.asList("2000-01-02", "0"), partitionChina))));
}
 
Example 15
Source File: ThriftMetastoreUtil.java    From presto with Apache License 2.0 5 votes vote down vote up
private static StorageDescriptor makeStorageDescriptor(String tableName, List<Column> columns, Storage storage)
{
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
    serdeInfo.setParameters(storage.getSerdeParameters());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(emptyToNull(storage.getLocation()));
    sd.setCols(columns.stream()
            .map(ThriftMetastoreUtil::toMetastoreApiFieldSchema)
            .collect(toImmutableList()));
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
    sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
    sd.setSkewedInfoIsSet(storage.isSkewed());
    sd.setParameters(ImmutableMap.of());

    Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
    if (bucketProperty.isPresent()) {
        sd.setNumBuckets(bucketProperty.get().getBucketCount());
        sd.setBucketCols(bucketProperty.get().getBucketedBy());
        if (!bucketProperty.get().getSortedBy().isEmpty()) {
            sd.setSortCols(bucketProperty.get().getSortedBy().stream()
                    .map(column -> new Order(column.getColumnName(), column.getOrder().getHiveOrder()))
                    .collect(toImmutableList()));
        }
    }

    return sd;
}
 
Example 16
Source File: HiveDifferencesIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
private void createTable(
    String databaseName,
    String tableName,
    File tableLocation,
    String sourceTable,
    String sourceLocation,
    boolean addChecksum)
  throws Exception {
  File partition0 = createPartitionData("part=0", tableLocation, Arrays.asList("1\tadam", "2\tsusan"));
  File partition1 = createPartitionData("part=1", tableLocation, Arrays.asList("3\tchun", "4\tkim"));

  Table table = new Table();
  table.setDbName(databaseName);
  table.setTableName(tableName);
  table.setTableType(TableType.EXTERNAL_TABLE.name());
  table.setParameters(new HashMap<String, String>());
  if (sourceTable != null) {
    table.getParameters().put(CircusTrainTableParameter.SOURCE_TABLE.parameterName(), sourceTable);
  }
  if (sourceLocation != null) {
    table.getParameters().put(CircusTrainTableParameter.SOURCE_LOCATION.parameterName(), sourceLocation);
  }

  List<FieldSchema> partitionColumns = Arrays.asList(PARTITION_COL);
  table.setPartitionKeys(partitionColumns);

  List<FieldSchema> dataColumns = Arrays.asList(FOO_COL, BAR_COL);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(dataColumns);
  sd.setLocation(tableLocation.toURI().toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());

  table.setSd(sd);

  HiveMetaStoreClient client = catalog.client();
  client.createTable(table);
  LOG
      .info(">>>> Partitions added: {}",
          +client
              .add_partitions(Arrays
                  .asList(
                      newPartition(databaseName, tableName, sd, Arrays.asList("0"), partition0, sourceTable,
                          sourceLocation + "part=0", addChecksum),
                      newPartition(databaseName, tableName, sd, Arrays.asList("1"), partition1, sourceTable,
                          sourceLocation + "part=1", addChecksum))));
}
 
Example 17
Source File: TestUtils.java    From circus-train with Apache License 2.0 4 votes vote down vote up
public static Table createPartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location,
    List<FieldSchema> columns,
    List<FieldSchema> partitionKeys,
    String serializationLib,
    String inputFormatClassName,
    String outputFormatClassName)
    throws Exception {

  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  hiveTable.setPartitionKeys(partitionKeys);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(columns);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(inputFormatClassName);
  sd.setOutputFormat(outputFormatClassName);
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib(serializationLib);

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
 
Example 18
Source File: HiveConvertersImpl.java    From metacat with Apache License 2.0 4 votes vote down vote up
private StorageDescriptor fromStorageDto(@Nullable final StorageDto storageDto, @Nullable final String serdeName) {
    //
    // Set all required fields to null. This is to simulate Hive behavior.
    // Setting it to empty string failed certain hive operations.
    //
    final StorageDescriptor result = new StorageDescriptor();
    String inputFormat = null;
    String location = null;
    String outputFormat = null;
    String serializationLib = null;
    Map<String, String> sdParams = Maps.newHashMap();
    Map<String, String> serdeParams = Maps.newHashMap();

    if (storageDto != null) {
        if (storageDto.getInputFormat() != null) {
            inputFormat = storageDto.getInputFormat();
        }
        if (storageDto.getUri() != null) {
            location = storageDto.getUri();
        }
        if (storageDto.getOutputFormat() != null) {
            outputFormat = storageDto.getOutputFormat();
        }
        if (storageDto.getSerializationLib() != null) {
            serializationLib = storageDto.getSerializationLib();
        }
        if (storageDto.getParameters() != null) {
            sdParams = storageDto.getParameters();
        }
        if (storageDto.getSerdeInfoParameters() != null) {
            serdeParams = storageDto.getSerdeInfoParameters();
        }
    }
    result.setSerdeInfo(new SerDeInfo(serdeName, serializationLib, serdeParams));
    result.setBucketCols(Collections.emptyList());
    result.setSortCols(Collections.emptyList());
    result.setInputFormat(inputFormat);
    result.setLocation(location);
    result.setOutputFormat(outputFormat);
    result.setCols(Collections.emptyList());
    // Setting an empty skewed info.
    result.setSkewedInfo(new SkewedInfo(Collections.emptyList(), Collections.emptyList(), Collections.emptyMap()));
    result.setParameters(sdParams);
    return result;
}
 
Example 19
Source File: ComparisonToolIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
private void createReplicaTable() throws Exception {
  File partitionEurope = new File(replicaTableUri, "local_date=2000-01-01");
  File partitionUk = new File(partitionEurope, "local_hour=0");
  File dataFileUk = new File(partitionUk, PART_00000);
  FileUtils.writeStringToFile(dataFileUk, "1\tadam\tlondon\tuk\n2\tsusan\tglasgow\tuk\n");

  File partitionAsia = new File(replicaTableUri, "local_date=2000-01-02");
  File partitionChina = new File(partitionAsia, "local_hour=0");
  File dataFileChina = new File(partitionChina, PART_00000);
  String data = "1\tchun\tbeijing\tchina\n2\tshanghai\tmilan\titaly\n";
  FileUtils.writeStringToFile(dataFileChina, data);

  HiveMetaStoreClient replicaClient = catalog.client();

  Table replica = new Table();
  replica.setDbName(DATABASE);
  replica.setTableName(REPLICA_TABLE);
  replica.setTableType(TableType.EXTERNAL_TABLE.name());
  Map<String, String> parameters = new HashMap<>();
  parameters.put("comment", "comment replica");
  replica.setParameters(parameters);
  List<FieldSchema> partitionColumns = Arrays.asList(new FieldSchema("local_date", "string", ""),
      new FieldSchema("local_hour", "string", ""));
  replica.setPartitionKeys(partitionColumns);

  List<FieldSchema> dataColumns = Arrays.asList(new FieldSchema("id", "bigint", ""),
      new FieldSchema("name", "string", ""), new FieldSchema("city", "string", ""),
      new FieldSchema("country", "string", ""));

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(dataColumns);
  sd.setLocation(replicaTableUri.toURI().toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());

  replica.setSd(sd);

  replicaClient.createTable(replica);
  LOG.info(">>>> Partitions added: {}",
      +replicaClient.add_partitions(
          Arrays.asList(newPartition(REPLICA_TABLE, sd, Arrays.asList("2000-01-01", "0"), partitionUk),
              newPartition(REPLICA_TABLE, sd, Arrays.asList("2000-01-02", "0"), partitionChina))));
}
 
Example 20
Source File: ComparisonToolIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
private void createSourceTable() throws Exception {
  File partitionEurope = new File(sourceTableUri, "local_date=2000-01-01");
  File partitionUk = new File(partitionEurope, "local_hour=0");
  File dataFileUk = new File(partitionUk, PART_00000);
  FileUtils.writeStringToFile(dataFileUk, "1\tadam\tlondon\n2\tsusan\tglasgow\n");

  File partitionAsia = new File(sourceTableUri, "local_date=2000-01-02");
  File partitionChina = new File(partitionAsia, "local_hour=0");
  File dataFileChina = new File(partitionChina, PART_00000);
  String data = "1\tchun\tbeijing\n2\tshanghai\tmilan\n";
  FileUtils.writeStringToFile(dataFileChina, data);

  HiveMetaStoreClient sourceClient = catalog.client();

  Table source = new Table();
  source.setDbName(DATABASE);
  source.setTableName(SOURCE_TABLE);
  source.setTableType(TableType.EXTERNAL_TABLE.name());
  Map<String, String> parameters = new HashMap<>();
  parameters.put("comment", "comment source");
  source.setParameters(parameters);

  List<FieldSchema> partitionColumns = Arrays.asList(new FieldSchema("local_date", "string", ""),
      new FieldSchema("local_hour", "string", ""));
  source.setPartitionKeys(partitionColumns);

  List<FieldSchema> dataColumns = Arrays.asList(new FieldSchema("id", "bigint", ""),
      new FieldSchema("name", "string", ""), new FieldSchema("city", "string", ""));

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(dataColumns);
  sd.setLocation(sourceTableUri.toURI().toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());

  source.setSd(sd);

  sourceClient.createTable(source);
  LOG.info(">>>> Partitions added: {}",
      +sourceClient
          .add_partitions(Arrays.asList(newPartition(SOURCE_TABLE, sd, Arrays.asList("2000-01-01", "0"), partitionUk),
              newPartition(SOURCE_TABLE, sd, Arrays.asList("2000-01-02", "0"), partitionChina))));
}