Java Code Examples for org.apache.hadoop.hive.metastore.api.Table#putToParameters()

The following examples show how to use org.apache.hadoop.hive.metastore.api.Table#putToParameters() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroHiveTableStrategy.java    From data-highway with Apache License 2.0 6 votes vote down vote up
@Override
public Table newHiveTable(
    String databaseName,
    String tableName,
    String partitionColumnName,
    String location,
    Schema schema,
    int version) {

  Table table = new Table();
  table.setDbName(databaseName);
  table.setTableName(tableName);

  table.setTableType(TableType.EXTERNAL_TABLE.toString());
  table.putToParameters("EXTERNAL", "TRUE");
  addRoadAnnotations(table);

  URI schemaUri = uriResolver.resolve(schema, table.getTableName(), version);
  table.putToParameters(AVRO_SCHEMA_URL, schemaUri.toString());
  table.putToParameters(AVRO_SCHEMA_VERSION, Integer.toString(version));
  table.setPartitionKeys(Arrays.asList(new FieldSchema(partitionColumnName, "string", null)));

  table.setSd(AvroStorageDescriptorFactory.create(location));

  return table;
}
 
Example 2
Source File: TestUtils.java    From waggle-dance with Apache License 2.0 6 votes vote down vote up
static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    File location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toURI().toString());
  sd.setParameters(new HashMap<>());
  sd.setSerdeInfo(new SerDeInfo());

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  return hiveTable;
}
 
Example 3
Source File: IntegrationTestHelper.java    From circus-train with Apache License 2.0 6 votes vote down vote up
void createManagedPartitionedTable(URI sourceTableUri) throws Exception {
  TestUtils.createPartitionedTable(metaStoreClient, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, sourceTableUri);
  Table table = metaStoreClient.getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  table.setTableType(TableType.MANAGED_TABLE.name());
  table.putToParameters("EXTERNAL", "FALSE");
  metaStoreClient.alter_table(table.getDbName(), table.getTableName(), table);

  URI partitionEurope = URI.create(sourceTableUri + "/continent=Europe");
  URI partitionUk = URI.create(partitionEurope + "/country=UK");
  File dataFileUk = new File(partitionUk.getPath(), PART_00000);
  FileUtils.writeStringToFile(dataFileUk, "1\tadam\tlondon\n2\tsusan\tglasgow\n");

  URI partitionAsia = URI.create(sourceTableUri + "/continent=Asia");
  URI partitionChina = URI.create(partitionAsia + "/country=China");
  File dataFileChina = new File(partitionChina.getPath(), PART_00000);
  FileUtils.writeStringToFile(dataFileChina, "1\tchun\tbeijing\n2\tshanghai\tmilan\n");

  LOG
      .info(">>>> Partitions added: {}",
          metaStoreClient
              .add_partitions(Arrays
                  .asList(newTablePartition(table, Arrays.asList("Europe", "UK"), partitionUk),
                      newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina))));
}
 
Example 4
Source File: AvroSerDeTransformation.java    From circus-train with Apache License 2.0 6 votes vote down vote up
Table apply(Table table, String avroSchemaDestination, String eventId) throws Exception {
  if (avroSchemaDestination == null) {
    return table;
  }

  avroSchemaDestination = addTrailingSlash(avroSchemaDestination);
  avroSchemaDestination += eventId;

  String avroSchemaSource = table.getParameters().get(AVRO_SCHEMA_URL_PARAMETER);
  copy(avroSchemaSource, avroSchemaDestination);

  table.putToParameters(AVRO_SCHEMA_URL_PARAMETER,
      avroSchemaDestination + "/" + getAvroSchemaFileName(avroSchemaSource));
  LOG.info("Avro SerDe transformation has been applied to table '{}'", table.getTableName());
  return table;
}
 
Example 5
Source File: AvroHiveTableStrategy.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Override
public Table alterHiveTable(Table table, Schema schema, int version) {
  Table alteredTable = new Table(table);
  addRoadAnnotations(alteredTable);
  URI schemaUri = uriResolver.resolve(schema, table.getTableName(), version);
  alteredTable.putToParameters(AVRO_SCHEMA_URL, schemaUri.toString());
  alteredTable.putToParameters(AVRO_SCHEMA_VERSION, Integer.toString(version));
  return alteredTable;
}
 
Example 6
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void unpartitionedTableReplicateAvroSchemaOverride() throws Exception {
  helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));

  java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test");
  Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes());
  String avroSchemaBaseUrl = sourceAvroSchemaPath.toString();

  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE);
  sourceTable.putToParameters("avro.schema.url", avroSchemaBaseUrl);
  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("unpartitioned-single-table-avro-schema-override.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();

  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE);
      String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database-override/";
      String transformedAvroUrl = replicaHiveTable.getParameters().get("avro.schema.url");
      assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl));
    }
  });

  runner.run(config.getAbsolutePath());
}
 
Example 7
Source File: IntegrationTestTransformation.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Override
public Table transform(Table table) {
  if (table.getParameters().get("circus.train.test.transformation") != null) {
    table.putToParameters("table.transformed", "true");
  }

  return table;
}
 
Example 8
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void multipleTransformationOverridesApplied() throws Exception {
  helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));

  java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test");
  Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes());
  String avroSchemaUrl = sourceAvroSchemaPath.toString();

  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE);
  sourceTable.putToParameters("avro.schema.url", avroSchemaUrl);
  sourceTable.putToParameters("circus.train.test.transformation", "enabled");
  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("unpartitioned-single-table-multiple-transformation-overrides.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();

  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE);
      String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/override";
      Map<String, String> parameters = replicaHiveTable.getParameters();
      String transformedAvroUrl = parameters.get("avro.schema.url");
      assertThat(parameters.get("table.property.first"), is("first-override"));
      assertThat(parameters.get("table.property.second"), is("second-override"));
      assertThat(parameters.get("table.transformed"), is("true"));
      assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl));
    }
  });

  runner.run(config.getAbsolutePath());
}
 
Example 9
Source File: IntegrationTestHelper.java    From circus-train with Apache License 2.0 5 votes vote down vote up
void createManagedUnpartitionedTable(URI sourceTableUri) throws Exception {
  File dataFile = new File(sourceTableUri.getPath(), PART_00000);
  FileUtils.writeStringToFile(dataFile, "1\tadam\tlondon\n2\tsusan\tmilan\n");

  TestUtils.createUnpartitionedTable(metaStoreClient, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE, sourceTableUri);
  Table table = metaStoreClient.getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE);
  table.setTableType(TableType.MANAGED_TABLE.name());
  table.putToParameters("EXTERNAL", "FALSE");
  metaStoreClient.alter_table(table.getDbName(), table.getTableName(), table);
}
 
Example 10
Source File: CircusTrainTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Before
public void before() throws TException, IOException {
  Table table = new Table();
  table.setDbName(DATABASE);
  table.setTableName("source_" + TABLE);
  table.setTableType(TableType.EXTERNAL_TABLE.name());
  table.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(Arrays.asList(new FieldSchema("col1", "string", null)));
  sd.setSerdeInfo(new SerDeInfo());
  table.setSd(sd);

  hive.client().createTable(table);
}
 
Example 11
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void multipleTransformationsApplied() throws Exception {
  helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));

  java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test");
  Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes());
  String avroSchemaUrl = sourceAvroSchemaPath.toString();

  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE);
  sourceTable.putToParameters("avro.schema.url", avroSchemaUrl);
  sourceTable.putToParameters("circus.train.test.transformation", "enabled");
  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("unpartitioned-single-table-multiple-transformations.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();

  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE);
      String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/";
      Map<String, String> parameters = replicaHiveTable.getParameters();
      String transformedAvroUrl = parameters.get("avro.schema.url");
      assertThat(parameters.get("table.property.first"), is("first"));
      assertThat(parameters.get("table.property.second"), is("second"));
      assertThat(parameters.get("table.transformed"), is("true"));
      assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl));
    }
  });

  runner.run(config.getAbsolutePath());
}
 
Example 12
Source File: AvroHiveTableStrategyTest.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Test
public void getSchemaVersion() {
  Table table = new Table();
  table.putToParameters(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION, Integer.toString(1));
  int result = underTest.getSchemaVersion(table);
  assertThat(result, is(1));
}
 
Example 13
Source File: HoodieHiveClient.java    From hudi with Apache License 2.0 5 votes vote down vote up
void updateLastCommitTimeSynced(String tableName) {
  // Set the last commit time from the TBLproperties
  String lastCommitSynced = activeTimeline.lastInstant().get().getTimestamp();
  try {
    Table table = client.getTable(syncConfig.databaseName, tableName);
    table.putToParameters(HOODIE_LAST_COMMIT_TIME_SYNC, lastCommitSynced);
    client.alter_table(syncConfig.databaseName, tableName, table);
  } catch (Exception e) {
    throw new HoodieHiveSyncException("Failed to get update last commit time synced to " + lastCommitSynced, e);
  }
}
 
Example 14
Source File: CircusTrainHdfsS3IntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
@Test
public void unpartitionedTableWithExternalAvroSchema() throws Exception {
  final URI sourceTableUri = toUri(sourceWarehouseUri, DATABASE, UNPARTITIONED_TABLE);
  helper.createUnpartitionedTable(sourceTableUri);

  java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test");
  Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes());
  String avroSchemaUrl = sourceAvroSchemaPath.toString();

  Table sourceTable = sourceCatalog.client().getTable(DATABASE, UNPARTITIONED_TABLE);
  sourceTable.putToParameters("avro.schema.url", avroSchemaUrl);
  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("unpartitioned-single-table-avro-schema.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .copierOption(S3MapReduceCpOptionsParser.S3_ENDPOINT_URI, s3Proxy.getUri().toString())
      .replicaConfigurationProperty(ENDPOINT, s3Proxy.getUri().toString())
      .replicaConfigurationProperty(ACCESS_KEY, s3Proxy.getAccessKey())
      .replicaConfigurationProperty(SECRET_KEY, s3Proxy.getSecretKey())
      .build();
  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      // Assert location
      Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_TABLE);
      String eventId = hiveTable.getParameters().get(REPLICATION_EVENT.parameterName());
      URI replicaLocation = toUri("s3a://replica/", DATABASE, TARGET_UNPARTITIONED_TABLE + "/" + eventId);
      assertThat(hiveTable.getSd().getLocation(), is(replicaLocation.toString()));
      // Assert copied files
      File dataFile = new File(sourceTableUri.getPath(), PART_00000);
      String fileKeyRegex = String
          .format("%s/%s/ctt-\\d{8}t\\d{6}.\\d{3}z-\\w{8}/%s", DATABASE, TARGET_UNPARTITIONED_TABLE, PART_00000);
      List<S3ObjectSummary> replicaFiles = TestUtils.listObjects(s3Client, "replica");
      assertThat(replicaFiles.size(), is(2));
      // assert Avro schema copied
      S3ObjectSummary s3ObjectSummary = replicaFiles.get(0);
      String content = IOUtils
          .toString(s3Client
              .getObject(s3ObjectSummary.getBucketName(), s3ObjectSummary.getKey())
              .getObjectContent()
              .getDelegateStream());
      assertThat(content, is(AVRO_SCHEMA_CONTENT));
      String transformedAvroUrl = hiveTable.getParameters().get("avro.schema.url");
      assertThat(transformedAvroUrl, is(replicaLocation + "/.schema/avro-schema-file.test"));
      // data file
      assertThat(replicaFiles.get(1).getSize(), is(dataFile.length()));
      assertThat(replicaFiles.get(1).getKey().matches(fileKeyRegex), is(true));
    }
  });
  runner.run(config.getAbsolutePath());
}
 
Example 15
Source File: CircusTrainReplicationModeIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
private void setupReplicaParameters(Table replicaTable) {
  replicaTable.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID");
  replicaTable.putToParameters("paramToUpdate", "update-me");
}
 
Example 16
Source File: TestUtils.java    From circus-train with Apache License 2.0 4 votes vote down vote up
public static Table createPartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location,
    List<FieldSchema> columns,
    List<FieldSchema> partitionKeys,
    String serializationLib,
    String inputFormatClassName,
    String outputFormatClassName)
    throws Exception {

  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  hiveTable.setPartitionKeys(partitionKeys);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(columns);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(inputFormatClassName);
  sd.setOutputFormat(outputFormatClassName);
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib(serializationLib);

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
 
Example 17
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
@Test
public void unpartitionedTableMetadataUpdate() throws Exception {
  helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));

  // creating replicaTable
  final URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE);
  TestUtils
      .createUnpartitionedTable(replicaCatalog.client(), DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE,
          replicaLocation);
  Table table = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE);
  table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID");
  replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table);

  // adjusting the sourceTable, mimicking the change we want to update
  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE);
  sourceTable.putToParameters("paramToUpdate", "updated");
  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("unpartitioned-single-table-metadata-update.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();
  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE);
      assertThat(hiveTable.getDbName(), is(DATABASE));
      assertThat(hiveTable.getTableName(), is(TARGET_UNPARTITIONED_MANAGED_TABLE));
      // dummyEventID should be overridden
      assertThat(hiveTable.getParameters().get(REPLICATION_EVENT.parameterName()), startsWith("ctt-"));
      assertThat(hiveTable.getParameters().get("paramToUpdate"), is("updated"));
      assertThat(isExternalTable(hiveTable), is(true));
      assertThat(hiveTable.getSd().getCols(), is(DATA_COLUMNS));
      assertThat(hiveTable.getSd().getLocation(), is(replicaLocation.toString()));
    }
  });
  runner.run(config.getAbsolutePath());
}
 
Example 18
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
@Test
public void partitionedTableMetadataUpdate() throws Exception {
  helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));

  // creating replicaTable
  final URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  TestUtils
      .createPartitionedTable(replicaCatalog.client(), DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, replicaLocation);
  Table table = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE);
  table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID");
  URI partitionAsia = URI.create(replicaLocation + "/dummyEventID/continent=Asia");
  final URI partitionChina = URI.create(partitionAsia + "/country=China");
  replicaCatalog
      .client()
      .add_partitions(Arrays.asList(newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina)));
  replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table);

  // adjusting the sourceTable, mimicking the change we want to update
  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  sourceTable.putToParameters("paramToUpdate", "updated");
  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);
  Partition partition = sourceCatalog
      .client()
      .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China");
  partition.putToParameters("partition_paramToUpdate", "updated");
  sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("partitioned-single-table-metadata-update.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();
  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE);
      assertThat(hiveTable.getDbName(), is(DATABASE));
      assertThat(hiveTable.getTableName(), is(TARGET_PARTITIONED_MANAGED_TABLE));
      // dummyEventID should be overridden
      assertThat(hiveTable.getParameters().get(REPLICATION_EVENT.parameterName()), startsWith("ctp-"));
      assertThat(hiveTable.getParameters().get("paramToUpdate"), is("updated"));
      assertThat(isExternalTable(hiveTable), is(true));
      assertThat(hiveTable.getSd().getCols(), is(DATA_COLUMNS));

      assertThat(hiveTable.getSd().getLocation(), is(replicaLocation.toString()));
      List<Partition> listPartitions = replicaCatalog
          .client()
          .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50);
      assertThat(listPartitions.size(), is(1));
      // Only previously replicated partitions are updated, no NEW partitions are created
      assertThat(listPartitions.get(0).getSd().getLocation(), is(partitionChina.toString()));
      assertThat(listPartitions.get(0).getParameters().get("partition_paramToUpdate"), is("updated"));
    }
  });
  runner.run(config.getAbsolutePath());
}
 
Example 19
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
@Test
public void unpartitionedTableMetadataUpdateAvroSchema() throws Exception {
  helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE));

  String avroParameter = "avro.schema.url";
  java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test");
  Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes());
  String avroSchemaUrl = sourceAvroSchemaPath.toString();

  URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE);
  TestUtils
      .createUnpartitionedTable(replicaCatalog.client(), DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE,
          replicaLocation);
  Table table = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE);
  table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID");
  replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table);

  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE);
  sourceTable.putToParameters(avroParameter, avroSchemaUrl);
  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("unpartitioned-single-table-avro-schema-metadata-update.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();
  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE);
      String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/";
      String transformedAvroUrl = replicaHiveTable.getParameters().get("avro.schema.url");
      assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl));
    }
  });
  runner.run(config.getAbsolutePath());
}
 
Example 20
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
@Test
public void partitionedTableMetadataUpdateAvroSchema() throws Exception {
  helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));

  java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test");
  Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes());
  String avroSchemaUrl = sourceAvroSchemaPath.toString();

  URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  TestUtils
      .createPartitionedTable(replicaCatalog.client(), DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, replicaLocation);
  Table table = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE);
  table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID");

  URI partitionAsia = URI.create(replicaLocation + "/dummyEventID/continent=Asia");
  URI partitionChina = URI.create(partitionAsia + "/country=China");
  replicaCatalog
      .client()
      .add_partitions(Arrays.asList(newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina)));
  replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table);

  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  sourceTable.putToParameters("avro.schema.url", avroSchemaUrl);

  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);
  Partition partition = sourceCatalog
      .client()
      .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China");
  partition.putToParameters("avro.schema.url", avroSchemaUrl);

  sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("partitioned-single-table-avro-schema-metadata-update.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();
  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE);
      String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/";
      String transformedAvroUrl = replicaHiveTable.getParameters().get("avro.schema.url");
      assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl));

      List<Partition> listPartitions = replicaCatalog
          .client()
          .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50);
      transformedAvroUrl = listPartitions.get(0).getParameters().get("avro.schema.url");
      assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl));
    }
  });
  runner.run(config.getAbsolutePath());
}