Java Code Examples for org.apache.hadoop.hive.metastore.api.Table#putToParameters()
The following examples show how to use
org.apache.hadoop.hive.metastore.api.Table#putToParameters() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroHiveTableStrategy.java From data-highway with Apache License 2.0 | 6 votes |
@Override public Table newHiveTable( String databaseName, String tableName, String partitionColumnName, String location, Schema schema, int version) { Table table = new Table(); table.setDbName(databaseName); table.setTableName(tableName); table.setTableType(TableType.EXTERNAL_TABLE.toString()); table.putToParameters("EXTERNAL", "TRUE"); addRoadAnnotations(table); URI schemaUri = uriResolver.resolve(schema, table.getTableName(), version); table.putToParameters(AVRO_SCHEMA_URL, schemaUri.toString()); table.putToParameters(AVRO_SCHEMA_VERSION, Integer.toString(version)); table.setPartitionKeys(Arrays.asList(new FieldSchema(partitionColumnName, "string", null))); table.setSd(AvroStorageDescriptorFactory.create(location)); return table; }
Example 2
Source File: TestUtils.java From waggle-dance with Apache License 2.0 | 6 votes |
static Table createUnpartitionedTable( HiveMetaStoreClient metaStoreClient, String database, String table, File location) throws TException { Table hiveTable = new Table(); hiveTable.setDbName(database); hiveTable.setTableName(table); hiveTable.setTableType(TableType.EXTERNAL_TABLE.name()); hiveTable.putToParameters("EXTERNAL", "TRUE"); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(DATA_COLUMNS); sd.setLocation(location.toURI().toString()); sd.setParameters(new HashMap<>()); sd.setSerdeInfo(new SerDeInfo()); hiveTable.setSd(sd); metaStoreClient.createTable(hiveTable); return hiveTable; }
Example 3
Source File: IntegrationTestHelper.java From circus-train with Apache License 2.0 | 6 votes |
void createManagedPartitionedTable(URI sourceTableUri) throws Exception { TestUtils.createPartitionedTable(metaStoreClient, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, sourceTableUri); Table table = metaStoreClient.getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); table.setTableType(TableType.MANAGED_TABLE.name()); table.putToParameters("EXTERNAL", "FALSE"); metaStoreClient.alter_table(table.getDbName(), table.getTableName(), table); URI partitionEurope = URI.create(sourceTableUri + "/continent=Europe"); URI partitionUk = URI.create(partitionEurope + "/country=UK"); File dataFileUk = new File(partitionUk.getPath(), PART_00000); FileUtils.writeStringToFile(dataFileUk, "1\tadam\tlondon\n2\tsusan\tglasgow\n"); URI partitionAsia = URI.create(sourceTableUri + "/continent=Asia"); URI partitionChina = URI.create(partitionAsia + "/country=China"); File dataFileChina = new File(partitionChina.getPath(), PART_00000); FileUtils.writeStringToFile(dataFileChina, "1\tchun\tbeijing\n2\tshanghai\tmilan\n"); LOG .info(">>>> Partitions added: {}", metaStoreClient .add_partitions(Arrays .asList(newTablePartition(table, Arrays.asList("Europe", "UK"), partitionUk), newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina)))); }
Example 4
Source File: AvroSerDeTransformation.java From circus-train with Apache License 2.0 | 6 votes |
Table apply(Table table, String avroSchemaDestination, String eventId) throws Exception { if (avroSchemaDestination == null) { return table; } avroSchemaDestination = addTrailingSlash(avroSchemaDestination); avroSchemaDestination += eventId; String avroSchemaSource = table.getParameters().get(AVRO_SCHEMA_URL_PARAMETER); copy(avroSchemaSource, avroSchemaDestination); table.putToParameters(AVRO_SCHEMA_URL_PARAMETER, avroSchemaDestination + "/" + getAvroSchemaFileName(avroSchemaSource)); LOG.info("Avro SerDe transformation has been applied to table '{}'", table.getTableName()); return table; }
Example 5
Source File: AvroHiveTableStrategy.java From data-highway with Apache License 2.0 | 5 votes |
@Override public Table alterHiveTable(Table table, Schema schema, int version) { Table alteredTable = new Table(table); addRoadAnnotations(alteredTable); URI schemaUri = uriResolver.resolve(schema, table.getTableName(), version); alteredTable.putToParameters(AVRO_SCHEMA_URL, schemaUri.toString()); alteredTable.putToParameters(AVRO_SCHEMA_VERSION, Integer.toString(version)); return alteredTable; }
Example 6
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 5 votes |
@Test public void unpartitionedTableReplicateAvroSchemaOverride() throws Exception { helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test"); Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes()); String avroSchemaBaseUrl = sourceAvroSchemaPath.toString(); Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE); sourceTable.putToParameters("avro.schema.url", avroSchemaBaseUrl); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("unpartitioned-single-table-avro-schema-override.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE); String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database-override/"; String transformedAvroUrl = replicaHiveTable.getParameters().get("avro.schema.url"); assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl)); } }); runner.run(config.getAbsolutePath()); }
Example 7
Source File: IntegrationTestTransformation.java From circus-train with Apache License 2.0 | 5 votes |
@Override public Table transform(Table table) { if (table.getParameters().get("circus.train.test.transformation") != null) { table.putToParameters("table.transformed", "true"); } return table; }
Example 8
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 5 votes |
@Test public void multipleTransformationOverridesApplied() throws Exception { helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test"); Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes()); String avroSchemaUrl = sourceAvroSchemaPath.toString(); Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE); sourceTable.putToParameters("avro.schema.url", avroSchemaUrl); sourceTable.putToParameters("circus.train.test.transformation", "enabled"); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("unpartitioned-single-table-multiple-transformation-overrides.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE); String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/override"; Map<String, String> parameters = replicaHiveTable.getParameters(); String transformedAvroUrl = parameters.get("avro.schema.url"); assertThat(parameters.get("table.property.first"), is("first-override")); assertThat(parameters.get("table.property.second"), is("second-override")); assertThat(parameters.get("table.transformed"), is("true")); assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl)); } }); runner.run(config.getAbsolutePath()); }
Example 9
Source File: IntegrationTestHelper.java From circus-train with Apache License 2.0 | 5 votes |
void createManagedUnpartitionedTable(URI sourceTableUri) throws Exception { File dataFile = new File(sourceTableUri.getPath(), PART_00000); FileUtils.writeStringToFile(dataFile, "1\tadam\tlondon\n2\tsusan\tmilan\n"); TestUtils.createUnpartitionedTable(metaStoreClient, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE, sourceTableUri); Table table = metaStoreClient.getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE); table.setTableType(TableType.MANAGED_TABLE.name()); table.putToParameters("EXTERNAL", "FALSE"); metaStoreClient.alter_table(table.getDbName(), table.getTableName(), table); }
Example 10
Source File: CircusTrainTest.java From circus-train with Apache License 2.0 | 5 votes |
@Before public void before() throws TException, IOException { Table table = new Table(); table.setDbName(DATABASE); table.setTableName("source_" + TABLE); table.setTableType(TableType.EXTERNAL_TABLE.name()); table.putToParameters("EXTERNAL", "TRUE"); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(Arrays.asList(new FieldSchema("col1", "string", null))); sd.setSerdeInfo(new SerDeInfo()); table.setSd(sd); hive.client().createTable(table); }
Example 11
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 5 votes |
@Test public void multipleTransformationsApplied() throws Exception { helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test"); Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes()); String avroSchemaUrl = sourceAvroSchemaPath.toString(); Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE); sourceTable.putToParameters("avro.schema.url", avroSchemaUrl); sourceTable.putToParameters("circus.train.test.transformation", "enabled"); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("unpartitioned-single-table-multiple-transformations.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE); String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/"; Map<String, String> parameters = replicaHiveTable.getParameters(); String transformedAvroUrl = parameters.get("avro.schema.url"); assertThat(parameters.get("table.property.first"), is("first")); assertThat(parameters.get("table.property.second"), is("second")); assertThat(parameters.get("table.transformed"), is("true")); assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl)); } }); runner.run(config.getAbsolutePath()); }
Example 12
Source File: AvroHiveTableStrategyTest.java From data-highway with Apache License 2.0 | 5 votes |
@Test public void getSchemaVersion() { Table table = new Table(); table.putToParameters(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION, Integer.toString(1)); int result = underTest.getSchemaVersion(table); assertThat(result, is(1)); }
Example 13
Source File: HoodieHiveClient.java From hudi with Apache License 2.0 | 5 votes |
void updateLastCommitTimeSynced(String tableName) { // Set the last commit time from the TBLproperties String lastCommitSynced = activeTimeline.lastInstant().get().getTimestamp(); try { Table table = client.getTable(syncConfig.databaseName, tableName); table.putToParameters(HOODIE_LAST_COMMIT_TIME_SYNC, lastCommitSynced); client.alter_table(syncConfig.databaseName, tableName, table); } catch (Exception e) { throw new HoodieHiveSyncException("Failed to get update last commit time synced to " + lastCommitSynced, e); } }
Example 14
Source File: CircusTrainHdfsS3IntegrationTest.java From circus-train with Apache License 2.0 | 4 votes |
@Test public void unpartitionedTableWithExternalAvroSchema() throws Exception { final URI sourceTableUri = toUri(sourceWarehouseUri, DATABASE, UNPARTITIONED_TABLE); helper.createUnpartitionedTable(sourceTableUri); java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test"); Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes()); String avroSchemaUrl = sourceAvroSchemaPath.toString(); Table sourceTable = sourceCatalog.client().getTable(DATABASE, UNPARTITIONED_TABLE); sourceTable.putToParameters("avro.schema.url", avroSchemaUrl); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("unpartitioned-single-table-avro-schema.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .copierOption(S3MapReduceCpOptionsParser.S3_ENDPOINT_URI, s3Proxy.getUri().toString()) .replicaConfigurationProperty(ENDPOINT, s3Proxy.getUri().toString()) .replicaConfigurationProperty(ACCESS_KEY, s3Proxy.getAccessKey()) .replicaConfigurationProperty(SECRET_KEY, s3Proxy.getSecretKey()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { // Assert location Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_TABLE); String eventId = hiveTable.getParameters().get(REPLICATION_EVENT.parameterName()); URI replicaLocation = toUri("s3a://replica/", DATABASE, TARGET_UNPARTITIONED_TABLE + "/" + eventId); assertThat(hiveTable.getSd().getLocation(), is(replicaLocation.toString())); // Assert copied files File dataFile = new File(sourceTableUri.getPath(), PART_00000); String fileKeyRegex = String .format("%s/%s/ctt-\\d{8}t\\d{6}.\\d{3}z-\\w{8}/%s", DATABASE, TARGET_UNPARTITIONED_TABLE, PART_00000); List<S3ObjectSummary> replicaFiles = TestUtils.listObjects(s3Client, "replica"); assertThat(replicaFiles.size(), is(2)); // assert Avro schema copied S3ObjectSummary s3ObjectSummary = replicaFiles.get(0); String content = IOUtils .toString(s3Client .getObject(s3ObjectSummary.getBucketName(), s3ObjectSummary.getKey()) .getObjectContent() .getDelegateStream()); assertThat(content, is(AVRO_SCHEMA_CONTENT)); String transformedAvroUrl = hiveTable.getParameters().get("avro.schema.url"); assertThat(transformedAvroUrl, is(replicaLocation + "/.schema/avro-schema-file.test")); // data file assertThat(replicaFiles.get(1).getSize(), is(dataFile.length())); assertThat(replicaFiles.get(1).getKey().matches(fileKeyRegex), is(true)); } }); runner.run(config.getAbsolutePath()); }
Example 15
Source File: CircusTrainReplicationModeIntegrationTest.java From circus-train with Apache License 2.0 | 4 votes |
private void setupReplicaParameters(Table replicaTable) { replicaTable.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID"); replicaTable.putToParameters("paramToUpdate", "update-me"); }
Example 16
Source File: TestUtils.java From circus-train with Apache License 2.0 | 4 votes |
public static Table createPartitionedTable( HiveMetaStoreClient metaStoreClient, String database, String table, URI location, List<FieldSchema> columns, List<FieldSchema> partitionKeys, String serializationLib, String inputFormatClassName, String outputFormatClassName) throws Exception { Table hiveTable = new Table(); hiveTable.setDbName(database); hiveTable.setTableName(table); hiveTable.setTableType(TableType.EXTERNAL_TABLE.name()); hiveTable.putToParameters("EXTERNAL", "TRUE"); hiveTable.setPartitionKeys(partitionKeys); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(columns); sd.setLocation(location.toString()); sd.setParameters(new HashMap<String, String>()); sd.setInputFormat(inputFormatClassName); sd.setOutputFormat(outputFormatClassName); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setSerializationLib(serializationLib); hiveTable.setSd(sd); metaStoreClient.createTable(hiveTable); ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table); ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L)); ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData); List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1); metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj)); return hiveTable; }
Example 17
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 4 votes |
@Test public void unpartitionedTableMetadataUpdate() throws Exception { helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); // creating replicaTable final URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE); TestUtils .createUnpartitionedTable(replicaCatalog.client(), DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE, replicaLocation); Table table = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE); table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID"); replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table); // adjusting the sourceTable, mimicking the change we want to update Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE); sourceTable.putToParameters("paramToUpdate", "updated"); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("unpartitioned-single-table-metadata-update.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE); assertThat(hiveTable.getDbName(), is(DATABASE)); assertThat(hiveTable.getTableName(), is(TARGET_UNPARTITIONED_MANAGED_TABLE)); // dummyEventID should be overridden assertThat(hiveTable.getParameters().get(REPLICATION_EVENT.parameterName()), startsWith("ctt-")); assertThat(hiveTable.getParameters().get("paramToUpdate"), is("updated")); assertThat(isExternalTable(hiveTable), is(true)); assertThat(hiveTable.getSd().getCols(), is(DATA_COLUMNS)); assertThat(hiveTable.getSd().getLocation(), is(replicaLocation.toString())); } }); runner.run(config.getAbsolutePath()); }
Example 18
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 4 votes |
@Test public void partitionedTableMetadataUpdate() throws Exception { helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); // creating replicaTable final URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); TestUtils .createPartitionedTable(replicaCatalog.client(), DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, replicaLocation); Table table = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE); table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID"); URI partitionAsia = URI.create(replicaLocation + "/dummyEventID/continent=Asia"); final URI partitionChina = URI.create(partitionAsia + "/country=China"); replicaCatalog .client() .add_partitions(Arrays.asList(newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina))); replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table); // adjusting the sourceTable, mimicking the change we want to update Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); sourceTable.putToParameters("paramToUpdate", "updated"); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); Partition partition = sourceCatalog .client() .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China"); partition.putToParameters("partition_paramToUpdate", "updated"); sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("partitioned-single-table-metadata-update.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE); assertThat(hiveTable.getDbName(), is(DATABASE)); assertThat(hiveTable.getTableName(), is(TARGET_PARTITIONED_MANAGED_TABLE)); // dummyEventID should be overridden assertThat(hiveTable.getParameters().get(REPLICATION_EVENT.parameterName()), startsWith("ctp-")); assertThat(hiveTable.getParameters().get("paramToUpdate"), is("updated")); assertThat(isExternalTable(hiveTable), is(true)); assertThat(hiveTable.getSd().getCols(), is(DATA_COLUMNS)); assertThat(hiveTable.getSd().getLocation(), is(replicaLocation.toString())); List<Partition> listPartitions = replicaCatalog .client() .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50); assertThat(listPartitions.size(), is(1)); // Only previously replicated partitions are updated, no NEW partitions are created assertThat(listPartitions.get(0).getSd().getLocation(), is(partitionChina.toString())); assertThat(listPartitions.get(0).getParameters().get("partition_paramToUpdate"), is("updated")); } }); runner.run(config.getAbsolutePath()); }
Example 19
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 4 votes |
@Test public void unpartitionedTableMetadataUpdateAvroSchema() throws Exception { helper.createManagedUnpartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE)); String avroParameter = "avro.schema.url"; java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test"); Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes()); String avroSchemaUrl = sourceAvroSchemaPath.toString(); URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE); TestUtils .createUnpartitionedTable(replicaCatalog.client(), DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE, replicaLocation); Table table = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE); table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID"); replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table); Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_UNPARTITIONED_TABLE); sourceTable.putToParameters(avroParameter, avroSchemaUrl); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("unpartitioned-single-table-avro-schema-metadata-update.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_UNPARTITIONED_MANAGED_TABLE); String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/"; String transformedAvroUrl = replicaHiveTable.getParameters().get("avro.schema.url"); assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl)); } }); runner.run(config.getAbsolutePath()); }
Example 20
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 4 votes |
@Test public void partitionedTableMetadataUpdateAvroSchema() throws Exception { helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test"); Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes()); String avroSchemaUrl = sourceAvroSchemaPath.toString(); URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); TestUtils .createPartitionedTable(replicaCatalog.client(), DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, replicaLocation); Table table = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE); table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID"); URI partitionAsia = URI.create(replicaLocation + "/dummyEventID/continent=Asia"); URI partitionChina = URI.create(partitionAsia + "/country=China"); replicaCatalog .client() .add_partitions(Arrays.asList(newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina))); replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table); Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); sourceTable.putToParameters("avro.schema.url", avroSchemaUrl); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); Partition partition = sourceCatalog .client() .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China"); partition.putToParameters("avro.schema.url", avroSchemaUrl); sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("partitioned-single-table-avro-schema-metadata-update.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE); String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/"; String transformedAvroUrl = replicaHiveTable.getParameters().get("avro.schema.url"); assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl)); List<Partition> listPartitions = replicaCatalog .client() .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50); transformedAvroUrl = listPartitions.get(0).getParameters().get("avro.schema.url"); assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl)); } }); runner.run(config.getAbsolutePath()); }