Java Code Examples for org.apache.hadoop.hive.metastore.api.Partition#putToParameters()

The following examples show how to use org.apache.hadoop.hive.metastore.api.Partition#putToParameters() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroSerDeTransformation.java    From circus-train with Apache License 2.0 6 votes vote down vote up
Partition apply(Partition partition, String avroSchemaDestination, String eventId) throws Exception {
  if (avroSchemaDestination == null) {
    return partition;
  }

  avroSchemaDestination = addTrailingSlash(avroSchemaDestination);
  avroSchemaDestination += eventId;

  String avroSchemaSource = partition.getParameters().get(AVRO_SCHEMA_URL_PARAMETER);
  copy(avroSchemaSource, avroSchemaDestination);

  partition.putToParameters(AVRO_SCHEMA_URL_PARAMETER,
      avroSchemaDestination + "/" + getAvroSchemaFileName(avroSchemaSource));
  LOG.info("Avro SerDe transformation has been applied to partition '{}'", partition.toString());
  return partition;
}
 
Example 2
Source File: HivePartitionManager.java    From data-highway with Apache License 2.0 5 votes vote down vote up
private Partition newHivePartition(
    String tableName,
    List<String> partitionValues,
    String location,
    Map<String, String> parameters) {
  Partition partition = new Partition();
  partition.setDbName(databaseName);
  partition.setTableName(tableName);
  partition.setValues(partitionValues);
  parameters.forEach((key, value) -> partition.putToParameters(key, value));
  partition.putToParameters(DATA_HIGHWAY_VERSION, DataHighwayVersion.VERSION);
  partition.putToParameters(DATA_HIGHWAY_LAST_REVISION, ISO_OFFSET_DATE_TIME.withZone(UTC).format(clock.instant()));
  partition.setSd(AvroStorageDescriptorFactory.create(location));
  return partition;
}
 
Example 3
Source File: AddCheckSumReplicaTableFactory.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Override
Partition newReplicaPartition(
    String eventId,
    Table sourceTable,
    Partition sourcePartition,
    String replicaDatabaseName,
    String replicaTableName,
    Path replicaPartitionLocation,
    ReplicationMode replicationMode) {
  Partition replica = super.newReplicaPartition(eventId, sourceTable, sourcePartition, replicaDatabaseName,
      replicaTableName, replicaPartitionLocation, replicationMode);
  String checksum = checksumFunction.apply(locationAsPath(sourcePartition));
  replica.putToParameters(PARTITION_CHECKSUM.parameterName(), checksum);
  return replica;
}
 
Example 4
Source File: ReplicaTableFactory.java    From circus-train with Apache License 2.0 5 votes vote down vote up
Partition newReplicaPartition(
    String eventId,
    Table sourceTable,
    Partition sourcePartition,
    String replicaDatabaseName,
    String replicaTableName,
    Path replicaPartitionLocation,
    ReplicationMode replicationMode) {
  Partition replica = partitionTransformation.transform(new Partition(sourcePartition));
  replica.setDbName(replicaDatabaseName);
  replica.setTableName(replicaTableName);
  if (replica.getSd() != null) {
    replica.getSd().setLocation(toStringOrNull(replicaPartitionLocation));
  }

  String sourcePartitionLocation = sourcePartition.getSd() == null ? ""
      : toStringOrEmpty(sourcePartition.getSd().getLocation());

  // Statistic specific parameters
  replica.putToParameters(STATS_GENERATED_VIA_STATS_TASK, Boolean.TRUE.toString());
  replica.putToParameters(STATS_GENERATED, Boolean.TRUE.toString());
  replica.putToParameters(DO_NOT_UPDATE_STATS, Boolean.TRUE.toString());
  // Replication specific parameters
  replica.putToParameters(LAST_REPLICATED.parameterName(), DateTime.now(DateTimeZone.UTC).toString());
  replica.putToParameters(REPLICATION_EVENT.parameterName(), eventId);
  replica.putToParameters(SOURCE_LOCATION.parameterName(), sourcePartitionLocation);
  replica.putToParameters(SOURCE_TABLE.parameterName(), Warehouse.getQualifiedName(sourceTable));
  replica.putToParameters(SOURCE_METASTORE.parameterName(), sourceMetaStoreUris);
  replica.putToParameters(REPLICATION_MODE.parameterName(), replicationMode.name());
  return replica;
}
 
Example 5
Source File: HiveDifferencesIntegrationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void replicaPartitionHasChangedButIgnorableParamter() throws Exception {
  Partition replicaPartition1 = catalog.client().getPartition(DATABASE, REPLICA_TABLE, "part=1");
  replicaPartition1.putToParameters("DO_NOT_UPDATE_STATS", "true");
  replicaPartition1.putToParameters("STATS_GENERATED_VIA_STATS_TASK", "true");
  replicaPartition1.putToParameters("STATS_GENERATED", "true");
  catalog.client().alter_partition(DATABASE, REPLICA_TABLE, replicaPartition1);
  Table sourceTable = catalog.client().getTable(DATABASE, SOURCE_TABLE);
  Table replicaTable = catalog.client().getTable(DATABASE, REPLICA_TABLE);
  replicaPartition1.putToParameters("DO_NOT_UPDATE_STATS", "true");
  replicaPartition1.putToParameters("STATS_GENERATED_VIA_STATS_TASK", "true");
  replicaPartition1.putToParameters("STATS_GENERATED", "true");
  catalog.client().alter_table(DATABASE, REPLICA_TABLE, replicaTable);

  HiveDifferences
      .builder(diffListener)
      .comparatorRegistry(comparatorRegistry)
      .source(configuration, sourceTable, new PartitionIterator(catalog.client(), sourceTable, PARTITION_BATCH_SIZE))
      .replica(Optional.of(replicaTable),
          Optional.of(new BufferedPartitionFetcher(catalog.client(), replicaTable, PARTITION_BATCH_SIZE)))
      .checksumFunction(checksumFunction)
      .build()
      .run();
  verify(diffListener, never()).onChangedTable(anyList());
  verify(diffListener, never()).onNewPartition(anyString(), any(Partition.class));
  verify(diffListener, never()).onChangedPartition(anyString(), any(Partition.class), anyList());
  verify(diffListener, never()).onDataChanged(anyString(), any(Partition.class));
}
 
Example 6
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
@Test
public void partitionedTableMetadataMirror() throws Exception {
  helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));

  // adjusting the sourceTable, mimicking the change we want to update
  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  sourceTable.putToParameters("paramToUpdate", "updated");
  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);
  Partition partition = sourceCatalog
      .client()
      .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China");
  partition.putToParameters("partition_paramToUpdate", "updated");
  sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("partitioned-single-table-mirror.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();

  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {

      Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE);
      assertThat(hiveTable.getDbName(), is(DATABASE));
      assertThat(hiveTable.getTableName(), is(TARGET_PARTITIONED_MANAGED_TABLE));
      // MIRRORED table should be set to EXTERNAL
      assertThat(isExternalTable(hiveTable), is(true));
      assertThat(hiveTable.getSd().getCols(), is(DATA_COLUMNS));
      assertThat(hiveTable.getParameters().get("paramToUpdate"), is("updated"));

      File sameAsSourceLocation = new File(sourceWarehouseUri, DATABASE + "/" + SOURCE_MANAGED_PARTITIONED_TABLE);
      assertThat(hiveTable.getSd().getLocation() + "/", is(sameAsSourceLocation.toURI().toString()));

      List<Partition> listPartitions = replicaCatalog
          .client()
          .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50);
      assertThat(listPartitions.size(), is(2));
      assertThat(listPartitions.get(0).getSd().getLocation(),
          is(sameAsSourceLocation.toURI().toString() + "continent=Asia/country=China"));
      assertThat(listPartitions.get(0).getParameters().get("partition_paramToUpdate"), is("updated"));
      assertThat(listPartitions.get(1).getSd().getLocation(),
          is(sameAsSourceLocation.toURI().toString() + "continent=Europe/country=UK"));
    }
  });
  runner.run(config.getAbsolutePath());
}
 
Example 7
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
@Test
public void partitionedTableMetadataUpdate() throws Exception {
  helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));

  // creating replicaTable
  final URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  TestUtils
      .createPartitionedTable(replicaCatalog.client(), DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, replicaLocation);
  Table table = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE);
  table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID");
  URI partitionAsia = URI.create(replicaLocation + "/dummyEventID/continent=Asia");
  final URI partitionChina = URI.create(partitionAsia + "/country=China");
  replicaCatalog
      .client()
      .add_partitions(Arrays.asList(newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina)));
  replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table);

  // adjusting the sourceTable, mimicking the change we want to update
  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  sourceTable.putToParameters("paramToUpdate", "updated");
  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);
  Partition partition = sourceCatalog
      .client()
      .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China");
  partition.putToParameters("partition_paramToUpdate", "updated");
  sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("partitioned-single-table-metadata-update.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();
  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE);
      assertThat(hiveTable.getDbName(), is(DATABASE));
      assertThat(hiveTable.getTableName(), is(TARGET_PARTITIONED_MANAGED_TABLE));
      // dummyEventID should be overridden
      assertThat(hiveTable.getParameters().get(REPLICATION_EVENT.parameterName()), startsWith("ctp-"));
      assertThat(hiveTable.getParameters().get("paramToUpdate"), is("updated"));
      assertThat(isExternalTable(hiveTable), is(true));
      assertThat(hiveTable.getSd().getCols(), is(DATA_COLUMNS));

      assertThat(hiveTable.getSd().getLocation(), is(replicaLocation.toString()));
      List<Partition> listPartitions = replicaCatalog
          .client()
          .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50);
      assertThat(listPartitions.size(), is(1));
      // Only previously replicated partitions are updated, no NEW partitions are created
      assertThat(listPartitions.get(0).getSd().getLocation(), is(partitionChina.toString()));
      assertThat(listPartitions.get(0).getParameters().get("partition_paramToUpdate"), is("updated"));
    }
  });
  runner.run(config.getAbsolutePath());
}
 
Example 8
Source File: CircusTrainHdfsHdfsIntegrationTest.java    From circus-train with Apache License 2.0 4 votes vote down vote up
@Test
public void partitionedTableMetadataUpdateAvroSchema() throws Exception {
  helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));
  LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE));

  java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test");
  Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes());
  String avroSchemaUrl = sourceAvroSchemaPath.toString();

  URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  TestUtils
      .createPartitionedTable(replicaCatalog.client(), DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, replicaLocation);
  Table table = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE);
  table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID");

  URI partitionAsia = URI.create(replicaLocation + "/dummyEventID/continent=Asia");
  URI partitionChina = URI.create(partitionAsia + "/country=China");
  replicaCatalog
      .client()
      .add_partitions(Arrays.asList(newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina)));
  replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table);

  Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE);
  sourceTable.putToParameters("avro.schema.url", avroSchemaUrl);

  sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable);
  Partition partition = sourceCatalog
      .client()
      .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China");
  partition.putToParameters("avro.schema.url", avroSchemaUrl);

  sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition);

  exit.expectSystemExitWithStatus(0);
  File config = dataFolder.getFile("partitioned-single-table-avro-schema-metadata-update.yml");
  CircusTrainRunner runner = CircusTrainRunner
      .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation)
      .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(),
          sourceCatalog.driverClassName())
      .replicaMetaStore(replicaCatalog.getThriftConnectionUri())
      .build();
  exit.checkAssertionAfterwards(new Assertion() {
    @Override
    public void checkAssertion() throws Exception {
      Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE);
      String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/";
      String transformedAvroUrl = replicaHiveTable.getParameters().get("avro.schema.url");
      assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl));

      List<Partition> listPartitions = replicaCatalog
          .client()
          .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50);
      transformedAvroUrl = listPartitions.get(0).getParameters().get("avro.schema.url");
      assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl));
    }
  });
  runner.run(config.getAbsolutePath());
}