org.apache.hadoop.hive.metastore.Warehouse Java Examples

The following examples show how to use org.apache.hadoop.hive.metastore.Warehouse. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ViewTransformation.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@Override
public Table transform(Table table) {
  if (!MetaStoreUtils.isView(table)) {
    return table;
  }

  LOG.info("Translating HQL of view {}.{}", table.getDbName(), table.getTableName());
  String tableQualifiedName = Warehouse.getQualifiedName(table);
  String hql = hqlTranslator.translate(tableQualifiedName, table.getViewOriginalText());
  String expandedHql = hqlTranslator.translate(tableQualifiedName, table.getViewExpandedText());

  Table transformedView = new Table(table);
  transformedView.setViewOriginalText(hql);
  transformedView.setViewExpandedText(expandedHql);

  if (!replicaHiveConf.getBoolean(SKIP_TABLE_EXIST_CHECKS, false)) {
    LOG
        .info("Validating that tables used by the view {}.{} exist in the replica catalog", table.getDbName(),
            table.getTableName());
    validateReferencedTables(transformedView);
  }

  return transformedView;
}
 
Example #2
Source File: AWSCatalogMetastoreClient.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
@Override
public org.apache.hadoop.hive.metastore.api.Partition getPartitionWithAuthInfo(
      String databaseName, String tableName, List<String> values,
      String userName, List<String> groupNames)
      throws MetaException, UnknownTableException, NoSuchObjectException, TException {

    // TODO move this into the service
    org.apache.hadoop.hive.metastore.api.Partition partition = getPartition(databaseName, tableName, values);
    org.apache.hadoop.hive.metastore.api.Table table = getTable(databaseName, tableName);
    if ("TRUE".equalsIgnoreCase(table.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) {
        String partName = Warehouse.makePartName(table.getPartitionKeys(), values);
        HiveObjectRef obj = new HiveObjectRef();
        obj.setObjectType(HiveObjectType.PARTITION);
        obj.setDbName(databaseName);
        obj.setObjectName(tableName);
        obj.setPartValues(values);
        org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet privilegeSet =
              this.get_privilege_set(obj, userName, groupNames);
        partition.setPrivileges(privilegeSet);
    }

    return partition;
}
 
Example #3
Source File: HiveConnectorFastServiceConfig.java    From metacat with Apache License 2.0 6 votes vote down vote up
/**
 * create hive connector fast partition service.
 *
 * @param metacatHiveClient      hive client
 * @param warehouse              hive warehouse
 * @param hiveMetacatConverter   metacat converter
 * @param connectorContext       connector config
 * @param directSqlGetPartition  service to get partitions
 * @param directSqlSavePartition service to save partitions
 * @param icebergTableHandler    iceberg table handler
 * @return HiveConnectorPartitionService
 */
@Bean
public HiveConnectorPartitionService partitionService(
    final IMetacatHiveClient metacatHiveClient,
    final Warehouse warehouse,
    final HiveConnectorInfoConverter hiveMetacatConverter,
    final ConnectorContext connectorContext,
    final DirectSqlGetPartition directSqlGetPartition,
    final DirectSqlSavePartition directSqlSavePartition,
    final IcebergTableHandler icebergTableHandler
    ) {
    return new HiveConnectorFastPartitionService(
        connectorContext,
        metacatHiveClient,
        warehouse,
        hiveMetacatConverter,
        directSqlGetPartition,
        directSqlSavePartition,
        icebergTableHandler
    );
}
 
Example #4
Source File: PartitionUtil.java    From metacat with Apache License 2.0 6 votes vote down vote up
/**
 * Retrieves the partition values from the partition name. This method also validates the partition keys to that
 * of the table.
 *
 * @param tableQName  table name
 * @param table       table
 * @param partName    partition name
 * @return list of partition values
 */
public static List<String> getPartValuesFromPartName(final QualifiedName tableQName, final Table table,
    final String partName) {
    if (Strings.isNullOrEmpty(partName)) {
        throw new InvalidMetaException(tableQName, partName, null);
    }
    final LinkedHashMap<String, String> partSpec = new LinkedHashMap<>();
    Warehouse.makeSpecFromName(partSpec, new Path(partName));
    final List<String> values = new ArrayList<>();
    for (FieldSchema field : table.getPartitionKeys()) {
        final String key = field.getName();
        final String val = partSpec.get(key);
        if (val == null) {
            throw new InvalidMetaException(tableQName, partName, null);
        }
        values.add(val);
    }
    return values;
}
 
Example #5
Source File: AWSCatalogMetastoreClient.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
public AWSCatalogMetastoreClient(HiveConf conf, HiveMetaHookLoader hook) throws MetaException {
  this.conf = conf;
  glueClient = new AWSGlueClientFactory(this.conf).newClient();

  // TODO preserve existing functionality for HiveMetaHook
  wh = new Warehouse(this.conf);

  AWSGlueMetastore glueMetastore = new AWSGlueMetastoreFactory().newMetastore(conf);
  glueMetastoreClientDelegate = new GlueMetastoreClientDelegate(this.conf, glueMetastore, wh);

  snapshotActiveConf();
  catalogId = MetastoreClientUtils.getCatalogId(conf);
  if (!doesDefaultDBExist()) {
    createDefaultDatabase();
  }
}
 
Example #6
Source File: GlueMetastoreClientDelegate.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
/**
 * Taken from HiveMetaStore#append_partition_common
 */
private org.apache.hadoop.hive.metastore.api.Partition buildPartitionFromValues(
  org.apache.hadoop.hive.metastore.api.Table table, List<String> values) throws MetaException {
  org.apache.hadoop.hive.metastore.api.Partition partition = new org.apache.hadoop.hive.metastore.api.Partition();
  partition.setDbName(table.getDbName());
  partition.setTableName(table.getTableName());
  partition.setValues(values);
  partition.setSd(table.getSd().deepCopy());

  Path partLocation = new Path(table.getSd().getLocation(), Warehouse.makePartName(table.getPartitionKeys(), values));
  partition.getSd().setLocation(partLocation.toString());

  long timeInSecond = System.currentTimeMillis() / MILLISECOND_TO_SECOND_FACTOR;
  partition.setCreateTime((int) timeInSecond);
  partition.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(timeInSecond));
  return partition;
}
 
Example #7
Source File: GlueMetastoreClientDelegate.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
public List<String> listPartitionNames(
    String databaseName,
    String tableName,
    List<String> values,
    short max
) throws TException {
  String expression = null;
  org.apache.hadoop.hive.metastore.api.Table table = getTable(databaseName, tableName);
  if (values != null) {
    expression = ExpressionHelper.buildExpressionFromPartialSpecification(table, values);
  }

  List<String> names = Lists.newArrayList();
  List<org.apache.hadoop.hive.metastore.api.Partition> partitions = getPartitions(databaseName, tableName, expression, max);
  for(org.apache.hadoop.hive.metastore.api.Partition p : partitions) {
    names.add(Warehouse.makePartName(table.getPartitionKeys(), p.getValues()));
  }
  return names;
}
 
Example #8
Source File: GlueMetastoreClientDelegateTest.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws Exception {
  conf = new HiveConf();
  glueClient = mock(AWSGlue.class);
  wh = mock(Warehouse.class);
  metastoreClientDelegate = new GlueMetastoreClientDelegate(conf, new DefaultAWSGlueMetastore(conf, glueClient), wh);
  
  // Create a client delegate with CatalogId
  hiveConfCatalogId = new HiveConf();
  hiveConfCatalogId.set(GlueMetastoreClientDelegate.CATALOG_ID_CONF, CATALOG_ID);
  metastoreClientDelegateCatalogId = new GlueMetastoreClientDelegate(hiveConfCatalogId, new DefaultAWSGlueMetastore(hiveConfCatalogId, glueClient), wh);

  testDb = getTestDatabase();
  testTbl= getTestTable(testDb.getName());
  setupMockWarehouseForPath(new Path(testTbl.getStorageDescriptor().getLocation().toString()), false, true);
}
 
Example #9
Source File: HiveConnectorFastPartitionService.java    From metacat with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 *
 * @param context                connector context
 * @param metacatHiveClient      hive client
 * @param warehouse              hive warehouse
 * @param hiveMetacatConverters  hive converter
 * @param directSqlGetPartition  service to get partitions
 * @param directSqlSavePartition service to save partitions
 * @param icebergTableHandler    iceberg table handler
 */
public HiveConnectorFastPartitionService(
    final ConnectorContext context,
    final IMetacatHiveClient metacatHiveClient,
    final Warehouse warehouse,
    final HiveConnectorInfoConverter hiveMetacatConverters,
    final DirectSqlGetPartition directSqlGetPartition,
    final DirectSqlSavePartition directSqlSavePartition,
    final IcebergTableHandler icebergTableHandler
) {
    super(context, metacatHiveClient, hiveMetacatConverters);
    this.warehouse = warehouse;
    this.directSqlGetPartition = directSqlGetPartition;
    this.directSqlSavePartition = directSqlSavePartition;
    this.registry = context.getRegistry();
    this.icebergTableHandler = icebergTableHandler;
}
 
Example #10
Source File: MetastoreClientPartitionIntegrationTest.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setUpForClass() throws MetaException {
  HiveConf conf = mock(HiveConf.class);
  Warehouse wh = mock(Warehouse.class);
  when(conf.get(HiveConf.ConfVars.USERS_IN_ADMIN_ROLE.varname,"")).thenReturn("");

  glueClient = new GlueTestClientFactory().newClient();
  GlueClientFactory clientFactory = mock(GlueClientFactory.class);
  when(clientFactory.newClient()).thenReturn(glueClient);

  metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh)
      .withClientFactory(clientFactory).build();
  catalogDatabase = getTestDatabase();
  glueClient.createDatabase(new CreateDatabaseRequest().withDatabaseInput(
      GlueInputConverter.convertToDatabaseInput(catalogDatabase)));
  catalogTable = getTestTable();
}
 
Example #11
Source File: HiveConnectorClientConfig.java    From metacat with Apache License 2.0 6 votes vote down vote up
/**
 * create warehouse for file system calls.
 *
 * @param connectorContext connector config context
 * @return WareHouse
 */
@Bean
public Warehouse warehouse(final ConnectorContext connectorContext) {
    try {
        final HiveConf conf = this.getDefaultConf(connectorContext);
        connectorContext.getConfiguration().forEach(conf::set);
        return new Warehouse(conf);
    } catch (Exception e) {
        throw new IllegalArgumentException(
            String.format(
                "Failed creating the hive warehouse for catalog: %s",
                connectorContext.getCatalogName()
            ),
            e
        );
    }
}
 
Example #12
Source File: MetastoreClientDatabaseIntegrationTest.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws MetaException {
  conf = mock(HiveConf.class);
  wh = mock(Warehouse.class);
  tmpPath = new Path("/db");
  when(wh.getDefaultDatabasePath(anyString())).thenReturn(tmpPath);
  when(wh.getDnsPath(any(Path.class))).thenReturn(tmpPath);
  when(wh.isDir(any(Path.class))).thenReturn(true);
  when(conf.get(HiveConf.ConfVars.USERS_IN_ADMIN_ROLE.varname,"")).thenReturn("");

  glueClient = new GlueTestClientFactory().newClient();
  GlueClientFactory clientFactory = mock(GlueClientFactory.class);
  when(clientFactory.newClient()).thenReturn(glueClient);

  metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh)
      .withClientFactory(clientFactory).build();
  catalogDB = getTestDatabase();
  hiveDB = CatalogToHiveConverter.convertDatabase(catalogDB);

  additionalDbForCleanup = Lists.newArrayList();
}
 
Example #13
Source File: AWSCatalogMetastoreClientTest.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
@Test
public void testAppendPartitionByName() throws Exception {    
  List<String> values = Arrays.asList("foo");
  when(glueClient.getTable(any(GetTableRequest.class)))
      .thenReturn(new GetTableResult().withTable(HiveToCatalogConverter.convertTable(testTable)));
  Path partLocation = new Path(testTable.getSd().getLocation(), Warehouse
          .makePartName(testTable.getPartitionKeys(), values));
  setupMockWarehouseForPath(partLocation, false, true);
  mockBatchCreatePartitionsSucceed();

  org.apache.hadoop.hive.metastore.api.Partition res = metastoreClient.appendPartition(
      testDB.getName(),
      testTable.getTableName(),
      testTable.getPartitionKeys().get(0).getName() + "=foo");
  assertThat(res.getValues(), is(values));
  assertDaemonThreadPools();
}
 
Example #14
Source File: HiveTableOutputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
private void loadPartition(Path srcDir, Table table, Map<String, String> partSpec, HiveMetastoreClientWrapper client)
		throws TException, IOException {
	Path tblLocation = new Path(table.getSd().getLocation());
	String dbName = tablePath.getDatabaseName();
	String tableName = tablePath.getObjectName();
	List<Partition> existingPart = client.listPartitions(dbName, tableName, new ArrayList<>(partSpec.values()), (short) 1);
	Path destDir = existingPart.isEmpty() ? new Path(tblLocation, Warehouse.makePartPath(partSpec)) :
			new Path(existingPart.get(0).getSd().getLocation());
	moveFiles(srcDir, destDir);
	// register new partition if it doesn't exist
	if (existingPart.isEmpty()) {
		StorageDescriptor sd = new StorageDescriptor(hiveTablePartition.getStorageDescriptor());
		sd.setLocation(destDir.toString());
		Partition partition = HiveTableUtil.createHivePartition(dbName, tableName,
				new ArrayList<>(partSpec.values()), sd, new HashMap<>());
		partition.setValues(new ArrayList<>(partSpec.values()));
		client.add_partition(partition);
	}
}
 
Example #15
Source File: MockThriftMetastoreClient.java    From presto with Apache License 2.0 6 votes vote down vote up
@Override
public List<Partition> getPartitionsByNames(String dbName, String tableName, List<String> names)
        throws TException
{
    accessCount.incrementAndGet();
    if (throwException) {
        throw new RuntimeException();
    }
    if (!dbName.equals(TEST_DATABASE) || !tableName.equals(TEST_TABLE) || !ImmutableSet.of(TEST_PARTITION1, TEST_PARTITION2).containsAll(names)) {
        throw new NoSuchObjectException();
    }
    return Lists.transform(names, name -> {
        try {
            return new Partition(ImmutableList.copyOf(Warehouse.getPartValuesFromPartName(name)), TEST_DATABASE, TEST_TABLE, 0, 0, DEFAULT_STORAGE_DESCRIPTOR, ImmutableMap.of());
        }
        catch (MetaException e) {
            throw new RuntimeException(e);
        }
    });
}
 
Example #16
Source File: BufferedPartitionFetcher.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
void bufferPartitions(int firstPartition) {
  int totalPartitionsToLoad = Math.min(partitionNames.size(), firstPartition + bufferSize);
  List<String> partitionsToLoad = partitionNames.subList(firstPartition, totalPartitionsToLoad);

  try {
    LOG.debug("Fetching {} partitions.", totalPartitionsToLoad);
    List<Partition> partitions = metastore.getPartitionsByNames(table.getDbName(), table.getTableName(),
        partitionsToLoad);
    LOG.debug("Fetched {} partitions for table {}.", partitions.size(), Warehouse.getQualifiedName(table));

    buffer = new HashMap<>(partitions.size());
    for (Partition partition : partitions) {
      buffer.put(Warehouse.makePartName(table.getPartitionKeys(), partition.getValues()), partition);
    }
  } catch (TException e) {
    throw new RuntimeException("Unable to fetch partitions of table " + Warehouse.getQualifiedName(table), e);
  }
}
 
Example #17
Source File: PartitionedTableMetadataUpdateReplication.java    From circus-train with Apache License 2.0 6 votes vote down vote up
private PartitionsAndStatistics filterOnReplicatedPartitions(
    CloseableMetaStoreClient replicaClient,
    PartitionsAndStatistics sourcePartitionsAndStatistics,
    List<FieldSchema> partitionKeys)
  throws TException {
  Map<Partition, ColumnStatistics> statisticsByPartition = new LinkedHashMap<>();
  for (Partition partition : sourcePartitionsAndStatistics.getPartitions()) {
    try {
      replicaClient.getPartition(replicaDatabaseName, replicaTableName, partition.getValues());
      statisticsByPartition.put(partition, sourcePartitionsAndStatistics.getStatisticsForPartition(partition));
    } catch (NoSuchObjectException e) {
      LOG.debug("Partition {} doesn't exist, skipping it...", Warehouse.getQualifiedName(partition));
    }
  }
  return new PartitionsAndStatistics(partitionKeys, statisticsByPartition);
}
 
Example #18
Source File: AWSCatalogMetastoreClient.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
@Override
public org.apache.hadoop.hive.metastore.api.Partition getPartitionWithAuthInfo(
      String databaseName, String tableName, List<String> values,
      String userName, List<String> groupNames)
      throws MetaException, UnknownTableException, NoSuchObjectException, TException {

  // TODO move this into the service
  org.apache.hadoop.hive.metastore.api.Partition partition = getPartition(databaseName, tableName, values);
  org.apache.hadoop.hive.metastore.api.Table table = getTable(databaseName, tableName);
  if ("TRUE".equalsIgnoreCase(table.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) {
    String partName = Warehouse.makePartName(table.getPartitionKeys(), values);
    HiveObjectRef obj = new HiveObjectRef();
    obj.setObjectType(HiveObjectType.PARTITION);
    obj.setDbName(databaseName);
    obj.setObjectName(tableName);
    obj.setPartValues(values);
    org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet privilegeSet =
          this.get_privilege_set(obj, userName, groupNames);
    partition.setPrivileges(privilegeSet);
  }

  return partition;
}
 
Example #19
Source File: AWSCatalogMetastoreClient.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
public AWSCatalogMetastoreClient(HiveConf conf, HiveMetaHookLoader hook) throws MetaException {
  this.conf = conf;
  glueClient = new AWSGlueClientFactory(this.conf).newClient();

  // TODO preserve existing functionality for HiveMetaHook
  wh = new Warehouse(this.conf);

  AWSGlueMetastore glueMetastore = new AWSGlueMetastoreFactory().newMetastore(conf);
  glueMetastoreClientDelegate = new GlueMetastoreClientDelegate(this.conf, glueMetastore, wh);

  snapshotActiveConf();
  catalogId = MetastoreClientUtils.getCatalogId(conf);
  if (!doesDefaultDBExist()) {
    createDefaultDatabase();
  }
}
 
Example #20
Source File: DiffGeneratedPartitionPredicateTest.java    From circus-train with Apache License 2.0 6 votes vote down vote up
private void setupHiveTables() throws TException, IOException {
  List<FieldSchema> partitionKeys = Lists.newArrayList(newFieldSchema("p1"), newFieldSchema("p2"));

  File tableLocation = new File("db1", "table1");
  StorageDescriptor sd = newStorageDescriptor(tableLocation, "col0");
  table1 = newTable("table1", "db1", partitionKeys, sd);
  Partition partition1 = newPartition(table1, "value1", "value2");
  Partition partition2 = newPartition(table1, "value11", "value22");
  table1Partitions = Arrays.asList(partition1, partition2); //
  table1PartitionNames = Arrays
      .asList(Warehouse.makePartName(partitionKeys, partition1.getValues()),
          Warehouse.makePartName(partitionKeys, partition2.getValues()));

  File tableLocation2 = new File("db2", "table2");
  StorageDescriptor sd2 = newStorageDescriptor(tableLocation2, "col0");
  table2 = newTable("table2", "db2", partitionKeys, sd2);
}
 
Example #21
Source File: AWSCatalogMetastoreClientTest.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
  testDB = CatalogToHiveConverter.convertDatabase(getTestDatabase());
  testTable = CatalogToHiveConverter.convertTable(getTestTable(), testDB.getName());
  testIndex = getTestHiveIndex(testDB.getName());
  testPartition = CatalogToHiveConverter.convertPartition(
    getTestPartition(testDB.getName(), testTable.getTableName(), Lists.newArrayList("val1")));
  testFunction = CatalogToHiveConverter.convertFunction(testDB.getName(), getCatalogTestFunction());
  defaultWhPath = new Path("/tmp");
  partitionPath = new Path(testPartition.getSd().getLocation());

  wh = mock(Warehouse.class);
  setupMockWarehouseForPath(defaultWhPath, true, true);
  setupMockWarehouseForPath(partitionPath, false, false);

  conf = spy(new HiveConf());
  conf.setInt(GlueMetastoreClientDelegate.NUM_PARTITION_SEGMENTS_CONF, 1);
  glueClient = spy(AWSGlue.class);
  clientFactory = mock(GlueClientFactory.class);
  metastoreFactory = mock(AWSGlueMetastoreFactory.class);
  when(clientFactory.newClient()).thenReturn(glueClient);
  when(metastoreFactory.newMetastore(conf)).thenReturn(new DefaultAWSGlueMetastore(conf, glueClient));
  metastoreClient = new AWSCatalogMetastoreClient.Builder().withClientFactory(clientFactory)
      .withMetastoreFactory(metastoreFactory).withWarehouse(wh).createDefaults(false).withHiveConf(conf).build();
}
 
Example #22
Source File: MetastoreClientTableIntegrationTest.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setup() throws MetaException {
  conf = mock(HiveConf.class);
  wh = mock(Warehouse.class);
  tmpPath = new Path("/db");
  when(wh.getDefaultDatabasePath(anyString())).thenReturn(tmpPath);
  when(wh.getDnsPath(any(Path.class))).thenReturn(tmpPath);
  when(wh.isDir(any(Path.class))).thenReturn(true);
  when(conf.get(HiveConf.ConfVars.USERS_IN_ADMIN_ROLE.varname,"")).thenReturn("");

  glueClient = new GlueTestClientFactory().newClient();
  GlueClientFactory clientFactory = mock(GlueClientFactory.class);
  when(clientFactory.newClient()).thenReturn(glueClient);

  metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh)
          .withClientFactory(clientFactory).build();
  catalogDB = getTestDatabase();
  hiveDB = CatalogToHiveConverter.convertDatabase(catalogDB);
  glueClient.createDatabase(new CreateDatabaseRequest()
    .withDatabaseInput(GlueInputConverter.convertToDatabaseInput(catalogDB)));
}
 
Example #23
Source File: HiveConvertersImpl.java    From metacat with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public List<String> getPartValsFromName(@Nullable final TableDto tableDto, final String partName) {
    // Unescape the partition name

    final LinkedHashMap<String, String> hm;
    try {
        hm = Warehouse.makeSpecFromName(partName);
    } catch (MetaException e) {
        throw new IllegalArgumentException("Invalid partition name", e);
    }
    // Get the partition keys.
    List<String> partitionKeys = null;
    if (tableDto != null) {
        partitionKeys = tableDto.getPartition_keys();
    }
    // If table has not been provided, return the values without validating.
    if (partitionKeys != null) {
        final List<String> partVals = Lists.newArrayListWithCapacity(partitionKeys.size());
        for (String key : partitionKeys) {
            final String val = hm.get(key);
            if (val == null) {
                throw new IllegalArgumentException("Invalid partition name - missing " + key);
            }
            partVals.add(val);
        }
        return partVals;
    } else {
        return Lists.newArrayList(hm.values());
    }
}
 
Example #24
Source File: PartitionsAndStatistics.java    From circus-train with Apache License 2.0 5 votes vote down vote up
private static String getPartitionName(List<FieldSchema> partitionKeys, Partition partition) {
  try {
    return Warehouse.makePartName(partitionKeys, partition.getValues());
  } catch (MetaException e) {
    throw new RuntimeException(e);
  }
}
 
Example #25
Source File: HiveEndpoint.java    From circus-train with Apache License 2.0 5 votes vote down vote up
private List<String> getPartitionNames(List<FieldSchema> partitionKeys, List<Partition> partitions)
  throws MetaException {
  List<String> partitionNames = new ArrayList<>(partitions.size());
  for (Partition partition : partitions) {
    partitionNames.add(Warehouse.makePartName(partitionKeys, partition.getValues()));
  }
  return partitionNames;
}
 
Example #26
Source File: OrcMapreduceRecordReader.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    OrcNewSplit orcNewSplit = (OrcNewSplit) inputSplit;
    Configuration configuration = taskAttemptContext.getConfiguration();
    double maxMergeDistance = configuration.getDouble(MAX_MERGE_DISTANCE,MAX_MERGE_DISTANCE_DEFAULT);
    double maxReadSize = configuration.getDouble(MAX_READ_SIZE,MAX_READ_SIZE_DEFAULT);
    double streamBufferSize = configuration.getDouble(STREAM_BUFFER_SIZE,STREAM_BUFFER_SIZE_DEFAULT);
    Path path = orcNewSplit.getPath();
    FileSystem fileSystem = FileSystem.get(path.toUri(),configuration);
    long size = fileSystem.getFileStatus(path).getLen();
    FSDataInputStream inputStream = fileSystem.open(path);
    rowStruct = getRowStruct(configuration);
    predicate = getSplicePredicate(configuration);
    List<Integer> partitions = getPartitionIds(configuration);
    List<Integer> columnIds = getColumnIds(configuration);



    List<String> values = null;
    try {
        values = Warehouse.getPartValuesFromPartName(((OrcNewSplit) inputSplit).getPath().toString());
    } catch (MetaException me) {
        throw new IOException(me);
    }
    OrcDataSource orcDataSource = new HdfsOrcDataSource(path.toString(), size, new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE),
            new DataSize(maxReadSize, DataSize.Unit.MEGABYTE),
            new DataSize(streamBufferSize, DataSize.Unit.MEGABYTE), inputStream);
    OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE),
            new DataSize(maxReadSize, DataSize.Unit.MEGABYTE));
    orcRecordReader =
        orcReader.createRecordReader(getColumnsAndTypes(columnIds, rowStruct),
                                     predicate, orcNewSplit.getStart(), orcNewSplit.getLength(),
                                     HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext(),
                                     partitions, values);

}
 
Example #27
Source File: BufferedPartitionFetcher.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public BufferedPartitionFetcher(IMetaStoreClient metastore, Table table, short bufferSize) {

    try {
      LOG.debug("Fetching all partition names.");
      partitionNames = metastore.listPartitionNames(table.getDbName(), table.getTableName(), NO_LIMIT);
      LOG.debug("Fetched {} partition names for table {}.", partitionNames.size(), Warehouse.getQualifiedName(table));
    } catch (TException e) {
      throw new RuntimeException("Unable to fetch partition names of table " + Warehouse.getQualifiedName(table), e);
    }

    this.table = table;
    this.metastore = metastore;
    this.bufferSize = bufferSize;
    buffer = Collections.emptyMap();
  }
 
Example #28
Source File: FilterGeneratorImpl.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws CircusTrainException {
  out.println(String.format("Source catalog:        %s", source.getName()));
  out.println(String.format("Source MetaStore URIs: %s", source.getMetaStoreUris()));
  out.println(String.format("Source table:          %s", Warehouse.getQualifiedName(sourceTable)));
  out.println(String.format("Partition expression:  %s", partitionFilter));

  String parsedPartitionFilter = partitionPredicate.getPartitionPredicate();
  if (!Objects.equals(partitionFilter, parsedPartitionFilter)) {
    LOG.info("Evaluated expression to: {}", parsedPartitionFilter);
  }
  try {
    LOG.info("Executing filter with limit {} on: {}:{} ({})", partitionLimit, source.getName(),
        Warehouse.getQualifiedName(sourceTable), source.getMetaStoreUris());
    PartitionsAndStatistics partitions = source.getPartitions(sourceTable, parsedPartitionFilter, partitionLimit);
    LOG.info("Retrieved {} partition(s):", partitions.getPartitions().size());
    SortedSet<Partition> sorted = new TreeSet<>(PARTITION_COMPARATOR);
    sorted.addAll(partitions.getPartitions());
    List<List<String>> vals = new ArrayList<>();
    for (Partition partition : sorted) {
      vals.add(partition.getValues());
      LOG.info("{}", partition.getValues());
    }
    out.println(String.format("Partition filter:      %s", parsedPartitionFilter));
    out.println(String.format("Partition limit:       %s", partitionLimit));
    out.println(String.format("Partition(s) fetched:  %s", vals));
  } catch (TException e) {
    throw new CircusTrainException("Could not fetch partitions for filter: '" + parsedPartitionFilter + "'.", e);
  }
}
 
Example #29
Source File: CatalogThriftHiveMetastore.java    From metacat with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
@SuppressWarnings("unchecked")
public Map<String, String> partition_name_to_spec(final String partName) throws TException {
    return requestWrapper("partition_name_to_spec", new Object[]{partName}, () -> {
        if (Strings.isNullOrEmpty(partName)) {
            return (Map<String, String>) Collections.EMPTY_MAP;
        }

        return Warehouse.makeSpecFromName(partName);
    });
}
 
Example #30
Source File: CatalogThriftHiveMetastore.java    From metacat with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
@SuppressWarnings("unchecked")
public List<String> partition_name_to_vals(final String partName) throws TException {
    return requestWrapper("partition_name_to_vals", new Object[]{partName}, () -> {
        if (Strings.isNullOrEmpty(partName)) {
            return (List<String>) Collections.EMPTY_LIST;
        }

        final Map<String, String> spec = Warehouse.makeSpecFromName(partName);
        final List<String> vals = Lists.newArrayListWithCapacity(spec.size());
        vals.addAll(spec.values());
        return vals;
    });
}