org.apache.flink.table.catalog.hive.client.HiveMetastoreClientFactory Java Examples
The following examples show how to use
org.apache.flink.table.catalog.hive.client.HiveMetastoreClientFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveCatalog.java From flink with Apache License 2.0 | 5 votes |
@Override public void open() throws CatalogException { if (client == null) { client = HiveMetastoreClientFactory.create(hiveConf, hiveVersion); LOG.info("Connected to Hive metastore"); } if (!databaseExists(getDefaultDatabase())) { throw new CatalogException(String.format("Configured default database %s doesn't exist in catalog %s.", getDefaultDatabase(), getName())); } }
Example #2
Source File: HiveTableOutputFormat.java From flink with Apache License 2.0 | 5 votes |
@Override public void finalizeGlobal(int parallelism) throws IOException { StorageDescriptor jobSD = hiveTablePartition.getStorageDescriptor(); Path stagingDir = new Path(jobSD.getLocation()); FileSystem fs = stagingDir.getFileSystem(jobConf); try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) { Table table = client.getTable(tablePath.getDatabaseName(), tablePath.getObjectName()); if (!isDynamicPartition) { commitJob(stagingDir.toString()); } if (isPartitioned) { if (isDynamicPartition) { FileStatus[] generatedParts = HiveStatsUtils.getFileStatusRecurse(stagingDir, partitionColumns.size() - hiveTablePartition.getPartitionSpec().size(), fs); for (FileStatus part : generatedParts) { commitJob(part.getPath().toString()); LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<>(); Warehouse.makeSpecFromName(fullPartSpec, part.getPath()); loadPartition(part.getPath(), table, fullPartSpec, client); } } else { LinkedHashMap<String, String> partSpec = new LinkedHashMap<>(); for (String partCol : hiveTablePartition.getPartitionSpec().keySet()) { partSpec.put(partCol, hiveTablePartition.getPartitionSpec().get(partCol).toString()); } loadPartition(stagingDir, table, partSpec, client); } } else { moveFiles(stagingDir, new Path(table.getSd().getLocation())); } } catch (TException e) { throw new CatalogException("Failed to query Hive metaStore", e); } finally { fs.delete(stagingDir, true); } }
Example #3
Source File: TableEnvHiveConnectorTest.java From flink with Apache License 2.0 | 5 votes |
@BeforeClass public static void setup() { HiveConf hiveConf = hiveShell.getHiveConf(); hiveCatalog = HiveTestUtils.createHiveCatalog(hiveConf); hiveCatalog.open(); hmsClient = HiveMetastoreClientFactory.create(hiveConf, HiveShimLoader.getHiveVersion()); }
Example #4
Source File: HiveCatalog.java From flink with Apache License 2.0 | 5 votes |
@Override public void open() throws CatalogException { if (client == null) { client = HiveMetastoreClientFactory.create(hiveConf, hiveVersion); LOG.info("Connected to Hive metastore"); } if (!databaseExists(getDefaultDatabase())) { throw new CatalogException(String.format("Configured default database %s doesn't exist in catalog %s.", getDefaultDatabase(), getName())); } }
Example #5
Source File: TableEnvHiveConnectorITCase.java From flink with Apache License 2.0 | 5 votes |
@BeforeClass public static void setup() { HiveConf hiveConf = hiveShell.getHiveConf(); hiveCatalog = HiveTestUtils.createHiveCatalog(hiveConf); hiveCatalog.open(); hmsClient = HiveMetastoreClientFactory.create(hiveConf, HiveShimLoader.getHiveVersion()); }
Example #6
Source File: HiveTableSource.java From flink with Apache License 2.0 | 4 votes |
private void initAllPartitions() { allHivePartitions = new ArrayList<>(); // Please note that the following directly accesses Hive metastore, which is only a temporary workaround. // Ideally, we need to go thru Catalog API to get all info we need here, which requires some major // refactoring. We will postpone this until we merge Blink to Flink. try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) { String dbName = tablePath.getDatabaseName(); String tableName = tablePath.getObjectName(); List<String> partitionColNames = catalogTable.getPartitionKeys(); if (partitionColNames != null && partitionColNames.size() > 0) { final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname, HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal); List<Partition> partitions = client.listPartitions(dbName, tableName, (short) -1); for (Partition partition : partitions) { StorageDescriptor sd = partition.getSd(); Map<String, Object> partitionColValues = new HashMap<>(); Map<String, String> partitionSpec = new HashMap<>(); for (int i = 0; i < partitionColNames.size(); i++) { String partitionColName = partitionColNames.get(i); String partitionValue = partition.getValues().get(i); partitionSpec.put(partitionColName, partitionValue); DataType type = catalogTable.getSchema().getFieldDataType(partitionColName).get(); Object partitionObject; if (defaultPartitionName.equals(partitionValue)) { LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot(); // while this is inline with Hive, seems it should be null for string columns as well partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null; } else { partitionObject = restorePartitionValueFromFromType(partitionValue, type); } partitionColValues.put(partitionColName, partitionObject); } HiveTablePartition hiveTablePartition = new HiveTablePartition(sd, partitionColValues); allHivePartitions.add(hiveTablePartition); partitionList.add(partitionSpec); partitionSpec2HiveTablePartition.put(partitionSpec, hiveTablePartition); } } else { allHivePartitions.add(new HiveTablePartition(client.getTable(dbName, tableName).getSd(), null)); } } catch (TException e) { throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e); } initAllPartitions = true; }
Example #7
Source File: HiveBatchSource.java From Alink with Apache License 2.0 | 4 votes |
private List<HiveTablePartition> initAllPartitions() { List<HiveTablePartition> allHivePartitions = new ArrayList<>(); // Please note that the following directly accesses Hive metastore, which is only a temporary workaround. // Ideally, we need to go thru Catalog API to get all info we need here, which requires some major // refactoring. We will postpone this until we merge Blink to Flink. try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) { String dbName = tablePath.getDatabaseName(); String tableName = tablePath.getObjectName(); List<String> partitionColNames = catalogTable.getPartitionKeys(); if (partitionColNames != null && partitionColNames.size() > 0) { final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname, HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal); List<Partition> partitions = new ArrayList<>(); if (remainingPartitions != null) { for (Map<String, String> spec : remainingPartitions) { partitions.add(client.getPartition(dbName, tableName, partitionSpecToValues(spec, partitionColNames))); } } else { partitions.addAll(client.listPartitions(dbName, tableName, (short) -1)); } for (Partition partition : partitions) { StorageDescriptor sd = partition.getSd(); Map<String, Object> partitionColValues = new HashMap<>(); for (int i = 0; i < partitionColNames.size(); i++) { String partitionColName = partitionColNames.get(i); String partitionValue = partition.getValues().get(i); DataType type = catalogTable.getSchema().getFieldDataType(partitionColName).get(); Object partitionObject; if (defaultPartitionName.equals(partitionValue)) { LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot(); // while this is inline with Hive, seems it should be null for string columns as well partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null; } else { partitionObject = restorePartitionValueFromFromType(partitionValue, type); } partitionColValues.put(partitionColName, partitionObject); } HiveTablePartition hiveTablePartition = new HiveTablePartition(sd, partitionColValues); allHivePartitions.add(hiveTablePartition); } } else { allHivePartitions.add(new HiveTablePartition(client.getTable(dbName, tableName).getSd())); } } catch (TException e) { throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e); } return allHivePartitions; }
Example #8
Source File: HiveTableMetaStoreFactory.java From flink with Apache License 2.0 | 4 votes |
private HiveTableMetaStore() throws TException { client = HiveMetastoreClientFactory.create( new HiveConf(conf.conf(), HiveConf.class), hiveVersion); sd = client.getTable(database, tableName).getSd(); }
Example #9
Source File: HiveTableSource.java From flink with Apache License 2.0 | 4 votes |
private List<HiveTablePartition> initAllPartitions() { List<HiveTablePartition> allHivePartitions = new ArrayList<>(); // Please note that the following directly accesses Hive metastore, which is only a temporary workaround. // Ideally, we need to go thru Catalog API to get all info we need here, which requires some major // refactoring. We will postpone this until we merge Blink to Flink. try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) { String dbName = tablePath.getDatabaseName(); String tableName = tablePath.getObjectName(); List<String> partitionColNames = catalogTable.getPartitionKeys(); Table hiveTable = client.getTable(dbName, tableName); Properties tableProps = HiveReflectionUtils.getTableMetadata(hiveShim, hiveTable); String ttlStr = tableProps.getProperty(FileSystemOptions.LOOKUP_JOIN_CACHE_TTL.key()); hiveTableCacheTTL = ttlStr != null ? TimeUtils.parseDuration(ttlStr) : FileSystemOptions.LOOKUP_JOIN_CACHE_TTL.defaultValue(); if (partitionColNames != null && partitionColNames.size() > 0) { final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname, HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal); List<Partition> partitions = new ArrayList<>(); if (remainingPartitions != null) { for (Map<String, String> spec : remainingPartitions) { partitions.add(client.getPartition(dbName, tableName, partitionSpecToValues(spec, partitionColNames))); } } else { partitions.addAll(client.listPartitions(dbName, tableName, (short) -1)); } for (Partition partition : partitions) { HiveTablePartition hiveTablePartition = toHiveTablePartition( catalogTable.getPartitionKeys(), catalogTable.getSchema().getFieldNames(), catalogTable.getSchema().getFieldDataTypes(), hiveShim, tableProps, defaultPartitionName, partition); allHivePartitions.add(hiveTablePartition); } } else { allHivePartitions.add(new HiveTablePartition(hiveTable.getSd(), tableProps)); } } catch (TException e) { throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e); } return allHivePartitions; }