org.apache.flink.table.catalog.hive.client.HiveMetastoreClientFactory Java Examples

The following examples show how to use org.apache.flink.table.catalog.hive.client.HiveMetastoreClientFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveCatalog.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open() throws CatalogException {
	if (client == null) {
		client = HiveMetastoreClientFactory.create(hiveConf, hiveVersion);
		LOG.info("Connected to Hive metastore");
	}

	if (!databaseExists(getDefaultDatabase())) {
		throw new CatalogException(String.format("Configured default database %s doesn't exist in catalog %s.",
			getDefaultDatabase(), getName()));
	}
}
 
Example #2
Source File: HiveTableOutputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void finalizeGlobal(int parallelism) throws IOException {
	StorageDescriptor jobSD = hiveTablePartition.getStorageDescriptor();
	Path stagingDir = new Path(jobSD.getLocation());
	FileSystem fs = stagingDir.getFileSystem(jobConf);
	try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
		Table table = client.getTable(tablePath.getDatabaseName(), tablePath.getObjectName());
		if (!isDynamicPartition) {
			commitJob(stagingDir.toString());
		}
		if (isPartitioned) {
			if (isDynamicPartition) {
				FileStatus[] generatedParts = HiveStatsUtils.getFileStatusRecurse(stagingDir,
					partitionColumns.size() - hiveTablePartition.getPartitionSpec().size(), fs);
				for (FileStatus part : generatedParts) {
					commitJob(part.getPath().toString());
					LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<>();
					Warehouse.makeSpecFromName(fullPartSpec, part.getPath());
					loadPartition(part.getPath(), table, fullPartSpec, client);
				}
			} else {
				LinkedHashMap<String, String> partSpec = new LinkedHashMap<>();
				for (String partCol : hiveTablePartition.getPartitionSpec().keySet()) {
					partSpec.put(partCol, hiveTablePartition.getPartitionSpec().get(partCol).toString());
				}
				loadPartition(stagingDir, table, partSpec, client);
			}
		} else {
			moveFiles(stagingDir, new Path(table.getSd().getLocation()));
		}
	} catch (TException e) {
		throw new CatalogException("Failed to query Hive metaStore", e);
	} finally {
		fs.delete(stagingDir, true);
	}
}
 
Example #3
Source File: TableEnvHiveConnectorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void setup() {
	HiveConf hiveConf = hiveShell.getHiveConf();
	hiveCatalog = HiveTestUtils.createHiveCatalog(hiveConf);
	hiveCatalog.open();
	hmsClient = HiveMetastoreClientFactory.create(hiveConf, HiveShimLoader.getHiveVersion());
}
 
Example #4
Source File: HiveCatalog.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open() throws CatalogException {
	if (client == null) {
		client = HiveMetastoreClientFactory.create(hiveConf, hiveVersion);
		LOG.info("Connected to Hive metastore");
	}

	if (!databaseExists(getDefaultDatabase())) {
		throw new CatalogException(String.format("Configured default database %s doesn't exist in catalog %s.",
			getDefaultDatabase(), getName()));
	}
}
 
Example #5
Source File: TableEnvHiveConnectorITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void setup() {
	HiveConf hiveConf = hiveShell.getHiveConf();
	hiveCatalog = HiveTestUtils.createHiveCatalog(hiveConf);
	hiveCatalog.open();
	hmsClient = HiveMetastoreClientFactory.create(hiveConf, HiveShimLoader.getHiveVersion());
}
 
Example #6
Source File: HiveTableSource.java    From flink with Apache License 2.0 4 votes vote down vote up
private void initAllPartitions() {
	allHivePartitions = new ArrayList<>();
	// Please note that the following directly accesses Hive metastore, which is only a temporary workaround.
	// Ideally, we need to go thru Catalog API to get all info we need here, which requires some major
	// refactoring. We will postpone this until we merge Blink to Flink.
	try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
		String dbName = tablePath.getDatabaseName();
		String tableName = tablePath.getObjectName();
		List<String> partitionColNames = catalogTable.getPartitionKeys();
		if (partitionColNames != null && partitionColNames.size() > 0) {
			final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
					HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
			List<Partition> partitions =
					client.listPartitions(dbName, tableName, (short) -1);
			for (Partition partition : partitions) {
				StorageDescriptor sd = partition.getSd();
				Map<String, Object> partitionColValues = new HashMap<>();
				Map<String, String> partitionSpec = new HashMap<>();
				for (int i = 0; i < partitionColNames.size(); i++) {
					String partitionColName = partitionColNames.get(i);
					String partitionValue = partition.getValues().get(i);
					partitionSpec.put(partitionColName, partitionValue);
					DataType type = catalogTable.getSchema().getFieldDataType(partitionColName).get();
					Object partitionObject;
					if (defaultPartitionName.equals(partitionValue)) {
						LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
						// while this is inline with Hive, seems it should be null for string columns as well
						partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
					} else {
						partitionObject = restorePartitionValueFromFromType(partitionValue, type);
					}
					partitionColValues.put(partitionColName, partitionObject);
				}
				HiveTablePartition hiveTablePartition = new HiveTablePartition(sd, partitionColValues);
				allHivePartitions.add(hiveTablePartition);
				partitionList.add(partitionSpec);
				partitionSpec2HiveTablePartition.put(partitionSpec, hiveTablePartition);
			}
		} else {
			allHivePartitions.add(new HiveTablePartition(client.getTable(dbName, tableName).getSd(), null));
		}
	} catch (TException e) {
		throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e);
	}
	initAllPartitions = true;
}
 
Example #7
Source File: HiveBatchSource.java    From Alink with Apache License 2.0 4 votes vote down vote up
private List<HiveTablePartition> initAllPartitions() {
    List<HiveTablePartition> allHivePartitions = new ArrayList<>();
    // Please note that the following directly accesses Hive metastore, which is only a temporary workaround.
    // Ideally, we need to go thru Catalog API to get all info we need here, which requires some major
    // refactoring. We will postpone this until we merge Blink to Flink.
    try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
        String dbName = tablePath.getDatabaseName();
        String tableName = tablePath.getObjectName();
        List<String> partitionColNames = catalogTable.getPartitionKeys();
        if (partitionColNames != null && partitionColNames.size() > 0) {
            final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
                HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
            List<Partition> partitions = new ArrayList<>();
            if (remainingPartitions != null) {
                for (Map<String, String> spec : remainingPartitions) {
                    partitions.add(client.getPartition(dbName, tableName, partitionSpecToValues(spec, partitionColNames)));
                }
            } else {
                partitions.addAll(client.listPartitions(dbName, tableName, (short) -1));
            }
            for (Partition partition : partitions) {
                StorageDescriptor sd = partition.getSd();
                Map<String, Object> partitionColValues = new HashMap<>();
                for (int i = 0; i < partitionColNames.size(); i++) {
                    String partitionColName = partitionColNames.get(i);
                    String partitionValue = partition.getValues().get(i);
                    DataType type = catalogTable.getSchema().getFieldDataType(partitionColName).get();
                    Object partitionObject;
                    if (defaultPartitionName.equals(partitionValue)) {
                        LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
                        // while this is inline with Hive, seems it should be null for string columns as well
                        partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
                    } else {
                        partitionObject = restorePartitionValueFromFromType(partitionValue, type);
                    }
                    partitionColValues.put(partitionColName, partitionObject);
                }
                HiveTablePartition hiveTablePartition = new HiveTablePartition(sd, partitionColValues);
                allHivePartitions.add(hiveTablePartition);
            }
        } else {
            allHivePartitions.add(new HiveTablePartition(client.getTable(dbName, tableName).getSd()));
        }
    } catch (TException e) {
        throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e);
    }
    return allHivePartitions;
}
 
Example #8
Source File: HiveTableMetaStoreFactory.java    From flink with Apache License 2.0 4 votes vote down vote up
private HiveTableMetaStore() throws TException {
	client = HiveMetastoreClientFactory.create(
			new HiveConf(conf.conf(), HiveConf.class), hiveVersion);
	sd = client.getTable(database, tableName).getSd();
}
 
Example #9
Source File: HiveTableSource.java    From flink with Apache License 2.0 4 votes vote down vote up
private List<HiveTablePartition> initAllPartitions() {
	List<HiveTablePartition> allHivePartitions = new ArrayList<>();
	// Please note that the following directly accesses Hive metastore, which is only a temporary workaround.
	// Ideally, we need to go thru Catalog API to get all info we need here, which requires some major
	// refactoring. We will postpone this until we merge Blink to Flink.
	try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
		String dbName = tablePath.getDatabaseName();
		String tableName = tablePath.getObjectName();
		List<String> partitionColNames = catalogTable.getPartitionKeys();
		Table hiveTable = client.getTable(dbName, tableName);
		Properties tableProps = HiveReflectionUtils.getTableMetadata(hiveShim, hiveTable);
		String ttlStr = tableProps.getProperty(FileSystemOptions.LOOKUP_JOIN_CACHE_TTL.key());
		hiveTableCacheTTL = ttlStr != null ?
				TimeUtils.parseDuration(ttlStr) :
				FileSystemOptions.LOOKUP_JOIN_CACHE_TTL.defaultValue();
		if (partitionColNames != null && partitionColNames.size() > 0) {
			final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
					HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
			List<Partition> partitions = new ArrayList<>();
			if (remainingPartitions != null) {
				for (Map<String, String> spec : remainingPartitions) {
					partitions.add(client.getPartition(dbName, tableName, partitionSpecToValues(spec, partitionColNames)));
				}
			} else {
				partitions.addAll(client.listPartitions(dbName, tableName, (short) -1));
			}
			for (Partition partition : partitions) {
				HiveTablePartition hiveTablePartition = toHiveTablePartition(
						catalogTable.getPartitionKeys(),
						catalogTable.getSchema().getFieldNames(),
						catalogTable.getSchema().getFieldDataTypes(),
						hiveShim,
						tableProps,
						defaultPartitionName,
						partition);
				allHivePartitions.add(hiveTablePartition);
			}
		} else {
			allHivePartitions.add(new HiveTablePartition(hiveTable.getSd(), tableProps));
		}
	} catch (TException e) {
		throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e);
	}
	return allHivePartitions;
}