org.apache.flink.table.catalog.hive.client.HiveMetastoreClientFactory Java Exaples

Source File: HiveCatalog.java From flink with Apache License 2.0

5 votes

@Override
public void open() throws CatalogException {
	if (client == null) {
		client = HiveMetastoreClientFactory.create(hiveConf, hiveVersion);
		LOG.info("Connected to Hive metastore");
	}

	if (!databaseExists(getDefaultDatabase())) {
		throw new CatalogException(String.format("Configured default database %s doesn't exist in catalog %s.",
			getDefaultDatabase(), getName()));
	}
}

Source File: HiveTableOutputFormat.java From flink with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {
	StorageDescriptor jobSD = hiveTablePartition.getStorageDescriptor();
	Path stagingDir = new Path(jobSD.getLocation());
	FileSystem fs = stagingDir.getFileSystem(jobConf);
	try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
		Table table = client.getTable(tablePath.getDatabaseName(), tablePath.getObjectName());
		if (!isDynamicPartition) {
			commitJob(stagingDir.toString());
		}
		if (isPartitioned) {
			if (isDynamicPartition) {
				FileStatus[] generatedParts = HiveStatsUtils.getFileStatusRecurse(stagingDir,
					partitionColumns.size() - hiveTablePartition.getPartitionSpec().size(), fs);
				for (FileStatus part : generatedParts) {
					commitJob(part.getPath().toString());
					LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<>();
					Warehouse.makeSpecFromName(fullPartSpec, part.getPath());
					loadPartition(part.getPath(), table, fullPartSpec, client);
				}
			} else {
				LinkedHashMap<String, String> partSpec = new LinkedHashMap<>();
				for (String partCol : hiveTablePartition.getPartitionSpec().keySet()) {
					partSpec.put(partCol, hiveTablePartition.getPartitionSpec().get(partCol).toString());
				}
				loadPartition(stagingDir, table, partSpec, client);
			}
		} else {
			moveFiles(stagingDir, new Path(table.getSd().getLocation()));
		}
	} catch (TException e) {
		throw new CatalogException("Failed to query Hive metaStore", e);
	} finally {
		fs.delete(stagingDir, true);
	}
}

Source File: TableEnvHiveConnectorTest.java From flink with Apache License 2.0

5 votes

@BeforeClass
public static void setup() {
	HiveConf hiveConf = hiveShell.getHiveConf();
	hiveCatalog = HiveTestUtils.createHiveCatalog(hiveConf);
	hiveCatalog.open();
	hmsClient = HiveMetastoreClientFactory.create(hiveConf, HiveShimLoader.getHiveVersion());
}

Source File: HiveCatalog.java From flink with Apache License 2.0

5 votes

@Override
public void open() throws CatalogException {
	if (client == null) {
		client = HiveMetastoreClientFactory.create(hiveConf, hiveVersion);
		LOG.info("Connected to Hive metastore");
	}

	if (!databaseExists(getDefaultDatabase())) {
		throw new CatalogException(String.format("Configured default database %s doesn't exist in catalog %s.",
			getDefaultDatabase(), getName()));
	}
}

Source File: TableEnvHiveConnectorITCase.java From flink with Apache License 2.0

5 votes

@BeforeClass
public static void setup() {
	HiveConf hiveConf = hiveShell.getHiveConf();
	hiveCatalog = HiveTestUtils.createHiveCatalog(hiveConf);
	hiveCatalog.open();
	hmsClient = HiveMetastoreClientFactory.create(hiveConf, HiveShimLoader.getHiveVersion());
}

Source File: HiveTableSource.java From flink with Apache License 2.0

4 votes

private void initAllPartitions() {
	allHivePartitions = new ArrayList<>();
	// Please note that the following directly accesses Hive metastore, which is only a temporary workaround.
	// Ideally, we need to go thru Catalog API to get all info we need here, which requires some major
	// refactoring. We will postpone this until we merge Blink to Flink.
	try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
		String dbName = tablePath.getDatabaseName();
		String tableName = tablePath.getObjectName();
		List<String> partitionColNames = catalogTable.getPartitionKeys();
		if (partitionColNames != null && partitionColNames.size() > 0) {
			final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
					HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
			List<Partition> partitions =
					client.listPartitions(dbName, tableName, (short) -1);
			for (Partition partition : partitions) {
				StorageDescriptor sd = partition.getSd();
				Map<String, Object> partitionColValues = new HashMap<>();
				Map<String, String> partitionSpec = new HashMap<>();
				for (int i = 0; i < partitionColNames.size(); i++) {
					String partitionColName = partitionColNames.get(i);
					String partitionValue = partition.getValues().get(i);
					partitionSpec.put(partitionColName, partitionValue);
					DataType type = catalogTable.getSchema().getFieldDataType(partitionColName).get();
					Object partitionObject;
					if (defaultPartitionName.equals(partitionValue)) {
						LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
						// while this is inline with Hive, seems it should be null for string columns as well
						partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
					} else {
						partitionObject = restorePartitionValueFromFromType(partitionValue, type);
					}
					partitionColValues.put(partitionColName, partitionObject);
				}
				HiveTablePartition hiveTablePartition = new HiveTablePartition(sd, partitionColValues);
				allHivePartitions.add(hiveTablePartition);
				partitionList.add(partitionSpec);
				partitionSpec2HiveTablePartition.put(partitionSpec, hiveTablePartition);
			}
		} else {
			allHivePartitions.add(new HiveTablePartition(client.getTable(dbName, tableName).getSd(), null));
		}
	} catch (TException e) {
		throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e);
	}
	initAllPartitions = true;
}

Source File: HiveBatchSource.java From Alink with Apache License 2.0

4 votes

private List<HiveTablePartition> initAllPartitions() {
    List<HiveTablePartition> allHivePartitions = new ArrayList<>();
    // Please note that the following directly accesses Hive metastore, which is only a temporary workaround.
    // Ideally, we need to go thru Catalog API to get all info we need here, which requires some major
    // refactoring. We will postpone this until we merge Blink to Flink.
    try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
        String dbName = tablePath.getDatabaseName();
        String tableName = tablePath.getObjectName();
        List<String> partitionColNames = catalogTable.getPartitionKeys();
        if (partitionColNames != null && partitionColNames.size() > 0) {
            final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
                HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
            List<Partition> partitions = new ArrayList<>();
            if (remainingPartitions != null) {
                for (Map<String, String> spec : remainingPartitions) {
                    partitions.add(client.getPartition(dbName, tableName, partitionSpecToValues(spec, partitionColNames)));
                }
            } else {
                partitions.addAll(client.listPartitions(dbName, tableName, (short) -1));
            }
            for (Partition partition : partitions) {
                StorageDescriptor sd = partition.getSd();
                Map<String, Object> partitionColValues = new HashMap<>();
                for (int i = 0; i < partitionColNames.size(); i++) {
                    String partitionColName = partitionColNames.get(i);
                    String partitionValue = partition.getValues().get(i);
                    DataType type = catalogTable.getSchema().getFieldDataType(partitionColName).get();
                    Object partitionObject;
                    if (defaultPartitionName.equals(partitionValue)) {
                        LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
                        // while this is inline with Hive, seems it should be null for string columns as well
                        partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
                    } else {
                        partitionObject = restorePartitionValueFromFromType(partitionValue, type);
                    }
                    partitionColValues.put(partitionColName, partitionObject);
                }
                HiveTablePartition hiveTablePartition = new HiveTablePartition(sd, partitionColValues);
                allHivePartitions.add(hiveTablePartition);
            }
        } else {
            allHivePartitions.add(new HiveTablePartition(client.getTable(dbName, tableName).getSd()));
        }
    } catch (TException e) {
        throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e);
    }
    return allHivePartitions;
}

Source File: HiveTableMetaStoreFactory.java From flink with Apache License 2.0

4 votes

private HiveTableMetaStore() throws TException {
	client = HiveMetastoreClientFactory.create(
			new HiveConf(conf.conf(), HiveConf.class), hiveVersion);
	sd = client.getTable(database, tableName).getSd();
}

Source File: HiveTableSource.java From flink with Apache License 2.0

4 votes

private List<HiveTablePartition> initAllPartitions() {
	List<HiveTablePartition> allHivePartitions = new ArrayList<>();
	// Please note that the following directly accesses Hive metastore, which is only a temporary workaround.
	// Ideally, we need to go thru Catalog API to get all info we need here, which requires some major
	// refactoring. We will postpone this until we merge Blink to Flink.
	try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
		String dbName = tablePath.getDatabaseName();
		String tableName = tablePath.getObjectName();
		List<String> partitionColNames = catalogTable.getPartitionKeys();
		Table hiveTable = client.getTable(dbName, tableName);
		Properties tableProps = HiveReflectionUtils.getTableMetadata(hiveShim, hiveTable);
		String ttlStr = tableProps.getProperty(FileSystemOptions.LOOKUP_JOIN_CACHE_TTL.key());
		hiveTableCacheTTL = ttlStr != null ?
				TimeUtils.parseDuration(ttlStr) :
				FileSystemOptions.LOOKUP_JOIN_CACHE_TTL.defaultValue();
		if (partitionColNames != null && partitionColNames.size() > 0) {
			final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
					HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
			List<Partition> partitions = new ArrayList<>();
			if (remainingPartitions != null) {
				for (Map<String, String> spec : remainingPartitions) {
					partitions.add(client.getPartition(dbName, tableName, partitionSpecToValues(spec, partitionColNames)));
				}
			} else {
				partitions.addAll(client.listPartitions(dbName, tableName, (short) -1));
			}
			for (Partition partition : partitions) {
				HiveTablePartition hiveTablePartition = toHiveTablePartition(
						catalogTable.getPartitionKeys(),
						catalogTable.getSchema().getFieldNames(),
						catalogTable.getSchema().getFieldDataTypes(),
						hiveShim,
						tableProps,
						defaultPartitionName,
						partition);
				allHivePartitions.add(hiveTablePartition);
			}
		} else {
			allHivePartitions.add(new HiveTablePartition(hiveTable.getSd(), tableProps));
		}
	} catch (TException e) {
		throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e);
	}
	return allHivePartitions;
}

org.apache.flink.table.catalog.hive.client.HiveMetastoreClientFactory Java Examples