org.apache.flink.table.catalog.hive.HiveCatalog Java Examples

The following examples show how to use org.apache.flink.table.catalog.hive.HiveCatalog. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SqlScriptExecutor.java    From flink-tutorials with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		HiveCatalog hiveCatalog = new HiveCatalog(HIVE_CATALOG, HIVE_DATABASE, HIVE_CONF_DIR, HIVE_VERSION);
		StreamTableEnvironment env = createTableEnv();
		env.registerCatalog(HIVE_CATALOG, hiveCatalog);

		File script = new File(args[0]);
		String[] commands = FileUtils.readFileUtf8(script).split(";");

		for (String command : commands) {
			if (command.trim().isEmpty()) {
				continue;
			}

			LOG.info("Executing SQL statement: {}", command.trim());
			env.sqlUpdate(command.trim());
		}

		env.execute("SQL Script: " + script.getName());
	}
 
Example #2
Source File: ExecutionContextTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDatabases() throws Exception {
	final String hiveCatalog = "hivecatalog";

	final ExecutionContext<?> context = createCatalogExecutionContext();
	final TableEnvironment tableEnv = context.getTableEnvironment();

	assertEquals(1, tableEnv.listDatabases().length);
	assertEquals("mydatabase", tableEnv.listDatabases()[0]);

	tableEnv.useCatalog(hiveCatalog);

	assertEquals(2, tableEnv.listDatabases().length);
	assertEquals(
		new HashSet<>(
			Arrays.asList(
				HiveCatalog.DEFAULT_DB,
				DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE)
		),
		new HashSet<>(Arrays.asList(tableEnv.listDatabases()))
	);

	tableEnv.useCatalog(hiveCatalog);

	assertEquals(HiveCatalog.DEFAULT_DB, tableEnv.getCurrentDatabase());

	tableEnv.useDatabase(DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE);

	assertEquals(DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE, tableEnv.getCurrentDatabase());

	context.close();
}
 
Example #3
Source File: HiveCatalogFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testLoadHDFSConfigFromEnv() throws IOException {
	final String k1 = "what is connector?";
	final String v1 = "Hive";
	final String catalogName = "HiveCatalog";

	// set HADOOP_CONF_DIR env
	final File hadoopConfDir = tempFolder.newFolder();
	final File hdfsSiteFile = new File(hadoopConfDir, "hdfs-site.xml");
	writeProperty(hdfsSiteFile, k1, v1);
	final Map<String, String> originalEnv = System.getenv();
	final Map<String, String> newEnv = new HashMap<>(originalEnv);
	newEnv.put("HADOOP_CONF_DIR", hadoopConfDir.getAbsolutePath());
	CommonTestUtils.setEnv(newEnv);

	// create HiveCatalog use the Hadoop Configuration
	final CatalogDescriptor catalogDescriptor = new HiveCatalogDescriptor();
	final Map<String, String> properties = catalogDescriptor.toProperties();
	final HiveConf hiveConf;
	try {
		final HiveCatalog hiveCatalog = (HiveCatalog) TableFactoryService.find(CatalogFactory.class, properties)
			.createCatalog(catalogName, properties);
		hiveConf = hiveCatalog.getHiveConf();
	} finally {
		// set the Env back
		CommonTestUtils.setEnv(originalEnv);
	}
	//validate the result
	assertEquals(v1, hiveConf.get(k1, null));
}
 
Example #4
Source File: LocalExecutorITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testUseCatalogAndUseDatabase() throws Exception {
	final String csvOutputPath = new File(tempFolder.newFolder().getAbsolutePath(), "test-out.csv").toURI().toString();
	final URL url = getClass().getClassLoader().getResource("test-data.csv");
	Objects.requireNonNull(url);
	final Map<String, String> replaceVars = new HashMap<>();
	replaceVars.put("$VAR_PLANNER", planner);
	replaceVars.put("$VAR_SOURCE_PATH1", url.getPath());
	replaceVars.put("$VAR_EXECUTION_TYPE", "streaming");
	replaceVars.put("$VAR_SOURCE_SINK_PATH", csvOutputPath);
	replaceVars.put("$VAR_UPDATE_MODE", "update-mode: append");
	replaceVars.put("$VAR_MAX_ROWS", "100");

	final Executor executor = createModifiedExecutor(CATALOGS_ENVIRONMENT_FILE, clusterClient, replaceVars);
	final SessionContext session = new SessionContext("test-session", new Environment());

	try {
		assertEquals(Arrays.asList("mydatabase"), executor.listDatabases(session));

		executor.useCatalog(session, "hivecatalog");

		assertEquals(
			Arrays.asList(DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE, HiveCatalog.DEFAULT_DB),
			executor.listDatabases(session));

		assertEquals(Collections.emptyList(), executor.listTables(session));

		executor.useDatabase(session, DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE);

		assertEquals(Arrays.asList(DependencyTest.TestHiveCatalogFactory.TEST_TABLE), executor.listTables(session));
	} finally {
		executor.stop(session);
	}
}
 
Example #5
Source File: LocalExecutorITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testUseCatalogAndUseDatabase() throws Exception {
	final String csvOutputPath = new File(tempFolder.newFolder().getAbsolutePath(), "test-out.csv").toURI().toString();
	final URL url1 = getClass().getClassLoader().getResource("test-data.csv");
	final URL url2 = getClass().getClassLoader().getResource("test-data-1.csv");
	Objects.requireNonNull(url1);
	Objects.requireNonNull(url2);
	final Map<String, String> replaceVars = new HashMap<>();
	replaceVars.put("$VAR_PLANNER", planner);
	replaceVars.put("$VAR_SOURCE_PATH1", url1.getPath());
	replaceVars.put("$VAR_SOURCE_PATH2", url2.getPath());
	replaceVars.put("$VAR_EXECUTION_TYPE", "streaming");
	replaceVars.put("$VAR_SOURCE_SINK_PATH", csvOutputPath);
	replaceVars.put("$VAR_UPDATE_MODE", "update-mode: append");
	replaceVars.put("$VAR_MAX_ROWS", "100");

	final Executor executor = createModifiedExecutor(CATALOGS_ENVIRONMENT_FILE, clusterClient, replaceVars);
	final SessionContext session = new SessionContext("test-session", new Environment());
	String sessionId = executor.openSession(session);
	assertEquals("test-session", sessionId);

	try {
		assertEquals(Collections.singletonList("mydatabase"), executor.listDatabases(sessionId));

		executor.useCatalog(sessionId, "hivecatalog");

		assertEquals(
			Arrays.asList(DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE, HiveCatalog.DEFAULT_DB),
			executor.listDatabases(sessionId));

		assertEquals(Collections.singletonList(DependencyTest.TestHiveCatalogFactory.TABLE_WITH_PARAMETERIZED_TYPES),
			executor.listTables(sessionId));

		executor.useDatabase(sessionId, DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE);

		assertEquals(Collections.singletonList(DependencyTest.TestHiveCatalogFactory.TEST_TABLE), executor.listTables(sessionId));
	} finally {
		executor.closeSession(sessionId);
	}
}
 
Example #6
Source File: ExecutionContextTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDatabases() throws Exception {
	final String hiveCatalog = "hivecatalog";

	final ExecutionContext<?> context = createCatalogExecutionContext();
	final TableEnvironment tableEnv = context.createEnvironmentInstance().getTableEnvironment();

	assertEquals(1, tableEnv.listDatabases().length);
	assertEquals("mydatabase", tableEnv.listDatabases()[0]);

	tableEnv.useCatalog(hiveCatalog);

	assertEquals(2, tableEnv.listDatabases().length);
	assertEquals(
		new HashSet<>(
			Arrays.asList(
				HiveCatalog.DEFAULT_DB,
				DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE)
		),
		new HashSet<>(Arrays.asList(tableEnv.listDatabases()))
	);

	tableEnv.useCatalog(hiveCatalog);

	assertEquals(HiveCatalog.DEFAULT_DB, tableEnv.getCurrentDatabase());

	tableEnv.useDatabase(DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE);

	assertEquals(DependencyTest.TestHiveCatalogFactory.ADDITIONAL_TEST_DATABASE, tableEnv.getCurrentDatabase());
}
 
Example #7
Source File: HiveCatalogFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public Catalog createCatalog(String name, Map<String, String> properties) {
	final DescriptorProperties descriptorProperties = getValidatedProperties(properties);

	final String defaultDatabase =
		descriptorProperties.getOptionalString(CATALOG_DEFAULT_DATABASE)
			.orElse(HiveCatalog.DEFAULT_DB);

	final Optional<String> hiveConfDir = descriptorProperties.getOptionalString(CATALOG_HIVE_CONF_DIR);

	final String version = descriptorProperties.getOptionalString(CATALOG_HIVE_VERSION).orElse(HiveShimLoader.getHiveVersion());

	return new HiveCatalog(name, defaultDatabase, hiveConfDir.orElse(null), version);
}
 
Example #8
Source File: HiveCatalogFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public Catalog createCatalog(String name, Map<String, String> properties) {
	final DescriptorProperties descriptorProperties = getValidatedProperties(properties);

	final String defaultDatabase =
		descriptorProperties.getOptionalString(CATALOG_DEFAULT_DATABASE)
			.orElse(HiveCatalog.DEFAULT_DB);

	final Optional<String> hiveConfDir = descriptorProperties.getOptionalString(CATALOG_HIVE_CONF_DIR);

	final String version = descriptorProperties.getOptionalString(CATALOG_HIVE_VERSION).orElse(HiveShimLoader.getHiveVersion());

	return new HiveCatalog(name, defaultDatabase, hiveConfDir.orElse(null), version);
}
 
Example #9
Source File: HiveTableSourceITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private void testSourceConfig(boolean fallbackMR, boolean inferParallelism) throws Exception {
	HiveTableFactory tableFactorySpy = spy((HiveTableFactory) hiveCatalog.getTableFactory().get());

	doAnswer(invocation -> {
		TableSourceFactory.Context context = invocation.getArgument(0);
		return new TestConfigSource(
				new JobConf(hiveCatalog.getHiveConf()),
				context.getConfiguration(),
				context.getObjectIdentifier().toObjectPath(),
				context.getTable(),
				fallbackMR,
				inferParallelism);
	}).when(tableFactorySpy).createTableSource(any(TableSourceFactory.Context.class));

	HiveCatalog catalogSpy = spy(hiveCatalog);
	doReturn(Optional.of(tableFactorySpy)).when(catalogSpy).getTableFactory();

	TableEnvironment tableEnv = HiveTestUtils.createTableEnvWithBlinkPlannerBatchMode();
	tableEnv.getConfig().getConfiguration().setBoolean(
			HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER, fallbackMR);
	tableEnv.getConfig().getConfiguration().setBoolean(
			HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, inferParallelism);
	tableEnv.getConfig().getConfiguration().setInteger(
			ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 2);
	tableEnv.registerCatalog(catalogSpy.getName(), catalogSpy);
	tableEnv.useCatalog(catalogSpy.getName());

	List<Row> results = Lists.newArrayList(
			tableEnv.sqlQuery("select * from db1.src order by x").execute().collect());
	assertEquals("[1,a, 2,b]", results.toString());
}
 
Example #10
Source File: HiveTableUtil.java    From flink with Apache License 2.0 4 votes vote down vote up
public static Table instantiateHiveTable(ObjectPath tablePath, CatalogBaseTable table, HiveConf hiveConf) {
	if (!(table instanceof CatalogTableImpl) && !(table instanceof CatalogViewImpl)) {
		throw new CatalogException(
				"HiveCatalog only supports CatalogTableImpl and CatalogViewImpl");
	}
	// let Hive set default parameters for us, e.g. serialization.format
	Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(),
			tablePath.getObjectName());
	hiveTable.setCreateTime((int) (System.currentTimeMillis() / 1000));

	Map<String, String> properties = new HashMap<>(table.getProperties());
	// Table comment
	if (table.getComment() != null) {
		properties.put(HiveCatalogConfig.COMMENT, table.getComment());
	}

	boolean isGeneric = HiveCatalog.isGenericForCreate(properties);

	// Hive table's StorageDescriptor
	StorageDescriptor sd = hiveTable.getSd();
	HiveTableUtil.setDefaultStorageFormat(sd, hiveConf);

	if (isGeneric) {
		DescriptorProperties tableSchemaProps = new DescriptorProperties(true);
		tableSchemaProps.putTableSchema(Schema.SCHEMA, table.getSchema());

		if (table instanceof CatalogTable) {
			tableSchemaProps.putPartitionKeys(((CatalogTable) table).getPartitionKeys());
		}

		properties.putAll(tableSchemaProps.asMap());
		properties = maskFlinkProperties(properties);
		hiveTable.setParameters(properties);
	} else {
		HiveTableUtil.initiateTableFromProperties(hiveTable, properties, hiveConf);
		List<FieldSchema> allColumns = HiveTableUtil.createHiveColumns(table.getSchema());
		// Table columns and partition keys
		if (table instanceof CatalogTableImpl) {
			CatalogTable catalogTable = (CatalogTableImpl) table;

			if (catalogTable.isPartitioned()) {
				int partitionKeySize = catalogTable.getPartitionKeys().size();
				List<FieldSchema> regularColumns = allColumns.subList(0, allColumns.size() - partitionKeySize);
				List<FieldSchema> partitionColumns = allColumns.subList(allColumns.size() - partitionKeySize, allColumns.size());

				sd.setCols(regularColumns);
				hiveTable.setPartitionKeys(partitionColumns);
			} else {
				sd.setCols(allColumns);
				hiveTable.setPartitionKeys(new ArrayList<>());
			}
		} else {
			sd.setCols(allColumns);
		}
		// Table properties
		hiveTable.getParameters().putAll(properties);
	}

	if (table instanceof CatalogViewImpl) {
		// TODO: [FLINK-12398] Support partitioned view in catalog API
		hiveTable.setPartitionKeys(new ArrayList<>());

		CatalogView view = (CatalogView) table;
		hiveTable.setViewOriginalText(view.getOriginalQuery());
		hiveTable.setViewExpandedText(view.getExpandedQuery());
		hiveTable.setTableType(TableType.VIRTUAL_VIEW.name());
	}

	return hiveTable;
}
 
Example #11
Source File: ExecutionContextTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testCatalogs() throws Exception {
	final String inmemoryCatalog = "inmemorycatalog";
	final String hiveCatalog = "hivecatalog";
	final String hiveDefaultVersionCatalog = "hivedefaultversion";

	final ExecutionContext<?> context = createCatalogExecutionContext();
	final TableEnvironment tableEnv = context.getTableEnvironment();

	assertEquals(inmemoryCatalog, tableEnv.getCurrentCatalog());
	assertEquals("mydatabase", tableEnv.getCurrentDatabase());

	Catalog catalog = tableEnv.getCatalog(hiveCatalog).orElse(null);
	assertNotNull(catalog);
	assertTrue(catalog instanceof HiveCatalog);
	assertEquals("2.3.4", ((HiveCatalog) catalog).getHiveVersion());

	catalog = tableEnv.getCatalog(hiveDefaultVersionCatalog).orElse(null);
	assertNotNull(catalog);
	assertTrue(catalog instanceof HiveCatalog);
	// make sure we have assigned a default hive version
	assertFalse(StringUtils.isNullOrWhitespaceOnly(((HiveCatalog) catalog).getHiveVersion()));

	tableEnv.useCatalog(hiveCatalog);

	assertEquals(hiveCatalog, tableEnv.getCurrentCatalog());

	Set<String> allCatalogs = new HashSet<>(Arrays.asList(tableEnv.listCatalogs()));
	assertEquals(6, allCatalogs.size());
	assertEquals(
		new HashSet<>(
			Arrays.asList(
				"default_catalog",
				inmemoryCatalog,
				hiveCatalog,
				hiveDefaultVersionCatalog,
				"catalog1",
				"catalog2")
		),
		allCatalogs
	);

	context.close();
}
 
Example #12
Source File: HiveCatalogFactoryTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private static void checkEquals(HiveCatalog c1, HiveCatalog c2) {
	// Only assert a few selected properties for now
	assertEquals(c1.getName(), c2.getName());
	assertEquals(c1.getDefaultDatabase(), c2.getDefaultDatabase());
}
 
Example #13
Source File: HiveCatalogFactoryTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testLoadHadoopConfigFromEnv() throws IOException {
	Map<String, String> customProps = new HashMap<>();
	String k1 = "what is connector?";
	String v1 = "Hive";
	final String catalogName = "HiveCatalog";

	// set HADOOP_CONF_DIR env
	final File hadoopConfDir = tempFolder.newFolder();
	final File hdfsSiteFile = new File(hadoopConfDir, "hdfs-site.xml");
	writeProperty(hdfsSiteFile, k1, v1);
	customProps.put(k1, v1);

	// add mapred-site file
	final File mapredSiteFile = new File(hadoopConfDir, "mapred-site.xml");
	k1 = "mapred.site.config.key";
	v1 = "mapred.site.config.val";
	writeProperty(mapredSiteFile, k1, v1);
	customProps.put(k1, v1);

	final Map<String, String> originalEnv = System.getenv();
	final Map<String, String> newEnv = new HashMap<>(originalEnv);
	newEnv.put("HADOOP_CONF_DIR", hadoopConfDir.getAbsolutePath());
	newEnv.remove("HADOOP_HOME");
	CommonTestUtils.setEnv(newEnv);

	// create HiveCatalog use the Hadoop Configuration
	final HiveCatalogDescriptor catalogDescriptor = new HiveCatalogDescriptor();
	catalogDescriptor.hiveSitePath(CONF_DIR.getPath());
	final Map<String, String> properties = catalogDescriptor.toProperties();
	final HiveConf hiveConf;
	try {
		final HiveCatalog hiveCatalog = (HiveCatalog) TableFactoryService.find(CatalogFactory.class, properties)
				.createCatalog(catalogName, properties);
		hiveConf = hiveCatalog.getHiveConf();
	} finally {
		// set the Env back
		CommonTestUtils.setEnv(originalEnv);
	}
	// validate the result
	for (String key : customProps.keySet()) {
		assertEquals(customProps.get(key), hiveConf.get(key, null));
	}
}
 
Example #14
Source File: HiveCatalogFactoryTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void test() {
	final String catalogName = "mycatalog";

	final HiveCatalog expectedCatalog = HiveTestUtils.createHiveCatalog(catalogName, null);

	final HiveCatalogDescriptor catalogDescriptor = new HiveCatalogDescriptor();
	catalogDescriptor.hiveSitePath(CONF_DIR.getPath());

	final Map<String, String> properties = catalogDescriptor.toProperties();

	final Catalog actualCatalog = TableFactoryService.find(CatalogFactory.class, properties)
		.createCatalog(catalogName, properties);

	checkEquals(expectedCatalog, (HiveCatalog) actualCatalog);
}
 
Example #15
Source File: ExecutionContextTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testCatalogs() throws Exception {
	final String inmemoryCatalog = "inmemorycatalog";
	final String hiveCatalog = "hivecatalog";
	final String hiveDefaultVersionCatalog = "hivedefaultversion";

	final ExecutionContext<?> context = createCatalogExecutionContext();
	final TableEnvironment tableEnv = context.createEnvironmentInstance().getTableEnvironment();

	assertEquals(inmemoryCatalog, tableEnv.getCurrentCatalog());
	assertEquals("mydatabase", tableEnv.getCurrentDatabase());

	Catalog catalog = tableEnv.getCatalog(hiveCatalog).orElse(null);
	assertNotNull(catalog);
	assertTrue(catalog instanceof HiveCatalog);
	assertEquals("2.3.4", ((HiveCatalog) catalog).getHiveVersion());

	catalog = tableEnv.getCatalog(hiveDefaultVersionCatalog).orElse(null);
	assertNotNull(catalog);
	assertTrue(catalog instanceof HiveCatalog);
	// make sure we have assigned a default hive version
	assertFalse(StringUtils.isNullOrWhitespaceOnly(((HiveCatalog) catalog).getHiveVersion()));

	tableEnv.useCatalog(hiveCatalog);

	assertEquals(hiveCatalog, tableEnv.getCurrentCatalog());

	Set<String> allCatalogs = new HashSet<>(Arrays.asList(tableEnv.listCatalogs()));
	assertEquals(6, allCatalogs.size());
	assertEquals(
		new HashSet<>(
			Arrays.asList(
				"default_catalog",
				inmemoryCatalog,
				hiveCatalog,
				hiveDefaultVersionCatalog,
				"catalog1",
				"catalog2")
		),
		allCatalogs
	);
}
 
Example #16
Source File: HiveCatalogFactoryTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private static void checkEquals(HiveCatalog c1, HiveCatalog c2) {
	// Only assert a few selected properties for now
	assertEquals(c1.getName(), c2.getName());
	assertEquals(c1.getDefaultDatabase(), c2.getDefaultDatabase());
}
 
Example #17
Source File: HiveCatalogFactoryTest.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test
public void test() {
	final String catalogName = "mycatalog";

	final HiveCatalog expectedCatalog = HiveTestUtils.createHiveCatalog(catalogName, null);

	final CatalogDescriptor catalogDescriptor = new HiveCatalogDescriptor();

	final Map<String, String> properties = catalogDescriptor.toProperties();

	final Catalog actualCatalog = TableFactoryService.find(CatalogFactory.class, properties)
		.createCatalog(catalogName, properties);

	checkEquals(expectedCatalog, (HiveCatalog) actualCatalog);
}