/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi.hive; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.api.Table; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.kitesdk.data.Dataset; import org.kitesdk.data.DatasetDescriptor; import org.kitesdk.data.Datasets; import org.kitesdk.data.spi.DatasetRepositories; import org.kitesdk.data.spi.DatasetRepository; public class TestHiveDatasetURIsCompatibility { private static final DatasetDescriptor DESCRIPTOR = new DatasetDescriptor .Builder() .schemaLiteral("\"string\"") .build(); private static final MetaStoreUtil metastore = MetaStoreUtil.get(new Configuration()); @Before @After public void cleanHive() { // ensures all tables are removed for (String database : metastore.getAllDatabases()) { for (String table : metastore.getAllTables(database)) { metastore.dropTable(database, table); } if (!"default".equals(database)) { metastore.dropDatabase(database, true); } } } @Test public void testRelativePathURI() { // recognized as a deprecated form because there are 3 path components String uri = "dataset:hive:data/ns/ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("ns", "ds"); Assert.assertNotNull("Table should be found under ns.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedRelativePathURI() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:ns/ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("ns", "ds"); Assert.assertNotNull("Table should be found under ns.ds", table); Assert.assertTrue("Should create a managed table: " + table.getSd().getLocation(), HiveAbstractMetadataProvider.isManaged(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedRelativePathURIWithDescriptorLocation() { String uri = "dataset:hive:ns/ds"; Datasets.create(uri, new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ns/ds") .build()); Table table = metastore.getTable("ns", "ds"); Assert.assertNotNull("Table should be found under ns.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedRelativePathURIWithURILocation() { String uri = "dataset:hive:ns/ds?location=file:/tmp/data/ns/ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("ns", "ds"); Assert.assertNotNull("Table should be found under ns.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testLoadChangedRelativePathURI() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:ns/ds"; DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hive:/tmp/data"); Dataset<GenericRecord> expected = repo.create( "ns", "ds", DESCRIPTOR, GenericRecord.class); Dataset<GenericRecord> actual = Datasets.load(uri); Assert.assertEquals("Should load existing dataset ns.ds", expected, actual); Assert.assertEquals("URI should use apparent namespace", "dataset:hive:ns/ds", actual.getUri().toString()); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testLoadChangedRelativePathURICompatibility() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:data/ds"; DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hive:/tmp/data"); DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ds") // old location .build(); Dataset<GenericRecord> expected = repo.create( "default", "ds", withLocation, GenericRecord.class); Dataset<GenericRecord> actual = Datasets.load(uri); Assert.assertEquals("Should load existing dataset default.ds", expected, actual); Assert.assertEquals("URI should use apparent namespace", "dataset:hive:data/ds", actual.getUri().toString()); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedRelativePathURIMissingNamespace() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("default", "ds"); Assert.assertNotNull("Table should be found under default.ds", table); Assert.assertTrue("Should create a managed table: " + table.getSd().getLocation(), HiveAbstractMetadataProvider.isManaged(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedRelativePathURIMissingNamespaceWithDescriptorLocation() { String uri = "dataset:hive:ds"; Datasets.create(uri, new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ns/ds") .build()); Table table = metastore.getTable("default", "ds"); Assert.assertNotNull("Table should be found under default.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedRelativePathURIMissingNamespaceWithURILocation() { String uri = "dataset:hive:ds?location=file:/tmp/data/ns/ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("default", "ds"); Assert.assertNotNull("Table should be found under default.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testLoadChangedRelativePathURIMissingNamespace() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:ds"; DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hive:/tmp/data"); DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ds") // old location .build(); Dataset<GenericRecord> expected = repo.create( "default", "ds", withLocation, GenericRecord.class); Dataset<GenericRecord> actual = Datasets.load(uri); Assert.assertEquals("Should load existing dataset default.ds", expected, actual); Assert.assertEquals("URI should use actual namespace", "dataset:hive:default/ds", actual.getUri().toString()); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testAbsolutePathURI() { // recognized as a absolute because there are 3 path components String uri = "dataset:hive:/tmp/data/ns/ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("ns", "ds"); Assert.assertNotNull("Table should be found under ns.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedAbsolutePathURI() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:/ns/ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("ns", "ds"); Assert.assertNotNull("Table should be found under ns.ds", table); Assert.assertTrue("Should create a managed table: " + table.getSd().getLocation(), HiveAbstractMetadataProvider.isManaged(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedAbsolutePathURIWithDescriptorLocation() { String uri = "dataset:hive:/ns/ds"; Datasets.create(uri, new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ns/ds") .build()); Table table = metastore.getTable("ns", "ds"); Assert.assertNotNull("Table should be found under ns.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedAbsolutePathURIWithURILocation() { String uri = "dataset:hive:/ns/ds?location=file:/tmp/data/ns/ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("ns", "ds"); Assert.assertNotNull("Table should be found under ns.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testLoadChangedAbsolutePathURI() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:/ns/ds"; DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hive:/tmp/data"); Dataset<GenericRecord> expected = repo.create( "ns", "ds", DESCRIPTOR, GenericRecord.class); Dataset<GenericRecord> actual = Datasets.load(uri); Assert.assertEquals("Should load existing dataset ns.ds", expected, actual); Assert.assertEquals("URI should use apparent namespace", "dataset:hive:ns/ds", actual.getUri().toString()); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testLoadChangedAbsolutePathURICompatibility() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:/data/ds"; DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hive:/tmp/data"); DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ds") // old location .build(); Dataset<GenericRecord> expected = repo.create( "default", "ds", withLocation, GenericRecord.class); Dataset<GenericRecord> actual = Datasets.load(uri); Assert.assertEquals("Should load existing dataset default.ds", expected, actual); Assert.assertEquals("URI should use apparent namespace", "dataset:hive:data/ds", actual.getUri().toString()); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedAbsolutePathURIMissingNamespace() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:/ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("default", "ds"); Assert.assertNotNull("Table should be found under default.ds", table); Assert.assertTrue("Should create a managed table: " + table.getSd().getLocation(), HiveAbstractMetadataProvider.isManaged(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedAbsolutePathURIMissingNamespaceWithDescriptorLocation() { String uri = "dataset:hive:/ds"; Datasets.create(uri, new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ns/ds") .build()); Table table = metastore.getTable("default", "ds"); Assert.assertNotNull("Table should be found under default.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testCreateChangedAbsolutePathURIMissingNamespaceWithURILocation() { String uri = "dataset:hive:/ds?location=file:/tmp/data/ns/ds"; Datasets.create(uri, DESCRIPTOR); Table table = metastore.getTable("default", "ds"); Assert.assertNotNull("Table should be found under default.ds", table); Assert.assertTrue("Should create an external table", HiveAbstractMetadataProvider.isExternal(table)); Assert.assertTrue(Datasets.delete(uri)); } @Test public void testLoadChangedAbsolutePathURIMissingNamespace() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:/ds"; DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hive:/tmp/data"); DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ds") // old location .build(); Dataset<GenericRecord> expected = repo.create( "default", "ds", withLocation, GenericRecord.class); Dataset<GenericRecord> actual = Datasets.load(uri); Assert.assertEquals("Should load existing dataset default.ds", expected, actual); Assert.assertEquals("URI should use actual namespace", "dataset:hive:default/ds", actual.getUri().toString()); Assert.assertTrue(Datasets.delete(uri)); } }