org.apache.hadoop.hive.metastore.IMetaStoreClient Java Examples

The following examples show how to use org.apache.hadoop.hive.metastore.IMetaStoreClient. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveMetaStoreClientFactory.java    From incubator-gobblin with Apache License 2.0 7 votes vote down vote up
private IMetaStoreClient createMetaStoreClient() throws MetaException {
  HiveMetaHookLoader hookLoader = new HiveMetaHookLoader() {
    @Override
    public HiveMetaHook getHook(Table tbl) throws MetaException {
      if (tbl == null) {
        return null;
      }

      try {
        HiveStorageHandler storageHandler =
            HiveUtils.getStorageHandler(hiveConf, tbl.getParameters().get(META_TABLE_STORAGE));
        return storageHandler == null ? null : storageHandler.getMetaHook();
      } catch (HiveException e) {
        LOG.error(e.toString());
        throw new MetaException("Failed to get storage handler: " + e);
      }
    }
  };

  return RetryingMetaStoreClient.getProxy(hiveConf, hookLoader, HiveMetaStoreClient.class.getName());
}
 
Example #2
Source File: HCatalogUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
private static long getFileSizeForPartition(Read readRequest, Partition partitionToRead)
    throws Exception {
  IMetaStoreClient client = null;
  try {
    HiveConf hiveConf = HCatalogUtils.createHiveConf(readRequest);
    client = HCatalogUtils.createMetaStoreClient(hiveConf);
    List<org.apache.hadoop.hive.ql.metadata.Partition> p = new ArrayList<>();
    Table table = HCatUtil.getTable(client, readRequest.getDatabase(), readRequest.getTable());
    final org.apache.hadoop.hive.ql.metadata.Partition partition =
        new org.apache.hadoop.hive.ql.metadata.Partition(table, partitionToRead);
    p.add(partition);
    final List<Long> fileSizeForPartitions = StatsUtils.getFileSizeForPartitions(hiveConf, p);
    return fileSizeForPartitions.get(0);
  } finally {
    // IMetaStoreClient is not AutoCloseable, closing it manually
    if (client != null) {
      client.close();
    }
  }
}
 
Example #3
Source File: HiveShimV1.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public Function getFunction(IMetaStoreClient client, String dbName, String functionName) throws NoSuchObjectException, TException {
	try {
		// hive-1.x doesn't throw NoSuchObjectException if function doesn't exist, instead it throws a MetaException
		return client.getFunction(dbName, functionName);
	} catch (MetaException e) {
		// need to check the cause and message of this MetaException to decide whether it should actually be a NoSuchObjectException
		if (e.getCause() instanceof NoSuchObjectException) {
			throw (NoSuchObjectException) e.getCause();
		}
		if (e.getMessage().startsWith(NoSuchObjectException.class.getSimpleName())) {
			throw new NoSuchObjectException(e.getMessage());
		}
		throw e;
	}
}
 
Example #4
Source File: HiveMetadataForCompactionExtractor.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public HiveMetadataForCompactionExtractor(WorkUnitState state, FileSystem fs) throws IOException, TException {
  super(state);

  if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) {
    log.info("Ignoring Watermark workunit for {}", state.getProp(ConfigurationKeys.DATASET_URN_KEY));
    return;
  }

  try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) {
    Table table = client.get().getTable(this.dbName, this.tableName);

    String primaryKeyString = table.getParameters().get(state.getProp(COMPACTION_PRIMARY_KEY));
    List<String> primaryKeyList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(primaryKeyString);

    String deltaString = table.getParameters().get(state.getProp(COMPACTION_DELTA));
    List<String> deltaList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(deltaString);

    Path dataFilesPath = new Path(table.getSd().getLocation());

    compactionEntity = new MRCompactionEntity(primaryKeyList, deltaList, dataFilesPath, state.getProperties());
  }
}
 
Example #5
Source File: HiveShimV1.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
// 1.x client doesn't support filtering tables by type, so here we need to get all tables and filter by ourselves
public List<String> getViews(IMetaStoreClient client, String databaseName) throws UnknownDBException, TException {
	// We don't have to use reflection here because client.getAllTables(String) is supposed to be there for
	// all versions.
	List<String> tableNames = client.getAllTables(databaseName);
	List<String> views = new ArrayList<>();
	for (String name : tableNames) {
		Table table = client.getTable(databaseName, name);
		String viewDef = table.getViewOriginalText();
		if (viewDef != null && !viewDef.isEmpty()) {
			views.add(table.getTableName());
		}
	}
	return views;
}
 
Example #6
Source File: HiveMetaStoreClientFactoryTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreate() throws TException {
  HiveConf hiveConf = new HiveConf();
  HiveMetaStoreClientFactory factory = new HiveMetaStoreClientFactory(hiveConf);

  // Since we havE a specified hive-site in the classpath, so have to null it out here to proceed the test
  // The original value it will get if no local hive-site is placed, will be an empty string. 
  hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "");
  hiveConf.set(HIVE_METASTORE_TOKEN_SIGNATURE, "");
  IMetaStoreClient msc = factory.create();

  String dbName = "test_db";
  String description = "test database";
  String location = "file:/tmp/" + dbName;
  Database db = new Database(dbName, description, location, null);

  msc.dropDatabase(dbName, true, true);
  msc.createDatabase(db);
  db = msc.getDatabase(dbName);
  Assert.assertEquals(db.getName(), dbName);
  Assert.assertEquals(db.getDescription(), description);
  Assert.assertEquals(db.getLocationUri(), location);
}
 
Example #7
Source File: HivePartitionVersionFinder.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private static List<Partition> getPartitions(String completeTableName) {
  List<String> tableList = At_SPLITTER.splitToList(completeTableName);
  if (tableList.size() != 2) {
    log.warn("Invalid table name " + completeTableName);
    return Collections.EMPTY_LIST;
  }
  try (AutoReturnableObject<IMetaStoreClient> client = ComplianceRetentionJob.pool.getClient()) {
    Table table = client.get().getTable(tableList.get(0), tableList.get(1));
    HiveDataset dataset = new HiveDataset(FileSystem.newInstance(new Configuration()), ComplianceRetentionJob.pool,
        new org.apache.hadoop.hive.ql.metadata.Table(table), new Properties());
    return dataset.getPartitionsFromDataset();
  } catch (IOException | TException e) {
    log.warn("Unable to get Partitions for table " + completeTableName + " " + e.getMessage());
  }
  return Collections.EMPTY_LIST;
}
 
Example #8
Source File: HiveDatasetVersionCleaner.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Override
public void clean() throws IOException {

  // Possible empty directories to clean for this partition (version)
  Set<Path> possiblyEmptyDirectories = new HashSet<>();

  try (AutoReturnableObject<IMetaStoreClient> client = cleanableHiveDataset.getClientPool().getClient()) {
    Partition partition = hiveDatasetVersion.getPartition();
    try {
      if (!cleanableHiveDataset.isSimulate()) {
        client.get().dropPartition(partition.getTable().getDbName(), partition.getTable().getTableName(), partition.getValues(), false);
        log.info("Successfully dropped partition " + partition.getCompleteName());
      } else {
        log.info("Simulating drop partition " + partition.getCompleteName());
      }
      if (cleanableHiveDataset.isShouldDeleteData()) {
        cleanableHiveDataset.getFsCleanableHelper().clean(hiveDatasetVersion, possiblyEmptyDirectories);
      }
    } catch (TException | IOException e) {
      log.warn(String.format("Failed to completely delete partition %s.", partition.getCompleteName()), e);
      throw new IOException(e);
    }
  }
  cleanableHiveDataset.getFsCleanableHelper().cleanEmptyDirectories(possiblyEmptyDirectories, cleanableHiveDataset);
}
 
Example #9
Source File: HiveConvertPublisher.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
public Optional<Partition> getPartitionObject(String completePartitionName) {
  try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
    List<String> partitionList = At_SPLITTER.splitToList(completePartitionName);
    if (partitionList.size() != 3) {
      log.warn("Invalid partition name " + completePartitionName);
      return Optional.<Partition>absent();
    }
    Partition sourcePartition =
        client.get().getPartition(partitionList.get(0), partitionList.get(1), partitionList.get(2));
    return Optional.fromNullable(sourcePartition);
  } catch (IOException | TException e) {
    log.warn("Unable to get partition object from metastore for partition " + completePartitionName);
  }
  return Optional.<Partition>absent();
}
 
Example #10
Source File: TokenUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Get Hadoop tokens (tokens for job history server, job tracker, hive and HDFS) using Kerberos keytab,
 * on behalf on a proxy user, embed tokens into a {@link UserGroupInformation} as returned result, persist in-memory
 * credentials if tokenFile specified
 *
 * Note that when a super-user is fetching tokens for other users,
 * {@link #fetchHcatToken(String, HiveConf, String, IMetaStoreClient)} getDelegationToken} explicitly
 * contains a string parameter indicating proxy user, while other hadoop services require impersonation first.
 *
 * @param state A {@link State} object that should contain properties.
 * @param tokenFile If present, the file will store materialized credentials.
 * @param ugi The {@link UserGroupInformation} that used to impersonate into the proxy user by a "doAs block".
 * @param targetUser The user to be impersonated as, for fetching hadoop tokens.
 * @return A {@link UserGroupInformation} containing negotiated credentials.
 */
public static UserGroupInformation getHadoopAndHiveTokensForProxyUser(final State state, Optional<File> tokenFile,
    UserGroupInformation ugi, IMetaStoreClient client, String targetUser) throws IOException, InterruptedException {
  final Credentials cred = new Credentials();
  ugi.doAs(new PrivilegedExceptionAction<Void>() {
    @Override
    public Void run() throws Exception {
      getHadoopTokens(state, Optional.absent(), cred);
      return null;
    }
  });

  ugi.getCredentials().addAll(cred);
  // Will add hive tokens into ugi in this method.
  getHiveToken(state, client, cred, targetUser, ugi);

  if (tokenFile.isPresent()) {
    persistTokens(cred, tokenFile.get());
  }
  // at this point, tokens in ugi can be more than that in Credential object,
  // since hive token is not put in Credential object.
  return ugi;
}
 
Example #11
Source File: HiveShimV310.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void createTableWithConstraints(
		IMetaStoreClient client,
		Table table,
		Configuration conf,
		UniqueConstraint pk,
		List<Byte> pkTraits,
		List<String> notNullCols,
		List<Byte> nnTraits) {
	try {
		List<Object> hivePKs = createHivePKs(table, pk, pkTraits);
		List<Object> hiveNNs = createHiveNNs(table, conf, notNullCols, nnTraits);
		// createTableWithConstraints takes PK, FK, UNIQUE, NN, DEFAULT, CHECK lists
		HiveReflectionUtils.invokeMethod(
				client.getClass(),
				client,
				"createTableWithConstraints",
				new Class[]{Table.class, List.class, List.class, List.class, List.class, List.class, List.class},
				new Object[]{table, hivePKs, Collections.emptyList(), Collections.emptyList(), hiveNNs,
						Collections.emptyList(), Collections.emptyList()});
	} catch (Exception e) {
		throw new CatalogException("Failed to create Hive table with constraints", e);
	}
}
 
Example #12
Source File: HiveShimV310.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public Set<String> getNotNullColumns(IMetaStoreClient client, Configuration conf, String dbName, String tableName) {
	try {
		String hiveDefaultCatalog = getHMSDefaultCatalog(conf);
		Class requestClz = Class.forName("org.apache.hadoop.hive.metastore.api.NotNullConstraintsRequest");
		Object request = requestClz.getDeclaredConstructor(String.class, String.class, String.class)
				.newInstance(hiveDefaultCatalog, dbName, tableName);
		List<?> constraints = (List<?>) HiveReflectionUtils.invokeMethod(client.getClass(), client,
				"getNotNullConstraints", new Class[]{requestClz}, new Object[]{request});
		Class constraintClz = Class.forName("org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint");
		Method colNameMethod = constraintClz.getDeclaredMethod("getColumn_name");
		Method isRelyMethod = constraintClz.getDeclaredMethod("isRely_cstr");
		Set<String> res = new HashSet<>();
		for (Object constraint : constraints) {
			if ((boolean) isRelyMethod.invoke(constraint)) {
				res.add((String) colNameMethod.invoke(constraint));
			}
		}
		return res;
	} catch (Exception e) {
		throw new CatalogException("Failed to get NOT NULL constraints", e);
	}
}
 
Example #13
Source File: HiveMaterializerSource.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private HiveDataset getHiveDataset(String tableString, FileSystem fs, State state) throws IOException {
  try {
    HiveMetastoreClientPool pool = HiveMetastoreClientPool.get(state.getProperties(),
        Optional.fromNullable(state.getProp(HIVE_METASTORE_URI_KEY)));

    List<String> tokens = Splitter.on(".").splitToList(tableString);
    DbAndTable sourceDbAndTable = new DbAndTable(tokens.get(0), tokens.get(1));

    try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
      Table sourceTable = new Table(client.get().getTable(sourceDbAndTable.getDb(), sourceDbAndTable.getTable()));
      return new HiveDataset(fs, pool, sourceTable, ConfigUtils.propertiesToConfig(state.getProperties()));
    }
  } catch (TException exc) {
    throw new RuntimeException(exc);
  }
}
 
Example #14
Source File: HiveShimV2.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public List<String> getViews(IMetaStoreClient client, String databaseName) throws UnknownDBException, TException {
	try {
		Method method = client.getClass().getMethod("getTables", String.class, String.class, TableType.class);
		return (List<String>) method.invoke(client, databaseName, null, TableType.VIRTUAL_VIEW);
	} catch (InvocationTargetException ite) {
		Throwable targetEx = ite.getTargetException();
		if (targetEx instanceof TException) {
			throw (TException) targetEx;
		} else {
			throw new CatalogException(String.format("Failed to get views for %s", databaseName), targetEx);
		}
	} catch (NoSuchMethodException | IllegalAccessException e) {
		throw new CatalogException(String.format("Failed to get views for %s", databaseName), e);
	}
}
 
Example #15
Source File: HCatalogIO.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the size of the table in bytes, does not take into consideration filter/partition
 * details passed, if any.
 */
@Override
public long getEstimatedSizeBytes(PipelineOptions pipelineOptions) throws Exception {
  IMetaStoreClient client = null;
  try {
    HiveConf hiveConf = HCatalogUtils.createHiveConf(spec);
    client = HCatalogUtils.createMetaStoreClient(hiveConf);
    Table table = HCatUtil.getTable(client, spec.getDatabase(), spec.getTable());
    return StatsUtils.getFileSizeForTable(hiveConf, table);
  } finally {
    // IMetaStoreClient is not AutoCloseable, closing it manually
    if (client != null) {
      client.close();
    }
  }
}
 
Example #16
Source File: HiveMetaStoreBasedRegister.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * @return true if the db is successfully created.
 *         false if the db already exists.
 * @throws IOException
 */
private boolean createDbIfNotExists(IMetaStoreClient client, String dbName) throws IOException {
  boolean retVal;
  if (this.optimizedChecks) {
    try {
      retVal = this.tableAndDbExistenceCache.get(dbName, new Callable<Boolean>() {
        @Override
        public Boolean call() throws Exception {
          return ensureHiveDbExistence(dbName, client);
        }
      });
    } catch (ExecutionException ee) {
      throw new IOException("Database existence checking throwing execution exception.");
    }
    return retVal;
  } else {
    return this.ensureHiveDbExistence(dbName, client);
  }
}
 
Example #17
Source File: HiveMetaStoreProxy.java    From griffin with Apache License 2.0 6 votes vote down vote up
@Bean
public IMetaStoreClient initHiveMetastoreClient() {
    HiveConf hiveConf = new HiveConf();
    hiveConf.set("hive.metastore.local", "false");
    hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES,
        3);
    hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, uris);
    hiveConf.setIntVar(HiveConf.ConfVars.HMSHANDLERATTEMPTS, attempts);
    hiveConf.setVar(HiveConf.ConfVars.HMSHANDLERINTERVAL, interval);
    try {
        client = HiveMetaStoreClient.newSynchronizedClient(new HiveMetaStoreClient(hiveConf));
    } catch (Exception e) {
        LOGGER.error("Failed to connect hive metastore. {}", e);
    }
    return client;
}
 
Example #18
Source File: HiveShimV100.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
// 1.x client doesn't support filtering tables by type, so here we need to get all tables and filter by ourselves
public List<String> getViews(IMetaStoreClient client, String databaseName) throws UnknownDBException, TException {
	// We don't have to use reflection here because client.getAllTables(String) is supposed to be there for
	// all versions.
	List<String> tableNames = client.getAllTables(databaseName);
	List<String> views = new ArrayList<>();
	for (String name : tableNames) {
		Table table = client.getTable(databaseName, name);
		String viewDef = table.getViewOriginalText();
		if (viewDef != null && !viewDef.isEmpty()) {
			views.add(table.getTableName());
		}
	}
	return views;
}
 
Example #19
Source File: HiveMetaStoreBasedRegister.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Override
protected void registerPath(HiveSpec spec) throws IOException {
  try (Timer.Context context = this.metricContext.timer(PATH_REGISTER_TIMER).time();
      AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
    Table table = HiveMetaStoreUtils.getTable(spec.getTable());

    createDbIfNotExists(client.get(), table.getDbName());
    createOrAlterTable(client.get(), table, spec);

    Optional<HivePartition> partition = spec.getPartition();
    if (partition.isPresent()) {
      addOrAlterPartition(client.get(), table, partition.get());
    }
    HiveMetaStoreEventHelper.submitSuccessfulPathRegistration(eventSubmitter, spec);
  } catch (TException e) {
    HiveMetaStoreEventHelper.submitFailedPathRegistration(eventSubmitter, spec, e);
    throw new IOException(e);
  }
}
 
Example #20
Source File: HiveShimV230.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public List<String> getViews(IMetaStoreClient client, String databaseName) throws UnknownDBException, TException {
	try {
		Method method = client.getClass().getMethod("getTables", String.class, String.class, TableType.class);
		return (List<String>) method.invoke(client, databaseName, null, TableType.VIRTUAL_VIEW);
	} catch (InvocationTargetException ite) {
		Throwable targetEx = ite.getTargetException();
		if (targetEx instanceof TException) {
			throw (TException) targetEx;
		} else {
			throw new CatalogException(String.format("Failed to get views for %s", databaseName), targetEx);
		}
	} catch (NoSuchMethodException | IllegalAccessException e) {
		throw new CatalogException(String.format("Failed to get views for %s", databaseName), e);
	}
}
 
Example #21
Source File: HiveUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Get a list of {@link Partition}s for the <code>table</code> that matches an optional <code>filter</code>
 *
 * @param client an {@link IMetaStoreClient} for the correct metastore.
 * @param table the {@link Table} for which we should get partitions.
 * @param filter an optional filter for partitions as would be used in Hive. Can only filter on String columns.
 *               (e.g. "part = \"part1\"" or "date > \"2015\"".
 * @return a list of {@link Partition}s
 */
public static List<Partition> getPartitions(IMetaStoreClient client, Table table,
    Optional<String> filter, Optional<? extends HivePartitionExtendedFilter> hivePartitionExtendedFilterOptional)
    throws IOException {
  try {
    List<Partition> partitions = Lists.newArrayList();
    List<org.apache.hadoop.hive.metastore.api.Partition> partitionsList = filter.isPresent()
        ? client.listPartitionsByFilter(table.getDbName(), table.getTableName(), filter.get(), (short) -1)
        : client.listPartitions(table.getDbName(), table.getTableName(), (short) -1);
    for (org.apache.hadoop.hive.metastore.api.Partition p : partitionsList) {
      if (!hivePartitionExtendedFilterOptional.isPresent() ||
          hivePartitionExtendedFilterOptional.get().accept(p)) {
        Partition partition = new Partition(table, p);
        partitions.add(partition);
      }
    }
    return partitions;
  } catch (TException | HiveException te) {
    throw new IOException("Hive Error", te);
  }
}
 
Example #22
Source File: HiveClientWrapper.java    From pxf with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes the IMetaStoreClient
 * Uses classpath configuration files to locate the MetaStore
 *
 * @return initialized client
 */
public IMetaStoreClient initHiveClient(RequestContext context, Configuration configuration) {
    HiveConf hiveConf = getHiveConf(configuration);
    try {
        if (Utilities.isSecurityEnabled(configuration)) {
            UserGroupInformation loginUser = SecureLogin.getInstance().getLoginUser(context, configuration);
            LOG.debug("initialize HiveMetaStoreClient as login user '{}'", loginUser.getUserName());
            // wrap in doAs for Kerberos to propagate kerberos tokens from login Subject
            return loginUser.
                    doAs((PrivilegedExceptionAction<IMetaStoreClient>) () -> hiveClientFactory.initHiveClient(hiveConf));
        } else {
            return hiveClientFactory.initHiveClient(hiveConf);
        }
    } catch (MetaException | InterruptedException | IOException e) {
        throw new RuntimeException("Failed connecting to Hive MetaStore service: " + e.getMessage(), e);
    }
}
 
Example #23
Source File: HiveShimV210.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void createTableWithConstraints(
		IMetaStoreClient client,
		Table table,
		Configuration conf,
		UniqueConstraint pk,
		List<Byte> pkTraits,
		List<String> notNullCols,
		List<Byte> nnTraits) {
	if (!notNullCols.isEmpty()) {
		throw new UnsupportedOperationException("NOT NULL constraints not supported until 3.0.0");
	}
	try {
		List<Object> hivePKs = createHivePKs(table, pk, pkTraits);
		// createTableWithConstraints takes PK and FK lists
		HiveReflectionUtils.invokeMethod(
				client.getClass(),
				client,
				"createTableWithConstraints",
				new Class[]{Table.class, List.class, List.class},
				new Object[]{table, hivePKs, Collections.emptyList()});
	} catch (Exception e) {
		throw new CatalogException("Failed to create Hive table with constraints", e);
	}
}
 
Example #24
Source File: HiveShimV120.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public IMetaStoreClient getHiveMetastoreClient(HiveConf hiveConf) {
	try {
		Method method = RetryingMetaStoreClient.class.getMethod("getProxy", HiveConf.class);
		// getProxy is a static method
		return (IMetaStoreClient) method.invoke(null, (hiveConf));
	} catch (Exception ex) {
		throw new CatalogException("Failed to create Hive Metastore client", ex);
	}
}
 
Example #25
Source File: AbstractHiveDatasetVersionFinder.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Create {@link HiveDatasetVersion}s for all {@link Partition}s of a {@link HiveDataset}.
 * Calls {@link #getDatasetVersion(Partition)} for every {@link Partition} found.
 * <p>
 * Note: If an exception occurs while processing a partition, that partition will be ignored in the returned collection
 * </p>
 *
 * @throws IllegalArgumentException if <code>dataset</code> is not a {@link HiveDataset}. Or if {@link HiveDataset#getTable()}
 * is not partitioned.
 */
@Override
public Collection<HiveDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException {
  if (!(dataset instanceof HiveDataset)) {
    throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with HiveDataset");
  }
  final HiveDataset hiveDataset = (HiveDataset) dataset;

  if (!hiveDataset.getTable().isPartitioned()) {
    throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with partitioned hive tables");
  }

  try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) {

    List<Partition> partitions = HiveUtils.getPartitions(client.get(), hiveDataset.getTable(), Optional.<String> absent());
    return Lists.newArrayList(Iterables.filter(Iterables.transform(partitions, new Function<Partition, HiveDatasetVersion>() {

      @Override
      public HiveDatasetVersion apply(Partition partition) {
        try {
          return getDatasetVersion(partition);
        } catch (Throwable e) {
          log.warn(String.format("Failed to get DatasetVersion %s. Skipping.", partition.getCompleteName()), e);
          return null;
        }
      }
    }), Predicates.notNull()));
  }
}
 
Example #26
Source File: HiveMetaStoreBasedRegister.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
private void addOrAlterPartitionWithPullMode(IMetaStoreClient client, Table table, HivePartition partition)
    throws TException, IOException {
  Partition nativePartition = HiveMetaStoreUtils.getPartition(partition);

  Preconditions.checkArgument(table.getPartitionKeysSize() == nativePartition.getValues().size(),
      String.format("Partition key size is %s but partition value size is %s", table.getPartitionKeys().size(),
          nativePartition.getValues().size()));

  try (AutoCloseableHiveLock lock =
      this.locks.getPartitionLock(table.getDbName(), table.getTableName(), nativePartition.getValues())) {

    Partition existedPartition;
    try {
      try (Timer.Context context = this.metricContext.timer(GET_HIVE_PARTITION).time()) {
        existedPartition =  client.getPartition(table.getDbName(), table.getTableName(), nativePartition.getValues());
        if (this.skipDiffComputation) {
          onPartitionExistWithoutComputingDiff(table, nativePartition, null);
        } else {
          onPartitionExist(client, table, partition, nativePartition, existedPartition);
        }
      }
    } catch (TException e) {
      try (Timer.Context context = this.metricContext.timer(ADD_PARTITION_TIMER).time()) {
        client.add_partition(getPartitionWithCreateTimeNow(nativePartition));
      }
      catch (Throwable e2) {
        log.error(String.format(
            "Unable to add or alter partition %s in table %s with location %s: " + e2.getMessage(),
            stringifyPartitionVerbose(nativePartition), table.getTableName(), nativePartition.getSd().getLocation()), e2);
        throw e2;
      }
      log.info(String.format("Added partition %s to table %s with location %s", stringifyPartition(nativePartition),
          table.getTableName(), nativePartition.getSd().getLocation()));
    }
  }
}
 
Example #27
Source File: HiveShimV120.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void alterTable(IMetaStoreClient client, String databaseName, String tableName, Table table) throws InvalidOperationException, MetaException, TException {
	// For Hive-1.2.x, we need to tell HMS not to update stats. Otherwise, the stats we put in the table
	// parameters can be overridden. The extra config we add here will be removed by HMS after it's used.
	// Don't use StatsSetupConst.DO_NOT_UPDATE_STATS because it wasn't defined in Hive 1.1.x.
	table.getParameters().put("DO_NOT_UPDATE_STATS", "true");
	client.alter_table(databaseName, tableName, table);
}
 
Example #28
Source File: HiveConvertPublisher.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
public boolean addPartition(Partition destPartition, String completePartitionName) {
  try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
    client.get().add_partition(destPartition);
    return true;
  } catch (IOException | TException e) {
    log.warn("Unable to add Partition " + completePartitionName);
  }
  return false;
}
 
Example #29
Source File: PartitionLevelWatermarker.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Adds watermark workunits to <code>workunits</code>. A watermark workunit is a dummy workunit that is skipped by extractor/converter/writer.
 * It stores a map of watermarks. The map has one entry per partition with partition watermark as value.
 * <ul>
 * <li>Add one NoOp watermark workunit for each {@link Table}
 * <li>The workunit has an identifier property {@link #IS_WATERMARK_WORKUNIT_KEY} set to true.
 * <li>Watermarks for all {@link Partition}s that belong to this {@link Table} are added as {@link Map}
 * <li>A maximum of {@link #maxPartitionsPerDataset} are persisted. Watermarks are ordered by most recently modified {@link Partition}s
 *
 * </ul>
 * {@inheritDoc}
 * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#onGetWorkunitsEnd(java.util.List)
 */
@Override
public void onGetWorkunitsEnd(List<WorkUnit> workunits) {
  try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) {
    for (Map.Entry<String, Map<String, Long>> tableWatermark : this.expectedHighWatermarks.entrySet()) {

      String tableKey = tableWatermark.getKey();
      Map<String, Long> partitionWatermarks = tableWatermark.getValue();

      // Watermark workunits are required only for Partitioned tables
      // tableKey is table complete name in the format db@table
      if (!(new org.apache.hadoop.hive.ql.metadata.Table(client.get().getTable(
          tableKey.split("@")[0], tableKey.split("@")[1])).isPartitioned())) {
        continue;
      }
      // We only keep watermarks for partitions that were updated after leastWatermarkToPersistInState
      Map<String, Long> expectedPartitionWatermarks =
          ImmutableMap.copyOf(Maps.filterEntries(partitionWatermarks, new Predicate<Map.Entry<String, Long>>() {

            @Override
            public boolean apply(@Nonnull Map.Entry<String, Long> input) {
              return Long.compare(input.getValue(), PartitionLevelWatermarker.this.leastWatermarkToPersistInState) >= 0;
            }
          }));

      // Create dummy workunit to track all the partition watermarks for this table
      WorkUnit watermarkWorkunit = WorkUnit.createEmpty();
      watermarkWorkunit.setProp(IS_WATERMARK_WORKUNIT_KEY, true);
      watermarkWorkunit.setProp(ConfigurationKeys.DATASET_URN_KEY, tableKey);

      watermarkWorkunit.setWatermarkInterval(new WatermarkInterval(new MultiKeyValueLongWatermark(
          this.previousWatermarks.get(tableKey)), new MultiKeyValueLongWatermark(expectedPartitionWatermarks)));

      workunits.add(watermarkWorkunit);
    }
  } catch (IOException | TException e) {
    Throwables.propagate(e);
  }
}
 
Example #30
Source File: HiveMetaStoreBasedRegister.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * @deprecated Please use {@link #createOrAlterTable(IMetaStoreClient, Table, HiveSpec)} instead.
 */
@Deprecated
@Override
public boolean createTableIfNotExists(HiveTable table) throws IOException {
  try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient();
      AutoCloseableHiveLock lock = this.locks.getTableLock(table.getDbName(), table.getTableName())) {
    return createTableIfNotExists(client.get(), HiveMetaStoreUtils.getTable(table), table);
  }
}