org.apache.hive.hcatalog.common.HCatUtil Java Examples

The following examples show how to use org.apache.hive.hcatalog.common.HCatUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HCatInputFormatBase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
	this.fieldNames = new String[in.readInt()];
	for (int i = 0; i < this.fieldNames.length; i++) {
		this.fieldNames[i] = in.readUTF();
	}

	Configuration configuration = new Configuration();
	configuration.readFields(in);

	if (this.configuration == null) {
		this.configuration = configuration;
	}

	this.hCatInputFormat = new org.apache.hive.hcatalog.mapreduce.HCatInputFormat();
	this.outputSchema = (HCatSchema) HCatUtil.deserialize(this.configuration.get("mapreduce.lib.hcat.output.schema"));
}
 
Example #2
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
	this.fieldNames = new String[in.readInt()];
	for (int i = 0; i < this.fieldNames.length; i++) {
		this.fieldNames[i] = in.readUTF();
	}

	Configuration configuration = new Configuration();
	configuration.readFields(in);

	if (this.configuration == null) {
		this.configuration = configuration;
	}

	this.hCatInputFormat = new org.apache.hive.hcatalog.mapreduce.HCatInputFormat();
	this.outputSchema = (HCatSchema) HCatUtil.deserialize(this.configuration.get("mapreduce.lib.hcat.output.schema"));
}
 
Example #3
Source File: HiveMetaStore.java    From streamx with Apache License 2.0 6 votes vote down vote up
public HiveMetaStore(Configuration conf, HdfsSinkConnectorConfig connectorConfig) throws HiveMetaStoreException {
  HiveConf hiveConf = new HiveConf(conf, HiveConf.class);
  String hiveConfDir = connectorConfig.getString(HdfsSinkConnectorConfig.HIVE_CONF_DIR_CONFIG);
  String hiveMetaStoreURIs = connectorConfig.getString(HdfsSinkConnectorConfig.HIVE_METASTORE_URIS_CONFIG);
  if (hiveMetaStoreURIs.isEmpty()) {
    log.warn("hive.metastore.uris empty, an embedded Hive metastore will be "
             + "created in the directory the connector is started. "
             + "You need to start Hive in that specific directory to query the data.");
  }
  if (!hiveConfDir.equals("")) {
    String hiveSitePath = hiveConfDir + "/hive-site.xml";
    File hiveSite = new File(hiveSitePath);
    if (!hiveSite.exists()) {
      log.warn("hive-site.xml does not exist in provided Hive configuration directory {}.", hiveConf);
    }
    hiveConf.addResource(new Path(hiveSitePath));
  }
  hiveConf.set("hive.metastore.uris", hiveMetaStoreURIs);
  try {
    client = HCatUtil.getHiveMetastoreClient(hiveConf);
  } catch (IOException | MetaException e) {
    throw new HiveMetaStoreException(e);
  }
}
 
Example #4
Source File: HCatalogUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
private static long getFileSizeForPartition(Read readRequest, Partition partitionToRead)
    throws Exception {
  IMetaStoreClient client = null;
  try {
    HiveConf hiveConf = HCatalogUtils.createHiveConf(readRequest);
    client = HCatalogUtils.createMetaStoreClient(hiveConf);
    List<org.apache.hadoop.hive.ql.metadata.Partition> p = new ArrayList<>();
    Table table = HCatUtil.getTable(client, readRequest.getDatabase(), readRequest.getTable());
    final org.apache.hadoop.hive.ql.metadata.Partition partition =
        new org.apache.hadoop.hive.ql.metadata.Partition(table, partitionToRead);
    p.add(partition);
    final List<Long> fileSizeForPartitions = StatsUtils.getFileSizeForPartitions(hiveConf, p);
    return fileSizeForPartitions.get(0);
  } finally {
    // IMetaStoreClient is not AutoCloseable, closing it manually
    if (client != null) {
      client.close();
    }
  }
}
 
Example #5
Source File: HCatalogIO.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the size of the table in bytes, does not take into consideration filter/partition
 * details passed, if any.
 */
@Override
public long getEstimatedSizeBytes(PipelineOptions pipelineOptions) throws Exception {
  IMetaStoreClient client = null;
  try {
    HiveConf hiveConf = HCatalogUtils.createHiveConf(spec);
    client = HCatalogUtils.createMetaStoreClient(hiveConf);
    Table table = HCatUtil.getTable(client, spec.getDatabase(), spec.getTable());
    return StatsUtils.getFileSizeForTable(hiveConf, table);
  } finally {
    // IMetaStoreClient is not AutoCloseable, closing it manually
    if (client != null) {
      client.close();
    }
  }
}
 
Example #6
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
	this.fieldNames = new String[in.readInt()];
	for (int i = 0; i < this.fieldNames.length; i++) {
		this.fieldNames[i] = in.readUTF();
	}

	Configuration configuration = new Configuration();
	configuration.readFields(in);

	if (this.configuration == null) {
		this.configuration = configuration;
	}

	this.hCatInputFormat = new org.apache.hive.hcatalog.mapreduce.HCatInputFormat();
	this.outputSchema = (HCatSchema) HCatUtil.deserialize(this.configuration.get("mapreduce.lib.hcat.output.schema"));
}
 
Example #7
Source File: HCatInputFormatBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a HCatInputFormat for the given database, table, and
 * {@link org.apache.hadoop.conf.Configuration}.
 * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}.
 * The return type of the InputFormat can be changed to Flink-native tuples by calling
 * {@link HCatInputFormatBase#asFlinkTuples()}.
 *
 * @param database The name of the database to read from.
 * @param table The name of the table to read.
 * @param config The Configuration for the InputFormat.
 * @throws java.io.IOException
 */
public HCatInputFormatBase(String database, String table, Configuration config) throws IOException {
	super();
	this.configuration = config;
	HadoopUtils.mergeHadoopConf(this.configuration);

	this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table);
	this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration);

	// configure output schema of HCatFormat
	configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
	// set type information
	this.resultType = new WritableTypeInfo(DefaultHCatRecord.class);
}
 
Example #8
Source File: HCatInputFormatBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Specifies the fields which are returned by the InputFormat and their order.
 *
 * @param fields The fields and their order which are returned by the InputFormat.
 * @return This InputFormat with specified return fields.
 * @throws java.io.IOException
 */
public HCatInputFormatBase<T> getFields(String... fields) throws IOException {

	// build output schema
	ArrayList<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(fields.length);
	for (String field : fields) {
		fieldSchemas.add(this.outputSchema.get(field));
	}
	this.outputSchema = new HCatSchema(fieldSchemas);

	// update output schema configuration
	configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));

	return this;
}
 
Example #9
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a HCatInputFormat for the given database, table, and
 * {@link org.apache.hadoop.conf.Configuration}.
 * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}.
 * The return type of the InputFormat can be changed to Flink-native tuples by calling
 * {@link HCatInputFormatBase#asFlinkTuples()}.
 *
 * @param database The name of the database to read from.
 * @param table The name of the table to read.
 * @param config The Configuration for the InputFormat.
 * @throws java.io.IOException
 */
public HCatInputFormatBase(String database, String table, Configuration config) throws IOException {
	super();
	this.configuration = config;
	HadoopUtils.mergeHadoopConf(this.configuration);

	this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table);
	this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration);

	// configure output schema of HCatFormat
	configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
	// set type information
	this.resultType = new WritableTypeInfo(DefaultHCatRecord.class);
}
 
Example #10
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Specifies the fields which are returned by the InputFormat and their order.
 *
 * @param fields The fields and their order which are returned by the InputFormat.
 * @return This InputFormat with specified return fields.
 * @throws java.io.IOException
 */
public HCatInputFormatBase<T> getFields(String... fields) throws IOException {

	// build output schema
	ArrayList<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(fields.length);
	for (String field : fields) {
		fieldSchemas.add(this.outputSchema.get(field));
	}
	this.outputSchema = new HCatSchema(fieldSchemas);

	// update output schema configuration
	configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));

	return this;
}
 
Example #11
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a HCatInputFormat for the given database, table, and
 * {@link org.apache.hadoop.conf.Configuration}.
 * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}.
 * The return type of the InputFormat can be changed to Flink-native tuples by calling
 * {@link HCatInputFormatBase#asFlinkTuples()}.
 *
 * @param database The name of the database to read from.
 * @param table The name of the table to read.
 * @param config The Configuration for the InputFormat.
 * @throws java.io.IOException
 */
public HCatInputFormatBase(String database, String table, Configuration config) throws IOException {
	super();
	this.configuration = config;
	HadoopUtils.mergeHadoopConf(this.configuration);

	this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table);
	this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration);

	// configure output schema of HCatFormat
	configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
	// set type information
	this.resultType = new WritableTypeInfo(DefaultHCatRecord.class);
}
 
Example #12
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Specifies the fields which are returned by the InputFormat and their order.
 *
 * @param fields The fields and their order which are returned by the InputFormat.
 * @return This InputFormat with specified return fields.
 * @throws java.io.IOException
 */
public HCatInputFormatBase<T> getFields(String... fields) throws IOException {

	// build output schema
	ArrayList<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(fields.length);
	for (String field : fields) {
		fieldSchemas.add(this.outputSchema.get(field));
	}
	this.outputSchema = new HCatSchema(fieldSchemas);

	// update output schema configuration
	configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));

	return this;
}
 
Example #13
Source File: SqoopHCatExportHelper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
public SqoopHCatExportHelper(Configuration conf, boolean isOdps)
  throws IOException, InterruptedException {
  this.isOdps = isOdps;

  if (!isOdps) {
    colTypesJava =
        DefaultStringifier.load(conf, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_JAVA,
            MapWritable.class);
    colTypesSql =
        DefaultStringifier.load(conf, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_SQL,
            MapWritable.class);
  }
  // Instantiate a copy of the user's class to hold and parse the record.

  String recordClassName = conf.get(
    ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY);
  if (null == recordClassName) {
    throw new IOException("Export table class name ("
      + ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY
      + ") is not set!");
  }

  bigDecimalFormatString = conf.getBoolean(
    ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
    ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);

  debugHCatExportMapper = conf.getBoolean(
    SqoopHCatUtilities.DEBUG_HCAT_EXPORT_MAPPER_PROP, false);
  try {
    Class<?> cls = Class.forName(recordClassName, true,
      Thread.currentThread().getContextClassLoader());
    sqoopRecord = (SqoopRecord) ReflectionUtils.newInstance(cls, conf);
  } catch (ClassNotFoundException cnfe) {
    throw new IOException(cnfe);
  }

  if (null == sqoopRecord) {
    throw new IOException("Could not instantiate object of type "
      + recordClassName);
  }

  String inputJobInfoStr = conf.get(HCatConstants.HCAT_KEY_JOB_INFO);
  jobInfo =
    (InputJobInfo) HCatUtil.deserialize(inputJobInfoStr);
  HCatSchema tableSchema = jobInfo.getTableInfo().getDataColumns();
  HCatSchema partitionSchema =
    jobInfo.getTableInfo().getPartitionColumns();
  hCatFullTableSchema = new HCatSchema(tableSchema.getFields());
  for (HCatFieldSchema hfs : partitionSchema.getFields()) {
    hCatFullTableSchema.append(hfs);
  }
}
 
Example #14
Source File: SqoopHCatImportHelper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
public SqoopHCatImportHelper(Configuration conf) throws IOException,
  InterruptedException {

  String inputJobInfoStr = conf.get(HCatConstants.HCAT_KEY_JOB_INFO);
  jobInfo = (InputJobInfo) HCatUtil.deserialize(inputJobInfoStr);
  dataColsSchema = jobInfo.getTableInfo().getDataColumns();
  partitionSchema = jobInfo.getTableInfo().getPartitionColumns();
  StringBuilder storerInfoStr = new StringBuilder(1024);
  StorerInfo storerInfo = jobInfo.getTableInfo().getStorerInfo();
  storerInfoStr.append("HCatalog Storer Info : ").append("\n\tHandler = ")
    .append(storerInfo.getStorageHandlerClass())
    .append("\n\tInput format class = ").append(storerInfo.getIfClass())
    .append("\n\tOutput format class = ").append(storerInfo.getOfClass())
    .append("\n\tSerde class = ").append(storerInfo.getSerdeClass());
  Properties storerProperties = storerInfo.getProperties();
  if (!storerProperties.isEmpty()) {
    storerInfoStr.append("\nStorer properties ");
    for (Map.Entry<Object, Object> entry : storerProperties.entrySet()) {
      String key = (String) entry.getKey();
      Object val = entry.getValue();
      storerInfoStr.append("\n\t").append(key).append('=').append(val);
    }
  }
  storerInfoStr.append("\n");
  LOG.info(storerInfoStr);

  hCatFullTableSchema = new HCatSchema(dataColsSchema.getFields());
  for (HCatFieldSchema hfs : partitionSchema.getFields()) {
    hCatFullTableSchema.append(hfs);
  }
  fieldCount = hCatFullTableSchema.size();
  lobLoader = new LargeObjectLoader(conf, new Path(jobInfo.getTableInfo()
    .getTableLocation()));
  bigDecimalFormatString = conf.getBoolean(
    ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
    ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
  debugHCatImportMapper = conf.getBoolean(
    SqoopHCatUtilities.DEBUG_HCAT_IMPORT_MAPPER_PROP, false);
  IntWritable[] delimChars = DefaultStringifier.loadArray(conf,
    SqoopHCatUtilities.HIVE_DELIMITERS_TO_REPLACE_PROP, IntWritable.class);
  hiveDelimiters = new DelimiterSet((char) delimChars[0].get(),
    (char) delimChars[1].get(), (char) delimChars[2].get(),
    (char) delimChars[3].get(), delimChars[4].get() == 1 ? true : false);
  hiveDelimsReplacement = conf
    .get(SqoopHCatUtilities.HIVE_DELIMITERS_REPLACEMENT_PROP);
  if (hiveDelimsReplacement == null) {
    hiveDelimsReplacement = "";
  }
  doHiveDelimsReplacement = Boolean.valueOf(conf
    .get(SqoopHCatUtilities.HIVE_DELIMITERS_REPLACEMENT_ENABLED_PROP));

  IntWritable[] fPos = DefaultStringifier.loadArray(conf,
    SqoopHCatUtilities.HCAT_FIELD_POSITIONS_PROP, IntWritable.class);
  hCatFieldPositions = new int[fPos.length];
  for (int i = 0; i < fPos.length; ++i) {
    hCatFieldPositions[i] = fPos[i].get();
  }

  LOG.debug("Hive delims replacement enabled : " + doHiveDelimsReplacement);
  LOG.debug("Hive Delimiters : " + hiveDelimiters.toString());
  LOG.debug("Hive delimiters replacement : " + hiveDelimsReplacement);
  staticPartitionKeys = conf
    .getStrings(SqoopHCatUtilities.HCAT_STATIC_PARTITION_KEY_PROP);
  String partKeysString = staticPartitionKeys == null ? ""
    : Arrays.toString(staticPartitionKeys);
  LOG.debug("Static partition key used : "  + partKeysString);
}
 
Example #15
Source File: HCatalogUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
static IMetaStoreClient createMetaStoreClient(Configuration conf)
    throws IOException, MetaException {
  final HiveConf hiveConf = HCatUtil.getHiveConf(conf);
  return HCatUtil.getHiveMetastoreClient(hiveConf);
}
 
Example #16
Source File: HCatalogUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
static HiveConf createHiveConf(Read readRequest) throws IOException {
  Configuration conf = createConfiguration(readRequest.getConfigProperties());
  return HCatUtil.getHiveConf(conf);
}