org.apache.hadoop.hive.serde.Constants Java Examples

The following examples show how to use org.apache.hadoop.hive.serde.Constants. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff, String schema) throws SerDeException {
     Pattern pcols = Pattern.compile("[a-zA-Z_0-9]*[ ]");
     List<String> types = HiveRCSchemaUtil.parseSchemaTypes(schema);
     List<String> cols = HiveRCSchemaUtil.parseSchema(pcols, schema);

     List<FieldSchema> fieldSchemaList = new ArrayList<FieldSchema>(
         cols.size());

     for (int i = 0; i < cols.size(); i++) {
         fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil
             .findPigDataType(types.get(i))));
     }

     Properties props = new Properties();

     props.setProperty(Constants.LIST_COLUMNS,
         HiveRCSchemaUtil.listToString(cols));
     props.setProperty(Constants.LIST_COLUMN_TYPES,
         HiveRCSchemaUtil.listToString(types));

     Configuration hiveConf = new HiveConf(conf, SessionState.class);
     ColumnarSerDe serde = new ColumnarSerDe();
     serde.initialize(hiveConf, props);

     return (ColumnarStruct) serde.deserialize(buff);
}
 
Example #2
Source File: AbstractColumnSerDe.java    From Hive-Cassandra with Apache License 2.0 6 votes vote down vote up
/**
 * Parse cassandra column family name from table properties.
 *
 * @param tbl table properties
 * @return cassandra column family name
 * @throws SerDeException error parsing column family name
 */
protected String getCassandraColumnFamily(Properties tbl) throws SerDeException {
  String result = tbl.getProperty(CASSANDRA_CF_NAME);

  if (result == null) {

    result = tbl
        .getProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME);

    if (result == null) {
      throw new SerDeException("CassandraColumnFamily not defined" + tbl.toString());
    }

    if (result.indexOf(".") != -1) {
      result = result.substring(result.indexOf(".") + 1);
    }
  }

  return result;
}
 
Example #3
Source File: AbstractColumnSerDe.java    From Hive-Cassandra with Apache License 2.0 6 votes vote down vote up
/**
 * Parse the column mappping from table properties. If cassandra.columns.mapping
 * is defined in the property, use it to create the mapping. Otherwise, create the mapping from table
 * columns using the default mapping.
 *
 * @param tbl table properties
 * @return A list of column names
 * @throws SerDeException
 */
protected List<String> parseOrCreateColumnMapping(Properties tbl) throws SerDeException {
  String prop = tbl.getProperty(CASSANDRA_COL_MAPPING);

  if (prop != null) {
    return parseColumnMapping(prop);
  } else {
    String tblColumnStr = tbl.getProperty(Constants.LIST_COLUMNS);

    if (tblColumnStr != null) {
      //auto-create
      String mappingStr = createColumnMappingString(tblColumnStr);

      if (LOG.isDebugEnabled()) {
        LOG.debug("table column string: " + tblColumnStr);
        LOG.debug("Auto-created mapping string: " + mappingStr);
      }

      return Arrays.asList(mappingStr.split(","));

    } else {
      throw new SerDeException("Can't find table column definitions");
    }
  }
}
 
Example #4
Source File: HiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
    * Does the configuration setup and schema parsing and setup.
    *
    * @param table_schema
    *            String
    * @param columnsToRead
    *            String
    */
   private void setup(String table_schema) {

if (table_schema == null)
    throw new RuntimeException(
	    "The table schema must be defined as colname type, colname type.  All types are hive types");

// create basic configuration for hdfs and hive
conf = new Configuration();
hiveConf = new HiveConf(conf, SessionState.class);

// parse the table_schema string
List<String> types = HiveRCSchemaUtil.parseSchemaTypes(table_schema);
List<String> cols = HiveRCSchemaUtil.parseSchema(pcols, table_schema);

List<FieldSchema> fieldSchemaList = new ArrayList<FieldSchema>(
	cols.size());

for (int i = 0; i < cols.size(); i++) {
    fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil
	    .findPigDataType(types.get(i))));
}

pigSchema = new ResourceSchema(new Schema(fieldSchemaList));

props = new Properties();

// setting table schema properties for ColumnarSerDe
// these properties are never changed by the columns to read filter,
// because the columnar serde needs to now the
// complete format of each record.
props.setProperty(Constants.LIST_COLUMNS,
	HiveRCSchemaUtil.listToString(cols));
props.setProperty(Constants.LIST_COLUMN_TYPES,
	HiveRCSchemaUtil.listToString(types));

   }
 
Example #5
Source File: CassandraColumnSerDe.java    From Hive-Cassandra with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize the cassandra serialization and deserialization parameters from table properties and configuration.
 *
 * @param job
 * @param tbl
 * @param serdeName
 * @throws SerDeException
 */
@Override
protected void initCassandraSerDeParameters(Configuration job, Properties tbl, String serdeName)
    throws SerDeException {
  cassandraColumnFamily = getCassandraColumnFamily(tbl);
  cassandraColumnNames = parseOrCreateColumnMapping(tbl);

  cassandraColumnNamesBytes = new ArrayList<BytesWritable>();
  for (String columnName : cassandraColumnNames) {
    cassandraColumnNamesBytes.add(new BytesWritable(columnName.getBytes()));
  }

  iKey = cassandraColumnNames.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN);

  serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);

  validatorType = parseOrCreateValidatorType(tbl);

  setTableMapping();

  if (cassandraColumnNames.size() != serdeParams.getColumnNames().size()) {
    throw new SerDeException(serdeName + ": columns has " +
        serdeParams.getColumnNames().size() +
        " elements while cassandra.columns.mapping has " +
        cassandraColumnNames.size() + " elements" +
        " (counting the key if implicit)");
  }

  // we just can make sure that "StandardColumn:" is mapped to MAP<String,?>
  for (int i = 0; i < cassandraColumnNames.size(); i++) {
    String cassandraColName = cassandraColumnNames.get(i);
    if (cassandraColName.endsWith(":")) {
      TypeInfo typeInfo = serdeParams.getColumnTypes().get(i);
      if ((typeInfo.getCategory() != Category.MAP) ||
          (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getTypeName()
              != Constants.STRING_TYPE_NAME)) {

        throw new SerDeException(
            serdeName + ": Cassandra column family '"
                + cassandraColName
                + "' should be mapped to map<string,?> but is mapped to "
                + typeInfo.getTypeName());
      }
    }
  }
}
 
Example #6
Source File: SMSerDe.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
  * An initialization function used to gather information about the table.
  * Typically, a SerDe implementation will be interested in the list of
  * column names and their types. That information will be used to help
  * perform actual serialization and deserialization of data.
  */
 //@Override
 public void initialize(Configuration conf, Properties tbl) throws SerDeException {
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "initialize with conf=%s, tbl=%s",conf,tbl);
     // Get a list of the table's column names.
     tableName = tbl.getProperty(MRConstants.SPLICE_TABLE_NAME);
     String hbaseDir = null;
     if (conf != null) {
         hbaseDir = conf.get(HConstants.HBASE_DIR);
     }
     if (hbaseDir == null)
     	hbaseDir = System.getProperty(HConstants.HBASE_DIR);
     if (hbaseDir == null)
     	throw new SerDeException("hbase root directory not set, please include hbase.rootdir in config or via -D system property ...");
     if (conf != null) {
         conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName);
         conf.set(MRConstants.SPLICE_JDBC_STR, tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
         conf.set(HConstants.HBASE_DIR, hbaseDir);
         if (conf.get(HiveConf.ConfVars.POSTEXECHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "com.splicemachine.mrio.api.hive.PostExecHook");
         }
         if (conf.get(HiveConf.ConfVars.ONFAILUREHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.ONFAILUREHOOKS.varname, "com.splicemachine.mrio.api.hive.FailureExecHook");
         }
     }

     if (sqlUtil == null)
         sqlUtil = SMSQLUtil.getInstance(tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
     String colNamesStr = tbl.getProperty(Constants.LIST_COLUMNS);
     colNames.clear();
     for (String split: colNamesStr.split(","))
     	colNames.add(split.toUpperCase());
     String colTypesStr = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
     colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
     objectCache = new ArrayList<Object>(colTypes.size());
     if (tableName != null) {
         tableName = tableName.trim().toUpperCase();
         try {
             if (!sqlUtil.checkTableExists(tableName))
             	throw new SerDeException(String.format("table %s does not exist...",tableName));
             if (conf != null) {
                 ScanSetBuilder tableScannerBuilder = sqlUtil.getTableScannerBuilder(tableName, colNames);
                 conf.set(MRConstants.SPLICE_SCAN_INFO, tableScannerBuilder.base64Encode());

               //  TableContext tableContext = sqlUtil.createTableContext(tableName, tableScannerBuilder);
               //  conf.set(MRConstants.SPLICE_TBLE_CONTEXT, tableContext.getTableContextBase64String());
             }
} catch (Exception e) {
	throw new SerDeException(e);
}
     } 
      
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "generating hive info colNames=%s, colTypes=%s",colNames,colTypes);

     
     rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
     rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
     //serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName());
     Log.info("--------Finished initialize");
 }