Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory#getStructTypeInfo()

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory#getStructTypeInfo() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JSONSerDe.java    From searchanalytics-bigdata with MIT License 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help
 * perform actual serialization and deserialization of data.
 */
@Override
public void initialize(final Configuration conf, final Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	// Jai...change column names to lower case.
	colNames = Arrays.asList(colNamesStr.toLowerCase().split(","));
	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	final String colTypesStr = tbl
			.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	final List<TypeInfo> colTypes = TypeInfoUtils
			.getTypeInfosFromTypeString(colTypesStr);
	rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(
			colNames, colTypes);
	rowOI = TypeInfoUtils
			.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example 2
Source File: BlurObjectInspectorGenerator.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private TypeInfo getTypeInfo(String fieldType) {
  if (fieldType.equals(TEXT) || fieldType.equals(STRING) || fieldType.equals(STORED)) {
    return TypeInfoFactory.stringTypeInfo;
  } else if (fieldType.equals(LONG)) {
    return TypeInfoFactory.longTypeInfo;
  } else if (fieldType.equals(INT)) {
    return TypeInfoFactory.intTypeInfo;
  } else if (fieldType.equals(FLOAT)) {
    return TypeInfoFactory.floatTypeInfo;
  } else if (fieldType.equals(DOUBLE)) {
    return TypeInfoFactory.doubleTypeInfo;
  } else if (fieldType.equals(DATE)) {
    return TypeInfoFactory.dateTypeInfo;
  } else if (fieldType.equals(GEO_POINTVECTOR) || fieldType.equals(GEO_RECURSIVEPREFIX)
      || fieldType.equals(GEO_TERMPREFIX)) {
    List<TypeInfo> typeInfos = Arrays.asList((TypeInfo) TypeInfoFactory.floatTypeInfo,
        (TypeInfo) TypeInfoFactory.floatTypeInfo);
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList(LATITUDE, LONGITUDE), typeInfos);
  }
  // Return string for anything that is not a built in type.
  return TypeInfoFactory.stringTypeInfo;
}
 
Example 3
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help perform
 * actual serialization and deserialization of data.
 */
@Override
public void initialize(Configuration conf, Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	colNames = Arrays.asList(colNamesStr.split(","));

	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	List<TypeInfo> colTypes =
			TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

	rowTypeInfo =
			(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
	rowOI =
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example 4
Source File: TestNiFiOrcUtils.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
public static TypeInfo buildPrimitiveOrcSchema() {
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
            Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createLong(),
                    TypeInfoCreator.createBoolean(),
                    TypeInfoCreator.createFloat(),
                    TypeInfoCreator.createDouble(),
                    TypeInfoCreator.createBinary(),
                    TypeInfoCreator.createString()));
}
 
Example 5
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
public static TypeInfo buildPrimitiveOrcSchema() {
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
            Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createLong(),
                    TypeInfoCreator.createBoolean(),
                    TypeInfoCreator.createFloat(),
                    TypeInfoCreator.createDouble(),
                    TypeInfoCreator.createBinary(),
                    TypeInfoCreator.createString()));
}
 
Example 6
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
public static TypeInfo buildPrimitiveOrcSchema() {
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
            Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createLong(),
                    TypeInfoCreator.createBoolean(),
                    TypeInfoCreator.createFloat(),
                    TypeInfoCreator.createDouble(),
                    TypeInfoCreator.createBinary(),
                    TypeInfoCreator.createString()));
}
 
Example 7
Source File: NiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
public static TypeInfo getOrcSchema(RecordSchema recordSchema, boolean hiveFieldNames) throws IllegalArgumentException {
    List<RecordField> recordFields = recordSchema.getFields();
    if (recordFields != null) {
        List<String> orcFieldNames = new ArrayList<>(recordFields.size());
        List<TypeInfo> orcFields = new ArrayList<>(recordFields.size());
        recordFields.forEach(recordField -> {
            String fieldName = hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName();
            orcFieldNames.add(fieldName);
            orcFields.add(getOrcField(recordField.getDataType(), hiveFieldNames));
        });
        return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields);
    }
    return null;
}
 
Example 8
Source File: ParquetHiveSerDe.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {

  final TypeInfo rowTypeInfo;
  final List<String> columnNames;
  final List<TypeInfo> columnTypes;
  // Get column names and sort order
  final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }
  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }
  if (columnNames.size() != columnTypes.size()) {
    throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " +
      "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
      columnTypes);
  }
  // Create row related objects
  rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
  this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

  // Stats part
  stats = new SerDeStats();
  serializedSize = 0;
  deserializedSize = 0;
  status = LAST_OPERATION.UNKNOWN;
}
 
Example 9
Source File: HiveTypeUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public TypeInfo visit(RowType rowType) {
	List<String> names = rowType.getFieldNames();
	List<TypeInfo> typeInfos = new ArrayList<>(names.size());
	for (String name : names) {
		TypeInfo typeInfo =
				rowType.getTypeAt(rowType.getFieldIndex(name)).accept(this);
		if (null != typeInfo) {
			typeInfos.add(typeInfo);
		} else {
			return defaultMethod(rowType);
		}
	}
	return TypeInfoFactory.getStructTypeInfo(names, typeInfos);
}
 
Example 10
Source File: LWSerDe.java    From hive-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration conf, Properties tblProperties) throws SerDeException {
  colNames = Arrays.asList(tblProperties.getProperty(serdeConstants.LIST_COLUMNS).split(","));
  colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tblProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES));
  typeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
  inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
  row = new ArrayList<>();
  enableFieldMapping = Boolean.valueOf(tblProperties.getProperty(ENABLE_FIELD_MAPPING, "false"));
}
 
Example 11
Source File: IndexRSerde.java    From indexr with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
    String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

    if (Strings.isEmpty(columnNameProperty)) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (Strings.isEmpty(columnTypeProperty)) {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(StringUtils.repeat("string", ":", columnNames.size()));
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalArgumentException("IndexRHiveSerde initialization failed. Number of column " +
                "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
                columnTypes);
    }

    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    stats = new SerDeStats();
    serdeSize = 0;
}
 
Example 12
Source File: JsonSerdeUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
private static Object parseObject(@Nonnull final JsonParser p,
        @CheckForNull final List<String> columnNames,
        @CheckForNull final List<TypeInfo> columnTypes)
        throws JsonParseException, IOException, SerDeException {
    Preconditions.checkNotNull(columnNames, "columnNames MUST NOT be null in parseObject",
        SerDeException.class);
    Preconditions.checkNotNull(columnTypes, "columnTypes MUST NOT be null in parseObject",
        SerDeException.class);
    if (columnNames.size() != columnTypes.size()) {
        throw new SerDeException(
            "Size of columnNames and columnTypes does not match. #columnNames="
                    + columnNames.size() + ", #columnTypes=" + columnTypes.size());
    }

    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    final HCatSchema schema;
    try {
        schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
    } catch (HCatException e) {
        throw new SerDeException(e);
    }

    final List<Object> r = new ArrayList<Object>(Collections.nCopies(columnNames.size(), null));
    JsonToken token;
    while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) {
        // iterate through each token, and create appropriate object here.
        populateRecord(r, token, p, schema);
    }

    if (columnTypes.size() == 1) {
        return r.get(0);
    }
    return r;
}
 
Example 13
Source File: HiveSchemaConverter.java    From streamx with Apache License 2.0 5 votes vote down vote up
public static TypeInfo convertStruct(Schema schema) {
  final List<Field> fields = schema.fields();
  final List<String> names = new ArrayList<>(fields.size());
  final List<TypeInfo> types = new ArrayList<>(fields.size());
  for (Field field : fields) {
    names.add(field.name());
    types.add(convert(field.schema()));
  }
  return TypeInfoFactory.getStructTypeInfo(names, types);
}
 
Example 14
Source File: HiveTypeUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public TypeInfo visit(RowType rowType) {
	List<String> names = rowType.getFieldNames();
	List<TypeInfo> typeInfos = new ArrayList<>(names.size());
	for (String name : names) {
		TypeInfo typeInfo =
				rowType.getTypeAt(rowType.getFieldIndex(name)).accept(new TypeInfoLogicalTypeVisitor(dataType));
		if (null != typeInfo) {
			typeInfos.add(typeInfo);
		} else {
			return defaultMethod(rowType);
		}
	}
	return TypeInfoFactory.getStructTypeInfo(names, typeInfos);
}
 
Example 15
Source File: JsonSerdeUtilsTest.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Test
public void testRWNull() throws Exception {
    List<Object> nlist = new ArrayList<Object>(13);
    {
        nlist.add(null); // tinyint
        nlist.add(null); // smallint
        nlist.add(null); // int
        nlist.add(null); // bigint
        nlist.add(null); // double
        nlist.add(null); // float
        nlist.add(null); // string
        nlist.add(null); // string
        nlist.add(null); // struct
        nlist.add(null); // array
        nlist.add(null); // map
        nlist.add(null); // bool
        nlist.add(null); // complex
        nlist.add(null); //decimal(5,2)
        nlist.add(null); //char(10)
        nlist.add(null); //varchar(20)
        nlist.add(null); //date
        nlist.add(null); //timestamp
        nlist.add(null); //binary
    }

    DefaultHCatRecord r = new DefaultHCatRecord(nlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(nlist, deserialized);
}
 
Example 16
Source File: JsonSerdeUtilsTest.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Test
public void testRW() throws Exception {
    List<Object> rlist = new ArrayList<Object>(13);
    {
        rlist.add(new Byte("123"));
        rlist.add(new Short("456"));
        rlist.add(new Integer(789));
        rlist.add(new Long(1000L));
        rlist.add(new Double(5.3D));
        rlist.add(new Float(2.39F));
        rlist.add(new String("hcat\nand\nhadoop"));
        rlist.add(null);

        List<Object> innerStruct = new ArrayList<Object>(2);
        innerStruct.add(new String("abc"));
        innerStruct.add(new String("def"));
        rlist.add(innerStruct);

        List<Integer> innerList = new ArrayList<Integer>();
        innerList.add(314);
        innerList.add(007);
        rlist.add(innerList);

        Map<Short, String> map = new HashMap<Short, String>(3);
        map.put(new Short("2"), "hcat is cool");
        map.put(new Short("3"), "is it?");
        map.put(new Short("4"), "or is it not?");
        rlist.add(map);

        rlist.add(new Boolean(true));

        List<Object> c1 = new ArrayList<Object>();
        List<Object> c1_1 = new ArrayList<Object>();
        c1_1.add(new Integer(12));
        List<Object> i2 = new ArrayList<Object>();
        List<Integer> ii1 = new ArrayList<Integer>();
        ii1.add(new Integer(13));
        ii1.add(new Integer(14));
        i2.add(ii1);
        Map<String, List<?>> ii2 = new HashMap<String, List<?>>();
        List<Integer> iii1 = new ArrayList<Integer>();
        iii1.add(new Integer(15));
        ii2.put("phew", iii1);
        i2.add(ii2);
        c1_1.add(i2);
        c1.add(c1_1);
        rlist.add(c1);
        rlist.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2
        rlist.add(new HiveChar("hive\nchar", 10));
        rlist.add(new HiveVarchar("hive\nvarchar", 20));
        rlist.add(Date.valueOf("2014-01-07"));
        rlist.add(new Timestamp(System.currentTimeMillis()));
        rlist.add("hive\nbinary".getBytes("UTF-8"));
    }

    DefaultHCatRecord r = new DefaultHCatRecord(rlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(rlist, deserialized);
}
 
Example 17
Source File: SMSerDe.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
  * An initialization function used to gather information about the table.
  * Typically, a SerDe implementation will be interested in the list of
  * column names and their types. That information will be used to help
  * perform actual serialization and deserialization of data.
  */
 //@Override
 public void initialize(Configuration conf, Properties tbl) throws SerDeException {
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "initialize with conf=%s, tbl=%s",conf,tbl);
     // Get a list of the table's column names.
     tableName = tbl.getProperty(MRConstants.SPLICE_TABLE_NAME);
     String hbaseDir = null;
     if (conf != null) {
         hbaseDir = conf.get(HConstants.HBASE_DIR);
     }
     if (hbaseDir == null)
     	hbaseDir = System.getProperty(HConstants.HBASE_DIR);
     if (hbaseDir == null)
     	throw new SerDeException("hbase root directory not set, please include hbase.rootdir in config or via -D system property ...");
     if (conf != null) {
         conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName);
         conf.set(MRConstants.SPLICE_JDBC_STR, tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
         conf.set(HConstants.HBASE_DIR, hbaseDir);
         if (conf.get(HiveConf.ConfVars.POSTEXECHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "com.splicemachine.mrio.api.hive.PostExecHook");
         }
         if (conf.get(HiveConf.ConfVars.ONFAILUREHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.ONFAILUREHOOKS.varname, "com.splicemachine.mrio.api.hive.FailureExecHook");
         }
     }

     if (sqlUtil == null)
         sqlUtil = SMSQLUtil.getInstance(tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
     String colNamesStr = tbl.getProperty(Constants.LIST_COLUMNS);
     colNames.clear();
     for (String split: colNamesStr.split(","))
     	colNames.add(split.toUpperCase());
     String colTypesStr = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
     colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
     objectCache = new ArrayList<Object>(colTypes.size());
     if (tableName != null) {
         tableName = tableName.trim().toUpperCase();
         try {
             if (!sqlUtil.checkTableExists(tableName))
             	throw new SerDeException(String.format("table %s does not exist...",tableName));
             if (conf != null) {
                 ScanSetBuilder tableScannerBuilder = sqlUtil.getTableScannerBuilder(tableName, colNames);
                 conf.set(MRConstants.SPLICE_SCAN_INFO, tableScannerBuilder.base64Encode());

               //  TableContext tableContext = sqlUtil.createTableContext(tableName, tableScannerBuilder);
               //  conf.set(MRConstants.SPLICE_TBLE_CONTEXT, tableContext.getTableContextBase64String());
             }
} catch (Exception e) {
	throw new SerDeException(e);
}
     } 
      
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "generating hive info colNames=%s, colTypes=%s",colNames,colTypes);

     
     rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
     rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
     //serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName());
     Log.info("--------Finished initialize");
 }