Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory#getStructTypeInfo()

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory#getStructTypeInfo() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: bigdata-tutorial   File: JSONCDHSerDe.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help perform
 * actual serialization and deserialization of data.
 */
@Override
public void initialize(Configuration conf, Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	colNames = Arrays.asList(colNamesStr.split(","));

	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	List<TypeInfo> colTypes =
			TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

	rowTypeInfo =
			(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
	rowOI =
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example 2
private TypeInfo getTypeInfo(String fieldType) {
  if (fieldType.equals(TEXT) || fieldType.equals(STRING) || fieldType.equals(STORED)) {
    return TypeInfoFactory.stringTypeInfo;
  } else if (fieldType.equals(LONG)) {
    return TypeInfoFactory.longTypeInfo;
  } else if (fieldType.equals(INT)) {
    return TypeInfoFactory.intTypeInfo;
  } else if (fieldType.equals(FLOAT)) {
    return TypeInfoFactory.floatTypeInfo;
  } else if (fieldType.equals(DOUBLE)) {
    return TypeInfoFactory.doubleTypeInfo;
  } else if (fieldType.equals(DATE)) {
    return TypeInfoFactory.dateTypeInfo;
  } else if (fieldType.equals(GEO_POINTVECTOR) || fieldType.equals(GEO_RECURSIVEPREFIX)
      || fieldType.equals(GEO_TERMPREFIX)) {
    List<TypeInfo> typeInfos = Arrays.asList((TypeInfo) TypeInfoFactory.floatTypeInfo,
        (TypeInfo) TypeInfoFactory.floatTypeInfo);
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList(LATITUDE, LONGITUDE), typeInfos);
  }
  // Return string for anything that is not a built in type.
  return TypeInfoFactory.stringTypeInfo;
}
 
Example 3
Source Project: searchanalytics-bigdata   File: JSONSerDe.java    License: MIT License 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help
 * perform actual serialization and deserialization of data.
 */
@Override
public void initialize(final Configuration conf, final Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	// Jai...change column names to lower case.
	colNames = Arrays.asList(colNamesStr.toLowerCase().split(","));
	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	final String colTypesStr = tbl
			.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	final List<TypeInfo> colTypes = TypeInfoUtils
			.getTypeInfosFromTypeString(colTypesStr);
	rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(
			colNames, colTypes);
	rowOI = TypeInfoUtils
			.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example 4
Source Project: flink   File: HiveTypeUtil.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TypeInfo visit(RowType rowType) {
	List<String> names = rowType.getFieldNames();
	List<TypeInfo> typeInfos = new ArrayList<>(names.size());
	for (String name : names) {
		TypeInfo typeInfo =
				rowType.getTypeAt(rowType.getFieldIndex(name)).accept(new TypeInfoLogicalTypeVisitor(dataType));
		if (null != typeInfo) {
			typeInfos.add(typeInfo);
		} else {
			return defaultMethod(rowType);
		}
	}
	return TypeInfoFactory.getStructTypeInfo(names, typeInfos);
}
 
Example 5
public static TypeInfo buildPrimitiveOrcSchema() {
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
            Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createLong(),
                    TypeInfoCreator.createBoolean(),
                    TypeInfoCreator.createFloat(),
                    TypeInfoCreator.createDouble(),
                    TypeInfoCreator.createBinary(),
                    TypeInfoCreator.createString()));
}
 
Example 6
Source Project: streamx   File: HiveSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
public static TypeInfo convertStruct(Schema schema) {
  final List<Field> fields = schema.fields();
  final List<String> names = new ArrayList<>(fields.size());
  final List<TypeInfo> types = new ArrayList<>(fields.size());
  for (Field field : fields) {
    names.add(field.name());
    types.add(convert(field.schema()));
  }
  return TypeInfoFactory.getStructTypeInfo(names, types);
}
 
Example 7
@Nonnull
private static Object parseObject(@Nonnull final JsonParser p,
        @CheckForNull final List<String> columnNames,
        @CheckForNull final List<TypeInfo> columnTypes)
        throws JsonParseException, IOException, SerDeException {
    Preconditions.checkNotNull(columnNames, "columnNames MUST NOT be null in parseObject",
        SerDeException.class);
    Preconditions.checkNotNull(columnTypes, "columnTypes MUST NOT be null in parseObject",
        SerDeException.class);
    if (columnNames.size() != columnTypes.size()) {
        throw new SerDeException(
            "Size of columnNames and columnTypes does not match. #columnNames="
                    + columnNames.size() + ", #columnTypes=" + columnTypes.size());
    }

    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    final HCatSchema schema;
    try {
        schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
    } catch (HCatException e) {
        throw new SerDeException(e);
    }

    final List<Object> r = new ArrayList<Object>(Collections.nCopies(columnNames.size(), null));
    JsonToken token;
    while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) {
        // iterate through each token, and create appropriate object here.
        populateRecord(r, token, p, schema);
    }

    if (columnTypes.size() == 1) {
        return r.get(0);
    }
    return r;
}
 
Example 8
Source Project: indexr   File: IndexRSerde.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
    String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

    if (Strings.isEmpty(columnNameProperty)) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (Strings.isEmpty(columnTypeProperty)) {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(StringUtils.repeat("string", ":", columnNames.size()));
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalArgumentException("IndexRHiveSerde initialization failed. Number of column " +
                "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
                columnTypes);
    }

    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    stats = new SerDeStats();
    serdeSize = 0;
}
 
Example 9
Source Project: hive-solr   File: LWSerDe.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration conf, Properties tblProperties) throws SerDeException {
  colNames = Arrays.asList(tblProperties.getProperty(serdeConstants.LIST_COLUMNS).split(","));
  colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tblProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES));
  typeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
  inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
  row = new ArrayList<>();
  enableFieldMapping = Boolean.valueOf(tblProperties.getProperty(ENABLE_FIELD_MAPPING, "false"));
}
 
Example 10
Source Project: flink   File: HiveTypeUtil.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TypeInfo visit(RowType rowType) {
	List<String> names = rowType.getFieldNames();
	List<TypeInfo> typeInfos = new ArrayList<>(names.size());
	for (String name : names) {
		TypeInfo typeInfo =
				rowType.getTypeAt(rowType.getFieldIndex(name)).accept(this);
		if (null != typeInfo) {
			typeInfos.add(typeInfo);
		} else {
			return defaultMethod(rowType);
		}
	}
	return TypeInfoFactory.getStructTypeInfo(names, typeInfos);
}
 
Example 11
Source Project: parquet-mr   File: ParquetHiveSerDe.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {

  final TypeInfo rowTypeInfo;
  final List<String> columnNames;
  final List<TypeInfo> columnTypes;
  // Get column names and sort order
  final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }
  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }
  if (columnNames.size() != columnTypes.size()) {
    throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " +
      "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
      columnTypes);
  }
  // Create row related objects
  rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
  this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

  // Stats part
  stats = new SerDeStats();
  serializedSize = 0;
  deserializedSize = 0;
  status = LAST_OPERATION.UNKNOWN;
}
 
Example 12
Source Project: nifi   File: NiFiOrcUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static TypeInfo getOrcSchema(RecordSchema recordSchema, boolean hiveFieldNames) throws IllegalArgumentException {
    List<RecordField> recordFields = recordSchema.getFields();
    if (recordFields != null) {
        List<String> orcFieldNames = new ArrayList<>(recordFields.size());
        List<TypeInfo> orcFields = new ArrayList<>(recordFields.size());
        recordFields.forEach(recordField -> {
            String fieldName = hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName();
            orcFieldNames.add(fieldName);
            orcFields.add(getOrcField(recordField.getDataType(), hiveFieldNames));
        });
        return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields);
    }
    return null;
}
 
Example 13
Source Project: nifi   File: TestNiFiOrcUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static TypeInfo buildPrimitiveOrcSchema() {
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
            Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createLong(),
                    TypeInfoCreator.createBoolean(),
                    TypeInfoCreator.createFloat(),
                    TypeInfoCreator.createDouble(),
                    TypeInfoCreator.createBinary(),
                    TypeInfoCreator.createString()));
}
 
Example 14
Source Project: nifi   File: TestNiFiOrcUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static TypeInfo buildPrimitiveOrcSchema() {
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
            Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createLong(),
                    TypeInfoCreator.createBoolean(),
                    TypeInfoCreator.createFloat(),
                    TypeInfoCreator.createDouble(),
                    TypeInfoCreator.createBinary(),
                    TypeInfoCreator.createString()));
}
 
Example 15
@Test
public void testRW() throws Exception {
    List<Object> rlist = new ArrayList<Object>(13);
    {
        rlist.add(new Byte("123"));
        rlist.add(new Short("456"));
        rlist.add(new Integer(789));
        rlist.add(new Long(1000L));
        rlist.add(new Double(5.3D));
        rlist.add(new Float(2.39F));
        rlist.add(new String("hcat\nand\nhadoop"));
        rlist.add(null);

        List<Object> innerStruct = new ArrayList<Object>(2);
        innerStruct.add(new String("abc"));
        innerStruct.add(new String("def"));
        rlist.add(innerStruct);

        List<Integer> innerList = new ArrayList<Integer>();
        innerList.add(314);
        innerList.add(007);
        rlist.add(innerList);

        Map<Short, String> map = new HashMap<Short, String>(3);
        map.put(new Short("2"), "hcat is cool");
        map.put(new Short("3"), "is it?");
        map.put(new Short("4"), "or is it not?");
        rlist.add(map);

        rlist.add(new Boolean(true));

        List<Object> c1 = new ArrayList<Object>();
        List<Object> c1_1 = new ArrayList<Object>();
        c1_1.add(new Integer(12));
        List<Object> i2 = new ArrayList<Object>();
        List<Integer> ii1 = new ArrayList<Integer>();
        ii1.add(new Integer(13));
        ii1.add(new Integer(14));
        i2.add(ii1);
        Map<String, List<?>> ii2 = new HashMap<String, List<?>>();
        List<Integer> iii1 = new ArrayList<Integer>();
        iii1.add(new Integer(15));
        ii2.put("phew", iii1);
        i2.add(ii2);
        c1_1.add(i2);
        c1.add(c1_1);
        rlist.add(c1);
        rlist.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2
        rlist.add(new HiveChar("hive\nchar", 10));
        rlist.add(new HiveVarchar("hive\nvarchar", 20));
        rlist.add(Date.valueOf("2014-01-07"));
        rlist.add(new Timestamp(System.currentTimeMillis()));
        rlist.add("hive\nbinary".getBytes("UTF-8"));
    }

    DefaultHCatRecord r = new DefaultHCatRecord(rlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(rlist, deserialized);
}
 
Example 16
@Test
public void testRWNull() throws Exception {
    List<Object> nlist = new ArrayList<Object>(13);
    {
        nlist.add(null); // tinyint
        nlist.add(null); // smallint
        nlist.add(null); // int
        nlist.add(null); // bigint
        nlist.add(null); // double
        nlist.add(null); // float
        nlist.add(null); // string
        nlist.add(null); // string
        nlist.add(null); // struct
        nlist.add(null); // array
        nlist.add(null); // map
        nlist.add(null); // bool
        nlist.add(null); // complex
        nlist.add(null); //decimal(5,2)
        nlist.add(null); //char(10)
        nlist.add(null); //varchar(20)
        nlist.add(null); //date
        nlist.add(null); //timestamp
        nlist.add(null); //binary
    }

    DefaultHCatRecord r = new DefaultHCatRecord(nlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(nlist, deserialized);
}
 
Example 17
/**
  * An initialization function used to gather information about the table.
  * Typically, a SerDe implementation will be interested in the list of
  * column names and their types. That information will be used to help
  * perform actual serialization and deserialization of data.
  */
 //@Override
 public void initialize(Configuration conf, Properties tbl) throws SerDeException {
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "initialize with conf=%s, tbl=%s",conf,tbl);
     // Get a list of the table's column names.
     tableName = tbl.getProperty(MRConstants.SPLICE_TABLE_NAME);
     String hbaseDir = null;
     if (conf != null) {
         hbaseDir = conf.get(HConstants.HBASE_DIR);
     }
     if (hbaseDir == null)
     	hbaseDir = System.getProperty(HConstants.HBASE_DIR);
     if (hbaseDir == null)
     	throw new SerDeException("hbase root directory not set, please include hbase.rootdir in config or via -D system property ...");
     if (conf != null) {
         conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName);
         conf.set(MRConstants.SPLICE_JDBC_STR, tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
         conf.set(HConstants.HBASE_DIR, hbaseDir);
         if (conf.get(HiveConf.ConfVars.POSTEXECHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "com.splicemachine.mrio.api.hive.PostExecHook");
         }
         if (conf.get(HiveConf.ConfVars.ONFAILUREHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.ONFAILUREHOOKS.varname, "com.splicemachine.mrio.api.hive.FailureExecHook");
         }
     }

     if (sqlUtil == null)
         sqlUtil = SMSQLUtil.getInstance(tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
     String colNamesStr = tbl.getProperty(Constants.LIST_COLUMNS);
     colNames.clear();
     for (String split: colNamesStr.split(","))
     	colNames.add(split.toUpperCase());
     String colTypesStr = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
     colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
     objectCache = new ArrayList<Object>(colTypes.size());
     if (tableName != null) {
         tableName = tableName.trim().toUpperCase();
         try {
             if (!sqlUtil.checkTableExists(tableName))
             	throw new SerDeException(String.format("table %s does not exist...",tableName));
             if (conf != null) {
                 ScanSetBuilder tableScannerBuilder = sqlUtil.getTableScannerBuilder(tableName, colNames);
                 conf.set(MRConstants.SPLICE_SCAN_INFO, tableScannerBuilder.base64Encode());

               //  TableContext tableContext = sqlUtil.createTableContext(tableName, tableScannerBuilder);
               //  conf.set(MRConstants.SPLICE_TBLE_CONTEXT, tableContext.getTableContextBase64String());
             }
} catch (Exception e) {
	throw new SerDeException(e);
}
     } 
      
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "generating hive info colNames=%s, colTypes=%s",colNames,colTypes);

     
     rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
     rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
     //serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName());
     Log.info("--------Finished initialize");
 }