Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: presto   Source File: HiveType.java    License: Apache License 2.0 6 votes vote down vote up
public List<String> getHiveDereferenceNames(List<Integer> dereferences)
{
    ImmutableList.Builder<String> dereferenceNames = ImmutableList.builder();
    TypeInfo typeInfo = getTypeInfo();
    for (int fieldIndex : dereferences) {
        checkArgument(typeInfo instanceof StructTypeInfo, "typeInfo should be struct type", typeInfo);
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;

        checkArgument(fieldIndex >= 0, "fieldIndex cannot be negative");
        checkArgument(fieldIndex < structTypeInfo.getAllStructFieldNames().size(),
                "fieldIndex should be less than the number of fields in the struct");
        String fieldName = structTypeInfo.getAllStructFieldNames().get(fieldIndex);
        dereferenceNames.add(fieldName);
        typeInfo = structTypeInfo.getAllStructFieldTypeInfos().get(fieldIndex);
    }

    return dereferenceNames.build();
}
 
Example 2
Source Project: presto   Source File: HiveWriteUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static boolean isWritableType(TypeInfo typeInfo)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
            return isWritablePrimitiveType(primitiveCategory);
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isWritableType(mapTypeInfo.getMapKeyTypeInfo()) && isWritableType(mapTypeInfo.getMapValueTypeInfo());
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isWritableType(listTypeInfo.getListElementTypeInfo());
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream().allMatch(HiveWriteUtils::isWritableType);
    }
    return false;
}
 
Example 3
Source Project: presto   Source File: HiveCoercionPolicy.java    License: Apache License 2.0 6 votes vote down vote up
private boolean canCoerceForStruct(HiveType fromHiveType, HiveType toHiveType)
{
    if (fromHiveType.getCategory() != Category.STRUCT || toHiveType.getCategory() != Category.STRUCT) {
        return false;
    }
    List<String> fromFieldNames = ((StructTypeInfo) fromHiveType.getTypeInfo()).getAllStructFieldNames();
    List<String> toFieldNames = ((StructTypeInfo) toHiveType.getTypeInfo()).getAllStructFieldNames();
    List<HiveType> fromFieldTypes = extractStructFieldTypes(fromHiveType);
    List<HiveType> toFieldTypes = extractStructFieldTypes(toHiveType);
    // Rule:
    // * Fields may be added or dropped from the end.
    // * For all other field indices, the corresponding fields must have
    //   the same name, and the type must be coercible.
    for (int i = 0; i < min(fromFieldTypes.size(), toFieldTypes.size()); i++) {
        if (!fromFieldNames.get(i).equals(toFieldNames.get(i))) {
            return false;
        }
        if (!fromFieldTypes.get(i).equals(toFieldTypes.get(i)) && !canCoerce(fromFieldTypes.get(i), toFieldTypes.get(i))) {
            return false;
        }
    }
    return true;
}
 
Example 4
Source Project: multiple-dimension-spread   Source File: MDSSerde.java    License: Apache License 2.0 6 votes vote down vote up
private StructTypeInfo getAllReadTypeInfo( final String columnNameProperty , final String columnTypeProperty ){
  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  ArrayList<String> columnNames = new ArrayList<String>();
  if ( columnNameProperty != null && 0 < columnNameProperty.length() ) {
    String[] columnNameArray = columnNameProperty.split(",");
    for( int i = 0 ; i < columnNameArray.length ; i++ ){
      columnNames.add( columnNameArray[i] );
      filedIndexMap.put( columnNameArray[i] , i );
    }
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( columnNames );
  rootType.setAllStructFieldTypeInfos( fieldTypes );

  return rootType;
}
 
Example 5
Source Project: multiple-dimension-spread   Source File: MDSSerde.java    License: Apache License 2.0 6 votes vote down vote up
private StructTypeInfo getColumnProjectionTypeInfo( final String columnNameProperty , final String columnTypeProperty , final String projectionColumnNames ){
  Set<String> columnNameSet = new HashSet<String>();
  for( String columnName : projectionColumnNames.split(",") ){
    columnNameSet.add( columnName );
  }

  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  String[] splitNames = columnNameProperty.split(",");

  ArrayList<String> projectionColumnNameList = new ArrayList<String>();
  ArrayList<TypeInfo> projectionFieldTypeList = new ArrayList<TypeInfo>();
  for( int i = 0 ; i < fieldTypes.size() ; i++ ){
    if( columnNameSet.contains( splitNames[i] ) ){
      projectionColumnNameList.add( splitNames[i] );
      projectionFieldTypeList.add( fieldTypes.get(i) );
    }
    filedIndexMap.put( splitNames[i] , i );
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( projectionColumnNameList );
  rootType.setAllStructFieldTypeInfos( projectionFieldTypeList );

  return rootType;
}
 
Example 6
Source Project: multiple-dimension-spread   Source File: MDSSerde.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void initialize( final Configuration conf, final Properties table , final Properties part ) throws SerDeException{
  LOG.info( table.toString() );
  if( part != null ){
    LOG.info( part.toString() );
  }
  String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS);
  String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES);

  String projectionColumnNames = conf.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , "" );

  StructTypeInfo rootType;
  if( projectionColumnNames.isEmpty() ){
    rootType = getAllReadTypeInfo( columnNameProperty , columnTypeProperty );
  }
  else{
    rootType = getColumnProjectionTypeInfo( columnNameProperty , columnTypeProperty , projectionColumnNames );
  }

  inspector = MDSObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( rootType );
}
 
Example 7
Source Project: metacat   Source File: HiveTypeConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Type toMetacatType(final String type) {
    // Hack to fix presto "varchar" type coming in with no length which is required by Hive.
    final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(
        "varchar".equals(type.toLowerCase()) ? serdeConstants.STRING_TYPE_NAME : type);
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
    // The standard struct object inspector forces field names to lower case, however in Metacat we need to preserve
    // the original case of the struct fields so we wrap it with our wrapper to force the fieldNames to keep
    // their original case
    if (typeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) {
        final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        final StandardStructObjectInspector objectInspector = (StandardStructObjectInspector) oi;
        oi = new HiveTypeConverter.SameCaseStandardStructObjectInspector(
            structTypeInfo.getAllStructFieldNames(), objectInspector);
    }
    return getCanonicalType(oi);
}
 
Example 8
Source Project: emodb   Source File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private Object deserializeStruct(StructTypeInfo type, Object data)
        throws SerDeException {
    if (!(data instanceof Map)) {
        throw new SerDeException("Value not of type map");
    }
    //noinspection unchecked
    Map<String, Object> map = (Map<String, Object>) data;

    List<String> fieldNames = type.getAllStructFieldNames();
    List<TypeInfo> fieldTypes = type.getAllStructFieldTypeInfos();

    // When deserializing a struct the returned value is a list of values in the same order as the field names.

    List<Object> values = Lists.newArrayListWithCapacity(fieldNames.size());
    for (int i=0; i < fieldNames.size(); i++) {
        Object rawValue = getRawValueOrNullIfAbsent(fieldNames.get(i), map);
        Object value = deserialize(fieldTypes.get(i), rawValue);
        values.add(value);
    }

    return values;
}
 
Example 9
Source Project: indexr   Source File: ArrayWritableObjectInspector.java    License: Apache License 2.0 6 votes vote down vote up
public ArrayWritableObjectInspector(final StructTypeInfo rowTypeInfo) {

        typeInfo = rowTypeInfo;
        fieldNames = rowTypeInfo.getAllStructFieldNames();
        fieldInfos = rowTypeInfo.getAllStructFieldTypeInfos();
        fields = new ArrayList<StructField>(fieldNames.size());
        fieldsByName = new HashMap<String, StructFieldImpl>();

        for (int i = 0; i < fieldNames.size(); ++i) {
            final String name = fieldNames.get(i);
            final TypeInfo fieldInfo = fieldInfos.get(i);

            final StructFieldImpl field = new StructFieldImpl(name, getObjectInspector(fieldInfo), i);
            fields.add(field);
            fieldsByName.put(name, field);
        }
    }
 
Example 10
Source Project: Cobol-to-Hive   Source File: CobolDeserializer.java    License: Apache License 2.0 6 votes vote down vote up
private Object worker(String columnName, TypeInfo columnType){
	
	switch(columnType.getCategory()) {
		
		case STRUCT:
			return deserializeStruct(columnName, (StructTypeInfo) columnType);
		case UNION:
			return deserializeUnion(columnName,(UnionTypeInfo) columnType);
		case LIST:
		return deserializeList(columnName, (ListTypeInfo) columnType);
		case MAP:
		throw new RuntimeException("map type is not possible for cobol layout" + columnType.getCategory());
		case PRIMITIVE:
		return deserializePrimitive(columnName, (PrimitiveTypeInfo) columnType);
		default:
		throw new RuntimeException("Unknown TypeInfo: " + columnType.getCategory());
	}
}
 
Example 11
Source Project: bigdata-tutorial   Source File: JSONCDHSerDe.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help perform
 * actual serialization and deserialization of data.
 */
@Override
public void initialize(Configuration conf, Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	colNames = Arrays.asList(colNamesStr.split(","));

	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	List<TypeInfo> colTypes =
			TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

	rowTypeInfo =
			(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
	rowOI =
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example 12
Source Project: bigdata-tutorial   Source File: JSONCDHSerDe.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Parses a JSON object according to the Hive column's type.
 *
 * @param field         - The JSON object to parse
 * @param fieldTypeInfo - Metadata about the Hive column
 * @return - The parsed value of the field
 */
private Object parseField(Object field, TypeInfo fieldTypeInfo) {
	switch (fieldTypeInfo.getCategory()) {
		case PRIMITIVE:
			// Jackson will return the right thing in this case, so just return
			// the object
			if (field instanceof String) {
				field = field.toString().replaceAll("\n", "\\\\n");
			}
			return field;
		case LIST:
			return parseList(field, (ListTypeInfo) fieldTypeInfo);
		case MAP:
			return parseMap(field, (MapTypeInfo) fieldTypeInfo);
		case STRUCT:
			return parseStruct(field, (StructTypeInfo) fieldTypeInfo);
		case UNION:
			// Unsupported by JSON
		default:
			return null;
	}
}
 
Example 13
Source Project: incubator-gobblin   Source File: HiveOrcSerDeManager.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Extensible if there's other source-of-truth for fetching schema instead of interacting with HDFS.
 *
 * For purpose of initializing {@link org.apache.hadoop.hive.ql.io.orc.OrcSerde} object, it will require:
 * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMNS and
 * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMN_TYPES
 *
 */
protected void addSchemaPropertiesHelper(Path path, HiveRegistrationUnit hiveUnit) throws IOException {
  TypeInfo schema = getSchemaFromLatestFile(path, this.fs);
  if (schema instanceof StructTypeInfo) {
    StructTypeInfo structTypeInfo = (StructTypeInfo) schema;
    hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMNS,
        Joiner.on(",").join(structTypeInfo.getAllStructFieldNames()));
    hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMN_TYPES,
        Joiner.on(",").join(
            structTypeInfo.getAllStructFieldTypeInfos().stream().map(x -> x.getTypeName())
                .collect(Collectors.toList())));
  } else {
    // Hive always uses a struct with a field for each of the top-level columns as the root object type.
    // So for here we assume to-be-registered ORC files follow this pattern.
    throw new IllegalStateException("A valid ORC schema should be an instance of struct");
  }
}
 
Example 14
Source Project: parquet-mr   Source File: ArrayWritableObjectInspector.java    License: Apache License 2.0 6 votes vote down vote up
public ArrayWritableObjectInspector(final StructTypeInfo rowTypeInfo) {

    typeInfo = rowTypeInfo;
    fieldNames = rowTypeInfo.getAllStructFieldNames();
    fieldInfos = rowTypeInfo.getAllStructFieldTypeInfos();
    fields = new ArrayList<StructField>(fieldNames.size());
    fieldsByName = new HashMap<String, StructFieldImpl>();

    for (int i = 0; i < fieldNames.size(); ++i) {
      final String name = fieldNames.get(i);
      final TypeInfo fieldInfo = fieldInfos.get(i);

      final StructFieldImpl field = new StructFieldImpl(name, getObjectInspector(fieldInfo), i);
      fields.add(field);
      fieldsByName.put(name, field);
    }
  }
 
Example 15
private ObjectInspector createObjectInspectorWorker(TypeInfo ti) throws SerDeException {
  switch (ti.getCategory()) {
  case PRIMITIVE:
    PrimitiveTypeInfo pti = (PrimitiveTypeInfo) ti;
    return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
  case STRUCT:
    StructTypeInfo sti = (StructTypeInfo) ti;
    List<ObjectInspector> ois = new ArrayList<ObjectInspector>(sti.getAllStructFieldTypeInfos().size());
    for (TypeInfo typeInfo : sti.getAllStructFieldTypeInfos()) {
      ois.add(createObjectInspectorWorker(typeInfo));
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(sti.getAllStructFieldNames(), ois);
  case LIST:
    ListTypeInfo lti = (ListTypeInfo) ti;
    TypeInfo listElementTypeInfo = lti.getListElementTypeInfo();
    return ObjectInspectorFactory.getStandardListObjectInspector(createObjectInspectorWorker(listElementTypeInfo));
  default:
    throw new SerDeException("No Hive categories matched for [" + ti + "]");
  }
}
 
Example 16
Source Project: searchanalytics-bigdata   Source File: JSONSerDe.java    License: MIT License 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help
 * perform actual serialization and deserialization of data.
 */
@Override
public void initialize(final Configuration conf, final Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	// Jai...change column names to lower case.
	colNames = Arrays.asList(colNamesStr.toLowerCase().split(","));
	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	final String colTypesStr = tbl
			.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	final List<TypeInfo> colTypes = TypeInfoUtils
			.getTypeInfosFromTypeString(colTypesStr);
	rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(
			colNames, colTypes);
	rowOI = TypeInfoUtils
			.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example 17
Source Project: searchanalytics-bigdata   Source File: JSONSerDe.java    License: MIT License 6 votes vote down vote up
/**
 * Parses a JSON object and its fields. The Hive metadata is used to
 * determine how to parse the object fields.
 *
 * @param field
 *            - The JSON object to parse
 * @param fieldTypeInfo
 *            - Metadata about the Hive column
 * @return - A map representing the object and its fields
 */
@SuppressWarnings("unchecked")
private Object parseStruct(final Object field,
		final StructTypeInfo fieldTypeInfo) {
	final Map<Object, Object> map = (Map<Object, Object>) field;
	final ArrayList<TypeInfo> structTypes = fieldTypeInfo
			.getAllStructFieldTypeInfos();
	final ArrayList<String> structNames = fieldTypeInfo
			.getAllStructFieldNames();
	final List<Object> structRow = new ArrayList<Object>(structTypes.size());
	for (int i = 0; i < structNames.size(); i++) {
		structRow.add(parseField(map.get(structNames.get(i)),
				structTypes.get(i)));
	}
	return structRow;
}
 
Example 18
Source Project: spork   Source File: OrcStorage.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public RequiredFieldResponse pushProjection(
        RequiredFieldList requiredFieldList) throws FrontendException {
    if (requiredFieldList == null)
        return null;
    if (requiredFieldList.getFields() != null)
    {
        int schemaSize = ((StructTypeInfo)typeInfo).getAllStructFieldTypeInfos().size();
        mRequiredColumns = new boolean[schemaSize];
        for (RequiredField rf: requiredFieldList.getFields())
        {
            if (rf.getIndex()!=-1)
                mRequiredColumns[rf.getIndex()] = true;
        }
        Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
        try {
            p.setProperty(signature + RequiredColumnsSuffix, ObjectSerializer.serialize(mRequiredColumns));
        } catch (Exception e) {
            throw new RuntimeException("Cannot serialize mRequiredColumns");
        }
    }
    return new RequiredFieldResponse(true);
}
 
Example 19
Source Project: nifi   Source File: NiFiRecordSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private List<Object> deserialize(Record record, StructTypeInfo schema) throws SerDeException {
    List<Object> result = new ArrayList<>(Collections.nCopies(schema.getAllStructFieldNames().size(), null));

    try {
        RecordSchema recordSchema = record.getSchema();
        for (RecordField field : recordSchema.getFields()) {
            populateRecord(result, record.getValue(field), field, schema);
        }
    } catch(SerDeException se) {
        log.error("Error [{}] parsing Record [{}].", new Object[]{se.toString(), record}, se);
        throw se;
    } catch (Exception e) {
        log.error("Error [{}] parsing Record [{}].", new Object[]{e.toString(), record}, e);
        throw new SerDeException(e);
    }

    return result;
}
 
Example 20
Source Project: kite   Source File: HiveSchemaConverter.java    License: Apache License 2.0 6 votes vote down vote up
public static Schema convertTable(String table, Collection<FieldSchema> columns,
                                  @Nullable PartitionStrategy strategy) {
  ArrayList<String> fieldNames = Lists.newArrayList();
  ArrayList<TypeInfo> fieldTypes = Lists.newArrayList();
  LinkedList<String> start = Lists.newLinkedList();
  Collection<String[]> requiredFields = requiredFields(strategy);

  List<Schema.Field> fields = Lists.newArrayList();
  for (FieldSchema column : columns) {
    // pass null for the initial path to exclude the table name
    TypeInfo type = parseTypeInfo(column.getType());
    fieldNames.add(column.getName());
    fieldTypes.add(type);
    fields.add(convertField(start, column.getName(), type, requiredFields));
  }

  StructTypeInfo struct = new StructTypeInfo();
  struct.setAllStructFieldNames(fieldNames);
  struct.setAllStructFieldTypeInfos(fieldTypes);

  Schema recordSchema = Schema.createRecord(table, doc(struct), null, false);
  recordSchema.setFields(fields);

  return recordSchema;
}
 
Example 21
Source Project: kite   Source File: HiveSchemaConverter.java    License: Apache License 2.0 6 votes vote down vote up
private static Schema convert(LinkedList<String> path, String name,
                              StructTypeInfo type,
                              Collection<String[]> required) {
  List<String> names = type.getAllStructFieldNames();
  List<TypeInfo> types = type.getAllStructFieldTypeInfos();
  Preconditions.checkArgument(names.size() == types.size(),
      "Cannot convert struct: %s names != %s types",
      names.size(), types.size());

  List<Schema.Field> fields = Lists.newArrayList();
  for (int i = 0; i < names.size(); i += 1) {
    path.addLast(name);
    fields.add(convertField(path, names.get(i), types.get(i), required));
    path.removeLast();
  }

  Schema recordSchema = Schema.createRecord(name, doc(type), null, false);
  recordSchema.setFields(fields);

  return recordSchema;
}
 
Example 22
Source Project: presto   Source File: HiveType.java    License: Apache License 2.0 5 votes vote down vote up
public static boolean isSupportedType(TypeInfo typeInfo, StorageFormat storageFormat)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            return getPrimitiveType((PrimitiveTypeInfo) typeInfo) != null;
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isSupportedType(mapTypeInfo.getMapKeyTypeInfo(), storageFormat) && isSupportedType(mapTypeInfo.getMapValueTypeInfo(), storageFormat);
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isSupportedType(listTypeInfo.getListElementTypeInfo(), storageFormat);
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream()
                    .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
        case UNION:
            // This feature (reading uniontypes as structs) has only been verified against Avro and ORC tables. Here's a discussion:
            //   1. Avro tables are supported and verified.
            //   2. ORC tables are supported and verified.
            //   3. The Parquet format doesn't support uniontypes itself so there's no need to add support for it in Presto.
            //   4. TODO: RCFile tables are not supported yet.
            //   5. TODO: The support for Avro is done in SerDeUtils so it's possible that formats other than Avro are also supported. But verification is needed.
            if (storageFormat.getSerDe().equalsIgnoreCase(AVRO.getSerDe()) || storageFormat.getSerDe().equalsIgnoreCase(ORC.getSerDe())) {
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                return unionTypeInfo.getAllUnionObjectTypeInfos().stream()
                        .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
            }
    }
    return false;
}
 
Example 23
Source Project: presto   Source File: HiveType.java    License: Apache License 2.0 5 votes vote down vote up
public Optional<HiveType> getHiveTypeForDereferences(List<Integer> dereferences)
{
    TypeInfo typeInfo = getTypeInfo();
    for (int fieldIndex : dereferences) {
        checkArgument(typeInfo instanceof StructTypeInfo, "typeInfo should be struct type", typeInfo);
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        try {
            typeInfo = structTypeInfo.getAllStructFieldTypeInfos().get(fieldIndex);
        }
        catch (RuntimeException e) {
            return Optional.empty();
        }
    }
    return Optional.of(toHiveType(typeInfo));
}
 
Example 24
Source Project: flink   Source File: HiveTypeUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Convert Hive data type to a Flink data type.
 *
 * @param hiveType a Hive data type
 * @return the corresponding Flink data type
 */
public static DataType toFlinkType(TypeInfo hiveType) {
	checkNotNull(hiveType, "hiveType cannot be null");

	switch (hiveType.getCategory()) {
		case PRIMITIVE:
			return toFlinkPrimitiveType((PrimitiveTypeInfo) hiveType);
		case LIST:
			ListTypeInfo listTypeInfo = (ListTypeInfo) hiveType;
			return DataTypes.ARRAY(toFlinkType(listTypeInfo.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapTypeInfo = (MapTypeInfo) hiveType;
			return DataTypes.MAP(toFlinkType(mapTypeInfo.getMapKeyTypeInfo()), toFlinkType(mapTypeInfo.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType;

			List<String> names = structTypeInfo.getAllStructFieldNames();
			List<TypeInfo> typeInfos = structTypeInfo.getAllStructFieldTypeInfos();

			DataTypes.Field[] fields = new DataTypes.Field[names.size()];

			for (int i = 0; i < fields.length; i++) {
				fields[i] = DataTypes.FIELD(names.get(i), toFlinkType(typeInfos.get(i)));
			}

			return DataTypes.ROW(fields);
		default:
			throw new UnsupportedOperationException(
				String.format("Flink doesn't support Hive data type %s yet.", hiveType));
	}
}
 
Example 25
Source Project: flink   Source File: HiveInspectors.java    License: Apache License 2.0 5 votes vote down vote up
private static ObjectInspector getObjectInspector(TypeInfo type) {
	switch (type.getCategory()) {

		case PRIMITIVE:
			PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type;
			return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(primitiveType);
		case LIST:
			ListTypeInfo listType = (ListTypeInfo) type;
			return ObjectInspectorFactory.getStandardListObjectInspector(
					getObjectInspector(listType.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapType = (MapTypeInfo) type;
			return ObjectInspectorFactory.getStandardMapObjectInspector(
					getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structType = (StructTypeInfo) type;
			List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();

			List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
			for (TypeInfo fieldType : fieldTypes) {
				fieldInspectors.add(getObjectInspector(fieldType));
			}

			return ObjectInspectorFactory.getStandardStructObjectInspector(
					structType.getAllStructFieldNames(), fieldInspectors);
		default:
			throw new CatalogException("Unsupported Hive type category " + type.getCategory());
	}
}
 
Example 26
Source Project: dremio-oss   Source File: HiveTypeCoercion.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TypeCoercion getChildTypeCoercion(String fieldName, BatchSchema childSchema) {
  Map<String, TypeInfo> childrenTypeInfoMap = CaseInsensitiveMap.newHashMap();

  TypeInfo typeInfo = typeInfoMap.get(fieldName);
  if (typeInfo instanceof StructTypeInfo) {
    for (Field field: childSchema.getFields()) {
      childrenTypeInfoMap.put(field.getName(), ((StructTypeInfo) typeInfo).getStructFieldTypeInfo(field.getName()));
    }
  } else if (typeInfo instanceof ListTypeInfo) {
    childrenTypeInfoMap.put(childSchema.getFields().get(0).getName(), ((ListTypeInfo) typeInfo).getListElementTypeInfo());
  }
  return new HiveTypeCoercion(childrenTypeInfoMap, varcharTruncationEnabled);
}
 
Example 27
Source Project: streamx   Source File: HiveSchemaConverterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertSimpleStruct() {
  TypeInfo type = HiveSchemaConverter.convert(SIMPLE_STRUCT);
  assertTrue(type instanceof StructTypeInfo);

  List<String> expectedFieldNames = new ArrayList<>();
  expectedFieldNames.add("id");
  expectedFieldNames.add("name");

  assertEquals(expectedFieldNames, ((StructTypeInfo) type).getAllStructFieldNames());
}
 
Example 28
Source Project: emodb   Source File: EmoSerDe.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Deserializes a raw value to the provided type.
 */
private Object deserialize(TypeInfo type, Object rawValue)
        throws SerDeException {
    Object value = null;

    if (rawValue != null) {
        switch (type.getCategory()) {
            case PRIMITIVE:
                value = deserializePrimitive((PrimitiveTypeInfo) type, rawValue);
                break;
            case STRUCT:
                value = deserializeStruct((StructTypeInfo) type, rawValue);
                break;
            case MAP:
                value = deserializeMap((MapTypeInfo) type, rawValue);
                break;
            case LIST:
                value = deserializeList((ListTypeInfo) type, rawValue);
                break;
            case UNION:
                value = deserializeUnion((UnionTypeInfo) type, rawValue);
                break;
        }
    }

    return value;
}
 
Example 29
@Override
public boolean supportsHiveType(TypeInfo typeInfo) {
  try {
    switch (typeInfo.getCategory()) {
      case MAP:
        MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
        if (!mapTypeInfo.getMapKeyTypeInfo().equals(TypeInfoFactory.stringTypeInfo)) {
          return false;
        }

        TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
        HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(valueTypeInfo);
        return true;

      case STRUCT:
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        for (TypeInfo fieldTypeInfo : structTypeInfo.getAllStructFieldTypeInfos()) {
          HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(fieldTypeInfo);
        }
        return true;

      default:
        return false;
    }
  } catch (IllegalArgumentException e) {
    return false;
  }
}
 
Example 30
Source Project: indexr   Source File: IndexRSerde.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
    String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

    if (Strings.isEmpty(columnNameProperty)) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (Strings.isEmpty(columnTypeProperty)) {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(StringUtils.repeat("string", ":", columnNames.size()));
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalArgumentException("IndexRHiveSerde initialization failed. Number of column " +
                "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
                columnTypes);
    }

    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    stats = new SerDeStats();
    serdeSize = 0;
}