Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: presto   Source File: OrcFileWriter.java    License: Apache License 2.0 6 votes vote down vote up
private static ObjectInspector getJavaObjectInspector(TypeInfo typeInfo)
{
    Category category = typeInfo.getCategory();
    if (category == PRIMITIVE) {
        return getPrimitiveJavaObjectInspector(getPrimitiveTypeInfo(typeInfo.getTypeName()));
    }
    if (category == LIST) {
        ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
        return getStandardListObjectInspector(getJavaObjectInspector(listTypeInfo.getListElementTypeInfo()));
    }
    if (category == MAP) {
        MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
        return getStandardMapObjectInspector(
                getJavaObjectInspector(mapTypeInfo.getMapKeyTypeInfo()),
                getJavaObjectInspector(mapTypeInfo.getMapValueTypeInfo()));
    }
    throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unhandled storage type: " + category);
}
 
Example 2
Source Project: presto   Source File: HiveCoercionRecordCursor.java    License: Apache License 2.0 6 votes vote down vote up
public MapCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, BridgingRecordCursor bridgingRecordCursor)
{
    requireNonNull(typeManager, "typeManage is null");
    requireNonNull(fromHiveType, "fromHiveType is null");
    requireNonNull(toHiveType, "toHiveType is null");
    this.bridgingRecordCursor = requireNonNull(bridgingRecordCursor, "bridgingRecordCursor is null");
    HiveType fromKeyHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType fromValueHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    HiveType toKeyHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType toValueHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    this.fromKeyValueTypes = fromHiveType.getType(typeManager).getTypeParameters();
    this.toType = toHiveType.getType(typeManager);
    this.toKeyValueTypes = toType.getTypeParameters();
    this.coercers = new Coercer[2];
    coercers[0] = fromKeyHiveType.equals(toKeyHiveType) ? null : createCoercer(typeManager, fromKeyHiveType, toKeyHiveType, bridgingRecordCursor);
    coercers[1] = fromValueHiveType.equals(toValueHiveType) ? null : createCoercer(typeManager, fromValueHiveType, toValueHiveType, bridgingRecordCursor);
    this.pageBuilder = coercers[0] == null && coercers[1] == null ? null : new PageBuilder(ImmutableList.of(toType));
}
 
Example 3
Source Project: presto   Source File: HiveBucketing.java    License: Apache License 2.0 6 votes vote down vote up
private static boolean containsTimestampBucketedV2(TypeInfo type)
{
    switch (type.getCategory()) {
        case PRIMITIVE:
            return ((PrimitiveTypeInfo) type).getPrimitiveCategory() == TIMESTAMP;
        case LIST:
            return containsTimestampBucketedV2(((ListTypeInfo) type).getListElementTypeInfo());
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) type;
            // Note: we do not check map value type because HiveBucketingV2#hashOfMap hashes map values with v1
            return containsTimestampBucketedV2(mapTypeInfo.getMapKeyTypeInfo());
        default:
            // TODO: support more types, e.g. ROW
            throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
    }
}
 
Example 4
Source Project: presto   Source File: HiveWriteUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static boolean isWritableType(TypeInfo typeInfo)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
            return isWritablePrimitiveType(primitiveCategory);
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isWritableType(mapTypeInfo.getMapKeyTypeInfo()) && isWritableType(mapTypeInfo.getMapValueTypeInfo());
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isWritableType(listTypeInfo.getListElementTypeInfo());
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream().allMatch(HiveWriteUtils::isWritableType);
    }
    return false;
}
 
Example 5
public MDSMapObjectInspector( final MapTypeInfo typeInfo ){
  TypeInfo keyTypeInfo = typeInfo.getMapKeyTypeInfo();
  if( keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && ( (PrimitiveTypeInfo)keyTypeInfo ).getPrimitiveCategory() == PrimitiveCategory.STRING ){
    keyObjectInspector = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
  }
  else{
    throw new RuntimeException( "Map key type is string only." );
  }

  valueObjectInspector = MDSObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( typeInfo.getMapValueTypeInfo() ); 

  if( valueObjectInspector.getCategory() == ObjectInspector.Category.PRIMITIVE ){
    getField = new PrimitiveGetField( (PrimitiveObjectInspector)valueObjectInspector );
  }
  else if( valueObjectInspector.getCategory() == ObjectInspector.Category.UNION ){
    getField = new UnionGetField( (UnionTypeInfo)( typeInfo.getMapValueTypeInfo() ) );
  }
  else{
    getField = new NestedGetField();
  }
}
 
Example 6
Source Project: emodb   Source File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private Object deserializeMap(MapTypeInfo type, Object data)
        throws SerDeException {
    if (!(data instanceof Map)) {
        throw new SerDeException("Value not of type map");
    }
    //noinspection unchecked
    Map<String, Object> map = (Map<String, Object>) data;
    Map<Object, Object> values = Maps.newHashMap();

    for (Map.Entry<String, Object> entry : map.entrySet()) {
        Object key = deserialize(type.getMapKeyTypeInfo(), entry.getKey());
        Object value = deserialize(type.getMapValueTypeInfo(), entry.getValue());
        values.put(key, value);
    }

    return values;
}
 
Example 7
Source Project: bigdata-tutorial   Source File: JSONCDHSerDe.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Parses a JSON object according to the Hive column's type.
 *
 * @param field         - The JSON object to parse
 * @param fieldTypeInfo - Metadata about the Hive column
 * @return - The parsed value of the field
 */
private Object parseField(Object field, TypeInfo fieldTypeInfo) {
	switch (fieldTypeInfo.getCategory()) {
		case PRIMITIVE:
			// Jackson will return the right thing in this case, so just return
			// the object
			if (field instanceof String) {
				field = field.toString().replaceAll("\n", "\\\\n");
			}
			return field;
		case LIST:
			return parseList(field, (ListTypeInfo) fieldTypeInfo);
		case MAP:
			return parseMap(field, (MapTypeInfo) fieldTypeInfo);
		case STRUCT:
			return parseStruct(field, (StructTypeInfo) fieldTypeInfo);
		case UNION:
			// Unsupported by JSON
		default:
			return null;
	}
}
 
Example 8
Source Project: presto   Source File: HiveType.java    License: Apache License 2.0 5 votes vote down vote up
public static boolean isSupportedType(TypeInfo typeInfo, StorageFormat storageFormat)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            return getPrimitiveType((PrimitiveTypeInfo) typeInfo) != null;
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isSupportedType(mapTypeInfo.getMapKeyTypeInfo(), storageFormat) && isSupportedType(mapTypeInfo.getMapValueTypeInfo(), storageFormat);
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isSupportedType(listTypeInfo.getListElementTypeInfo(), storageFormat);
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream()
                    .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
        case UNION:
            // This feature (reading uniontypes as structs) has only been verified against Avro and ORC tables. Here's a discussion:
            //   1. Avro tables are supported and verified.
            //   2. ORC tables are supported and verified.
            //   3. The Parquet format doesn't support uniontypes itself so there's no need to add support for it in Presto.
            //   4. TODO: RCFile tables are not supported yet.
            //   5. TODO: The support for Avro is done in SerDeUtils so it's possible that formats other than Avro are also supported. But verification is needed.
            if (storageFormat.getSerDe().equalsIgnoreCase(AVRO.getSerDe()) || storageFormat.getSerDe().equalsIgnoreCase(ORC.getSerDe())) {
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                return unionTypeInfo.getAllUnionObjectTypeInfos().stream()
                        .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
            }
    }
    return false;
}
 
Example 9
Source Project: presto   Source File: HivePageSource.java    License: Apache License 2.0 5 votes vote down vote up
public MapCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType)
{
    requireNonNull(typeManager, "typeManage is null");
    requireNonNull(fromHiveType, "fromHiveType is null");
    this.toType = requireNonNull(toHiveType, "toHiveType is null").getType(typeManager);
    HiveType fromKeyHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType fromValueHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    HiveType toKeyHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType toValueHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    this.keyCoercer = fromKeyHiveType.equals(toKeyHiveType) ? null : createCoercer(typeManager, fromKeyHiveType, toKeyHiveType);
    this.valueCoercer = fromValueHiveType.equals(toValueHiveType) ? null : createCoercer(typeManager, fromValueHiveType, toValueHiveType);
}
 
Example 10
Source Project: presto   Source File: HiveBucketingV2.java    License: Apache License 2.0 5 votes vote down vote up
private static int hashOfMap(MapTypeInfo type, Block singleMapBlock)
{
    TypeInfo keyTypeInfo = type.getMapKeyTypeInfo();
    TypeInfo valueTypeInfo = type.getMapValueTypeInfo();
    int result = 0;
    for (int i = 0; i < singleMapBlock.getPositionCount(); i += 2) {
        // Sic! we're hashing map keys with v2 but map values with v1 just as in
        // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L903-L904
        result += hash(keyTypeInfo, singleMapBlock, i) ^ HiveBucketingV1.hash(valueTypeInfo, singleMapBlock, i + 1);
    }
    return result;
}
 
Example 11
Source Project: presto   Source File: HiveBucketingV1.java    License: Apache License 2.0 5 votes vote down vote up
private static int hashOfMap(MapTypeInfo type, Block singleMapBlock)
{
    TypeInfo keyTypeInfo = type.getMapKeyTypeInfo();
    TypeInfo valueTypeInfo = type.getMapValueTypeInfo();
    int result = 0;
    for (int i = 0; i < singleMapBlock.getPositionCount(); i += 2) {
        result += hash(keyTypeInfo, singleMapBlock, i) ^ hash(valueTypeInfo, singleMapBlock, i + 1);
    }
    return result;
}
 
Example 12
Source Project: presto   Source File: HiveCoercionPolicy.java    License: Apache License 2.0 5 votes vote down vote up
private boolean canCoerceForMap(HiveType fromHiveType, HiveType toHiveType)
{
    if (fromHiveType.getCategory() != Category.MAP || toHiveType.getCategory() != Category.MAP) {
        return false;
    }
    HiveType fromKeyType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType fromValueType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    HiveType toKeyType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType toValueType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    return (fromKeyType.equals(toKeyType) || canCoerce(fromKeyType, toKeyType)) &&
            (fromValueType.equals(toValueType) || canCoerce(fromValueType, toValueType));
}
 
Example 13
private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repetition repetition)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return mapType(repetition, name, "map", keyType, valueType);
}
 
Example 14
Source Project: presto   Source File: SingleLevelArraySchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repetition repetition)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return ConversionPatterns.mapType(repetition, name, keyType, valueType);
}
 
Example 15
Source Project: presto   Source File: MapKeyValuesSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(String name, MapTypeInfo typeInfo)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return mapType(Repetition.OPTIONAL, name, "map", keyType, valueType);
}
 
Example 16
Source Project: flink   Source File: HiveTypeUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Convert Hive data type to a Flink data type.
 *
 * @param hiveType a Hive data type
 * @return the corresponding Flink data type
 */
public static DataType toFlinkType(TypeInfo hiveType) {
	checkNotNull(hiveType, "hiveType cannot be null");

	switch (hiveType.getCategory()) {
		case PRIMITIVE:
			return toFlinkPrimitiveType((PrimitiveTypeInfo) hiveType);
		case LIST:
			ListTypeInfo listTypeInfo = (ListTypeInfo) hiveType;
			return DataTypes.ARRAY(toFlinkType(listTypeInfo.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapTypeInfo = (MapTypeInfo) hiveType;
			return DataTypes.MAP(toFlinkType(mapTypeInfo.getMapKeyTypeInfo()), toFlinkType(mapTypeInfo.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType;

			List<String> names = structTypeInfo.getAllStructFieldNames();
			List<TypeInfo> typeInfos = structTypeInfo.getAllStructFieldTypeInfos();

			DataTypes.Field[] fields = new DataTypes.Field[names.size()];

			for (int i = 0; i < fields.length; i++) {
				fields[i] = DataTypes.FIELD(names.get(i), toFlinkType(typeInfos.get(i)));
			}

			return DataTypes.ROW(fields);
		default:
			throw new UnsupportedOperationException(
				String.format("Flink doesn't support Hive data type %s yet.", hiveType));
	}
}
 
Example 17
Source Project: flink   Source File: HiveInspectors.java    License: Apache License 2.0 5 votes vote down vote up
private static ObjectInspector getObjectInspector(TypeInfo type) {
	switch (type.getCategory()) {

		case PRIMITIVE:
			PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type;
			return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(primitiveType);
		case LIST:
			ListTypeInfo listType = (ListTypeInfo) type;
			return ObjectInspectorFactory.getStandardListObjectInspector(
					getObjectInspector(listType.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapType = (MapTypeInfo) type;
			return ObjectInspectorFactory.getStandardMapObjectInspector(
					getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structType = (StructTypeInfo) type;
			List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();

			List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
			for (TypeInfo fieldType : fieldTypes) {
				fieldInspectors.add(getObjectInspector(fieldType));
			}

			return ObjectInspectorFactory.getStandardStructObjectInspector(
					structType.getAllStructFieldNames(), fieldInspectors);
		default:
			throw new CatalogException("Unsupported Hive type category " + type.getCategory());
	}
}
 
Example 18
Source Project: emodb   Source File: EmoSerDe.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Deserializes a raw value to the provided type.
 */
private Object deserialize(TypeInfo type, Object rawValue)
        throws SerDeException {
    Object value = null;

    if (rawValue != null) {
        switch (type.getCategory()) {
            case PRIMITIVE:
                value = deserializePrimitive((PrimitiveTypeInfo) type, rawValue);
                break;
            case STRUCT:
                value = deserializeStruct((StructTypeInfo) type, rawValue);
                break;
            case MAP:
                value = deserializeMap((MapTypeInfo) type, rawValue);
                break;
            case LIST:
                value = deserializeList((ListTypeInfo) type, rawValue);
                break;
            case UNION:
                value = deserializeUnion((UnionTypeInfo) type, rawValue);
                break;
        }
    }

    return value;
}
 
Example 19
@Override
public boolean supportsHiveType(TypeInfo typeInfo) {
  try {
    switch (typeInfo.getCategory()) {
      case MAP:
        MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
        if (!mapTypeInfo.getMapKeyTypeInfo().equals(TypeInfoFactory.stringTypeInfo)) {
          return false;
        }

        TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
        HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(valueTypeInfo);
        return true;

      case STRUCT:
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        for (TypeInfo fieldTypeInfo : structTypeInfo.getAllStructFieldTypeInfos()) {
          HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(fieldTypeInfo);
        }
        return true;

      default:
        return false;
    }
  } catch (IllegalArgumentException e) {
    return false;
  }
}
 
Example 20
Source Project: bigdata-tutorial   Source File: JSONCDHSerDe.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Parse a JSON object as a map. This uses the Hive metadata for the map
 * values to determine how to parse the values. The map is assumed to have
 * a string for a key.
 *
 * @param field         - The JSON list to parse
 * @param fieldTypeInfo - Metadata about the Hive column
 * @return
 */
private Object parseMap(Object field, MapTypeInfo fieldTypeInfo) {
	Map<Object, Object> map = (Map<Object, Object>) field;
	TypeInfo valueTypeInfo = fieldTypeInfo.getMapValueTypeInfo();
	if (map != null) {
		for (Map.Entry<Object, Object> entry : map.entrySet()) {
			map.put(entry.getKey(), parseField(entry.getValue(), valueTypeInfo));
		}
	}
	return map;
}
 
Example 21
Source Project: flink   Source File: HiveTypeUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Convert Hive data type to a Flink data type.
 *
 * @param hiveType a Hive data type
 * @return the corresponding Flink data type
 */
public static DataType toFlinkType(TypeInfo hiveType) {
	checkNotNull(hiveType, "hiveType cannot be null");

	switch (hiveType.getCategory()) {
		case PRIMITIVE:
			return toFlinkPrimitiveType((PrimitiveTypeInfo) hiveType);
		case LIST:
			ListTypeInfo listTypeInfo = (ListTypeInfo) hiveType;
			return DataTypes.ARRAY(toFlinkType(listTypeInfo.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapTypeInfo = (MapTypeInfo) hiveType;
			return DataTypes.MAP(toFlinkType(mapTypeInfo.getMapKeyTypeInfo()), toFlinkType(mapTypeInfo.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType;

			List<String> names = structTypeInfo.getAllStructFieldNames();
			List<TypeInfo> typeInfos = structTypeInfo.getAllStructFieldTypeInfos();

			DataTypes.Field[] fields = new DataTypes.Field[names.size()];

			for (int i = 0; i < fields.length; i++) {
				fields[i] = DataTypes.FIELD(names.get(i), toFlinkType(typeInfos.get(i)));
			}

			return DataTypes.ROW(fields);
		default:
			throw new UnsupportedOperationException(
				String.format("Flink doesn't support Hive data type %s yet.", hiveType));
	}
}
 
Example 22
Source Project: flink   Source File: HiveInspectors.java    License: Apache License 2.0 5 votes vote down vote up
private static ObjectInspector getObjectInspector(TypeInfo type) {
	switch (type.getCategory()) {

		case PRIMITIVE:
			PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type;
			return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(primitiveType);
		case LIST:
			ListTypeInfo listType = (ListTypeInfo) type;
			return ObjectInspectorFactory.getStandardListObjectInspector(
					getObjectInspector(listType.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapType = (MapTypeInfo) type;
			return ObjectInspectorFactory.getStandardMapObjectInspector(
					getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structType = (StructTypeInfo) type;
			List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();

			List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
			for (TypeInfo fieldType : fieldTypes) {
				fieldInspectors.add(getObjectInspector(fieldType));
			}

			return ObjectInspectorFactory.getStandardStructObjectInspector(
					structType.getAllStructFieldNames(), fieldInspectors);
		default:
			throw new CatalogException("Unsupported Hive type category " + type.getCategory());
	}
}
 
Example 23
Source Project: parquet-mr   Source File: HiveSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static Type convertType(final String name, final TypeInfo typeInfo, final Repetition repetition) {
  if (typeInfo.getCategory().equals(Category.PRIMITIVE)) {
    if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BINARY, name);
    } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT32, name);
    } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT64, name);
    } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.DOUBLE, name);
    } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.FLOAT, name);
    } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, name);
    } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
      // TODO : binaryTypeInfo is a byte array. Need to map it
      throw new UnsupportedOperationException("Binary type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
      throw new UnsupportedOperationException("Timestamp type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
      throw new UnsupportedOperationException("Void type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
      throw new UnsupportedOperationException("Unknown type not implemented");
    } else {
      throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
  } else if (typeInfo.getCategory().equals(Category.LIST)) {
    return convertArrayType(name, (ListTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
    return convertStructType(name, (StructTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.MAP)) {
    return convertMapType(name, (MapTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.UNION)) {
    throw new UnsupportedOperationException("Union type not implemented");
  } else {
    throw new IllegalArgumentException("Unknown type: " + typeInfo);
  }
}
 
Example 24
Source Project: parquet-mr   Source File: HiveSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo) {
  final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
      typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
  final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
      typeInfo.getMapValueTypeInfo());
  return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType);
}
 
Example 25
Source Project: parquet-mr   Source File: ArrayWritableObjectInspector.java    License: Apache License 2.0 5 votes vote down vote up
private ObjectInspector getObjectInspector(final TypeInfo typeInfo) {
  if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetStringInspector;
  } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
    return new ArrayWritableObjectInspector((StructTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.LIST)) {
    final TypeInfo subTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
    return new ParquetHiveArrayInspector(getObjectInspector(subTypeInfo));
  } else if (typeInfo.getCategory().equals(Category.MAP)) {
    final TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
    final TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
    if (keyTypeInfo.equals(TypeInfoFactory.stringTypeInfo) || keyTypeInfo.equals(TypeInfoFactory.byteTypeInfo)
            || keyTypeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
      return new DeepParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
    } else {
      return new StandardParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
    }
  } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
    throw new UnsupportedOperationException("timestamp not implemented yet");
  } else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetByteInspector;
  } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetShortInspector;
  } else {
    throw new IllegalArgumentException("Unknown field info: " + typeInfo);
  }

}
 
Example 26
Source Project: searchanalytics-bigdata   Source File: JSONSerDe.java    License: MIT License 5 votes vote down vote up
/**
 * Parses a JSON object according to the Hive column's type.
 *
 * @param field
 *            - The JSON object to parse
 * @param fieldTypeInfo
 *            - Metadata about the Hive column
 * @return - The parsed value of the field
 */
private Object parseField(Object field, final TypeInfo fieldTypeInfo) {
	switch (fieldTypeInfo.getCategory()) {
	case PRIMITIVE:
		// Jackson will return the right thing in this case, so just return
		// the object
		// Get type-safe JSON values
		if (field instanceof String) {
			field = field.toString().replaceAll("\n", "\\\\n");
		}
		if (fieldTypeInfo.getTypeName().equalsIgnoreCase(
				serdeConstants.BIGINT_TYPE_NAME)) {
			field = new Long(String.valueOf(field));
		}
		return field;
	case LIST:
		return parseList(field, (ListTypeInfo) fieldTypeInfo);
	case MAP:
		return parseMap(field, (MapTypeInfo) fieldTypeInfo);
	case STRUCT:
		return parseStruct(field, (StructTypeInfo) fieldTypeInfo);
	case UNION:
		// Unsupported by JSON
	default:
		return null;
	}
}
 
Example 27
Source Project: Hive-XML-SerDe   Source File: XmlObjectInspectorFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns the standard java object inspector
 * 
 * @param typeInfo
 *            the type info
 * @param xmlProcessor
 *            the XML processor
 * @return the standard java object inspector
 */
public static ObjectInspector getStandardJavaObjectInspectorFromTypeInfo(TypeInfo typeInfo, XmlProcessor xmlProcessor) {
    switch (typeInfo.getCategory()) {
        case PRIMITIVE: {
            return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
        }
        case LIST: {
            ObjectInspector listElementObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(((ListTypeInfo) typeInfo).getListElementTypeInfo(),
                xmlProcessor);
            return new XmlListObjectInspector(listElementObjectInspector, xmlProcessor);
        }
        case MAP: {
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            ObjectInspector mapKeyObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo(),
                xmlProcessor);
            ObjectInspector mapValueObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo(),
                xmlProcessor);
            return new XmlMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector, xmlProcessor);
        }
        case STRUCT: {
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<String> structFieldNames = structTypeInfo.getAllStructFieldNames();
            List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
            List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
            for (int fieldIndex = 0; fieldIndex < fieldTypeInfos.size(); ++fieldIndex) {
                structFieldObjectInspectors.add(getStandardJavaObjectInspectorFromTypeInfo(fieldTypeInfos.get(fieldIndex), xmlProcessor));
            }
            return getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors, xmlProcessor);
        }
        default: {
            throw new IllegalStateException();
        }
    }
}
 
Example 28
Source Project: presto   Source File: HiveBucketingV2.java    License: Apache License 2.0 4 votes vote down vote up
private static int hash(TypeInfo type, Block block, int position)
{
    // This function mirrors the behavior of function hashCodeMurmur in
    // HIVE-18910 (and following) 7dc47faddb serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java

    if (block.isNull(position)) {
        return 0;
    }

    switch (type.getCategory()) {
        case PRIMITIVE:
            PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) type;
            PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory();
            Type prestoType = requireNonNull(HiveType.getPrimitiveType(typeInfo));
            switch (primitiveCategory) {
                case BOOLEAN:
                    return prestoType.getBoolean(block, position) ? 1 : 0;
                case BYTE:
                    return SignedBytes.checkedCast(prestoType.getLong(block, position));
                case SHORT:
                    return Murmur3.hash32(bytes(Shorts.checkedCast(prestoType.getLong(block, position))));
                case INT:
                    return Murmur3.hash32(bytes(toIntExact(prestoType.getLong(block, position))));
                case LONG:
                    return Murmur3.hash32(bytes(prestoType.getLong(block, position)));
                case FLOAT:
                    // convert to canonical NaN if necessary
                    // Sic! we're `floatToIntBits -> cast to float -> floatToRawIntBits` just as it is (implicitly) done in
                    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L830
                    return Murmur3.hash32(bytes(floatToRawIntBits(floatToIntBits(intBitsToFloat(toIntExact(prestoType.getLong(block, position)))))));
                case DOUBLE:
                    // Sic! we're `doubleToLongBits -> cast to double -> doubleToRawLongBits` just as it is (implicitly) done in
                    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L836
                    return Murmur3.hash32(bytes(doubleToRawLongBits(doubleToLongBits(prestoType.getDouble(block, position)))));
                case STRING:
                    return Murmur3.hash32(prestoType.getSlice(block, position).getBytes());
                case VARCHAR:
                    return Murmur3.hash32(prestoType.getSlice(block, position).getBytes());
                case DATE:
                    // day offset from 1970-01-01
                    return Murmur3.hash32(bytes(toIntExact(prestoType.getLong(block, position))));
                // case TIMESTAMP: // TODO (https://github.com/prestosql/presto/issues/1706): support bucketing v2 for timestamp
                default:
                    throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive primitive category: " + primitiveCategory);
            }
        case LIST:
            return hashOfList((ListTypeInfo) type, block.getObject(position, Block.class));
        case MAP:
            return hashOfMap((MapTypeInfo) type, block.getObject(position, Block.class));
        default:
            // TODO: support more types, e.g. ROW
            throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
    }
}
 
Example 29
Source Project: presto   Source File: HiveBucketingV2.java    License: Apache License 2.0 4 votes vote down vote up
private static int hash(TypeInfo type, Object value)
{
    if (value == null) {
        return 0;
    }

    switch (type.getCategory()) {
        case PRIMITIVE:
            PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) type;
            PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory();
            switch (primitiveCategory) {
                case BOOLEAN:
                    return (boolean) value ? 1 : 0;
                case BYTE:
                    return SignedBytes.checkedCast((long) value);
                case SHORT:
                    return Murmur3.hash32(bytes(Shorts.checkedCast((long) value)));
                case INT:
                    return Murmur3.hash32(bytes(toIntExact((long) value)));
                case LONG:
                    return Murmur3.hash32(bytes((long) value));
                case FLOAT:
                    // convert to canonical NaN if necessary
                    // Sic! we're `floatToIntBits -> cast to float -> floatToRawIntBits` just as it is (implicitly) done in
                    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L830
                    return Murmur3.hash32(bytes(floatToRawIntBits(floatToIntBits(intBitsToFloat(toIntExact((long) value))))));
                case DOUBLE:
                    // convert to canonical NaN if necessary
                    // Sic! we're `doubleToLongBits -> cast to double -> doubleToRawLongBits` just as it is (implicitly) done in
                    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L836
                    return Murmur3.hash32(bytes(doubleToRawLongBits(doubleToLongBits((double) value))));
                case STRING:
                    return Murmur3.hash32(((Slice) value).getBytes());
                case VARCHAR:
                    return Murmur3.hash32(((Slice) value).getBytes());
                case DATE:
                    // day offset from 1970-01-01
                    return Murmur3.hash32(bytes(toIntExact((long) value)));
                // case TIMESTAMP: // TODO (https://github.com/prestosql/presto/issues/1706): support bucketing v2 for timestamp
                default:
                    throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive primitive category: " + primitiveCategory);
            }
        case LIST:
            return hashOfList((ListTypeInfo) type, (Block) value);
        case MAP:
            return hashOfMap((MapTypeInfo) type, (Block) value);
        default:
            // TODO: support more types, e.g. ROW
            throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
    }
}
 
Example 30
Source Project: presto   Source File: HiveBucketingV1.java    License: Apache License 2.0 4 votes vote down vote up
static int hash(TypeInfo type, Block block, int position)
{
    // This function mirrors the behavior of function hashCode in
    // HIVE-12025 ba83fd7bff serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
    // https://github.com/apache/hive/blob/ba83fd7bff/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java

    if (block.isNull(position)) {
        return 0;
    }

    switch (type.getCategory()) {
        case PRIMITIVE:
            PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) type;
            PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory();
            Type prestoType = requireNonNull(HiveType.getPrimitiveType(typeInfo));
            switch (primitiveCategory) {
                case BOOLEAN:
                    return prestoType.getBoolean(block, position) ? 1 : 0;
                case BYTE:
                    return SignedBytes.checkedCast(prestoType.getLong(block, position));
                case SHORT:
                    return Shorts.checkedCast(prestoType.getLong(block, position));
                case INT:
                    return toIntExact(prestoType.getLong(block, position));
                case LONG:
                    long bigintValue = prestoType.getLong(block, position);
                    return (int) ((bigintValue >>> 32) ^ bigintValue);
                case FLOAT:
                    // convert to canonical NaN if necessary
                    return floatToIntBits(intBitsToFloat(toIntExact(prestoType.getLong(block, position))));
                case DOUBLE:
                    long doubleValue = doubleToLongBits(prestoType.getDouble(block, position));
                    return (int) ((doubleValue >>> 32) ^ doubleValue);
                case STRING:
                    return hashBytes(0, prestoType.getSlice(block, position));
                case VARCHAR:
                    return hashBytes(1, prestoType.getSlice(block, position));
                case DATE:
                    // day offset from 1970-01-01
                    return toIntExact(prestoType.getLong(block, position));
                case TIMESTAMP:
                    return hashTimestamp(prestoType.getLong(block, position));
                default:
                    throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive primitive category: " + primitiveCategory);
            }
        case LIST:
            return hashOfList((ListTypeInfo) type, block.getObject(position, Block.class));
        case MAP:
            return hashOfMap((MapTypeInfo) type, block.getObject(position, Block.class));
        default:
            // TODO: support more types, e.g. ROW
            throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
    }
}