Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo#getPrimitiveCategory()

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo#getPrimitiveCategory() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: EmoSerDe.java    From emodb with Apache License 2.0 6 votes vote down vote up
private Object deserializeNumber(Object value, PrimitiveTypeInfo type)
        throws SerDeException {
    // Note that only numbers and booleans are supported.  All other types cannot be deserialized.  In particular
    // String representations of numbers are not parsed.
    Number number;
    if (value instanceof Number) {
        number = (Number) value;
    } else if (value instanceof Boolean) {
        number = ((Boolean) value) ? (byte) 1 : 0;
    } else {
        throw new SerDeException("Value is not a " + type + ": " + value);
    }

    switch (type.getPrimitiveCategory()) {
        case BYTE:   return number.byteValue();
        case SHORT:  return number.shortValue();
        case INT:    return number.intValue();
        case LONG:   return number.longValue();
        case FLOAT:  return number.floatValue();
        case DOUBLE: return number.doubleValue();
    }

    throw new SerDeException("Primitive number did not match any expected categories"); // Unreachable
}
 
Example 2
Source File: EmoSerDe.java    From emodb with Apache License 2.0 6 votes vote down vote up
/**
 * Determines if the given primitive is supported by this deserializer.  At this time the only exclusions are
 * BINARY, DECIMAL, VARCHAR, CHAR, and UNKNOWN.
 */
private boolean isSupportedPrimitive(PrimitiveTypeInfo type) {
    switch (type.getPrimitiveCategory()) {
        case VOID:
        case STRING:
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
        case FLOAT:
        case DOUBLE:
        case DATE:
        case TIMESTAMP:
            return true;
        default:
            return false;
    }
}
 
Example 3
Source File: ColumnVectorAssignorFactory.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
public static IColumnVectorAssignor create( final TypeInfo typeInfo ){
  switch ( typeInfo.getCategory() ){
    case PRIMITIVE:
      PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo)typeInfo;
      switch( primitiveTypeInfo.getPrimitiveCategory() ){
        case STRING:
        case BINARY:
          return new BytesColumnVectorAssignor();
        case BYTE:
          return new LongColumnVectorAssignor( BytePrimitiveSetter.getInstance() );
        case SHORT:
          return new LongColumnVectorAssignor( ShortPrimitiveSetter.getInstance() );
        case INT:
          return new LongColumnVectorAssignor( IntegerPrimitiveSetter.getInstance() );
        case BOOLEAN:
        case LONG:
          return new LongColumnVectorAssignor( LongPrimitiveSetter.getInstance() );
        case FLOAT:
          return new DoubleColumnVectorAssignor( FloatPrimitiveSetter.getInstance() );
        case DOUBLE:
          return new DoubleColumnVectorAssignor( DoublePrimitiveSetter.getInstance() );
        case DATE:
        case DECIMAL:
        case TIMESTAMP:
        case VOID:
        default:
          throw new UnsupportedOperationException( "Unsupport vectorize column " + primitiveTypeInfo.getPrimitiveCategory() );
      }
    case STRUCT:
    case MAP:
    case LIST:
    case UNION:
    default:
      throw new UnsupportedOperationException( "Unsupport vectorize column " + typeInfo.getCategory() );
  }
}
 
Example 4
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static boolean isFieldTypeVarchar(FieldSchema hiveField) {
  final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType());
  if (typeInfo.getCategory() == Category.PRIMITIVE) {
    PrimitiveTypeInfo pTypeInfo = (PrimitiveTypeInfo) typeInfo;
    if (pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.VARCHAR ||
      pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.CHAR) {
      return true;
    }
  }
  return false;
}
 
Example 5
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static boolean isFieldTypeVarchar(FieldSchema hiveField) {
  final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType());
  if (typeInfo.getCategory() == Category.PRIMITIVE) {
    PrimitiveTypeInfo pTypeInfo = (PrimitiveTypeInfo) typeInfo;
    if (pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.VARCHAR ||
      pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.CHAR) {
      return true;
    }
  }
  return false;
}
 
Example 6
Source File: HiveTypeUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
private static DataType toFlinkPrimitiveType(PrimitiveTypeInfo hiveType) {
	checkNotNull(hiveType, "hiveType cannot be null");

	switch (hiveType.getPrimitiveCategory()) {
		case CHAR:
			return DataTypes.CHAR(((CharTypeInfo) hiveType).getLength());
		case VARCHAR:
			return DataTypes.VARCHAR(((VarcharTypeInfo) hiveType).getLength());
		case STRING:
			return DataTypes.STRING();
		case BOOLEAN:
			return DataTypes.BOOLEAN();
		case BYTE:
			return DataTypes.TINYINT();
		case SHORT:
			return DataTypes.SMALLINT();
		case INT:
			return DataTypes.INT();
		case LONG:
			return DataTypes.BIGINT();
		case FLOAT:
			return DataTypes.FLOAT();
		case DOUBLE:
			return DataTypes.DOUBLE();
		case DATE:
			return DataTypes.DATE();
		case TIMESTAMP:
			return DataTypes.TIMESTAMP(9);
		case BINARY:
			return DataTypes.BYTES();
		case DECIMAL:
			DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) hiveType;
			return DataTypes.DECIMAL(decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale());
		default:
			throw new UnsupportedOperationException(
				String.format("Flink doesn't support Hive primitive type %s yet", hiveType));
	}
}
 
Example 7
Source File: HiveTypeUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
private static DataType toFlinkPrimitiveType(PrimitiveTypeInfo hiveType) {
	checkNotNull(hiveType, "hiveType cannot be null");

	switch (hiveType.getPrimitiveCategory()) {
		case CHAR:
			return DataTypes.CHAR(((CharTypeInfo) hiveType).getLength());
		case VARCHAR:
			return DataTypes.VARCHAR(((VarcharTypeInfo) hiveType).getLength());
		case STRING:
			return DataTypes.STRING();
		case BOOLEAN:
			return DataTypes.BOOLEAN();
		case BYTE:
			return DataTypes.TINYINT();
		case SHORT:
			return DataTypes.SMALLINT();
		case INT:
			return DataTypes.INT();
		case LONG:
			return DataTypes.BIGINT();
		case FLOAT:
			return DataTypes.FLOAT();
		case DOUBLE:
			return DataTypes.DOUBLE();
		case DATE:
			return DataTypes.DATE();
		case TIMESTAMP:
			return DataTypes.TIMESTAMP();
		case BINARY:
			return DataTypes.BYTES();
		case DECIMAL:
			DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) hiveType;
			return DataTypes.DECIMAL(decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale());
		default:
			throw new UnsupportedOperationException(
				String.format("Flink doesn't support Hive primitive type %s yet", hiveType));
	}
}
 
Example 8
Source File: MDSColumnTypeUtil.java    From multiple-dimension-spread with Apache License 2.0 4 votes vote down vote up
public static ColumnType typeInfoToColumnType( final TypeInfo typeInfo ){
  switch ( typeInfo.getCategory() ){
    case STRUCT:
      return ColumnType.SPREAD;
    case MAP:
      return ColumnType.SPREAD;
    case LIST:
      return ColumnType.ARRAY;
    case UNION:
      return ColumnType.UNION;
    case PRIMITIVE:
      PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo)typeInfo;
      switch( primitiveTypeInfo.getPrimitiveCategory() ){
        case STRING:
          return ColumnType.STRING;
        case BINARY:
          return ColumnType.BYTES;
        case BOOLEAN:
          return ColumnType.BOOLEAN;
        case BYTE:
          return ColumnType.BYTE;
        case DOUBLE:
          return ColumnType.DOUBLE;
        case FLOAT:
          return ColumnType.FLOAT;
        case INT:
          return ColumnType.INTEGER;
        case LONG:
          return ColumnType.LONG;
        case SHORT:
          return ColumnType.SHORT;

        case DATE:
        case DECIMAL:
        case TIMESTAMP:
        case VOID:
        default:
          return ColumnType.UNKNOWN;
      }
    default:
      return ColumnType.UNKNOWN;
  }
}
 
Example 9
Source File: UnionField.java    From multiple-dimension-spread with Apache License 2.0 4 votes vote down vote up
private IGetUnionObject craeteGetUnionObject( final byte tag , final TypeInfo typeInfo ){
  switch ( typeInfo.getCategory() ){
    case STRUCT:
      return new NestedGetUnionObject( tag , ColumnType.SPREAD );
    case MAP:
      return new NestedGetUnionObject( tag , ColumnType.SPREAD );
    case LIST:
      return new NestedGetUnionObject( tag , ColumnType.ARRAY );
    case UNION:
      return new NullGetUnionObject( tag );
    case PRIMITIVE:
      PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo)typeInfo;
      PrimitiveObjectInspector primitiveObjectInspector = (PrimitiveObjectInspector)( MDSObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( typeInfo ) );
      switch( primitiveTypeInfo.getPrimitiveCategory() ){
        case STRING:
          return new PrimitiveGetUnionObject( tag , primitiveObjectInspector , ColumnType.STRING );
        case BINARY:
          return new PrimitiveGetUnionObject( tag , primitiveObjectInspector , ColumnType.BYTES );
        case BOOLEAN:
          return new PrimitiveGetUnionObject( tag , primitiveObjectInspector , ColumnType.BOOLEAN );
        case BYTE:
          return new PrimitiveGetUnionObject( tag , primitiveObjectInspector , ColumnType.BYTE );
        case DOUBLE:
          return new PrimitiveGetUnionObject( tag , primitiveObjectInspector , ColumnType.DOUBLE );
        case FLOAT:
          return new PrimitiveGetUnionObject( tag , primitiveObjectInspector , ColumnType.FLOAT );
        case INT:
          return new PrimitiveGetUnionObject( tag , primitiveObjectInspector , ColumnType.INTEGER );
        case LONG:
          return new PrimitiveGetUnionObject( tag , primitiveObjectInspector , ColumnType.LONG );
        case SHORT:
          return new PrimitiveGetUnionObject( tag , primitiveObjectInspector , ColumnType.SHORT );
        case DATE:
        case DECIMAL:
        case TIMESTAMP:
        case VOID:
        default:
          return new NullGetUnionObject( tag );
      }
    default:
      return new NullGetUnionObject( tag );
  }
}
 
Example 10
Source File: MDSObjectInspectorFactory.java    From multiple-dimension-spread with Apache License 2.0 4 votes vote down vote up
public static ObjectInspector craeteObjectInspectorFromTypeInfo( final TypeInfo typeInfo ){
  switch ( typeInfo.getCategory() ){
    case STRUCT:
      return new MDSStructObjectInspector( (StructTypeInfo)typeInfo );
    case MAP:
      return new MDSMapObjectInspector( (MapTypeInfo)typeInfo );
    case LIST:
      return new MDSListObjectInspector( (ListTypeInfo)typeInfo );
    case UNION:
      UnionTypeInfo unionTypeInfo = (UnionTypeInfo)typeInfo;
      List<ObjectInspector> unionList = new ArrayList<ObjectInspector>();
      for( TypeInfo childTypeInfo : unionTypeInfo.getAllUnionObjectTypeInfos() ){
        unionList.add( craeteObjectInspectorFromTypeInfo( childTypeInfo ) );
      }
      return ObjectInspectorFactory.getStandardUnionObjectInspector( unionList );
    case PRIMITIVE:
      PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo)typeInfo;
      switch( primitiveTypeInfo.getPrimitiveCategory() ){
        case STRING:
          return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
        case BINARY:
          return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
        case BOOLEAN:
          return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
        case BYTE:
          return PrimitiveObjectInspectorFactory.writableByteObjectInspector;
        case DOUBLE:
          return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
        case FLOAT:
          return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
        case INT:
          return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
        case LONG:
          return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
        case SHORT:
          return PrimitiveObjectInspectorFactory.writableShortObjectInspector;

        case DATE:
        case DECIMAL:
        case TIMESTAMP:
        case VOID:
        default:
        throw new UnsupportedOperationException( "Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory() );
      }
    default:
      throw new UnsupportedOperationException( "Unknown category " + typeInfo.getCategory() );
  }
}
 
Example 11
Source File: HiveJsonStructReader.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private Object getObjectOfCorrespondingPrimitiveType(String s, PrimitiveObjectInspector oi)
        throws IOException {
    PrimitiveTypeInfo typeInfo = oi.getTypeInfo();
    if (writeablePrimitives) {
        Converter c = ObjectInspectorConverters.getConverter(
            PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
        return c.convert(s);
    }

    switch (typeInfo.getPrimitiveCategory()) {
        case INT:
            return Integer.valueOf(s);
        case BYTE:
            return Byte.valueOf(s);
        case SHORT:
            return Short.valueOf(s);
        case LONG:
            return Long.valueOf(s);
        case BOOLEAN:
            return (s.equalsIgnoreCase("true"));
        case FLOAT:
            return Float.valueOf(s);
        case DOUBLE:
            return Double.valueOf(s);
        case STRING:
            return s;
        case BINARY:
            try {
                String t = Text.decode(s.getBytes(), 0, s.getBytes().length);
                return t.getBytes();
            } catch (CharacterCodingException e) {
                LOG.warn("Error generating json binary type from object.", e);
                return null;
            }
        case DATE:
            return Date.valueOf(s);
        case TIMESTAMP:
            return Timestamp.valueOf(s);
        case DECIMAL:
            return HiveDecimal.create(s);
        case VARCHAR:
            return new HiveVarchar(s, ((BaseCharTypeInfo) typeInfo).getLength());
        case CHAR:
            return new HiveChar(s, ((BaseCharTypeInfo) typeInfo).getLength());
        default:
            throw new IOException(
                "Could not convert from string to " + typeInfo.getPrimitiveCategory());
    }
}
 
Example 12
Source File: HiveSchemaConverter.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static Field getArrowFieldFromHivePrimitiveType(String name, TypeInfo typeInfo) {
  switch (typeInfo.getCategory()) {
  case PRIMITIVE:
    PrimitiveTypeInfo pTypeInfo = (PrimitiveTypeInfo) typeInfo;
    switch (pTypeInfo.getPrimitiveCategory()) {
    case BOOLEAN:

      return new Field(name, true, new Bool(), null);
    case BYTE:
      return new Field(name, true, new Int(32, true), null);
    case SHORT:
      return new Field(name, true, new Int(32, true), null);

    case INT:
      return new Field(name, true, new Int(32, true), null);

    case LONG:
      return new Field(name, true, new Int(64, true), null);

    case FLOAT:
      return new Field(name, true, new FloatingPoint(FloatingPointPrecision.SINGLE), null);

    case DOUBLE:
      return new Field(name, true, new FloatingPoint(FloatingPointPrecision.DOUBLE), null);

    case DATE:
      return new Field(name, true, new Date(DateUnit.MILLISECOND), null);

    case TIMESTAMP:
      return new Field(name, true, new Timestamp(TimeUnit.MILLISECOND, null), null);

    case BINARY:
      return new Field(name, true, new Binary(), null);
    case DECIMAL: {
      DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) pTypeInfo;
      return new Field(name, true, new Decimal(decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale()), null);
    }

    case STRING:
    case VARCHAR:
    case CHAR: {
      return new Field(name, true, new Utf8(), null);
    }
    case UNKNOWN:
    case VOID:
    default:
      // fall through.
    }
  default:
  }

  return null;
}
 
Example 13
Source File: HiveUtilities.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
/**
 * Helper method which converts Hive primitive type to Dremio primitive type
 * @param primitiveTypeInfo
 * @param options
 * @return
 */
private static final MinorType getMinorTypeFromHivePrimitiveTypeInfo(PrimitiveTypeInfo primitiveTypeInfo,
    OptionManager options) {
  switch(primitiveTypeInfo.getPrimitiveCategory()) {
    case BINARY:
      return MinorType.VARBINARY;
    case BOOLEAN:
      return MinorType.BIT;
    case DECIMAL: {

      if (options.getOption(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY).getBoolVal() == false) {
        throw UserException.unsupportedError()
            .message(ExecErrorConstants.DECIMAL_DISABLE_ERR_MSG)
            .build(logger);
      }
      return MinorType.DECIMAL;
    }
    case DOUBLE:
      return MinorType.FLOAT8;
    case FLOAT:
      return MinorType.FLOAT4;
    // TODO (DRILL-2470)
    // Byte and short (tinyint and smallint in SQL types) are currently read as integers
    // as these smaller integer types are not fully supported in Dremio today.
    case SHORT:
    case BYTE:
    case INT:
      return MinorType.INT;
    case LONG:
      return MinorType.BIGINT;
    case STRING:
    case VARCHAR:
    case CHAR:
      return MinorType.VARCHAR;
    case TIMESTAMP:
      return MinorType.TIMESTAMPMILLI;
    case DATE:
      return MinorType.DATEMILLI;
  }
  throwUnsupportedHiveDataTypeError(primitiveTypeInfo.getPrimitiveCategory().toString());
  return null;
}
 
Example 14
Source File: HiveBucketingV1.java    From presto with Apache License 2.0 4 votes vote down vote up
private static int hash(TypeInfo type, Object value)
{
    if (value == null) {
        return 0;
    }

    switch (type.getCategory()) {
        case PRIMITIVE:
            PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) type;
            PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory();
            switch (primitiveCategory) {
                case BOOLEAN:
                    return (boolean) value ? 1 : 0;
                case BYTE:
                    return SignedBytes.checkedCast((long) value);
                case SHORT:
                    return Shorts.checkedCast((long) value);
                case INT:
                    return toIntExact((long) value);
                case LONG:
                    long bigintValue = (long) value;
                    return (int) ((bigintValue >>> 32) ^ bigintValue);
                case FLOAT:
                    // convert to canonical NaN if necessary
                    return floatToIntBits(intBitsToFloat(toIntExact((long) value)));
                case DOUBLE:
                    long doubleValue = doubleToLongBits((double) value);
                    return (int) ((doubleValue >>> 32) ^ doubleValue);
                case STRING:
                    return hashBytes(0, (Slice) value);
                case VARCHAR:
                    return hashBytes(1, (Slice) value);
                case DATE:
                    // day offset from 1970-01-01
                    return toIntExact((long) value);
                case TIMESTAMP:
                    return hashTimestamp((long) value);
                default:
                    throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive primitive category: " + primitiveCategory);
            }
        case LIST:
            return hashOfList((ListTypeInfo) type, (Block) value);
        case MAP:
            return hashOfMap((MapTypeInfo) type, (Block) value);
        default:
            // TODO: support more types, e.g. ROW
            throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
    }
}
 
Example 15
Source File: HiveUtilities.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
/**
 * Helper method which converts Hive primitive type to Dremio primitive type
 * @param primitiveTypeInfo
 * @param options
 * @return
 */
private static final MinorType getMinorTypeFromHivePrimitiveTypeInfo(PrimitiveTypeInfo primitiveTypeInfo,
    OptionManager options) {
  switch(primitiveTypeInfo.getPrimitiveCategory()) {
    case BINARY:
      return MinorType.VARBINARY;
    case BOOLEAN:
      return MinorType.BIT;
    case DECIMAL: {

      if (options.getOption(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY).getBoolVal() == false) {
        throw UserException.unsupportedError()
            .message(ExecErrorConstants.DECIMAL_DISABLE_ERR_MSG)
            .build(logger);
      }
      return MinorType.DECIMAL;
    }
    case DOUBLE:
      return MinorType.FLOAT8;
    case FLOAT:
      return MinorType.FLOAT4;
    // TODO (DRILL-2470)
    // Byte and short (tinyint and smallint in SQL types) are currently read as integers
    // as these smaller integer types are not fully supported in Dremio today.
    case SHORT:
    case BYTE:
    case INT:
      return MinorType.INT;
    case LONG:
      return MinorType.BIGINT;
    case STRING:
    case VARCHAR:
    case CHAR:
      return MinorType.VARCHAR;
    case TIMESTAMP:
      return MinorType.TIMESTAMPMILLI;
    case DATE:
      return MinorType.DATEMILLI;
  }
  throwUnsupportedHiveDataTypeError(primitiveTypeInfo.getPrimitiveCategory().toString());
  return null;
}
 
Example 16
Source File: HiveBucketingV1.java    From presto with Apache License 2.0 4 votes vote down vote up
static int hash(TypeInfo type, Block block, int position)
{
    // This function mirrors the behavior of function hashCode in
    // HIVE-12025 ba83fd7bff serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
    // https://github.com/apache/hive/blob/ba83fd7bff/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java

    if (block.isNull(position)) {
        return 0;
    }

    switch (type.getCategory()) {
        case PRIMITIVE:
            PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) type;
            PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory();
            Type prestoType = requireNonNull(HiveType.getPrimitiveType(typeInfo));
            switch (primitiveCategory) {
                case BOOLEAN:
                    return prestoType.getBoolean(block, position) ? 1 : 0;
                case BYTE:
                    return SignedBytes.checkedCast(prestoType.getLong(block, position));
                case SHORT:
                    return Shorts.checkedCast(prestoType.getLong(block, position));
                case INT:
                    return toIntExact(prestoType.getLong(block, position));
                case LONG:
                    long bigintValue = prestoType.getLong(block, position);
                    return (int) ((bigintValue >>> 32) ^ bigintValue);
                case FLOAT:
                    // convert to canonical NaN if necessary
                    return floatToIntBits(intBitsToFloat(toIntExact(prestoType.getLong(block, position))));
                case DOUBLE:
                    long doubleValue = doubleToLongBits(prestoType.getDouble(block, position));
                    return (int) ((doubleValue >>> 32) ^ doubleValue);
                case STRING:
                    return hashBytes(0, prestoType.getSlice(block, position));
                case VARCHAR:
                    return hashBytes(1, prestoType.getSlice(block, position));
                case DATE:
                    // day offset from 1970-01-01
                    return toIntExact(prestoType.getLong(block, position));
                case TIMESTAMP:
                    return hashTimestamp(prestoType.getLong(block, position));
                default:
                    throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive primitive category: " + primitiveCategory);
            }
        case LIST:
            return hashOfList((ListTypeInfo) type, block.getObject(position, Block.class));
        case MAP:
            return hashOfMap((MapTypeInfo) type, block.getObject(position, Block.class));
        default:
            // TODO: support more types, e.g. ROW
            throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
    }
}
 
Example 17
Source File: SolrSerde.java    From hive-solr with MIT License 4 votes vote down vote up
@Override
public Object deserialize(Writable writable) throws SerDeException {
    final MapWritable input = (MapWritable) writable;
    final Text t = new Text();
    row.clear();

    for(int i=0;i<columnNames.size();i++){
        String k=columnNames.get(i);
        t.set(k);

        final Writable value = input.get(t);
        if (value != null && !NullWritable.get().equals(value)) {


            String colName = null;
            TypeInfo type_info = null;
            Object obj = null;

                colName = columnNames.get(i);
                type_info = columnTypes.get(i);
                obj = null;
                if (type_info.getCategory() == ObjectInspector.Category.PRIMITIVE) {
                    PrimitiveTypeInfo p_type_info = (PrimitiveTypeInfo) type_info;
                    switch (p_type_info.getPrimitiveCategory()) {
                        case STRING:
                            obj = value.toString();
                            break;
                        case LONG:
                        case INT:
                            try {
                                obj = Long.parseLong(value.toString());
                            } catch (Exception e) {
                                e.printStackTrace();
                            }
                    }
                }
                row.add(obj);


        }

    }

    return row;

}
 
Example 18
Source File: ExcelSerde.java    From hadoopoffice with Apache License 2.0 4 votes vote down vote up
/**
 * Deserializes an object of type @see #getSerializedClass()
 * Note: Some Java types, such as Decimal, are converted to Hive specific datatypes. 
 * 
 * @param arg0 object of type @see #getSerializedClass()
 * @return Array containing objects of type primitive Java (e.g. string, byte, integer)/Hive (e.g HiveDecimal, HiveVarChar)
 * 
 */
@Override
public Object deserialize(Writable arg0) throws SerDeException {
	if ((arg0 == null) || (arg0 instanceof NullWritable)) {
		return this.nullRow;
	}
	Object[] primitiveRow = this.readConverter
			.getDataAccordingToSchema((SpreadSheetCellDAO[]) ((ArrayWritable) arg0).get());
	// check if supported type and convert to hive type, if necessary
	for (int i = 0; i < primitiveRow.length; i++) {
		PrimitiveTypeInfo ti = (PrimitiveTypeInfo) this.columnTypes.get(i);
		switch (ti.getPrimitiveCategory()) {
		case STRING:
			primitiveRow[i] = primitiveRow[i];
			break;
		case BYTE:
			primitiveRow[i] = primitiveRow[i];
			break;
		case SHORT:
			primitiveRow[i] = primitiveRow[i];
			break;
		case INT:
			primitiveRow[i] = primitiveRow[i];
			break;
		case LONG:
			primitiveRow[i] = primitiveRow[i];
			break;
		case FLOAT:
			primitiveRow[i] = primitiveRow[i];
			break;
		case DOUBLE:
			primitiveRow[i] = primitiveRow[i];
			break;
		case BOOLEAN:
			primitiveRow[i] = primitiveRow[i];
			break;
		case TIMESTAMP:
			primitiveRow[i] = primitiveRow[i];
			break;
		case DATE:
			if (primitiveRow[i] != null) {
				primitiveRow[i] = new java.sql.Date(((Date) primitiveRow[i]).getTime());
			}
			break;
		case DECIMAL:
			if (primitiveRow[i] != null) {
				primitiveRow[i] = HiveDecimal.create((BigDecimal) primitiveRow[i]);
			}
			break;
		case CHAR:
			if (primitiveRow[i] != null) {
				primitiveRow[i] = new HiveChar((String) primitiveRow[i], ((CharTypeInfo) ti).getLength());
			}
			break;
		case VARCHAR:
			if (primitiveRow[i] != null) {
				primitiveRow[i] = new HiveVarchar((String) primitiveRow[i], ((VarcharTypeInfo) ti).getLength());
			}
			break;
		default:
			throw new SerDeException("Unsupported type " + ti);
		}
	}
	if (this.columnNames.size()>primitiveRow.length) { // can happen in rare cases where a row does not contain all columns
		Object[] tempRow = new Object[this.columnNames.size()];
		for (int i=0;i<primitiveRow.length;i++) {
			tempRow[i]=primitiveRow[i];
		}
		primitiveRow=tempRow;
	}
	return primitiveRow;
}
 
Example 19
Source File: HiveBucketingV2.java    From presto with Apache License 2.0 4 votes vote down vote up
private static int hash(TypeInfo type, Object value)
{
    if (value == null) {
        return 0;
    }

    switch (type.getCategory()) {
        case PRIMITIVE:
            PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) type;
            PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory();
            switch (primitiveCategory) {
                case BOOLEAN:
                    return (boolean) value ? 1 : 0;
                case BYTE:
                    return SignedBytes.checkedCast((long) value);
                case SHORT:
                    return Murmur3.hash32(bytes(Shorts.checkedCast((long) value)));
                case INT:
                    return Murmur3.hash32(bytes(toIntExact((long) value)));
                case LONG:
                    return Murmur3.hash32(bytes((long) value));
                case FLOAT:
                    // convert to canonical NaN if necessary
                    // Sic! we're `floatToIntBits -> cast to float -> floatToRawIntBits` just as it is (implicitly) done in
                    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L830
                    return Murmur3.hash32(bytes(floatToRawIntBits(floatToIntBits(intBitsToFloat(toIntExact((long) value))))));
                case DOUBLE:
                    // convert to canonical NaN if necessary
                    // Sic! we're `doubleToLongBits -> cast to double -> doubleToRawLongBits` just as it is (implicitly) done in
                    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L836
                    return Murmur3.hash32(bytes(doubleToRawLongBits(doubleToLongBits((double) value))));
                case STRING:
                    return Murmur3.hash32(((Slice) value).getBytes());
                case VARCHAR:
                    return Murmur3.hash32(((Slice) value).getBytes());
                case DATE:
                    // day offset from 1970-01-01
                    return Murmur3.hash32(bytes(toIntExact((long) value)));
                // case TIMESTAMP: // TODO (https://github.com/prestosql/presto/issues/1706): support bucketing v2 for timestamp
                default:
                    throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive primitive category: " + primitiveCategory);
            }
        case LIST:
            return hashOfList((ListTypeInfo) type, (Block) value);
        case MAP:
            return hashOfMap((MapTypeInfo) type, (Block) value);
        default:
            // TODO: support more types, e.g. ROW
            throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
    }
}
 
Example 20
Source File: HiveBucketingV2.java    From presto with Apache License 2.0 4 votes vote down vote up
private static int hash(TypeInfo type, Block block, int position)
{
    // This function mirrors the behavior of function hashCodeMurmur in
    // HIVE-18910 (and following) 7dc47faddb serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java

    if (block.isNull(position)) {
        return 0;
    }

    switch (type.getCategory()) {
        case PRIMITIVE:
            PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) type;
            PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory();
            Type prestoType = requireNonNull(HiveType.getPrimitiveType(typeInfo));
            switch (primitiveCategory) {
                case BOOLEAN:
                    return prestoType.getBoolean(block, position) ? 1 : 0;
                case BYTE:
                    return SignedBytes.checkedCast(prestoType.getLong(block, position));
                case SHORT:
                    return Murmur3.hash32(bytes(Shorts.checkedCast(prestoType.getLong(block, position))));
                case INT:
                    return Murmur3.hash32(bytes(toIntExact(prestoType.getLong(block, position))));
                case LONG:
                    return Murmur3.hash32(bytes(prestoType.getLong(block, position)));
                case FLOAT:
                    // convert to canonical NaN if necessary
                    // Sic! we're `floatToIntBits -> cast to float -> floatToRawIntBits` just as it is (implicitly) done in
                    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L830
                    return Murmur3.hash32(bytes(floatToRawIntBits(floatToIntBits(intBitsToFloat(toIntExact(prestoType.getLong(block, position)))))));
                case DOUBLE:
                    // Sic! we're `doubleToLongBits -> cast to double -> doubleToRawLongBits` just as it is (implicitly) done in
                    // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L836
                    return Murmur3.hash32(bytes(doubleToRawLongBits(doubleToLongBits(prestoType.getDouble(block, position)))));
                case STRING:
                    return Murmur3.hash32(prestoType.getSlice(block, position).getBytes());
                case VARCHAR:
                    return Murmur3.hash32(prestoType.getSlice(block, position).getBytes());
                case DATE:
                    // day offset from 1970-01-01
                    return Murmur3.hash32(bytes(toIntExact(prestoType.getLong(block, position))));
                // case TIMESTAMP: // TODO (https://github.com/prestosql/presto/issues/1706): support bucketing v2 for timestamp
                default:
                    throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive primitive category: " + primitiveCategory);
            }
        case LIST:
            return hashOfList((ListTypeInfo) type, block.getObject(position, Block.class));
        case MAP:
            return hashOfMap((MapTypeInfo) type, block.getObject(position, Block.class));
        default:
            // TODO: support more types, e.g. ROW
            throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
    }
}