Java Code Examples for org.apache.parquet.schema.Type#getOriginalType()

The following examples show how to use org.apache.parquet.schema.Type#getOriginalType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Metadata.java    From Bats with Apache License 2.0 6 votes vote down vote up
private ColTypeInfo getColTypeInfo(MessageType schema, Type type, String[] path, int depth) {
  if (type.isPrimitive()) {
    PrimitiveType primitiveType = (PrimitiveType) type;
    int precision = 0;
    int scale = 0;
    if (primitiveType.getDecimalMetadata() != null) {
      precision = primitiveType.getDecimalMetadata().getPrecision();
      scale = primitiveType.getDecimalMetadata().getScale();
    }

    int repetitionLevel = schema.getMaxRepetitionLevel(path);
    int definitionLevel = schema.getMaxDefinitionLevel(path);

    return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel);
  }
  Type t = ((GroupType) type).getType(path[depth]);
  return getColTypeInfo(schema, t, path, depth + 1);
}
 
Example 2
Source File: ParquetRecordWriter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Changes the list inner '$data$' vector name to 'element' in the schema
 */
private Type renameChildTypeToElement(Type childType) {
  if (childType.isPrimitive()) {
    PrimitiveType childPrimitiveType = childType.asPrimitiveType();
    return new PrimitiveType(childType.getRepetition(),
      childPrimitiveType.getPrimitiveTypeName(),
      childPrimitiveType.getTypeLength(),
      "element",
      childPrimitiveType.getOriginalType(),
      childPrimitiveType.getDecimalMetadata(),
      childPrimitiveType.getId());
  } else {
    GroupType childGroupType = childType.asGroupType();
    Type.ID id = childGroupType.getId();
    GroupType groupType = new GroupType(childType.getRepetition(),
      "element",
      childType.getOriginalType(),
      childGroupType.getFields());
    if (id != null) {
      groupType = groupType.withId(id.hashCode());
    }
    return groupType;
  }
}
 
Example 3
Source File: TestDataWritableWriter.java    From presto with Apache License 2.0 5 votes vote down vote up
/**
 * It writes the field value to the Parquet RecordConsumer. It detects the field type, and calls
 * the correct write function.
 *
 * @param value The writable object that contains the value.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the type schema.
 */
private void writeValue(Object value, ObjectInspector inspector, Type type)
{
    if (type.isPrimitive()) {
        checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE);
        writePrimitive(value, (PrimitiveObjectInspector) inspector);
    }
    else {
        GroupType groupType = type.asGroupType();
        OriginalType originalType = type.getOriginalType();

        if (OriginalType.LIST == originalType) {
            checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
            if (singleLevelArray) {
                writeSingleLevelArray(value, (ListObjectInspector) inspector, groupType);
            }
            else {
                writeArray(value, (ListObjectInspector) inspector, groupType);
            }
        }
        else if (originalType != null && (originalType == OriginalType.MAP || originalType == OriginalType.MAP_KEY_VALUE)) {
            checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
            writeMap(value, (MapObjectInspector) inspector, groupType);
        }
        else {
            checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT);
            writeGroup(value, (StructObjectInspector) inspector, groupType);
        }
    }
}
 
Example 4
Source File: RowConverter.java    From flink with Apache License 2.0 5 votes vote down vote up
RowPrimitiveConverter(Type dataType, ParentDataHolder parentDataHolder, int pos) {
	this.parentDataHolder = parentDataHolder;
	this.pos = pos;
	if (dataType.isPrimitive()) {
		this.originalType = dataType.getOriginalType();
		this.primitiveTypeName = dataType.asPrimitiveType().getPrimitiveTypeName();
	} else {
		// Backward-compatibility  It can be a group type middle layer
		Type primitiveType = dataType.asGroupType().getType(0);
		this.originalType = primitiveType.getOriginalType();
		this.primitiveTypeName = primitiveType.asPrimitiveType().getPrimitiveTypeName();
	}
}
 
Example 5
Source File: ParquetRowiseReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void verifyDecimalTypesAreSame(OutputMutator output, ParquetColumnResolver columnResolver) {
  for (ValueVector vector : output.getVectors()) {
    Field fieldInSchema = vector.getField();
    if (fieldInSchema.getType().getTypeID() == ArrowType.ArrowTypeID.Decimal) {
      ArrowType.Decimal typeInTable = (ArrowType.Decimal) fieldInSchema.getType();
      Type typeInParquet = null;
      // the field in arrow schema may not be present in hive schema
      try {
        typeInParquet  = schema.getType(columnResolver.getParquetColumnName(fieldInSchema.getName()));
      } catch (InvalidRecordException e) {
      }
      if (typeInParquet == null) {
        continue;
      }
      boolean schemaMisMatch = true;
      OriginalType originalType = typeInParquet.getOriginalType();
      if (originalType.equals(OriginalType.DECIMAL) ) {
        int precision = typeInParquet
          .asPrimitiveType().getDecimalMetadata().getPrecision();
        int scale = typeInParquet.asPrimitiveType().getDecimalMetadata().getScale();
        ArrowType decimalType = new ArrowType.Decimal(precision, scale);
        if (decimalType.equals(typeInTable)) {
          schemaMisMatch = false;
        }
      }
      if (schemaMisMatch) {
        throw UserException.schemaChangeError().message("Mixed types "+ fieldInSchema.getType()
          + " , " + typeInParquet + " is not supported.")
          .build(logger);
      }
    }
  }
}
 
Example 6
Source File: ParquetRowiseReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static Type getType(String[] pathSegments, int depth, MessageType schema) {
  Type type = schema.getType(Arrays.copyOfRange(pathSegments, 0, depth + 1));
  if (depth + 1 == pathSegments.length) {
    return type;
  } else {
    Preconditions.checkState(!type.isPrimitive());
    return new GroupType(type.getRepetition(), type.getName(), type.getOriginalType(), getType(pathSegments, depth + 1, schema));
  }
}
 
Example 7
Source File: Metadata.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private OriginalType getOriginalType(Type type, String[] path, int depth) {
  if (type.isPrimitive()) {
    return type.getOriginalType();
  }
  Type t = ((GroupType) type).getType(path[depth]);
  return getOriginalType(t, path, depth + 1);
}
 
Example 8
Source File: ParquetReaderUtility.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * Converts {@link ColumnDescriptor} to {@link SchemaPath} and converts any parquet LOGICAL LIST to something
 * the execution engine can understand (removes the extra 'list' and 'element' fields from the name)
 */
public static List<String> convertColumnDescriptor(final MessageType schema, final ColumnDescriptor columnDescriptor) {
  List<String> path = Lists.newArrayList(columnDescriptor.getPath());

  // go through the path and find all logical lists
  int index = 0;
  Type type = schema;
  while (!type.isPrimitive()) { // don't bother checking the last element in the path as it is a primitive type
    type = type.asGroupType().getType(path.get(index));
    if (type.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(type.asGroupType())) {
      // remove 'list'
      type = type.asGroupType().getType(path.get(index+1));
      path.remove(index+1);

      // remove 'element'
      type = type.asGroupType().getType(path.get(index+1));

      //handle nested list case
      while (type.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(type.asGroupType())) {
        // current 'list'.'element' entry
        path.remove(index+1);

        // nested 'list' entry
        type = type.asGroupType().getType(path.get(index+1));
        path.remove(index+1);

        type = type.asGroupType().getType(path.get(index+1));
      }

      // final 'list'.'element' entry
      path.remove(index+1);

    }
    index++;
  }
  return path;
}
 
Example 9
Source File: RowConverter.java    From flink with Apache License 2.0 5 votes vote down vote up
RowPrimitiveConverter(Type dataType, ParentDataHolder parentDataHolder, int pos) {
	this.parentDataHolder = parentDataHolder;
	this.pos = pos;
	if (dataType.isPrimitive()) {
		this.originalType = dataType.getOriginalType();
		this.primitiveTypeName = dataType.asPrimitiveType().getPrimitiveTypeName();
	} else {
		// Backward-compatibility  It can be a group type middle layer
		Type primitiveType = dataType.asGroupType().getType(0);
		this.originalType = primitiveType.getOriginalType();
		this.primitiveTypeName = primitiveType.asPrimitiveType().getPrimitiveTypeName();
	}
}
 
Example 10
Source File: ParquetResolver.java    From pxf with Apache License 2.0 4 votes vote down vote up
private void fillGroup(int index, OneField field, Group group, Type type) throws IOException {
    if (field.val == null)
        return;
    switch (type.asPrimitiveType().getPrimitiveTypeName()) {
        case BINARY:
            if (type.getOriginalType() == OriginalType.UTF8)
                group.add(index, (String) field.val);
            else
                group.add(index, Binary.fromReusedByteArray((byte[]) field.val));
            break;
        case INT32:
            if (type.getOriginalType() == OriginalType.INT_16)
                group.add(index, (Short) field.val);
            else
                group.add(index, (Integer) field.val);
            break;
        case INT64:
            group.add(index, (Long) field.val);
            break;
        case DOUBLE:
            group.add(index, (Double) field.val);
            break;
        case FLOAT:
            group.add(index, (Float) field.val);
            break;
        case FIXED_LEN_BYTE_ARRAY:
            // From org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter.DecimalDataWriter#decimalToBinary
            String value = (String) field.val;
            int precision = Math.min(HiveDecimal.MAX_PRECISION, type.asPrimitiveType().getDecimalMetadata().getPrecision());
            int scale = Math.min(HiveDecimal.MAX_SCALE, type.asPrimitiveType().getDecimalMetadata().getScale());
            HiveDecimal hiveDecimal = HiveDecimal.enforcePrecisionScale(
                    HiveDecimal.create(value),
                    precision,
                    scale);

            if (hiveDecimal == null) {
                // When precision is higher than HiveDecimal.MAX_PRECISION
                // and enforcePrecisionScale returns null, it means we
                // cannot store the value in Parquet because we have
                // exceeded the precision. To make the behavior consistent
                // with Hive's behavior when storing on a Parquet-backed
                // table, we store the value as null.
                return;
            }

            byte[] decimalBytes = hiveDecimal.bigIntegerBytesScaled(scale);

            // Estimated number of bytes needed.
            int precToBytes = ParquetFileAccessor.PRECISION_TO_BYTE_COUNT[precision - 1];
            if (precToBytes == decimalBytes.length) {
                // No padding needed.
                group.add(index, Binary.fromReusedByteArray(decimalBytes));
            } else {
                byte[] tgt = new byte[precToBytes];
                if (hiveDecimal.signum() == -1) {
                    // For negative number, initializing bits to 1
                    for (int i = 0; i < precToBytes; i++) {
                        tgt[i] |= 0xFF;
                    }
                }
                System.arraycopy(decimalBytes, 0, tgt, precToBytes - decimalBytes.length, decimalBytes.length); // Padding leading zeroes/ones.
                group.add(index, Binary.fromReusedByteArray(tgt));
            }
            // end -- org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter.DecimalDataWriter#decimalToBinary
            break;
        case INT96:  // SQL standard timestamp string value with or without time zone literals: https://www.postgresql.org/docs/9.4/datatype-datetime.html
            String timestamp = (String) field.val;
            if (TIMESTAMP_PATTERN.matcher(timestamp).find()) {
                // Note: this conversion convert type "timestamp with time zone" will lose timezone information
                // while preserving the correct value. (as Parquet doesn't support timestamp with time zone.
                group.add(index, ParquetTypeConverter.getBinaryFromTimestampWithTimeZone(timestamp));
            } else {
                group.add(index, ParquetTypeConverter.getBinaryFromTimestamp(timestamp));
            }
            break;
        case BOOLEAN:
            group.add(index, (Boolean) field.val);
            break;
        default:
            throw new IOException("Not supported type " + type.asPrimitiveType().getPrimitiveTypeName());
    }
}
 
Example 11
Source File: LogicalListL1Converter.java    From dremio-oss with Apache License 2.0 3 votes vote down vote up
/**
 * Checks if the schema is similar to the following:
 * <pre>
 * optional group <name> (LIST) {
 *   repeated group <list-name> {
 *     <element-repetition> <element-type> <element-name>;
 *   }
 * }
 * </pre>
 *
 * @param schema parquet group type
 * @return true is supported
 */
public static boolean isSupportedSchema(GroupType schema) {
  if (schema.getFieldCount() == 1) {
    Type type = schema.getType(0);
    // check: repeated group
    if (type.isPrimitive() || !type.isRepetition(REPEATED) || type.getOriginalType() != null) {
      return false;
    }
    return type.asGroupType().getFieldCount() == 1;
  }
  return false;
}