Java Code Examples for org.apache.parquet.schema.PrimitiveType#PrimitiveTypeName

The following examples show how to use org.apache.parquet.schema.PrimitiveType#PrimitiveTypeName . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPigSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testListsOfPrimitive() throws Exception {
  for (Type.Repetition repetition : Type.Repetition.values()) {
    for (Type.Repetition valueRepetition : Type.Repetition.values()) {
      for (PrimitiveType.PrimitiveTypeName primitiveTypeName : PrimitiveType.PrimitiveTypeName.values()) {
        if (primitiveTypeName != PrimitiveType.PrimitiveTypeName.INT96) { // INT96 is NYI
          Types.PrimitiveBuilder<PrimitiveType> value = Types.primitive(primitiveTypeName, valueRepetition);
          if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
            value.length(1);
          GroupType type = Types.buildGroup(repetition).addField(value.named("b")).as(OriginalType.LIST).named("a");
          pigSchemaConverter.convertField(type); // no exceptions, please
        }
      }
    }
  }
}
 
Example 2
Source File: AvroSchemaConverter190Int96Avro17.java    From datacollector with Apache License 2.0 6 votes vote down vote up
private Schema addLogicalTypeStrToSchema(
    Schema schema,
    OriginalType annotation,
    PrimitiveType asPrimitive,
    PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName
) {
  Map<String, String> logicalType = convertOriginalTypeToMap(annotation, asPrimitive.getDecimalMetadata());
  if (logicalType != null && (annotation != DECIMAL ||
      parquetPrimitiveTypeName == BINARY ||
      parquetPrimitiveTypeName == FIXED_LEN_BYTE_ARRAY)) {
    for(Map.Entry<String, String> entry : logicalType.entrySet()) {
      schema.addProp(entry.getKey(), entry.getValue());
    }
  }

  return schema;
}
 
Example 3
Source File: ParquetToDrillTypeConverter.java    From Bats with Apache License 2.0 5 votes vote down vote up
public static TypeProtos.MajorType toMajorType(PrimitiveType.PrimitiveTypeName primitiveTypeName, int length,
                                        TypeProtos.DataMode mode, SchemaElement schemaElement,
                                        OptionManager options) {
  ConvertedType convertedType = schemaElement.getConverted_type();
  MinorType minorType = getMinorType(primitiveTypeName, length, convertedType, options);
  TypeProtos.MajorType.Builder typeBuilder = TypeProtos.MajorType.newBuilder().setMinorType(minorType).setMode(mode);

  if (Types.isDecimalType(minorType)) {
    int precision = schemaElement.getPrecision();
    int scale = schemaElement.getScale();

    typeBuilder.setPrecision(precision).setScale(scale);
  }
  return typeBuilder.build();
}
 
Example 4
Source File: DeprecatedParquetVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * @param type a fixed length type from the parquet library enum
 * @return the length in pageDataByteArray of the type
 */
public static int getTypeLengthInBits(PrimitiveType.PrimitiveTypeName type) {
  switch (type) {
    case INT64:   return 64;
    case INT32:   return 32;
    case BOOLEAN: return 1;
    case FLOAT:   return 32;
    case DOUBLE:  return 64;
    case INT96:   return 96;
    // binary and fixed length byte array
    default:
      throw new IllegalStateException("Length cannot be determined for type " + type);
  }
}
 
Example 5
Source File: Metadata_V3.java    From Bats with Apache License 2.0 5 votes vote down vote up
public ColumnMetadata_v3(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, Object minValue, Object maxValue, Long nulls) {
  this.name = name;
  this.minValue = minValue;
  this.maxValue = maxValue;
  this.nulls = nulls;
  this.primitiveType = primitiveType;
}
 
Example 6
Source File: Metadata_V4.java    From Bats with Apache License 2.0 5 votes vote down vote up
public ColumnTypeMetadata_v4(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel, long totalNullCount, boolean isInteresting) {
  this.name = name;
  this.primitiveType = primitiveType;
  this.originalType = originalType;
  this.precision = precision;
  this.scale = scale;
  this.repetitionLevel = repetitionLevel;
  this.definitionLevel = definitionLevel;
  this.key = new Key(name);
  this.totalNullCount = totalNullCount;
  this.isInteresting = isInteresting;
}
 
Example 7
Source File: ParquetTableMetadataUtils.java    From Bats with Apache License 2.0 5 votes vote down vote up
/**
 * Returns {@link PrimitiveType.PrimitiveTypeName} type for the specified column.
 *
 * @param parquetTableMetadata the source of column type
 * @param column               column whose {@link PrimitiveType.PrimitiveTypeName} should be returned
 * @return {@link PrimitiveType.PrimitiveTypeName} type for the specified column
 */
public static PrimitiveType.PrimitiveTypeName getPrimitiveTypeName(MetadataBase.ParquetTableMetadataBase parquetTableMetadata, MetadataBase.ColumnMetadata column) {
  PrimitiveType.PrimitiveTypeName primitiveType = column.getPrimitiveType();
  // for the case of parquet metadata v1 version, type information isn't stored in parquetTableMetadata, but in ColumnMetadata
  if (primitiveType == null) {
    primitiveType = parquetTableMetadata.getPrimitiveType(column.getName());
  }
  return primitiveType;
}
 
Example 8
Source File: Metadata_V1.java    From Bats with Apache License 2.0 5 votes vote down vote up
public ColumnMetadata_v1(SchemaPath name, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType,
                         Object max, Object min, Long nulls) {
  this.name = name;
  this.primitiveType = primitiveType;
  this.originalType = originalType;
  this.max = max;
  this.min = min;
  this.nulls = nulls;
}
 
Example 9
Source File: ParquetToMinorTypeConverter.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private static TypeProtos.MinorType getMinorType(PrimitiveType.PrimitiveTypeName primitiveTypeName, int length,
                                                   SchemaElement schemaElement, OptionManager options, Field arrowField,
                                                   final boolean readInt96AsTimeStamp) {

    ConvertedType convertedType = schemaElement.getConverted_type();

    switch (primitiveTypeName) {
      case BINARY:
        if (convertedType == null) {
          return TypeProtos.MinorType.VARBINARY;
        }
        switch (convertedType) {
          case UTF8:
            return TypeProtos.MinorType.VARCHAR;
          case DECIMAL:
            ParquetReaderUtility.checkDecimalTypeEnabled(options);
            return getDecimalType(schemaElement);
          default:
            return TypeProtos.MinorType.VARBINARY;
        }
      case INT64:
        if (convertedType == null) {
          return TypeProtos.MinorType.BIGINT;
        }
        switch(convertedType) {
          case DECIMAL:
            ParquetReaderUtility.checkDecimalTypeEnabled(options);
            return TypeProtos.MinorType.DECIMAL;
          // TODO - add this back if it is decided to be added upstream, was removed form our pull request July 2014
//              case TIME_MICROS:
//                throw new UnsupportedOperationException();
          case TIMESTAMP_MILLIS:
            return TypeProtos.MinorType.TIMESTAMP;
          default:
            throw new UnsupportedOperationException(String.format("unsupported type: %s %s", primitiveTypeName, convertedType));
        }
      case INT32:
        if (convertedType == null) {
          return TypeProtos.MinorType.INT;
        }
        switch(convertedType) {
          case DECIMAL:
            ParquetReaderUtility.checkDecimalTypeEnabled(options);
            return TypeProtos.MinorType.DECIMAL;
          case DATE:
            return TypeProtos.MinorType.DATE;
          case TIME_MILLIS:
            return TypeProtos.MinorType.TIME;
          default:
            throw new UnsupportedOperationException(String.format("unsupported type: %s %s", primitiveTypeName, convertedType));
        }
      case BOOLEAN:
        return TypeProtos.MinorType.BIT;
      case FLOAT:
        return TypeProtos.MinorType.FLOAT4;
      case DOUBLE:
        return TypeProtos.MinorType.FLOAT8;
      // TODO - Both of these are not supported by the parquet library yet (7/3/13),
      // but they are declared here for when they are implemented
      case INT96:
        if (readInt96AsTimeStamp) {
          return TypeProtos.MinorType.TIMESTAMP;
        } else {
          return TypeProtos.MinorType.VARBINARY;
        }
      case FIXED_LEN_BYTE_ARRAY:
        if (convertedType == null) {
          checkArgument(length > 0, "A length greater than zero must be provided for a FixedBinary type.");
          return TypeProtos.MinorType.VARBINARY;
        } else if (convertedType == ConvertedType.DECIMAL) {
          ParquetReaderUtility.checkDecimalTypeEnabled(options);
          return getDecimalType(schemaElement);
        } else if (convertedType == ConvertedType.INTERVAL) {
          if (arrowField != null) {
            if (arrowField.getType().getTypeID() == ArrowTypeID.Interval) {
              switch (((Interval)arrowField.getType()).getUnit()) {
                case DAY_TIME:
                  return TypeProtos.MinorType.INTERVALDAY;
                case YEAR_MONTH:
                  return TypeProtos.MinorType.INTERVALYEAR;
              }
            }
            throw new IllegalArgumentException("incompatible type " + arrowField);
          }
          // TODO: older versions of Drill generated this
          return TypeProtos.MinorType.VARBINARY;
        }
      default:
        throw new UnsupportedOperationException("Type not supported: " + primitiveTypeName);
    }
  }
 
Example 10
Source File: Metadata_V4.java    From Bats with Apache License 2.0 4 votes vote down vote up
public ColumnMetadata_v4(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, Object minValue, Object maxValue, Long nulls) {
  super(name, primitiveType, minValue, maxValue, nulls);
}
 
Example 11
Source File: Metadata_V4.java    From Bats with Apache License 2.0 4 votes vote down vote up
@JsonIgnore
@Override
public PrimitiveType.PrimitiveTypeName getPrimitiveType() {
  return primitiveType;
}
 
Example 12
Source File: Metadata_V4.java    From Bats with Apache License 2.0 4 votes vote down vote up
@Override
public PrimitiveType.PrimitiveTypeName getPrimitiveType(String[] columnName) {
  return getColumnTypeInfo(columnName).primitiveType;
}
 
Example 13
Source File: Metadata_V3.java    From Bats with Apache License 2.0 4 votes vote down vote up
@Override
public PrimitiveType.PrimitiveTypeName getPrimitiveType() {
  return primitiveType;
}
 
Example 14
Source File: PathAction.java    From iow-hadoop-streaming with Apache License 2.0 4 votes vote down vote up
public PrimitiveType.PrimitiveTypeName getType() {
    return this.type;
}
 
Example 15
Source File: ParquetTableMetadataUtils.java    From Bats with Apache License 2.0 4 votes vote down vote up
/**
 * Returns map of column names with their drill types for specified {@code rowGroup}.
 *
 * @param parquetTableMetadata the source of primitive and original column types
 * @param rowGroup             row group whose columns should be discovered
 * @return map of column names with their drill types
 */
public static Map<SchemaPath, TypeProtos.MajorType> getRowGroupFields(
    MetadataBase.ParquetTableMetadataBase parquetTableMetadata, MetadataBase.RowGroupMetadata rowGroup) {
  Map<SchemaPath, TypeProtos.MajorType> columns = new LinkedHashMap<>();
  for (MetadataBase.ColumnMetadata column : rowGroup.getColumns()) {

    PrimitiveType.PrimitiveTypeName primitiveType = getPrimitiveTypeName(parquetTableMetadata, column);
    OriginalType originalType = getOriginalType(parquetTableMetadata, column);
    int precision = 0;
    int scale = 0;
    int definitionLevel = 1;
    int repetitionLevel = 0;
    MetadataVersion metadataVersion = new MetadataVersion(parquetTableMetadata.getMetadataVersion());
    // only ColumnTypeMetadata_v3 and ColumnTypeMetadata_v4 store information about scale, precision, repetition level and definition level
    if (parquetTableMetadata.hasColumnMetadata() && (metadataVersion.compareTo(new MetadataVersion(3, 0)) >= 0)) {
      scale = parquetTableMetadata.getScale(column.getName());
      precision = parquetTableMetadata.getPrecision(column.getName());
      repetitionLevel = parquetTableMetadata.getRepetitionLevel(column.getName());
      definitionLevel = parquetTableMetadata.getDefinitionLevel(column.getName());
    }
    TypeProtos.DataMode mode;
    if (repetitionLevel >= 1) {
      mode = TypeProtos.DataMode.REPEATED;
    } else if (repetitionLevel == 0 && definitionLevel == 0) {
      mode = TypeProtos.DataMode.REQUIRED;
    } else {
      mode = TypeProtos.DataMode.OPTIONAL;
    }
    TypeProtos.MajorType columnType =
        TypeProtos.MajorType.newBuilder(ParquetReaderUtility.getType(primitiveType, originalType, scale, precision))
            .setMode(mode)
            .build();

    SchemaPath columnPath = SchemaPath.getCompoundPath(column.getName());
    TypeProtos.MajorType majorType = columns.get(columnPath);
    if (majorType == null) {
      columns.put(columnPath, columnType);
    } else {
      TypeProtos.MinorType leastRestrictiveType = TypeCastRules.getLeastRestrictiveType(Arrays.asList(majorType.getMinorType(), columnType.getMinorType()));
      if (leastRestrictiveType != majorType.getMinorType()) {
        columns.put(columnPath, columnType);
      }
    }
  }
  return columns;
}
 
Example 16
Source File: ParquetTableMetadataUtils.java    From Bats with Apache License 2.0 4 votes vote down vote up
/**
 * Returns {@link Comparator} instance considering specified {@code primitiveType} and {@code originalType}.
 *
 * @param primitiveType primitive type of the column
 * @param originalType  original type og the column
 * @return {@link Comparator} instance
 */
public static Comparator getComparator(PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType) {
  if (originalType != null) {
    switch (originalType) {
      case UINT_8:
      case UINT_16:
      case UINT_32:
        return getNaturalNullsFirstComparator();
      case UINT_64:
        return getNaturalNullsFirstComparator();
      case DATE:
      case INT_8:
      case INT_16:
      case INT_32:
      case INT_64:
      case TIME_MICROS:
      case TIME_MILLIS:
      case TIMESTAMP_MICROS:
      case TIMESTAMP_MILLIS:
      case DECIMAL:
      case UTF8:
        return getNaturalNullsFirstComparator();
      case INTERVAL:
        return UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
      default:
        return getNaturalNullsFirstComparator();
    }
  } else {
    switch (primitiveType) {
      case INT32:
      case INT64:
      case FLOAT:
      case DOUBLE:
      case BOOLEAN:
      case BINARY:
      case INT96:
      case FIXED_LEN_BYTE_ARRAY:
        return getNaturalNullsFirstComparator();
      default:
        throw new UnsupportedOperationException("Unsupported type: " + primitiveType);
    }
  }
}
 
Example 17
Source File: Metadata_V3.java    From Bats with Apache License 2.0 4 votes vote down vote up
@JsonIgnore
@Override public PrimitiveType.PrimitiveTypeName getPrimitiveType(String[] columnName) {
  return getColumnTypeInfo(columnName).primitiveType;
}
 
Example 18
Source File: ParquetSplitReaderUtil.java    From flink with Apache License 2.0 4 votes vote down vote up
public static WritableColumnVector createWritableColumnVector(
		int batchSize,
		LogicalType fieldType,
		PrimitiveType primitiveType) {
	PrimitiveType.PrimitiveTypeName typeName = primitiveType.getPrimitiveTypeName();
	switch (fieldType.getTypeRoot()) {
		case BOOLEAN:
			checkArgument(
					typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN,
					"Unexpected type: %s", typeName);
			return new HeapBooleanVector(batchSize);
		case TINYINT:
			checkArgument(
					typeName == PrimitiveType.PrimitiveTypeName.INT32,
					"Unexpected type: %s", typeName);
			return new HeapByteVector(batchSize);
		case DOUBLE:
			checkArgument(
					typeName == PrimitiveType.PrimitiveTypeName.DOUBLE,
					"Unexpected type: %s", typeName);
			return new HeapDoubleVector(batchSize);
		case FLOAT:
			checkArgument(
					typeName == PrimitiveType.PrimitiveTypeName.FLOAT,
					"Unexpected type: %s", typeName);
			return new HeapFloatVector(batchSize);
		case INTEGER:
		case DATE:
		case TIME_WITHOUT_TIME_ZONE:
			checkArgument(
					typeName == PrimitiveType.PrimitiveTypeName.INT32,
					"Unexpected type: %s", typeName);
			return new HeapIntVector(batchSize);
		case BIGINT:
			checkArgument(
					typeName == PrimitiveType.PrimitiveTypeName.INT64,
					"Unexpected type: %s", typeName);
			return new HeapLongVector(batchSize);
		case SMALLINT:
			checkArgument(
					typeName == PrimitiveType.PrimitiveTypeName.INT32,
					"Unexpected type: %s", typeName);
			return new HeapShortVector(batchSize);
		case CHAR:
		case VARCHAR:
		case BINARY:
		case VARBINARY:
			checkArgument(
					typeName == PrimitiveType.PrimitiveTypeName.BINARY,
					"Unexpected type: %s", typeName);
			return new HeapBytesVector(batchSize);
		case TIMESTAMP_WITHOUT_TIME_ZONE:
		case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
			checkArgument(
					typeName == PrimitiveType.PrimitiveTypeName.INT96,
					"Unexpected type: %s", typeName);
			return new HeapTimestampVector(batchSize);
		case DECIMAL:
			DecimalType decimalType = (DecimalType) fieldType;
			if (DecimalDataUtils.is32BitDecimal(decimalType.getPrecision())) {
				checkArgument(
						(typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY ||
								typeName == PrimitiveType.PrimitiveTypeName.INT32) &&
								primitiveType.getOriginalType() == OriginalType.DECIMAL,
						"Unexpected type: %s", typeName);
				return new HeapIntVector(batchSize);
			} else if (DecimalDataUtils.is64BitDecimal(decimalType.getPrecision())) {
				checkArgument(
						(typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY ||
								typeName == PrimitiveType.PrimitiveTypeName.INT64) &&
								primitiveType.getOriginalType() == OriginalType.DECIMAL,
						"Unexpected type: %s", typeName);
				return new HeapLongVector(batchSize);
			} else {
				checkArgument(
						(typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY ||
								typeName == PrimitiveType.PrimitiveTypeName.BINARY) &&
								primitiveType.getOriginalType() == OriginalType.DECIMAL,
						"Unexpected type: %s", typeName);
				return new HeapBytesVector(batchSize);
			}
		default:
			throw new UnsupportedOperationException(fieldType + " is not supported now.");
	}
}
 
Example 19
Source File: MetadataBase.java    From Bats with Apache License 2.0 votes vote down vote up
public abstract PrimitiveType.PrimitiveTypeName getPrimitiveType(); 
Example 20
Source File: MetadataBase.java    From Bats with Apache License 2.0 votes vote down vote up
@JsonIgnore public abstract PrimitiveType.PrimitiveTypeName getPrimitiveType(String[] columnName);