Java Code Examples for org.apache.parquet.schema.PrimitiveType#getOriginalType()

The following examples show how to use org.apache.parquet.schema.PrimitiveType#getOriginalType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetRecordWriter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Changes the list inner '$data$' vector name to 'element' in the schema
 */
private Type renameChildTypeToElement(Type childType) {
  if (childType.isPrimitive()) {
    PrimitiveType childPrimitiveType = childType.asPrimitiveType();
    return new PrimitiveType(childType.getRepetition(),
      childPrimitiveType.getPrimitiveTypeName(),
      childPrimitiveType.getTypeLength(),
      "element",
      childPrimitiveType.getOriginalType(),
      childPrimitiveType.getDecimalMetadata(),
      childPrimitiveType.getId());
  } else {
    GroupType childGroupType = childType.asGroupType();
    Type.ID id = childGroupType.getId();
    GroupType groupType = new GroupType(childType.getRepetition(),
      "element",
      childType.getOriginalType(),
      childGroupType.getFields());
    if (id != null) {
      groupType = groupType.withId(id.hashCode());
    }
    return groupType;
  }
}
 
Example 2
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  OriginalType otype = type.getOriginalType();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (otype != null) out.format(" O:%s", otype);

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[0]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example 3
Source File: ParquetUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static boolean isIntType(PrimitiveType primitiveType) {
  if (primitiveType.getOriginalType() != null) {
    switch (primitiveType.getOriginalType()) {
      case INT_8:
      case INT_16:
      case INT_32:
      case DATE:
        return true;
      default:
        return false;
    }
  }
  return primitiveType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT32;
}
 
Example 4
Source File: ParquetConversions.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static Function<Object, Object> converterFromParquet(PrimitiveType type) {
  if (type.getOriginalType() != null) {
    switch (type.getOriginalType()) {
      case UTF8:
        // decode to CharSequence to avoid copying into a new String
        return binary -> StandardCharsets.UTF_8.decode(((Binary) binary).toByteBuffer());
      case DECIMAL:
        int scale = type.getDecimalMetadata().getScale();
        switch (type.getPrimitiveTypeName()) {
          case INT32:
          case INT64:
            return num -> BigDecimal.valueOf(((Number) num).longValue(), scale);
          case FIXED_LEN_BYTE_ARRAY:
          case BINARY:
            return bin -> new BigDecimal(new BigInteger(((Binary) bin).getBytes()), scale);
          default:
            throw new IllegalArgumentException(
                "Unsupported primitive type for decimal: " + type.getPrimitiveTypeName());
        }
      default:
    }
  }

  switch (type.getPrimitiveTypeName()) {
    case FIXED_LEN_BYTE_ARRAY:
    case BINARY:
      return binary -> ByteBuffer.wrap(((Binary) binary).getBytes());
    default:
  }

  return obj -> obj;
}
 
Example 5
Source File: ParquetConversions.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static Function<Object, Object> converterFromParquet(PrimitiveType type) {
  if (type.getOriginalType() != null) {
    switch (type.getOriginalType()) {
      case UTF8:
        // decode to CharSequence to avoid copying into a new String
        return binary -> Charsets.UTF_8.decode(((Binary) binary).toByteBuffer());
      case DECIMAL:
        int scale = type.getDecimalMetadata().getScale();
        switch (type.getPrimitiveTypeName()) {
          case INT32:
          case INT64:
            return num -> BigDecimal.valueOf(((Number) num).longValue(), scale);
          case FIXED_LEN_BYTE_ARRAY:
          case BINARY:
            return bin -> new BigDecimal(new BigInteger(((Binary) bin).getBytes()), scale);
          default:
            throw new IllegalArgumentException(
                "Unsupported primitive type for decimal: " + type.getPrimitiveTypeName());
        }
      default:
    }
  }

  switch (type.getPrimitiveTypeName()) {
    case FIXED_LEN_BYTE_ARRAY:
    case BINARY:
      return binary -> ByteBuffer.wrap(((Binary) binary).getBytes());
    default:
  }

  return obj -> obj;
}
 
Example 6
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (showOriginalTypes) {
    OriginalType otype;
    try {
      otype = type.getOriginalType();
    } catch (Exception e) {
      otype = null;
    }
    if (otype != null) out.format(" O:%s", otype);
  } else {
    LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation();
    if (ltype != null) out.format(" L:%s", ltype);
  }

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[0]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example 7
Source File: ArrowVectorAccessors.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@NotNull
private static ArrowVectorAccessor getDictionaryVectorAccessor(
    Dictionary dictionary,
    ColumnDescriptor desc,
    FieldVector vector, PrimitiveType primitive) {
  Preconditions.checkState(vector instanceof IntVector, "Dictionary ids should be stored in IntVectors only");
  if (primitive.getOriginalType() != null) {
    switch (desc.getPrimitiveType().getOriginalType()) {
      case ENUM:
      case JSON:
      case UTF8:
      case BSON:
        return new DictionaryStringAccessor((IntVector) vector, dictionary);
      case INT_64:
      case TIMESTAMP_MILLIS:
      case TIMESTAMP_MICROS:
        return new DictionaryLongAccessor((IntVector) vector, dictionary);
      case DECIMAL:
        switch (primitive.getPrimitiveTypeName()) {
          case BINARY:
          case FIXED_LEN_BYTE_ARRAY:
            return new DictionaryDecimalBinaryAccessor(
                (IntVector) vector,
                dictionary);
          case INT64:
            return new DictionaryDecimalLongAccessor(
                (IntVector) vector,
                dictionary);
          case INT32:
            return new DictionaryDecimalIntAccessor(
                (IntVector) vector,
                dictionary);
          default:
            throw new UnsupportedOperationException(
                "Unsupported base type for decimal: " + primitive.getPrimitiveTypeName());
        }
      default:
        throw new UnsupportedOperationException(
            "Unsupported logical type: " + primitive.getOriginalType());
    }
  } else {
    switch (primitive.getPrimitiveTypeName()) {
      case FIXED_LEN_BYTE_ARRAY:
      case BINARY:
        return new DictionaryBinaryAccessor((IntVector) vector, dictionary);
      case FLOAT:
        return new DictionaryFloatAccessor((IntVector) vector, dictionary);
      case INT64:
        return new DictionaryLongAccessor((IntVector) vector, dictionary);
      case DOUBLE:
        return new DictionaryDoubleAccessor((IntVector) vector, dictionary);
      default:
        throw new UnsupportedOperationException("Unsupported type: " + primitive);
    }
  }
}