Java Code Examples for org.apache.parquet.schema.OriginalType

The following examples show how to use org.apache.parquet.schema.OriginalType. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Bats   Source File: ParquetTableMetadataUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Populates the non-interesting column's statistics
 * @param schemaPaths columns paths which should be ignored
 * @param parquetTableMetadata the source of column metadata for non-interesting column's statistics
 * @return returns non-interesting column statistics map
 */
@SuppressWarnings("unchecked")
public static Map<SchemaPath, ColumnStatistics> populateNonInterestingColumnsStats(
        Set<SchemaPath> schemaPaths, MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
  Map<SchemaPath, ColumnStatistics> columnsStatistics = new HashMap<>();
  if (parquetTableMetadata instanceof Metadata_V4.ParquetTableMetadata_v4) {
    for (Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata :
        ((Metadata_V4.ParquetTableMetadata_v4) parquetTableMetadata).getColumnTypeInfoMap().values()) {
      SchemaPath schemaPath = SchemaPath.getCompoundPath(columnTypeMetadata.name);
      if (!schemaPaths.contains(schemaPath)) {
        Map<StatisticsKind, Object> statistics = new HashMap<>();
        statistics.put(ColumnStatisticsKind.NULLS_COUNT, GroupScan.NO_COLUMN_STATS);
        PrimitiveType.PrimitiveTypeName primitiveType = columnTypeMetadata.primitiveType;
        OriginalType originalType = columnTypeMetadata.originalType;
        Comparator comparator = getComparator(primitiveType, originalType);
        columnsStatistics.put(schemaPath, new ColumnStatisticsImpl<>(statistics, comparator));
      }
    }
  }
  return columnsStatistics;
}
 
Example 2
Source Project: Bats   Source File: ParquetRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
protected PrimitiveType getPrimitiveType(MaterializedField field) {
  MinorType minorType = field.getType().getMinorType();
  String name = field.getName();
  int length = ParquetTypeHelper.getLengthForMinorType(minorType);
  PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
  if (Types.isDecimalType(minorType)) {
    primitiveTypeName = logicalTypeForDecimals;
    if (usePrimitiveTypesForDecimals) {
      if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT32)) {
        primitiveTypeName = PrimitiveTypeName.INT32;
      } else if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT64)) {
        primitiveTypeName = PrimitiveTypeName.INT64;
      }
    }

    length = DecimalUtility.getMaxBytesSizeForPrecision(field.getPrecision());
  }

  Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
  OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
  DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
  return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
 
Example 3
Source Project: presto   Source File: TestMetadataReader.java    License: Apache License 2.0 6 votes vote down vote up
@Test(dataProvider = "allCreatedBy")
public void testReadStatsBinaryUtf8(Optional<String> fileCreatedBy)
{
    PrimitiveType varchar = new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8);
    Statistics statistics;

    // Stats written by Parquet after https://issues.apache.org/jira/browse/PARQUET-1025
    statistics = new Statistics();
    statistics.setNull_count(13);
    statistics.setMin_value("a".getBytes(UTF_8));
    statistics.setMax_value("é".getBytes(UTF_8));
    assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), varchar))
            .isInstanceOfSatisfying(BinaryStatistics.class, columnStatistics -> {
                assertEquals(columnStatistics.getNumNulls(), 13);
                assertEquals(columnStatistics.getMin().getBytes(), new byte[] {'a'});
                assertEquals(columnStatistics.getMax().getBytes(), new byte[] {(byte) 0xC3, (byte) 0xA9});
                assertEquals(columnStatistics.getMinBytes(), new byte[] {'a'});
                assertEquals(columnStatistics.getMaxBytes(), new byte[] {(byte) 0xC3, (byte) 0xA9});
                assertEquals(columnStatistics.genericGetMin().getBytes(), new byte[] {'a'});
                assertEquals(columnStatistics.genericGetMax().getBytes(), new byte[] {(byte) 0xC3, (byte) 0xA9});
            });
}
 
Example 4
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 6 votes vote down vote up
private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      ParquetMetadataConverter.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty());
  Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes());
}
 
Example 5
Source Project: dremio-oss   Source File: ParquetGroupConverter.java    License: Apache License 2.0 6 votes vote down vote up
Converter groupConverterFromArrowSchema(String fieldName, String groupTypeName, GroupType groupType, Collection<SchemaPath> c) {
  final String nameForChild = getNameForChild(fieldName);
  final Field arrowField = Schema.findField(arrowSchema, groupTypeName);
  final ArrowTypeID arrowTypeType = arrowField.getType().getTypeID();
  final List<Field> arrowChildren = arrowField.getChildren();
  if (arrowTypeType == ArrowTypeID.Union) {
    // if it's a union we will add the children directly to the parent
    return new UnionGroupConverter(columnResolver, fieldName, mutator, getWriterProvider(), groupType, c, options, arrowChildren, nameForChild,
        schemaHelper);
  } else if (arrowTypeType == ArrowTypeID.List) {
    // make sure the parquet schema matches the arrow schema and delegate handling the logical list to defaultGroupConverter()
    Preconditions.checkState(groupType.getOriginalType() == OriginalType.LIST, "parquet schema doesn't match the arrow schema for LIST " + nameForChild);
  }

  return defaultGroupConverter(fieldName, mutator, groupType, c, arrowChildren);
}
 
Example 6
Source Project: parquet-mr   Source File: TestPigSchemaConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testListsOfPrimitive() throws Exception {
  for (Type.Repetition repetition : Type.Repetition.values()) {
    for (Type.Repetition valueRepetition : Type.Repetition.values()) {
      for (PrimitiveType.PrimitiveTypeName primitiveTypeName : PrimitiveType.PrimitiveTypeName.values()) {
        if (primitiveTypeName != PrimitiveType.PrimitiveTypeName.INT96) { // INT96 is NYI
          Types.PrimitiveBuilder<PrimitiveType> value = Types.primitive(primitiveTypeName, valueRepetition);
          if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
            value.length(1);
          GroupType type = Types.buildGroup(repetition).addField(value.named("b")).as(OriginalType.LIST).named("a");
          pigSchemaConverter.convertField(type); // no exceptions, please
        }
      }
    }
  }
}
 
Example 7
Source Project: parquet-mr   Source File: TestParquetParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIDs() {
  String message =
      "message Message {\n" +
      "  required binary string (UTF8) = 6;\n" +
      "  required int32 i=1;\n" +
      "  required binary s2= 3;\n" +
      "  required binary s3 =4;\n" +
      "}\n";

  MessageType parsed = parseMessageType(message);
  MessageType expected = buildMessage()
      .required(BINARY).as(OriginalType.UTF8).id(6).named("string")
      .required(INT32).id(1).named("i")
      .required(BINARY).id(3).named("s2")
      .required(BINARY).id(4).named("s3")
      .named("Message");

  assertEquals(expected, parsed);
  MessageType reparsed = parseMessageType(parsed.toString());
  assertEquals(expected, reparsed);
}
 
Example 8
Source Project: hadoop-etl-udfs   Source File: ExaParquetWriterImpl.java    License: MIT License 6 votes vote down vote up
static private List<Type> typeInfoToParquetTypes(final List<ExaParquetTypeInfo> exaParquetTypeInfos) {
    List<Type> types = new ArrayList<>();
    for (ExaParquetTypeInfo exaType: exaParquetTypeInfos) {
        if (exaType.length != 0) {
            types.add(new PrimitiveType(
                    Type.Repetition.valueOf(exaType.typeRepitition),
                    PrimitiveType.PrimitiveTypeName.valueOf(exaType.primitiveTypeName),
                    exaType.length,
                    exaType.name));
        } else {
            types.add(new PrimitiveType(
                    Type.Repetition.valueOf(exaType.typeRepitition),
                    PrimitiveType.PrimitiveTypeName.valueOf(exaType.primitiveTypeName),
                    exaType.name,
                    exaType.originalType == null ? null : OriginalType.valueOf(exaType.originalType)));
        }
    }
    return types;
}
 
Example 9
private OriginalType convertLogicalType(LogicalType logicalType) {
  if (logicalType == null) {
    return null;
  } else if (logicalType instanceof LogicalTypes.Decimal) {
    return OriginalType.DECIMAL;
  } else if (logicalType instanceof LogicalTypes.Date) {
    return OriginalType.DATE;
  } else if (logicalType instanceof LogicalTypes.TimeMillis) {
    return OriginalType.TIME_MILLIS;
  } else if (logicalType instanceof LogicalTypes.TimeMicros) {
    return OriginalType.TIME_MICROS;
  } else if (logicalType instanceof LogicalTypes.TimestampMillis) {
    return OriginalType.TIMESTAMP_MILLIS;
  } else if (logicalType instanceof LogicalTypes.TimestampMicros) {
    return OriginalType.TIMESTAMP_MICROS;
  }
  return null;
}
 
Example 10
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIgnoreStatsWithSignedSortOrder() {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("z"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
      .as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      StatsHelper.V1.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not include min/max: " + convertedStats, convertedStats.hasNonNullValue());
  Assert.assertTrue("Stats should have null count: " + convertedStats, convertedStats.isNumNullsSet());
  Assert.assertEquals("Stats should have 3 nulls: " + convertedStats, 3L, convertedStats.getNumNulls());
}
 
Example 11
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testV2OnlyStats() {
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
      0x7F,
      0x80);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
      0x7FFF,
      0x8000);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
      0x7FFFFFFF,
      0x80000000);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
      0x7FFFFFFFFFFFFFFFL,
      0x8000000000000000L);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
      new BigInteger("-765875"),
      new BigInteger("876856"));
  testV2OnlyStats(
      Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
          .named(""),
      new BigInteger("-6769643"),
      new BigInteger("9864675"));
}
 
Example 12
private Schema addLogicalTypeStrToSchema(
    Schema schema,
    OriginalType annotation,
    PrimitiveType asPrimitive,
    PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName
) {
  Map<String, String> logicalType = convertOriginalTypeToMap(annotation, asPrimitive.getDecimalMetadata());
  if (logicalType != null && (annotation != DECIMAL ||
      parquetPrimitiveTypeName == BINARY ||
      parquetPrimitiveTypeName == FIXED_LEN_BYTE_ARRAY)) {
    for(Map.Entry<String, String> entry : logicalType.entrySet()) {
      schema.addProp(entry.getKey(), entry.getValue());
    }
  }

  return schema;
}
 
Example 13
private OriginalType convertLogicalTypeStr(String logicalType) {
    if (logicalType == null) {
      return null;
    } else if (AvroTypeUtil.LOGICAL_TYPE_DECIMAL.equals(logicalType)) {
      return OriginalType.DECIMAL;
    } else if (AvroTypeUtil.LOGICAL_TYPE_DATE.equals(logicalType)) {
      return OriginalType.DATE;
    } else if (AvroTypeUtil.LOGICAL_TYPE_TIME_MILLIS.equals(logicalType)) {
      return OriginalType.TIME_MILLIS;
//    } else if (AvroTypeUtil.LOGICAL_TYPE_TIME_MICROS.equals(logicalType)) {
//      return OriginalType.TIME_MICROS;
    } else if (AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType)) {
      return OriginalType.TIMESTAMP_MILLIS;
//    } else if (AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType)) {
//      return OriginalType.TIMESTAMP_MICROS;
    }
    return null;
  }
 
Example 14
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testV2StatsEqualMinMax() {
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
      93,
      93);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
      -5892,
      -5892);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
      234998934,
      234998934);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
      -2389943895984985L,
      -2389943895984985L);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
      new BigInteger("823749"),
      new BigInteger("823749"));
  testV2StatsEqualMinMax(
      Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
          .named(""),
      new BigInteger("-8752832"),
      new BigInteger("-8752832"));
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT96).named(""),
      new BigInteger("81032984"),
      new BigInteger("81032984"));
}
 
Example 15
Source Project: Bats   Source File: ParquetTableMetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Converts specified {@link MetadataBase.RowGroupMetadata} into the map of {@link ColumnStatistics}
 * instances with column names as keys.
 *
 * @param tableMetadata    the source of column types
 * @param rowGroupMetadata metadata to convert
 * @return map with converted row group metadata
 */
@SuppressWarnings("unchecked")
private static Map<SchemaPath, ColumnStatistics> getRowGroupColumnStatistics(
    MetadataBase.ParquetTableMetadataBase tableMetadata, MetadataBase.RowGroupMetadata rowGroupMetadata) {

  Map<SchemaPath, ColumnStatistics> columnsStatistics = new HashMap<>();

  for (MetadataBase.ColumnMetadata column : rowGroupMetadata.getColumns()) {
    SchemaPath colPath = SchemaPath.getCompoundPath(column.getName());

    Long nulls = column.getNulls();
    if (!column.isNumNullsSet() || nulls == null) {
      nulls = GroupScan.NO_COLUMN_STATS;
    }
    PrimitiveType.PrimitiveTypeName primitiveType = getPrimitiveTypeName(tableMetadata, column);
    OriginalType originalType = getOriginalType(tableMetadata, column);
    Comparator comparator = getComparator(primitiveType, originalType);

    Map<StatisticsKind, Object> statistics = new HashMap<>();
    statistics.put(ColumnStatisticsKind.MIN_VALUE, getValue(column.getMinValue(), primitiveType, originalType));
    statistics.put(ColumnStatisticsKind.MAX_VALUE, getValue(column.getMaxValue(), primitiveType, originalType));
    statistics.put(ColumnStatisticsKind.NULLS_COUNT, nulls);
    columnsStatistics.put(colPath, new ColumnStatisticsImpl(statistics, comparator));
  }
  columnsStatistics.putAll(populateNonInterestingColumnsStats(columnsStatistics.keySet(), tableMetadata));
  return columnsStatistics;
}
 
Example 16
Source Project: Bats   Source File: ParquetTableMetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns {@link OriginalType} type for the specified column.
 *
 * @param parquetTableMetadata the source of column type
 * @param column               column whose {@link OriginalType} should be returned
 * @return {@link OriginalType} type for the specified column
 */
public static OriginalType getOriginalType(MetadataBase.ParquetTableMetadataBase parquetTableMetadata, MetadataBase.ColumnMetadata column) {
  OriginalType originalType = column.getOriginalType();
  // for the case of parquet metadata v1 version, type information isn't stored in parquetTableMetadata, but in ColumnMetadata
  if (originalType == null) {
    originalType = parquetTableMetadata.getOriginalType(column.getName());
  }
  return originalType;
}
 
Example 17
Source Project: Bats   Source File: Metadata.java    License: Apache License 2.0 5 votes vote down vote up
ColTypeInfo(OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel) {
  this.originalType = originalType;
  this.precision = precision;
  this.scale = scale;
  this.repetitionLevel = repetitionLevel;
  this.definitionLevel = definitionLevel;
}
 
Example 18
Source Project: Bats   Source File: Metadata_V1.java    License: Apache License 2.0 5 votes vote down vote up
public ColumnMetadata_v1(SchemaPath name, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType,
                         Object max, Object min, Long nulls) {
  this.name = name;
  this.primitiveType = primitiveType;
  this.originalType = originalType;
  this.max = max;
  this.min = min;
  this.nulls = nulls;
}
 
Example 19
Source Project: Bats   Source File: Metadata_V4.java    License: Apache License 2.0 5 votes vote down vote up
public ColumnTypeMetadata_v4(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel, long totalNullCount, boolean isInteresting) {
  this.name = name;
  this.primitiveType = primitiveType;
  this.originalType = originalType;
  this.precision = precision;
  this.scale = scale;
  this.repetitionLevel = repetitionLevel;
  this.definitionLevel = definitionLevel;
  this.key = new Key(name);
  this.totalNullCount = totalNullCount;
  this.isInteresting = isInteresting;
}
 
Example 20
Source Project: Bats   Source File: Metadata_V3.java    License: Apache License 2.0 5 votes vote down vote up
public ColumnTypeMetadata_v3(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel) {
  this.name = name;
  this.primitiveType = primitiveType;
  this.originalType = originalType;
  this.precision = precision;
  this.scale = scale;
  this.repetitionLevel = repetitionLevel;
  this.definitionLevel = definitionLevel;
  this.key = new Key(name);
}
 
Example 21
Source Project: parquet-mr   Source File: List3Levels.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Will validate the structure of the list
 * @param list the Parquet List
 */
public List3Levels(GroupType list) {
  if (list.getOriginalType() != OriginalType.LIST || list.getFields().size() != 1) {
    throw new IllegalArgumentException("invalid list type: " + list);
  }
  this.list = list;
  Type repeatedField = list.getFields().get(0);
  if (repeatedField.isPrimitive() || !repeatedField.isRepetition(REPEATED) || repeatedField.asGroupType().getFields().size() != 1) {
    throw new IllegalArgumentException("invalid list type: " + list);
  }
  this.repeated = repeatedField.asGroupType();
  this.element = repeated.getFields().get(0);
}
 
Example 22
Source Project: tajo   Source File: TajoSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private Type convertColumn(Column column) {
  TajoDataTypes.Type type = column.getDataType().getType();
  switch (type) {
    case BOOLEAN:
      return primitive(column.getSimpleName(),
                       PrimitiveTypeName.BOOLEAN);
    case BIT:
    case INT2:
    case INT4:
      return primitive(column.getSimpleName(),
                       PrimitiveTypeName.INT32);
    case INT8:
      return primitive(column.getSimpleName(),
                       PrimitiveTypeName.INT64);
    case FLOAT4:
      return primitive(column.getSimpleName(),
                       PrimitiveTypeName.FLOAT);
    case FLOAT8:
      return primitive(column.getSimpleName(),
                       PrimitiveTypeName.DOUBLE);
    case CHAR:
    case TEXT:
      return primitive(column.getSimpleName(),
                       PrimitiveTypeName.BINARY,
                       OriginalType.UTF8);
    case DATE:
      return primitive(column.getSimpleName(),
                      PrimitiveTypeName.INT32,
                      OriginalType.DATE);
    case PROTOBUF:
      return primitive(column.getSimpleName(),
                       PrimitiveTypeName.BINARY);
    case BLOB:
      return primitive(column.getSimpleName(),
                       PrimitiveTypeName.BINARY);
    default:
      throw new RuntimeException("Cannot convert Tajo type: " + type);
  }
}
 
Example 23
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
{
    if (!nested.isRepetition(Repetition.REPEATED)) {
        throw new IllegalArgumentException("Nested type should be repeated: " + nested);
    }
    return new GroupType(repetition, alias, originalType, nested);
}
 
Example 24
Source Project: presto   Source File: TestDataWritableWriter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * It writes the field value to the Parquet RecordConsumer. It detects the field type, and calls
 * the correct write function.
 *
 * @param value The writable object that contains the value.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the type schema.
 */
private void writeValue(Object value, ObjectInspector inspector, Type type)
{
    if (type.isPrimitive()) {
        checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE);
        writePrimitive(value, (PrimitiveObjectInspector) inspector);
    }
    else {
        GroupType groupType = type.asGroupType();
        OriginalType originalType = type.getOriginalType();

        if (OriginalType.LIST == originalType) {
            checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
            if (singleLevelArray) {
                writeSingleLevelArray(value, (ListObjectInspector) inspector, groupType);
            }
            else {
                writeArray(value, (ListObjectInspector) inspector, groupType);
            }
        }
        else if (originalType != null && (originalType == OriginalType.MAP || originalType == OriginalType.MAP_KEY_VALUE)) {
            checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
            writeMap(value, (MapObjectInspector) inspector, groupType);
        }
        else {
            checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT);
            writeGroup(value, (StructObjectInspector) inspector, groupType);
        }
    }
}
 
Example 25
Source Project: presto   Source File: MapKeyValuesSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
{
    if (!nested.isRepetition(Repetition.REPEATED)) {
        throw new IllegalArgumentException("Nested type should be repeated: " + nested);
    }
    return new GroupType(repetition, alias, originalType, nested);
}
 
Example 26
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSkippedV2Stats() {
  testSkippedV2Stats(
      Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(12).as(OriginalType.INTERVAL).named(""),
      new BigInteger("12345678"),
      new BigInteger("12345679"));
  testSkippedV2Stats(Types.optional(PrimitiveTypeName.INT96).named(""),
      new BigInteger("-75687987"),
      new BigInteger("45367657"));
}
 
Example 27
Source Project: parquet-mr   Source File: MetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (showOriginalTypes) {
    OriginalType otype;
    try {
      otype = type.getOriginalType();
    } catch (Exception e) {
      otype = null;
    }
    if (otype != null) out.format(" O:%s", otype);
  } else {
    LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation();
    if (ltype != null) out.format(" L:%s", ltype);
  }

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[0]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example 28
Source Project: presto   Source File: TestMetadataReader.java    License: Apache License 2.0 5 votes vote down vote up
@Test(dataProvider = "allCreatedBy")
public void testReadNullStats(Optional<String> fileCreatedBy)
{
    // integer
    assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, INT32, "Test column")))
            .isInstanceOfSatisfying(
                    IntStatistics.class,
                    columnStatistics -> assertTrue(columnStatistics.isEmpty()));

    // bigint
    assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, INT64, "Test column")))
            .isInstanceOfSatisfying(
                    LongStatistics.class,
                    columnStatistics -> assertTrue(columnStatistics.isEmpty()));

    // varchar
    assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8)))
            .isInstanceOfSatisfying(
                    BinaryStatistics.class,
                    columnStatistics -> assertTrue(columnStatistics.isEmpty()));

    // varbinary
    assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, BINARY, "Test column")))
            .isInstanceOfSatisfying(
                    BinaryStatistics.class,
                    columnStatistics -> assertTrue(columnStatistics.isEmpty()));
}
 
Example 29
Source Project: iceberg   Source File: ParquetTypeVisitor.java    License: Apache License 2.0 5 votes vote down vote up
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) {
  if (type instanceof MessageType) {
    return visitor.message((MessageType) type,
        visitFields(type.asGroupType(), visitor));

  } else if (type.isPrimitive()) {
    return visitor.primitive(type.asPrimitiveType());

  } else {
    // if not a primitive, the typeId must be a group
    GroupType group = type.asGroupType();
    OriginalType annotation = group.getOriginalType();
    if (annotation != null) {
      switch (annotation) {
        case LIST:
          return visitList(group, visitor);

        case MAP:
          return visitMap(group, visitor);

        default:
      }
    }

    return visitor.struct(group, visitFields(group, visitor));
  }
}
 
Example 30
Source Project: pxf   Source File: ParquetRecordFilterBuilder.java    License: Apache License 2.0 5 votes vote down vote up
private static Integer getIntegerForINT32(OriginalType originalType, OperandNode valueOperand) {
    if (valueOperand == null) return null;
    if (originalType == OriginalType.DATE) {
        // Number of days since epoch
        LocalDate localDateValue = LocalDate.parse(valueOperand.toString());
        LocalDate epoch = LocalDate.ofEpochDay(0);
        return (int) ChronoUnit.DAYS.between(epoch, localDateValue);
    }
    return Integer.parseInt(valueOperand.toString());
}