Java Code Examples for org.apache.parquet.schema.OriginalType
The following examples show how to use
org.apache.parquet.schema.OriginalType. These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Bats Source File: ParquetTableMetadataUtils.java License: Apache License 2.0 | 6 votes |
/** * Populates the non-interesting column's statistics * @param schemaPaths columns paths which should be ignored * @param parquetTableMetadata the source of column metadata for non-interesting column's statistics * @return returns non-interesting column statistics map */ @SuppressWarnings("unchecked") public static Map<SchemaPath, ColumnStatistics> populateNonInterestingColumnsStats( Set<SchemaPath> schemaPaths, MetadataBase.ParquetTableMetadataBase parquetTableMetadata) { Map<SchemaPath, ColumnStatistics> columnsStatistics = new HashMap<>(); if (parquetTableMetadata instanceof Metadata_V4.ParquetTableMetadata_v4) { for (Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata : ((Metadata_V4.ParquetTableMetadata_v4) parquetTableMetadata).getColumnTypeInfoMap().values()) { SchemaPath schemaPath = SchemaPath.getCompoundPath(columnTypeMetadata.name); if (!schemaPaths.contains(schemaPath)) { Map<StatisticsKind, Object> statistics = new HashMap<>(); statistics.put(ColumnStatisticsKind.NULLS_COUNT, GroupScan.NO_COLUMN_STATS); PrimitiveType.PrimitiveTypeName primitiveType = columnTypeMetadata.primitiveType; OriginalType originalType = columnTypeMetadata.originalType; Comparator comparator = getComparator(primitiveType, originalType); columnsStatistics.put(schemaPath, new ColumnStatisticsImpl<>(statistics, comparator)); } } } return columnsStatistics; }
Example 2
Source Project: Bats Source File: ParquetRecordWriter.java License: Apache License 2.0 | 6 votes |
protected PrimitiveType getPrimitiveType(MaterializedField field) { MinorType minorType = field.getType().getMinorType(); String name = field.getName(); int length = ParquetTypeHelper.getLengthForMinorType(minorType); PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType); if (Types.isDecimalType(minorType)) { primitiveTypeName = logicalTypeForDecimals; if (usePrimitiveTypesForDecimals) { if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT32)) { primitiveTypeName = PrimitiveTypeName.INT32; } else if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT64)) { primitiveTypeName = PrimitiveTypeName.INT64; } } length = DecimalUtility.getMaxBytesSizeForPrecision(field.getPrecision()); } Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode()); OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType); DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field); return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null); }
Example 3
Source Project: presto Source File: TestMetadataReader.java License: Apache License 2.0 | 6 votes |
@Test(dataProvider = "allCreatedBy") public void testReadStatsBinaryUtf8(Optional<String> fileCreatedBy) { PrimitiveType varchar = new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8); Statistics statistics; // Stats written by Parquet after https://issues.apache.org/jira/browse/PARQUET-1025 statistics = new Statistics(); statistics.setNull_count(13); statistics.setMin_value("a".getBytes(UTF_8)); statistics.setMax_value("é".getBytes(UTF_8)); assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), varchar)) .isInstanceOfSatisfying(BinaryStatistics.class, columnStatistics -> { assertEquals(columnStatistics.getNumNulls(), 13); assertEquals(columnStatistics.getMin().getBytes(), new byte[] {'a'}); assertEquals(columnStatistics.getMax().getBytes(), new byte[] {(byte) 0xC3, (byte) 0xA9}); assertEquals(columnStatistics.getMinBytes(), new byte[] {'a'}); assertEquals(columnStatistics.getMaxBytes(), new byte[] {(byte) 0xC3, (byte) 0xA9}); assertEquals(columnStatistics.genericGetMin().getBytes(), new byte[] {'a'}); assertEquals(columnStatistics.genericGetMax().getBytes(), new byte[] {(byte) 0xC3, (byte) 0xA9}); }); }
Example 4
Source Project: parquet-mr Source File: TestParquetMetadataConverter.java License: Apache License 2.0 | 6 votes |
private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) { ParquetMetadataConverter converter = new ParquetMetadataConverter(); BinaryStatistics stats = new BinaryStatistics(); stats.incrementNumNulls(); stats.updateStats(Binary.fromString("A")); stats.incrementNumNulls(); stats.updateStats(Binary.fromString("A")); stats.incrementNumNulls(); PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, ParquetMetadataConverter.toParquetStatistics(stats), binaryType); Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty()); Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes()); }
Example 5
Source Project: dremio-oss Source File: ParquetGroupConverter.java License: Apache License 2.0 | 6 votes |
Converter groupConverterFromArrowSchema(String fieldName, String groupTypeName, GroupType groupType, Collection<SchemaPath> c) { final String nameForChild = getNameForChild(fieldName); final Field arrowField = Schema.findField(arrowSchema, groupTypeName); final ArrowTypeID arrowTypeType = arrowField.getType().getTypeID(); final List<Field> arrowChildren = arrowField.getChildren(); if (arrowTypeType == ArrowTypeID.Union) { // if it's a union we will add the children directly to the parent return new UnionGroupConverter(columnResolver, fieldName, mutator, getWriterProvider(), groupType, c, options, arrowChildren, nameForChild, schemaHelper); } else if (arrowTypeType == ArrowTypeID.List) { // make sure the parquet schema matches the arrow schema and delegate handling the logical list to defaultGroupConverter() Preconditions.checkState(groupType.getOriginalType() == OriginalType.LIST, "parquet schema doesn't match the arrow schema for LIST " + nameForChild); } return defaultGroupConverter(fieldName, mutator, groupType, c, arrowChildren); }
Example 6
Source Project: parquet-mr Source File: TestPigSchemaConverter.java License: Apache License 2.0 | 6 votes |
@Test public void testListsOfPrimitive() throws Exception { for (Type.Repetition repetition : Type.Repetition.values()) { for (Type.Repetition valueRepetition : Type.Repetition.values()) { for (PrimitiveType.PrimitiveTypeName primitiveTypeName : PrimitiveType.PrimitiveTypeName.values()) { if (primitiveTypeName != PrimitiveType.PrimitiveTypeName.INT96) { // INT96 is NYI Types.PrimitiveBuilder<PrimitiveType> value = Types.primitive(primitiveTypeName, valueRepetition); if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) value.length(1); GroupType type = Types.buildGroup(repetition).addField(value.named("b")).as(OriginalType.LIST).named("a"); pigSchemaConverter.convertField(type); // no exceptions, please } } } } }
Example 7
Source Project: parquet-mr Source File: TestParquetParser.java License: Apache License 2.0 | 6 votes |
@Test public void testIDs() { String message = "message Message {\n" + " required binary string (UTF8) = 6;\n" + " required int32 i=1;\n" + " required binary s2= 3;\n" + " required binary s3 =4;\n" + "}\n"; MessageType parsed = parseMessageType(message); MessageType expected = buildMessage() .required(BINARY).as(OriginalType.UTF8).id(6).named("string") .required(INT32).id(1).named("i") .required(BINARY).id(3).named("s2") .required(BINARY).id(4).named("s3") .named("Message"); assertEquals(expected, parsed); MessageType reparsed = parseMessageType(parsed.toString()); assertEquals(expected, reparsed); }
Example 8
Source Project: hadoop-etl-udfs Source File: ExaParquetWriterImpl.java License: MIT License | 6 votes |
static private List<Type> typeInfoToParquetTypes(final List<ExaParquetTypeInfo> exaParquetTypeInfos) { List<Type> types = new ArrayList<>(); for (ExaParquetTypeInfo exaType: exaParquetTypeInfos) { if (exaType.length != 0) { types.add(new PrimitiveType( Type.Repetition.valueOf(exaType.typeRepitition), PrimitiveType.PrimitiveTypeName.valueOf(exaType.primitiveTypeName), exaType.length, exaType.name)); } else { types.add(new PrimitiveType( Type.Repetition.valueOf(exaType.typeRepitition), PrimitiveType.PrimitiveTypeName.valueOf(exaType.primitiveTypeName), exaType.name, exaType.originalType == null ? null : OriginalType.valueOf(exaType.originalType))); } } return types; }
Example 9
Source Project: datacollector Source File: AvroSchemaConverter190Int96Avro18.java License: Apache License 2.0 | 6 votes |
private OriginalType convertLogicalType(LogicalType logicalType) { if (logicalType == null) { return null; } else if (logicalType instanceof LogicalTypes.Decimal) { return OriginalType.DECIMAL; } else if (logicalType instanceof LogicalTypes.Date) { return OriginalType.DATE; } else if (logicalType instanceof LogicalTypes.TimeMillis) { return OriginalType.TIME_MILLIS; } else if (logicalType instanceof LogicalTypes.TimeMicros) { return OriginalType.TIME_MICROS; } else if (logicalType instanceof LogicalTypes.TimestampMillis) { return OriginalType.TIMESTAMP_MILLIS; } else if (logicalType instanceof LogicalTypes.TimestampMicros) { return OriginalType.TIMESTAMP_MICROS; } return null; }
Example 10
Source Project: parquet-mr Source File: TestParquetMetadataConverter.java License: Apache License 2.0 | 6 votes |
@Test public void testIgnoreStatsWithSignedSortOrder() { ParquetMetadataConverter converter = new ParquetMetadataConverter(); BinaryStatistics stats = new BinaryStatistics(); stats.incrementNumNulls(); stats.updateStats(Binary.fromString("A")); stats.incrementNumNulls(); stats.updateStats(Binary.fromString("z")); stats.incrementNumNulls(); PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY) .as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, StatsHelper.V1.toParquetStatistics(stats), binaryType); Assert.assertFalse("Stats should not include min/max: " + convertedStats, convertedStats.hasNonNullValue()); Assert.assertTrue("Stats should have null count: " + convertedStats, convertedStats.isNumNullsSet()); Assert.assertEquals("Stats should have 3 nulls: " + convertedStats, 3L, convertedStats.getNumNulls()); }
Example 11
Source Project: parquet-mr Source File: TestParquetMetadataConverter.java License: Apache License 2.0 | 6 votes |
@Test public void testV2OnlyStats() { testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""), 0x7F, 0x80); testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""), 0x7FFF, 0x8000); testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""), 0x7FFFFFFF, 0x80000000); testV2OnlyStats(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""), 0x7FFFFFFFFFFFFFFFL, 0x8000000000000000L); testV2OnlyStats(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""), new BigInteger("-765875"), new BigInteger("876856")); testV2OnlyStats( Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7) .named(""), new BigInteger("-6769643"), new BigInteger("9864675")); }
Example 12
Source Project: datacollector Source File: AvroSchemaConverter190Int96Avro17.java License: Apache License 2.0 | 6 votes |
private Schema addLogicalTypeStrToSchema( Schema schema, OriginalType annotation, PrimitiveType asPrimitive, PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName ) { Map<String, String> logicalType = convertOriginalTypeToMap(annotation, asPrimitive.getDecimalMetadata()); if (logicalType != null && (annotation != DECIMAL || parquetPrimitiveTypeName == BINARY || parquetPrimitiveTypeName == FIXED_LEN_BYTE_ARRAY)) { for(Map.Entry<String, String> entry : logicalType.entrySet()) { schema.addProp(entry.getKey(), entry.getValue()); } } return schema; }
Example 13
Source Project: datacollector Source File: AvroSchemaConverter190Int96Avro17.java License: Apache License 2.0 | 6 votes |
private OriginalType convertLogicalTypeStr(String logicalType) { if (logicalType == null) { return null; } else if (AvroTypeUtil.LOGICAL_TYPE_DECIMAL.equals(logicalType)) { return OriginalType.DECIMAL; } else if (AvroTypeUtil.LOGICAL_TYPE_DATE.equals(logicalType)) { return OriginalType.DATE; } else if (AvroTypeUtil.LOGICAL_TYPE_TIME_MILLIS.equals(logicalType)) { return OriginalType.TIME_MILLIS; // } else if (AvroTypeUtil.LOGICAL_TYPE_TIME_MICROS.equals(logicalType)) { // return OriginalType.TIME_MICROS; } else if (AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType)) { return OriginalType.TIMESTAMP_MILLIS; // } else if (AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType)) { // return OriginalType.TIMESTAMP_MICROS; } return null; }
Example 14
Source Project: parquet-mr Source File: TestParquetMetadataConverter.java License: Apache License 2.0 | 6 votes |
@Test public void testV2StatsEqualMinMax() { testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""), 93, 93); testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""), -5892, -5892); testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""), 234998934, 234998934); testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""), -2389943895984985L, -2389943895984985L); testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""), new BigInteger("823749"), new BigInteger("823749")); testV2StatsEqualMinMax( Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7) .named(""), new BigInteger("-8752832"), new BigInteger("-8752832")); testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT96).named(""), new BigInteger("81032984"), new BigInteger("81032984")); }
Example 15
Source Project: Bats Source File: ParquetTableMetadataUtils.java License: Apache License 2.0 | 5 votes |
/** * Converts specified {@link MetadataBase.RowGroupMetadata} into the map of {@link ColumnStatistics} * instances with column names as keys. * * @param tableMetadata the source of column types * @param rowGroupMetadata metadata to convert * @return map with converted row group metadata */ @SuppressWarnings("unchecked") private static Map<SchemaPath, ColumnStatistics> getRowGroupColumnStatistics( MetadataBase.ParquetTableMetadataBase tableMetadata, MetadataBase.RowGroupMetadata rowGroupMetadata) { Map<SchemaPath, ColumnStatistics> columnsStatistics = new HashMap<>(); for (MetadataBase.ColumnMetadata column : rowGroupMetadata.getColumns()) { SchemaPath colPath = SchemaPath.getCompoundPath(column.getName()); Long nulls = column.getNulls(); if (!column.isNumNullsSet() || nulls == null) { nulls = GroupScan.NO_COLUMN_STATS; } PrimitiveType.PrimitiveTypeName primitiveType = getPrimitiveTypeName(tableMetadata, column); OriginalType originalType = getOriginalType(tableMetadata, column); Comparator comparator = getComparator(primitiveType, originalType); Map<StatisticsKind, Object> statistics = new HashMap<>(); statistics.put(ColumnStatisticsKind.MIN_VALUE, getValue(column.getMinValue(), primitiveType, originalType)); statistics.put(ColumnStatisticsKind.MAX_VALUE, getValue(column.getMaxValue(), primitiveType, originalType)); statistics.put(ColumnStatisticsKind.NULLS_COUNT, nulls); columnsStatistics.put(colPath, new ColumnStatisticsImpl(statistics, comparator)); } columnsStatistics.putAll(populateNonInterestingColumnsStats(columnsStatistics.keySet(), tableMetadata)); return columnsStatistics; }
Example 16
Source Project: Bats Source File: ParquetTableMetadataUtils.java License: Apache License 2.0 | 5 votes |
/** * Returns {@link OriginalType} type for the specified column. * * @param parquetTableMetadata the source of column type * @param column column whose {@link OriginalType} should be returned * @return {@link OriginalType} type for the specified column */ public static OriginalType getOriginalType(MetadataBase.ParquetTableMetadataBase parquetTableMetadata, MetadataBase.ColumnMetadata column) { OriginalType originalType = column.getOriginalType(); // for the case of parquet metadata v1 version, type information isn't stored in parquetTableMetadata, but in ColumnMetadata if (originalType == null) { originalType = parquetTableMetadata.getOriginalType(column.getName()); } return originalType; }
Example 17
Source Project: Bats Source File: Metadata.java License: Apache License 2.0 | 5 votes |
ColTypeInfo(OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel) { this.originalType = originalType; this.precision = precision; this.scale = scale; this.repetitionLevel = repetitionLevel; this.definitionLevel = definitionLevel; }
Example 18
Source Project: Bats Source File: Metadata_V1.java License: Apache License 2.0 | 5 votes |
public ColumnMetadata_v1(SchemaPath name, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, Object max, Object min, Long nulls) { this.name = name; this.primitiveType = primitiveType; this.originalType = originalType; this.max = max; this.min = min; this.nulls = nulls; }
Example 19
Source Project: Bats Source File: Metadata_V4.java License: Apache License 2.0 | 5 votes |
public ColumnTypeMetadata_v4(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel, long totalNullCount, boolean isInteresting) { this.name = name; this.primitiveType = primitiveType; this.originalType = originalType; this.precision = precision; this.scale = scale; this.repetitionLevel = repetitionLevel; this.definitionLevel = definitionLevel; this.key = new Key(name); this.totalNullCount = totalNullCount; this.isInteresting = isInteresting; }
Example 20
Source Project: Bats Source File: Metadata_V3.java License: Apache License 2.0 | 5 votes |
public ColumnTypeMetadata_v3(String[] name, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel) { this.name = name; this.primitiveType = primitiveType; this.originalType = originalType; this.precision = precision; this.scale = scale; this.repetitionLevel = repetitionLevel; this.definitionLevel = definitionLevel; this.key = new Key(name); }
Example 21
Source Project: parquet-mr Source File: List3Levels.java License: Apache License 2.0 | 5 votes |
/** * Will validate the structure of the list * @param list the Parquet List */ public List3Levels(GroupType list) { if (list.getOriginalType() != OriginalType.LIST || list.getFields().size() != 1) { throw new IllegalArgumentException("invalid list type: " + list); } this.list = list; Type repeatedField = list.getFields().get(0); if (repeatedField.isPrimitive() || !repeatedField.isRepetition(REPEATED) || repeatedField.asGroupType().getFields().size() != 1) { throw new IllegalArgumentException("invalid list type: " + list); } this.repeated = repeatedField.asGroupType(); this.element = repeated.getFields().get(0); }
Example 22
Source Project: tajo Source File: TajoSchemaConverter.java License: Apache License 2.0 | 5 votes |
private Type convertColumn(Column column) { TajoDataTypes.Type type = column.getDataType().getType(); switch (type) { case BOOLEAN: return primitive(column.getSimpleName(), PrimitiveTypeName.BOOLEAN); case BIT: case INT2: case INT4: return primitive(column.getSimpleName(), PrimitiveTypeName.INT32); case INT8: return primitive(column.getSimpleName(), PrimitiveTypeName.INT64); case FLOAT4: return primitive(column.getSimpleName(), PrimitiveTypeName.FLOAT); case FLOAT8: return primitive(column.getSimpleName(), PrimitiveTypeName.DOUBLE); case CHAR: case TEXT: return primitive(column.getSimpleName(), PrimitiveTypeName.BINARY, OriginalType.UTF8); case DATE: return primitive(column.getSimpleName(), PrimitiveTypeName.INT32, OriginalType.DATE); case PROTOBUF: return primitive(column.getSimpleName(), PrimitiveTypeName.BINARY); case BLOB: return primitive(column.getSimpleName(), PrimitiveTypeName.BINARY); default: throw new RuntimeException("Cannot convert Tajo type: " + type); } }
Example 23
Source Project: presto Source File: SingleLevelArrayMapKeyValuesSchemaConverter.java License: Apache License 2.0 | 5 votes |
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
Example 24
Source Project: presto Source File: TestDataWritableWriter.java License: Apache License 2.0 | 5 votes |
/** * It writes the field value to the Parquet RecordConsumer. It detects the field type, and calls * the correct write function. * * @param value The writable object that contains the value. * @param inspector The object inspector used to get the correct value type. * @param type Type that contains information about the type schema. */ private void writeValue(Object value, ObjectInspector inspector, Type type) { if (type.isPrimitive()) { checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE); writePrimitive(value, (PrimitiveObjectInspector) inspector); } else { GroupType groupType = type.asGroupType(); OriginalType originalType = type.getOriginalType(); if (OriginalType.LIST == originalType) { checkInspectorCategory(inspector, ObjectInspector.Category.LIST); if (singleLevelArray) { writeSingleLevelArray(value, (ListObjectInspector) inspector, groupType); } else { writeArray(value, (ListObjectInspector) inspector, groupType); } } else if (originalType != null && (originalType == OriginalType.MAP || originalType == OriginalType.MAP_KEY_VALUE)) { checkInspectorCategory(inspector, ObjectInspector.Category.MAP); writeMap(value, (MapObjectInspector) inspector, groupType); } else { checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT); writeGroup(value, (StructObjectInspector) inspector, groupType); } } }
Example 25
Source Project: presto Source File: MapKeyValuesSchemaConverter.java License: Apache License 2.0 | 5 votes |
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
Example 26
Source Project: parquet-mr Source File: TestParquetMetadataConverter.java License: Apache License 2.0 | 5 votes |
@Test public void testSkippedV2Stats() { testSkippedV2Stats( Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(12).as(OriginalType.INTERVAL).named(""), new BigInteger("12345678"), new BigInteger("12345679")); testSkippedV2Stats(Types.optional(PrimitiveTypeName.INT96).named(""), new BigInteger("-75687987"), new BigInteger("45367657")); }
Example 27
Source Project: parquet-mr Source File: MetadataUtils.java License: Apache License 2.0 | 5 votes |
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) { String name = Strings.repeat(".", depth) + type.getName(); Repetition rep = type.getRepetition(); PrimitiveTypeName ptype = type.getPrimitiveTypeName(); out.format("%s: %s %s", name, rep, ptype); if (showOriginalTypes) { OriginalType otype; try { otype = type.getOriginalType(); } catch (Exception e) { otype = null; } if (otype != null) out.format(" O:%s", otype); } else { LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation(); if (ltype != null) out.format(" L:%s", ltype); } if (container != null) { cpath.add(type.getName()); String[] paths = cpath.toArray(new String[0]); cpath.remove(cpath.size() - 1); ColumnDescriptor desc = container.getColumnDescription(paths); int defl = desc.getMaxDefinitionLevel(); int repl = desc.getMaxRepetitionLevel(); out.format(" R:%d D:%d", repl, defl); } out.println(); }
Example 28
Source Project: presto Source File: TestMetadataReader.java License: Apache License 2.0 | 5 votes |
@Test(dataProvider = "allCreatedBy") public void testReadNullStats(Optional<String> fileCreatedBy) { // integer assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, INT32, "Test column"))) .isInstanceOfSatisfying( IntStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty())); // bigint assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, INT64, "Test column"))) .isInstanceOfSatisfying( LongStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty())); // varchar assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8))) .isInstanceOfSatisfying( BinaryStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty())); // varbinary assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, BINARY, "Test column"))) .isInstanceOfSatisfying( BinaryStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty())); }
Example 29
Source Project: iceberg Source File: ParquetTypeVisitor.java License: Apache License 2.0 | 5 votes |
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) { if (type instanceof MessageType) { return visitor.message((MessageType) type, visitFields(type.asGroupType(), visitor)); } else if (type.isPrimitive()) { return visitor.primitive(type.asPrimitiveType()); } else { // if not a primitive, the typeId must be a group GroupType group = type.asGroupType(); OriginalType annotation = group.getOriginalType(); if (annotation != null) { switch (annotation) { case LIST: return visitList(group, visitor); case MAP: return visitMap(group, visitor); default: } } return visitor.struct(group, visitFields(group, visitor)); } }
Example 30
Source Project: pxf Source File: ParquetRecordFilterBuilder.java License: Apache License 2.0 | 5 votes |
private static Integer getIntegerForINT32(OriginalType originalType, OperandNode valueOperand) { if (valueOperand == null) return null; if (originalType == OriginalType.DATE) { // Number of days since epoch LocalDate localDateValue = LocalDate.parse(valueOperand.toString()); LocalDate epoch = LocalDate.ofEpochDay(0); return (int) ChronoUnit.DAYS.between(epoch, localDateValue); } return Integer.parseInt(valueOperand.toString()); }