org.apache.parquet.schema.Types Java Examples

The following examples show how to use org.apache.parquet.schema.Types. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testIncompatibleLogicalAndConvertedTypes() {
  ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
  MessageType schema = Types.buildMessage()
    .required(PrimitiveTypeName.BINARY)
    .as(OriginalType.DECIMAL).precision(9).scale(2)
    .named("aBinary")
    .named("Message");
  MessageType expected = Types.buildMessage()
    .required(PrimitiveTypeName.BINARY)
    .as(LogicalTypeAnnotation.jsonType())
    .named("aBinary")
    .named("Message");

  List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(schema);
  // Set converted type field to a different type to verify that in case of mismatch, it overrides logical type
  parquetSchema.get(1).setConverted_type(ConvertedType.JSON);
  MessageType actual = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
  assertEquals(expected, actual);
}
 
Example #2
Source File: TestStatistics.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testFloatingPointStringIndependentFromLocale() {
  Statistics<?> floatStats = Statistics.createStats(Types.optional(PrimitiveTypeName.FLOAT).named("test-float"));
  floatStats.updateStats(123.456f);
  Statistics<?> doubleStats = Statistics.createStats(Types.optional(PrimitiveTypeName.DOUBLE).named("test-double"));
  doubleStats.updateStats(12345.6789);

  Locale defaultLocale = Locale.getDefault();
  try {
    // Set the locale to French where the decimal separator would be ',' instead of '.'
    Locale.setDefault(Locale.FRENCH);
    assertEquals("min: 123.456, max: 123.456, num_nulls: 0", floatStats.toString());
    assertEquals("min: 12345.6789, max: 12345.6789, num_nulls: 0", doubleStats.toString());
  } finally {
    Locale.setDefault(defaultLocale);
  }
}
 
Example #3
Source File: TestParquetWriterAppendBlocks.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testFailDroppingColumns() throws IOException {
  MessageType droppedColumnSchema = Types.buildMessage()
      .required(BINARY).as(UTF8).named("string")
      .named("AppendTest");

  final ParquetMetadata footer = ParquetFileReader.readFooter(
      CONF, file1, NO_FILTER);
  final FSDataInputStream incoming = file1.getFileSystem(CONF).open(file1);

  Path droppedColumnFile = newTemp();
  final ParquetFileWriter writer = new ParquetFileWriter(
      CONF, droppedColumnSchema, droppedColumnFile);
  writer.start();

  TestUtils.assertThrows("Should complain that id column is dropped",
      IllegalArgumentException.class,
    (Callable<Void>) () -> {
      writer.appendRowGroups(incoming, footer.getBlocks(), false);
      return null;
    });
}
 
Example #4
Source File: TestParquetWriterAppendBlocks.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testFailMissingColumn() throws IOException {
  MessageType fileSchema = Types.buildMessage()
      .required(INT32).named("id")
      .required(BINARY).as(UTF8).named("string")
      .required(FLOAT).named("value")
      .named("AppendTest");

  Path missingColumnFile = newTemp();
  final ParquetFileWriter writer = new ParquetFileWriter(
      CONF, fileSchema, missingColumnFile);
  writer.start();

  TestUtils.assertThrows("Should complain that value column is missing",
      IllegalArgumentException.class,
    (Callable<Void>) () -> {
      writer.appendFile(CONF, file1);
      return null;
    });
}
 
Example #5
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Override
protected Type buildSchema() {
  JsonElementConverter elementConverter = this.elementConverter;
  JsonElementConverter keyConverter = getKeyConverter();
  GroupType mapGroup =
      Types.repeatedGroup().addFields(keyConverter.schema(), elementConverter.schema()).named(MAP_KEY)
          .asGroupType();
  String columnName = this.jsonSchema.getColumnName();
  switch (optionalOrRequired(this.jsonSchema)) {
    case OPTIONAL:
      return Types.optionalGroup().addFields(mapGroup).named(columnName).asGroupType();
    case REQUIRED:
      return Types.requiredGroup().addFields(mapGroup).named(columnName).asGroupType();
    default:
      return null;
  }
}
 
Example #6
Source File: TestParquetWriter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testBadWriteSchema() throws IOException {
  final File file = temp.newFile("test.parquet");
  file.delete();

  TestUtils.assertThrows("Should reject a schema with an empty group",
      InvalidSchemaException.class, (Callable<Void>) () -> {
        ExampleParquetWriter.builder(new Path(file.toString()))
            .withType(Types.buildMessage()
                .addField(new GroupType(REQUIRED, "invalid_group"))
                .named("invalid_message"))
            .build();
        return null;
      });

  Assert.assertFalse("Should not create a file when schema is rejected",
      file.exists());
}
 
Example #7
Source File: TestStatistics.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuilder() {
  testBuilder(Types.required(BOOLEAN).named("test_boolean"), false, new byte[] { 0 }, true, new byte[] { 1 });
  testBuilder(Types.required(INT32).named("test_int32"), -42, intToBytes(-42), 42, intToBytes(42));
  testBuilder(Types.required(INT64).named("test_int64"), -42l, longToBytes(-42), 42l, longToBytes(42));
  testBuilder(Types.required(FLOAT).named("test_float"), -42.0f, intToBytes(floatToIntBits(-42.0f)), 42.0f,
      intToBytes(floatToIntBits(42.0f)));
  testBuilder(Types.required(DOUBLE).named("test_double"), -42.0, longToBytes(doubleToLongBits(-42.0)), 42.0,
      longToBytes(Double.doubleToLongBits(42.0f)));

  byte[] min = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
  byte[] max = { 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 };
  testBuilder(Types.required(INT96).named("test_int96"), Binary.fromConstantByteArray(min), min,
      Binary.fromConstantByteArray(max), max);
  testBuilder(Types.required(FIXED_LEN_BYTE_ARRAY).length(12).named("test_fixed"), Binary.fromConstantByteArray(min),
      min,
      Binary.fromConstantByteArray(max), max);
  testBuilder(Types.required(BINARY).named("test_binary"), Binary.fromConstantByteArray(min), min,
      Binary.fromConstantByteArray(max), max);
}
 
Example #8
Source File: TestParquetParser.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmbeddedAnnotations() {
  String message = "message EmbeddedMessage {" +
      "  required binary json (JSON);" +
      "  required binary bson (BSON);" +
      "}\n";

  MessageType parsed = MessageTypeParser.parseMessageType(message);
  MessageType expected = Types.buildMessage()
      .required(BINARY).as(JSON).named("json")
      .required(BINARY).as(BSON).named("bson")
      .named("EmbeddedMessage");

  assertEquals(expected, parsed);
  MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
  assertEquals(expected, reparsed);
}
 
Example #9
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testIgnoreStatsWithSignedSortOrder() {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("z"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
      .as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      StatsHelper.V1.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not include min/max: " + convertedStats, convertedStats.hasNonNullValue());
  Assert.assertTrue("Stats should have null count: " + convertedStats, convertedStats.isNumNullsSet());
  Assert.assertEquals("Stats should have 3 nulls: " + convertedStats, 3L, convertedStats.getNumNulls());
}
 
Example #10
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      ParquetMetadataConverter.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty());
  Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes());
}
 
Example #11
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testV2OnlyStats() {
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
      0x7F,
      0x80);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
      0x7FFF,
      0x8000);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
      0x7FFFFFFF,
      0x80000000);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
      0x7FFFFFFFFFFFFFFFL,
      0x8000000000000000L);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
      new BigInteger("-765875"),
      new BigInteger("876856"));
  testV2OnlyStats(
      Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
          .named(""),
      new BigInteger("-6769643"),
      new BigInteger("9864675"));
}
 
Example #12
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testV2StatsEqualMinMax() {
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
      93,
      93);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
      -5892,
      -5892);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
      234998934,
      234998934);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
      -2389943895984985L,
      -2389943895984985L);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
      new BigInteger("823749"),
      new BigInteger("823749"));
  testV2StatsEqualMinMax(
      Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
          .named(""),
      new BigInteger("-8752832"),
      new BigInteger("-8752832"));
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT96).named(""),
      new BigInteger("81032984"),
      new BigInteger("81032984"));
}
 
Example #13
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildFloatZeroNaN() {
  PrimitiveType type = Types.required(FLOAT).named("test_float");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  StatsBuilder sb = new StatsBuilder();
  builder.add(sb.stats(type, -1.0f, -0.0f));
  builder.add(sb.stats(type, 0.0f, 1.0f));
  builder.add(sb.stats(type, 1.0f, 100.0f));
  ColumnIndex columnIndex = builder.build();
  assertCorrectValues(columnIndex.getMinValues(), -1.0f, -0.0f, 1.0f);
  assertCorrectValues(columnIndex.getMaxValues(), 0.0f, 1.0f, 100.0f);

  builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  builder.add(sb.stats(type, -1.0f, -0.0f));
  builder.add(sb.stats(type, 0.0f, Float.NaN));
  builder.add(sb.stats(type, 1.0f, 100.0f));
  assertNull(builder.build());
}
 
Example #14
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildDoubleZeroNaN() {
  PrimitiveType type = Types.required(DOUBLE).named("test_double");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  StatsBuilder sb = new StatsBuilder();
  builder.add(sb.stats(type, -1.0, -0.0));
  builder.add(sb.stats(type, 0.0, 1.0));
  builder.add(sb.stats(type, 1.0, 100.0));
  ColumnIndex columnIndex = builder.build();
  assertCorrectValues(columnIndex.getMinValues(), -1.0, -0.0, 1.0);
  assertCorrectValues(columnIndex.getMaxValues(), 0.0, 1.0, 100.0);

  builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  builder.add(sb.stats(type, -1.0, -0.0));
  builder.add(sb.stats(type, 0.0, Double.NaN));
  builder.add(sb.stats(type, 1.0, 100.0));
  assertNull(builder.build());
}
 
Example #15
Source File: TestSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testArrowTimeMicrosecondToParquet() {
  MessageType expected = converter.fromArrow(new Schema(asList(
    field("a", new ArrowType.Time(TimeUnit.MICROSECOND, 64))
  ))).getParquetSchema();
  Assert.assertEquals(expected,
    Types.buildMessage().addField(Types.optional(INT64).as(timeType(false, MICROS)).named("a")).named("root"));
}
 
Example #16
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
MessageType fromParquetSchema(List<SchemaElement> schema, List<ColumnOrder> columnOrders) {
  Iterator<SchemaElement> iterator = schema.iterator();
  SchemaElement root = iterator.next();
  Types.MessageTypeBuilder builder = Types.buildMessage();
  if (root.isSetField_id()) {
    builder.id(root.field_id);
  }
  buildChildren(builder, iterator, root.getNum_children(), columnOrders, 0);
  return builder.named(root.name);
}
 
Example #17
Source File: SchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Parquet Schema from an Arrow one and returns the mapping
 * @param arrowSchema the provided Arrow Schema
 * @return the mapping between the 2
 */
public SchemaMapping fromArrow(Schema arrowSchema) {
  List<Field> fields = arrowSchema.getFields();
  List<TypeMapping> parquetFields = fromArrow(fields);
  MessageType parquetType = addToBuilder(parquetFields, Types.buildMessage()).named("root");
  return new SchemaMapping(arrowSchema, parquetType, parquetFields);
}
 
Example #18
Source File: TestSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testArrowTimeMillisecondToParquet() {
  MessageType expected = converter.fromArrow(new Schema(asList(
    field("a", new ArrowType.Time(TimeUnit.MILLISECOND, 32))
  ))).getParquetSchema();
  Assert.assertEquals(expected,
    Types.buildMessage().addField(Types.optional(INT32).as(timeType(false, MILLIS)).named("a")).named("root"));
}
 
Example #19
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void testUseStatsWithSignedSortOrder(StatsHelper helper) {
  // override defaults and use stats that were accumulated using signed order
  Configuration conf = new Configuration();
  conf.setBoolean("parquet.strings.signed-min-max.enabled", true);

  ParquetMetadataConverter converter = new ParquetMetadataConverter(conf);
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("z"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
      .as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      helper.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not be empty", convertedStats.isEmpty());
  Assert.assertTrue(convertedStats.isNumNullsSet());
  Assert.assertEquals("Should have 3 nulls", 3, convertedStats.getNumNulls());
  if (helper == StatsHelper.V1) {
    assertFalse("Min-max should be null for V1 stats", convertedStats.hasNonNullValue());
  } else {
    Assert.assertEquals("Should have correct min (unsigned sort)",
        Binary.fromString("A"), convertedStats.genericGetMin());
    Assert.assertEquals("Should have correct max (unsigned sort)",
        Binary.fromString("z"), convertedStats.genericGetMax());
  }
}
 
Example #20
Source File: ProtoSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public MessageType convert(Class<? extends Message> protobufClass) {
  LOG.debug("Converting protocol buffer class \"" + protobufClass + "\" to parquet schema.");
  Descriptors.Descriptor descriptor = Protobufs.getMessageDescriptor(protobufClass);
  MessageType messageType =
      convertFields(Types.buildMessage(), descriptor.getFields())
      .named(descriptor.getFullName());
  LOG.debug("Converter info:\n " + descriptor.toProto() + " was converted to \n" + messageType);
  return messageType;
}
 
Example #21
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testColumnIndexConversion() {
  PrimitiveType type = Types.required(PrimitiveTypeName.INT64).named("test_int64");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  Statistics<?> stats = Statistics.createStats(type);
  stats.incrementNumNulls(16);
  stats.updateStats(-100l);
  stats.updateStats(100l);
  builder.add(stats);
  stats = Statistics.createStats(type);
  stats.incrementNumNulls(111);
  builder.add(stats);
  stats = Statistics.createStats(type);
  stats.updateStats(200l);
  stats.updateStats(500l);
  builder.add(stats);
  org.apache.parquet.format.ColumnIndex parquetColumnIndex =
      ParquetMetadataConverter.toParquetColumnIndex(type, builder.build());
  ColumnIndex columnIndex = ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex);
  assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
  assertTrue(Arrays.asList(false, true, false).equals(columnIndex.getNullPages()));
  assertTrue(Arrays.asList(16l, 111l, 0l).equals(columnIndex.getNullCounts()));
  assertTrue(Arrays.asList(
      ByteBuffer.wrap(BytesUtils.longToBytes(-100l)),
      ByteBuffer.allocate(0),
      ByteBuffer.wrap(BytesUtils.longToBytes(200l))).equals(columnIndex.getMinValues()));
  assertTrue(Arrays.asList(
      ByteBuffer.wrap(BytesUtils.longToBytes(100l)),
      ByteBuffer.allocate(0),
      ByteBuffer.wrap(BytesUtils.longToBytes(500l))).equals(columnIndex.getMaxValues()));

  assertNull("Should handle null column index", ParquetMetadataConverter
      .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT32).named("test_int32"), null));
  assertNull("Should ignore unsupported types", ParquetMetadataConverter
      .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT96).named("test_int96"), columnIndex));
  assertNull("Should ignore unsupported types",
      ParquetMetadataConverter.fromParquetColumnIndex(Types.required(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
          .length(12).as(OriginalType.INTERVAL).named("test_interval"), parquetColumnIndex));
}
 
Example #22
Source File: TestSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testParquetFixedBinaryToArrow() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Binary())
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}
 
Example #23
Source File: TestSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testArrowTimestampMicrosecondToParquet() {
  MessageType expected = converter.fromArrow(new Schema(asList(
    field("a", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"))
  ))).getParquetSchema();
  Assert.assertEquals(expected, Types.buildMessage().addField(Types.optional(INT64).as(TIMESTAMP_MICROS).named("a")).named("root"));
}
 
Example #24
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoOpBuilder() {
  ColumnIndexBuilder builder = ColumnIndexBuilder.getNoOpBuilder();
  StatsBuilder sb = new StatsBuilder();
  builder.add(sb.stats(Types.required(BINARY).as(UTF8).named("test_binary_utf8"), stringBinary("Jeltz"),
      stringBinary("Slartibartfast"), null, null));
  builder.add(sb.stats(Types.required(BOOLEAN).named("test_boolean"), true, true, null, null));
  builder.add(sb.stats(Types.required(DOUBLE).named("test_double"), null, null, null));
  builder.add(sb.stats(Types.required(INT32).named("test_int32"), null, null));
  builder.add(sb.stats(Types.required(INT64).named("test_int64"), -234l, -42l, null));
  assertEquals(0, builder.getPageCount());
  assertEquals(0, builder.getMinMaxSize());
  assertNull(builder.build());
}
 
Example #25
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testStaticBuildInt64() {
  ColumnIndex columnIndex = ColumnIndexBuilder.build(
      Types.required(INT64).named("test_int64"),
      BoundaryOrder.UNORDERED,
      asList(true, false, true, false, true, false),
      asList(1l, 2l, 3l, 4l, 5l, 6l),
      toBBList(null, 2l, null, 4l, null, 9l),
      toBBList(null, 3l, null, 15l, null, 10l));
  assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 1, 2, 3, 4, 5, 6);
  assertCorrectNullPages(columnIndex, true, false, true, false, true, false);
  assertCorrectValues(columnIndex.getMaxValues(), null, 3l, null, 15l, null, 10l);
  assertCorrectValues(columnIndex.getMinValues(), null, 2l, null, 4l, null, 9l);
}
 
Example #26
Source File: TestSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testParquetInt96ToArrowBinary() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(INT96).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Binary())
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}
 
Example #27
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testStaticBuildInt32() {
  ColumnIndex columnIndex = ColumnIndexBuilder.build(
      Types.required(INT32).named("test_int32"),
      BoundaryOrder.DESCENDING,
      asList(false, false, false, true, true, true),
      asList(0l, 10l, 0l, 3l, 5l, 7l),
      toBBList(10, 8, 6, null, null, null),
      toBBList(9, 7, 5, null, null, null));
  assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 0, 10, 0, 3, 5, 7);
  assertCorrectNullPages(columnIndex, false, false, false, true, true, true);
  assertCorrectValues(columnIndex.getMaxValues(), 9, 7, 5, null, null, null);
  assertCorrectValues(columnIndex.getMinValues(), 10, 8, 6, null, null, null);
}
 
Example #28
Source File: TestSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testParquetFixedBinaryToArrowDecimal() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(5).as(DECIMAL).precision(8).scale(2).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Decimal(8, 2))
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}
 
Example #29
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testStaticBuildDouble() {
  ColumnIndex columnIndex = ColumnIndexBuilder.build(
      Types.required(DOUBLE).named("test_double"),
      BoundaryOrder.UNORDERED,
      asList(false, false, false, false, false, false),
      asList(0l, 1l, 2l, 3l, 4l, 5l),
      toBBList(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0),
      toBBList(1.0, 2.0, 3.0, 4.0, 5.0, 6.0));
  assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 4, 5);
  assertCorrectNullPages(columnIndex, false, false, false, false, false, false);
  assertCorrectValues(columnIndex.getMaxValues(), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
  assertCorrectValues(columnIndex.getMinValues(), -1.0, -2.0, -3.0, -4.0, -5.0, -6.0);
}
 
Example #30
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testStaticBuildFloat() {
  ColumnIndex columnIndex = ColumnIndexBuilder.build(
      Types.required(FLOAT).named("test_float"),
      BoundaryOrder.ASCENDING,
      asList(true, true, true, false, false, false),
      asList(9l, 8l, 7l, 6l, 0l, 0l),
      toBBList(null, null, null, -3.0f, -2.0f, 0.1f),
      toBBList(null, null, null, -2.0f, 0.0f, 6.0f));
  assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 9, 8, 7, 6, 0, 0);
  assertCorrectNullPages(columnIndex, true, true, true, false, false, false);
  assertCorrectValues(columnIndex.getMaxValues(), null, null, null, -2.0f, 0.0f, 6.0f);
  assertCorrectValues(columnIndex.getMinValues(), null, null, null, -3.0f, -2.0f, 0.1f);
}