Java Code Examples for org.apache.parquet.schema.Types

The following examples show how to use org.apache.parquet.schema.Types. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
protected Type buildSchema() {
  JsonElementConverter elementConverter = this.elementConverter;
  JsonElementConverter keyConverter = getKeyConverter();
  GroupType mapGroup =
      Types.repeatedGroup().addFields(keyConverter.schema(), elementConverter.schema()).named(MAP_KEY)
          .asGroupType();
  String columnName = this.jsonSchema.getColumnName();
  switch (optionalOrRequired(this.jsonSchema)) {
    case OPTIONAL:
      return Types.optionalGroup().addFields(mapGroup).named(columnName).asGroupType();
    case REQUIRED:
      return Types.requiredGroup().addFields(mapGroup).named(columnName).asGroupType();
    default:
      return null;
  }
}
 
Example 2
Source Project: parquet-mr   Source File: TestParquetParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testEmbeddedAnnotations() {
  String message = "message EmbeddedMessage {" +
      "  required binary json (JSON);" +
      "  required binary bson (BSON);" +
      "}\n";

  MessageType parsed = MessageTypeParser.parseMessageType(message);
  MessageType expected = Types.buildMessage()
      .required(BINARY).as(JSON).named("json")
      .required(BINARY).as(BSON).named("bson")
      .named("EmbeddedMessage");

  assertEquals(expected, parsed);
  MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
  assertEquals(expected, reparsed);
}
 
Example 3
Source Project: parquet-mr   Source File: TestStatistics.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFloatingPointStringIndependentFromLocale() {
  Statistics<?> floatStats = Statistics.createStats(Types.optional(PrimitiveTypeName.FLOAT).named("test-float"));
  floatStats.updateStats(123.456f);
  Statistics<?> doubleStats = Statistics.createStats(Types.optional(PrimitiveTypeName.DOUBLE).named("test-double"));
  doubleStats.updateStats(12345.6789);

  Locale defaultLocale = Locale.getDefault();
  try {
    // Set the locale to French where the decimal separator would be ',' instead of '.'
    Locale.setDefault(Locale.FRENCH);
    assertEquals("min: 123.456, max: 123.456, num_nulls: 0", floatStats.toString());
    assertEquals("min: 12345.6789, max: 12345.6789, num_nulls: 0", doubleStats.toString());
  } finally {
    Locale.setDefault(defaultLocale);
  }
}
 
Example 4
Source Project: parquet-mr   Source File: TestStatistics.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBuilder() {
  testBuilder(Types.required(BOOLEAN).named("test_boolean"), false, new byte[] { 0 }, true, new byte[] { 1 });
  testBuilder(Types.required(INT32).named("test_int32"), -42, intToBytes(-42), 42, intToBytes(42));
  testBuilder(Types.required(INT64).named("test_int64"), -42l, longToBytes(-42), 42l, longToBytes(42));
  testBuilder(Types.required(FLOAT).named("test_float"), -42.0f, intToBytes(floatToIntBits(-42.0f)), 42.0f,
      intToBytes(floatToIntBits(42.0f)));
  testBuilder(Types.required(DOUBLE).named("test_double"), -42.0, longToBytes(doubleToLongBits(-42.0)), 42.0,
      longToBytes(Double.doubleToLongBits(42.0f)));

  byte[] min = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
  byte[] max = { 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 };
  testBuilder(Types.required(INT96).named("test_int96"), Binary.fromConstantByteArray(min), min,
      Binary.fromConstantByteArray(max), max);
  testBuilder(Types.required(FIXED_LEN_BYTE_ARRAY).length(12).named("test_fixed"), Binary.fromConstantByteArray(min),
      min,
      Binary.fromConstantByteArray(max), max);
  testBuilder(Types.required(BINARY).named("test_binary"), Binary.fromConstantByteArray(min), min,
      Binary.fromConstantByteArray(max), max);
}
 
Example 5
Source Project: parquet-mr   Source File: TestColumnIndexBuilder.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildDoubleZeroNaN() {
  PrimitiveType type = Types.required(DOUBLE).named("test_double");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  StatsBuilder sb = new StatsBuilder();
  builder.add(sb.stats(type, -1.0, -0.0));
  builder.add(sb.stats(type, 0.0, 1.0));
  builder.add(sb.stats(type, 1.0, 100.0));
  ColumnIndex columnIndex = builder.build();
  assertCorrectValues(columnIndex.getMinValues(), -1.0, -0.0, 1.0);
  assertCorrectValues(columnIndex.getMaxValues(), 0.0, 1.0, 100.0);

  builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  builder.add(sb.stats(type, -1.0, -0.0));
  builder.add(sb.stats(type, 0.0, Double.NaN));
  builder.add(sb.stats(type, 1.0, 100.0));
  assertNull(builder.build());
}
 
Example 6
Source Project: parquet-mr   Source File: TestColumnIndexBuilder.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildFloatZeroNaN() {
  PrimitiveType type = Types.required(FLOAT).named("test_float");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  StatsBuilder sb = new StatsBuilder();
  builder.add(sb.stats(type, -1.0f, -0.0f));
  builder.add(sb.stats(type, 0.0f, 1.0f));
  builder.add(sb.stats(type, 1.0f, 100.0f));
  ColumnIndex columnIndex = builder.build();
  assertCorrectValues(columnIndex.getMinValues(), -1.0f, -0.0f, 1.0f);
  assertCorrectValues(columnIndex.getMaxValues(), 0.0f, 1.0f, 100.0f);

  builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  builder.add(sb.stats(type, -1.0f, -0.0f));
  builder.add(sb.stats(type, 0.0f, Float.NaN));
  builder.add(sb.stats(type, 1.0f, 100.0f));
  assertNull(builder.build());
}
 
Example 7
Source Project: parquet-mr   Source File: TestParquetWriterAppendBlocks.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFailDroppingColumns() throws IOException {
  MessageType droppedColumnSchema = Types.buildMessage()
      .required(BINARY).as(UTF8).named("string")
      .named("AppendTest");

  final ParquetMetadata footer = ParquetFileReader.readFooter(
      CONF, file1, NO_FILTER);
  final FSDataInputStream incoming = file1.getFileSystem(CONF).open(file1);

  Path droppedColumnFile = newTemp();
  final ParquetFileWriter writer = new ParquetFileWriter(
      CONF, droppedColumnSchema, droppedColumnFile);
  writer.start();

  TestUtils.assertThrows("Should complain that id column is dropped",
      IllegalArgumentException.class,
    (Callable<Void>) () -> {
      writer.appendRowGroups(incoming, footer.getBlocks(), false);
      return null;
    });
}
 
Example 8
Source Project: parquet-mr   Source File: TestParquetWriterAppendBlocks.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFailMissingColumn() throws IOException {
  MessageType fileSchema = Types.buildMessage()
      .required(INT32).named("id")
      .required(BINARY).as(UTF8).named("string")
      .required(FLOAT).named("value")
      .named("AppendTest");

  Path missingColumnFile = newTemp();
  final ParquetFileWriter writer = new ParquetFileWriter(
      CONF, fileSchema, missingColumnFile);
  writer.start();

  TestUtils.assertThrows("Should complain that value column is missing",
      IllegalArgumentException.class,
    (Callable<Void>) () -> {
      writer.appendFile(CONF, file1);
      return null;
    });
}
 
Example 9
Source Project: parquet-mr   Source File: TestParquetWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBadWriteSchema() throws IOException {
  final File file = temp.newFile("test.parquet");
  file.delete();

  TestUtils.assertThrows("Should reject a schema with an empty group",
      InvalidSchemaException.class, (Callable<Void>) () -> {
        ExampleParquetWriter.builder(new Path(file.toString()))
            .withType(Types.buildMessage()
                .addField(new GroupType(REQUIRED, "invalid_group"))
                .named("invalid_message"))
            .build();
        return null;
      });

  Assert.assertFalse("Should not create a file when schema is rejected",
      file.exists());
}
 
Example 10
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIncompatibleLogicalAndConvertedTypes() {
  ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
  MessageType schema = Types.buildMessage()
    .required(PrimitiveTypeName.BINARY)
    .as(OriginalType.DECIMAL).precision(9).scale(2)
    .named("aBinary")
    .named("Message");
  MessageType expected = Types.buildMessage()
    .required(PrimitiveTypeName.BINARY)
    .as(LogicalTypeAnnotation.jsonType())
    .named("aBinary")
    .named("Message");

  List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(schema);
  // Set converted type field to a different type to verify that in case of mismatch, it overrides logical type
  parquetSchema.get(1).setConverted_type(ConvertedType.JSON);
  MessageType actual = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
  assertEquals(expected, actual);
}
 
Example 11
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIgnoreStatsWithSignedSortOrder() {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("z"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
      .as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      StatsHelper.V1.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not include min/max: " + convertedStats, convertedStats.hasNonNullValue());
  Assert.assertTrue("Stats should have null count: " + convertedStats, convertedStats.isNumNullsSet());
  Assert.assertEquals("Stats should have 3 nulls: " + convertedStats, 3L, convertedStats.getNumNulls());
}
 
Example 12
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 6 votes vote down vote up
private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      ParquetMetadataConverter.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty());
  Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes());
}
 
Example 13
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testV2OnlyStats() {
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
      0x7F,
      0x80);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
      0x7FFF,
      0x8000);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
      0x7FFFFFFF,
      0x80000000);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
      0x7FFFFFFFFFFFFFFFL,
      0x8000000000000000L);
  testV2OnlyStats(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
      new BigInteger("-765875"),
      new BigInteger("876856"));
  testV2OnlyStats(
      Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
          .named(""),
      new BigInteger("-6769643"),
      new BigInteger("9864675"));
}
 
Example 14
Source Project: parquet-mr   Source File: TestParquetMetadataConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testV2StatsEqualMinMax() {
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
      93,
      93);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
      -5892,
      -5892);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
      234998934,
      234998934);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
      -2389943895984985L,
      -2389943895984985L);
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
      new BigInteger("823749"),
      new BigInteger("823749"));
  testV2StatsEqualMinMax(
      Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
          .named(""),
      new BigInteger("-8752832"),
      new BigInteger("-8752832"));
  testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT96).named(""),
      new BigInteger("81032984"),
      new BigInteger("81032984"));
}
 
Example 15
Source Project: parquet-mr   Source File: TestSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testParquetFixedBinaryToArrowDecimal() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(5).as(DECIMAL).precision(8).scale(2).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Decimal(8, 2))
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}
 
Example 16
Source Project: presto   Source File: ParquetSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private MessageType convert(List<Type> types, List<String> columnNames)
{
    Types.MessageTypeBuilder builder = Types.buildMessage();
    for (int i = 0; i < types.size(); i++) {
        builder.addField(convert(types.get(i), columnNames.get(i), ImmutableList.of()));
    }
    return builder.named("presto_schema");
}
 
Example 17
Source Project: presto   Source File: ParquetSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private org.apache.parquet.schema.Type getMapType(MapType type, String name, List<String> parent)
{
    parent = ImmutableList.<String>builder().addAll(parent).add(name).add("map").build();
    Type keyType = type.getKeyType();
    Type valueType = type.getValueType();
    return Types.map(OPTIONAL)
            .key(convert(keyType, "key", parent))
            .value(convert(valueType, "value", parent))
            .named(name);
}
 
Example 18
Source Project: presto   Source File: MetadataReader.java    License: Apache License 2.0 5 votes vote down vote up
private static MessageType readParquetSchema(List<SchemaElement> schema)
{
    Iterator<SchemaElement> schemaIterator = schema.iterator();
    SchemaElement rootSchema = schemaIterator.next();
    Types.MessageTypeBuilder builder = Types.buildMessage();
    readTypeSchema(builder, schemaIterator, rootSchema.getNum_children());
    return builder.named(rootSchema.name);
}
 
Example 19
Source Project: presto   Source File: MetadataReader.java    License: Apache License 2.0 5 votes vote down vote up
private static void readTypeSchema(Types.GroupBuilder<?> builder, Iterator<SchemaElement> schemaIterator, int typeCount)
{
    for (int i = 0; i < typeCount; i++) {
        SchemaElement element = schemaIterator.next();
        Types.Builder<?, ?> typeBuilder;
        if (element.type == null) {
            typeBuilder = builder.group(Repetition.valueOf(element.repetition_type.name()));
            readTypeSchema((Types.GroupBuilder<?>) typeBuilder, schemaIterator, element.num_children);
        }
        else {
            Types.PrimitiveBuilder<?> primitiveBuilder = builder.primitive(getTypeName(element.type), Repetition.valueOf(element.repetition_type.name()));
            if (element.isSetType_length()) {
                primitiveBuilder.length(element.type_length);
            }
            if (element.isSetPrecision()) {
                primitiveBuilder.precision(element.precision);
            }
            if (element.isSetScale()) {
                primitiveBuilder.scale(element.scale);
            }
            typeBuilder = primitiveBuilder;
        }

        if (element.isSetConverted_type()) {
            typeBuilder.as(getOriginalType(element.converted_type));
        }
        if (element.isSetField_id()) {
            typeBuilder.id(element.field_id);
        }
        typeBuilder.named(element.name.toLowerCase(Locale.ENGLISH));
    }
}
 
Example 20
Source Project: iceberg   Source File: TypeToMessageType.java    License: Apache License 2.0 5 votes vote down vote up
public MessageType convert(Schema schema, String name) {
  Types.MessageTypeBuilder builder = Types.buildMessage();

  for (NestedField field : schema.columns()) {
    builder.addField(field(field));
  }

  return builder.named(AvroSchemaUtil.makeCompatibleName(name));
}
 
Example 21
Source Project: iceberg   Source File: TypeToMessageType.java    License: Apache License 2.0 5 votes vote down vote up
public MessageType convert(Schema schema, String name) {
  Types.MessageTypeBuilder builder = Types.buildMessage();

  for (NestedField field : schema.columns()) {
    builder.addField(field(field));
  }

  return builder.named(name);
}
 
Example 22
@Override
protected Type buildSchema() {
  String columnName = this.jsonSchema.getColumnName();
  if (this.repeated) {
    return Types.repeated(BINARY).as(UTF8).named(columnName);
  }
  switch (optionalOrRequired(this.jsonSchema)) {
    case OPTIONAL:
      return Types.optional(BINARY).as(UTF8).named(columnName);
    case REQUIRED:
      return Types.required(BINARY).as(UTF8).named(columnName);
    default:
      throw new RuntimeException("Unsupported Repetition type");
  }
}
 
Example 23
Source Project: parquet-mr   Source File: TestParquetParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTimeAnnotations() {
  String message = "message TimeMessage {" +
      "  required int32 date (DATE);" +
      "  required int32 time (TIME_MILLIS);" +
      "  required int64 timestamp (TIMESTAMP_MILLIS);" +
      "  required FIXED_LEN_BYTE_ARRAY(12) interval (INTERVAL);" +
      "  required int32 newTime (TIME(MILLIS,true));" +
      "  required int64 nanoTime (TIME(NANOS,true));" +
      "  required int64 newTimestamp (TIMESTAMP(MILLIS,false));" +
      "  required int64 nanoTimestamp (TIMESTAMP(NANOS,false));" +
      "}\n";

  MessageType parsed = MessageTypeParser.parseMessageType(message);
  MessageType expected = Types.buildMessage()
      .required(INT32).as(DATE).named("date")
      .required(INT32).as(TIME_MILLIS).named("time")
      .required(INT64).as(TIMESTAMP_MILLIS).named("timestamp")
      .required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("interval")
      .required(INT32).as(timeType(true, MILLIS)).named("newTime")
      .required(INT64).as(timeType(true, NANOS)).named("nanoTime")
      .required(INT64).as(timestampType(false, MILLIS)).named("newTimestamp")
      .required(INT64).as(timestampType(false, NANOS)).named("nanoTimestamp")
    .named("TimeMessage");

  assertEquals(expected, parsed);
  MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
  assertEquals(expected, reparsed);
}
 
Example 24
Source Project: parquet-mr   Source File: TestParquetParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIntAnnotations() {
  String message = "message IntMessage {" +
      "  required int32 i8 (INT_8);" +
      "  required int32 i16 (INT_16);" +
      "  required int32 i32 (INT_32);" +
      "  required int64 i64 (INT_64);" +
      "  required int32 u8 (UINT_8);" +
      "  required int32 u16 (UINT_16);" +
      "  required int32 u32 (UINT_32);" +
      "  required int64 u64 (UINT_64);" +
      "}\n";

  MessageType parsed = MessageTypeParser.parseMessageType(message);
  MessageType expected = Types.buildMessage()
      .required(INT32).as(INT_8).named("i8")
      .required(INT32).as(INT_16).named("i16")
      .required(INT32).as(INT_32).named("i32")
      .required(INT64).as(INT_64).named("i64")
      .required(INT32).as(UINT_8).named("u8")
      .required(INT32).as(UINT_16).named("u16")
      .required(INT32).as(UINT_32).named("u32")
      .required(INT64).as(UINT_64).named("u64")
      .named("IntMessage");

  assertEquals(expected, parsed);
  MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
  assertEquals(expected, reparsed);
}
 
Example 25
Source Project: parquet-mr   Source File: TestParquetParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIntegerAnnotations() {
  String message = "message IntMessage {" +
    "  required int32 i8 (INTEGER(8,true));" +
    "  required int32 i16 (INTEGER(16,true));" +
    "  required int32 i32 (INTEGER(32,true));" +
    "  required int64 i64 (INTEGER(64,true));" +
    "  required int32 u8 (INTEGER(8,false));" +
    "  required int32 u16 (INTEGER(16,false));" +
    "  required int32 u32 (INTEGER(32,false));" +
    "  required int64 u64 (INTEGER(64,false));" +
    "}\n";

  MessageType parsed = MessageTypeParser.parseMessageType(message);
  MessageType expected = Types.buildMessage()
    .required(INT32).as(intType(8, true)).named("i8")
    .required(INT32).as(intType(16, true)).named("i16")
    .required(INT32).as(intType(32, true)).named("i32")
    .required(INT64).as(intType(64, true)).named("i64")
    .required(INT32).as(intType(8, false)).named("u8")
    .required(INT32).as(intType(16, false)).named("u16")
    .required(INT32).as(intType(32, false)).named("u32")
    .required(INT64).as(intType(64, false)).named("u64")
    .named("IntMessage");

  assertEquals(expected, parsed);
  MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
  assertEquals(expected, reparsed);
}
 
Example 26
Source Project: parquet-mr   Source File: TestBinaryTruncator.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNonStringTruncate() {
  BinaryTruncator truncator = BinaryTruncator
      .getTruncator(Types.required(BINARY).as(DECIMAL).precision(10).scale(2).named("test_binary_decimal"));
  assertEquals(binary(0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA),
      truncator.truncateMin(binary(0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA), 2));
  assertEquals(binary(0x01, 0x02, 0x03, 0x04, 0x05, 0x06),
      truncator.truncateMax(binary(0x01, 0x02, 0x03, 0x04, 0x05, 0x06), 2));
}
 
Example 27
Source Project: parquet-mr   Source File: TestBinaryTruncator.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testContractNonStringTypes() {
  testTruncator(
      Types.required(FIXED_LEN_BYTE_ARRAY).length(8).as(DECIMAL).precision(18).scale(4).named("test_fixed_decimal"),
      false);
  testTruncator(Types.required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("test_fixed_interval"), false);
  testTruncator(Types.required(BINARY).as(DECIMAL).precision(10).scale(2).named("test_binary_decimal"), false);
  testTruncator(Types.required(INT96).named("test_int96"), false);
}
 
Example 28
Source Project: parquet-mr   Source File: TestBinaryTruncator.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testContractStringTypes() {
  testTruncator(Types.required(BINARY).named("test_binary"), true);
  testTruncator(Types.required(BINARY).as(UTF8).named("test_utf8"), true);
  testTruncator(Types.required(BINARY).as(ENUM).named("test_enum"), true);
  testTruncator(Types.required(BINARY).as(JSON).named("test_json"), true);
  testTruncator(Types.required(BINARY).as(BSON).named("test_bson"), true);
  testTruncator(Types.required(FIXED_LEN_BYTE_ARRAY).length(5).named("test_fixed"), true);
}
 
Example 29
Source Project: parquet-mr   Source File: TestSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testParquetInt64TimestampMillisToArrow() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}
 
Example 30
Source Project: parquet-mr   Source File: TestColumnIndexBuilder.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testStaticBuildBoolean() {
  ColumnIndex columnIndex = ColumnIndexBuilder.build(
      Types.required(BOOLEAN).named("test_boolean"),
      BoundaryOrder.DESCENDING,
      asList(false, true, false, true, false, true),
      asList(9l, 8l, 7l, 6l, 5l, 0l),
      toBBList(false, null, false, null, true, null),
      toBBList(true, null, false, null, true, null));
  assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
  assertCorrectNullCounts(columnIndex, 9, 8, 7, 6, 5, 0);
  assertCorrectNullPages(columnIndex, false, true, false, true, false, true);
  assertCorrectValues(columnIndex.getMaxValues(), true, null, false, null, true, null);
  assertCorrectValues(columnIndex.getMinValues(), false, null, false, null, true, null);
}