org.apache.iceberg.types.Types.StringType Java Examples

The following examples show how to use org.apache.iceberg.types.Types.StringType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ArrowSchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void convertPrimitive() {
  Schema iceberg = new Schema(
      Types.NestedField.optional(0, INTEGER_FIELD, IntegerType.get()),
      Types.NestedField.optional(1, BOOLEAN_FIELD, BooleanType.get()),
      Types.NestedField.required(2, DOUBLE_FIELD, DoubleType.get()),
      Types.NestedField.required(3, STRING_FIELD, StringType.get()),
      Types.NestedField.optional(4, DATE_FIELD, DateType.get()),
      Types.NestedField.optional(5, TIMESTAMP_FIELD, TimestampType.withZone()),
      Types.NestedField.optional(6, LONG_FIELD, LongType.get()),
      Types.NestedField.optional(7, FLOAT_FIELD, FloatType.get()),
      Types.NestedField.optional(8, TIME_FIELD, TimeType.get()),
      Types.NestedField.optional(9, BINARY_FIELD, Types.BinaryType.get()),
      Types.NestedField.optional(10, DECIMAL_FIELD, Types.DecimalType.of(1, 1)),
      Types.NestedField.optional(12, LIST_FIELD, Types.ListType.ofOptional(13, Types.IntegerType.get())),
      Types.NestedField.required(14, MAP_FIELD, Types.MapType.ofOptional(15, 16,
          StringType.get(), IntegerType.get())),
      Types.NestedField.optional(17, FIXED_WIDTH_BINARY_FIELD, Types.FixedType.ofLength(10)));

  org.apache.arrow.vector.types.pojo.Schema arrow = ArrowSchemaUtil.convert(iceberg);

  validate(iceberg, arrow);
}
 
Example #2
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testPrimitive() throws IOException {
  Schema icebergSchema = new Schema(
      optional(1, "b", BooleanType.get()),
      optional(2, "i", IntegerType.get()),
      optional(3, "l", LongType.get()),
      optional(4, "f", FloatType.get()),
      optional(5, "d", DoubleType.get()),
      optional(6, "dec", DecimalType.of(0, 2)),
      optional(7, "s", StringType.get()),
      optional(8, "bi", BinaryType.get())
  );

  ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema);
  assertEquals(
      "b:boolean,i:int,l:long,f:float,d:double,dec:bigdecimal,s:chararray,bi:bytearray", pigSchema.toString());
}
 
Example #3
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void complexNested() throws IOException {
  convertToPigSchema(new Schema(
      optional(1, "t", StructType.of(
          optional(2, "b", ListType.ofOptional(3, StructType.of(
              optional(4, "i", IntegerType.get()),
              optional(5, "s", StringType.get())
          )))
      )),
      optional(6, "m1", MapType.ofOptional(7, 8, StringType.get(), StructType.of(
          optional(9, "b", ListType.ofOptional(10, BinaryType.get())),
          optional(11, "m2", MapType.ofOptional(12, 13, StringType.get(), IntegerType.get()))
      ))),
      optional(14, "b1", ListType.ofOptional(15,
          MapType.ofOptional(16, 17, StringType.get(),
              ListType.ofOptional(18, FloatType.get()))))
  ), "t:(b:{(i:int,s:chararray)}),m1:[(b:{(bytearray)},m2:[int])],b1:{([{(float)}])}", "");
}
 
Example #4
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleInMap() throws IOException {
  Schema icebergSchema = new Schema(
      optional(
          1, "nested_list",
          MapType.ofOptional(
              2, 3,
              StringType.get(),
              ListType.ofOptional(
                  4, StructType.of(
                      required(5, "id", LongType.get()),
                      optional(6, "data", StringType.get()))))));

  ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema);
  // The output should contain a nested struct within a list within a map, I think.
  assertEquals("nested_list:[{(id:long,data:chararray)}]", pigSchema.toString());
}
 
Example #5
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void doubleWrappingTuples() throws IOException {
  // struct<a:array<struct<b:string>>> -> (a:{(b:chararray)})
  convertToPigSchema(
      new Schema(
          StructType.of(
              required(1, "a",
                  ListType.ofRequired(2, StructType.of(required(3, "b", StringType.get()))))
          ).fields()),
      "a:{(b:chararray)}",
      "A tuple inside a bag should not be double wrapped");
  // struct<a:array<boolean>> -> "(a:{(boolean)})
  convertToPigSchema(
      new Schema(StructType.of(required(1, "a", ListType.ofRequired(2, BooleanType.get()))).fields()),
      "a:{(boolean)}",
      "boolean (or anything non-tuple) element inside a bag should be wrapped inside a tuple"
  );
}
 
Example #6
Source File: TestPartitionTransforms.java    From presto with Apache License 2.0 5 votes vote down vote up
@Test
public void testToStringMatchesSpecification()
{
    assertEquals(Transforms.identity(StringType.get()).toString(), "identity");
    assertEquals(Transforms.bucket(StringType.get(), 13).toString(), "bucket[13]");
    assertEquals(Transforms.truncate(StringType.get(), 19).toString(), "truncate[19]");
    assertEquals(Transforms.year(DateType.get()).toString(), "year");
    assertEquals(Transforms.month(DateType.get()).toString(), "month");
    assertEquals(Transforms.day(DateType.get()).toString(), "day");
    assertEquals(Transforms.hour(TimestampType.withoutZone()).toString(), "hour");
}
 
Example #7
Source File: TestPartitionFields.java    From presto with Apache License 2.0 5 votes vote down vote up
private static PartitionSpec partitionSpec(Consumer<PartitionSpec.Builder> consumer)
{
    Schema schema = new Schema(
            NestedField.required(1, "order_key", LongType.get()),
            NestedField.required(2, "ts", TimestampType.withoutZone()),
            NestedField.required(3, "price", DoubleType.get()),
            NestedField.optional(4, "comment", StringType.get()),
            NestedField.optional(5, "notes", ListType.ofRequired(6, StringType.get())));

    PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
    consumer.accept(builder);
    return builder.build();
}
 
Example #8
Source File: ArrowSchemaUtilTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void convertComplex() {
  Schema iceberg = new Schema(
      Types.NestedField.optional(0, "m", MapType.ofOptional(
          1, 2, StringType.get(),
          LongType.get())
      ),
      Types.NestedField.required(3, "m2", MapType.ofOptional(
          4, 5, StringType.get(),
          ListType.ofOptional(6, TimestampType.withoutZone()))
      )
  );
  org.apache.arrow.vector.types.pojo.Schema arrow = ArrowSchemaUtil.convert(iceberg);
  Assert.assertEquals(iceberg.columns().size(), arrow.getFields().size());
}
 
Example #9
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testComplex() throws IOException {
  convertToPigSchema(
      new Schema(
          optional(1, "bag", ListType.ofOptional(2, BooleanType.get())),
          optional(3, "map", MapType.ofOptional(4, 5, StringType.get(), DoubleType.get())),
          optional(6, "tuple",
              StructType.of(optional(7, "i", IntegerType.get()), optional(8, "f", FloatType.get())))
      ), "bag:{(boolean)},map:[double],tuple:(i:int,f:float)", null
  );
}
 
Example #10
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void nestedMaps() throws IOException {
  convertToPigSchema(new Schema(
      optional(1, "nested",
          MapType.ofOptional(2, 3, StringType.get(),
              MapType.ofOptional(4, 5, StringType.get(),
                  MapType.ofOptional(6, 7, StringType.get(), DecimalType.of(10, 2)))))
  ), "nested:[[[bigdecimal]]]", "");
}
 
Example #11
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void nestedTuples() throws IOException {
  convertToPigSchema(new Schema(
      optional(1, "first", StructType.of(
          optional(2, "second", StructType.of(
              optional(3, "third", StructType.of(
                  optional(4, "val", StringType.get())
              ))
          ))
      ))
  ), "first:(second:(third:(val:chararray)))", "");
}
 
Example #12
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void mapConversions() throws IOException {
  // consistent behavior for maps conversions. The below test case, correctly does not specify map key types
  convertToPigSchema(
      new Schema(
          required(
              1, "a",
              MapType.ofRequired(
                  2, 3,
                  StringType.get(),
                  ListType.ofRequired(
                      4, StructType.of(
                          required(5, "b", LongType.get()),
                          required(6, "c", StringType.get())))))),
      "a:[{(b:long,c:chararray)}]",
      "We do not specify the map key type here");
  // struct<a:map<string,map<string,double>>> -> (a:[[double]])
  // As per https://pig.apache.org/docs/latest/basic.html#map-schema. It seems that
  // we  only need to specify value type as keys are always of type chararray
  convertToPigSchema(
      new Schema(
          StructType.of(
              required(1, "a", MapType.ofRequired(
                  2, 3,
                  StringType.get(),
                  MapType.ofRequired(4, 5, StringType.get(), DoubleType.get())))
          ).fields()),
      "a:[[double]]",
      "A map key type does not need to be specified");
}
 
Example #13
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testLongInBag() throws IOException {
  Schema icebergSchema = new Schema(
      optional(
          1, "nested_list",
          MapType.ofOptional(
              2, 3,
              StringType.get(),
              ListType.ofRequired(5, LongType.get()))));
  SchemaUtil.convert(icebergSchema);
}
 
Example #14
Source File: TestMetrics.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Test
public void testMetricsForListAndMapElements() throws IOException {
  StructType structType = StructType.of(
      required(1, "leafIntCol", IntegerType.get()),
      optional(2, "leafStringCol", StringType.get())
  );
  Schema schema = new Schema(
      optional(3, "intListCol", ListType.ofRequired(4, IntegerType.get())),
      optional(5, "mapCol", MapType.ofRequired(6, 7, StringType.get(), structType))
  );

  Record record = GenericRecord.create(schema);
  record.setField("intListCol", Lists.newArrayList(10, 11, 12));
  Record struct = GenericRecord.create(structType);
  struct.setField("leafIntCol", 1);
  struct.setField("leafStringCol", "BBB");
  Map<String, Record> map = Maps.newHashMap();
  map.put("4", struct);
  record.set(1, map);

  InputFile recordsFile = writeRecords(schema, record);

  Metrics metrics = getMetrics(recordsFile);
  Assert.assertEquals(1L, (long) metrics.recordCount());
  if (fileFormat() != FileFormat.ORC) {
    assertCounts(1, 1L, 0L, metrics);
    assertCounts(2, 1L, 0L, metrics);
    assertCounts(4, 3L, 0L, metrics);
    assertCounts(6, 1L, 0L, metrics);
  } else {
    assertCounts(1, null, null, metrics);
    assertCounts(2, null, null, metrics);
    assertCounts(4, null, null, metrics);
    assertCounts(6, null, null, metrics);
  }
  assertBounds(1, IntegerType.get(), null, null, metrics);
  assertBounds(2, StringType.get(), null, null, metrics);
  assertBounds(4, IntegerType.get(), null, null, metrics);
  assertBounds(6, StringType.get(), null, null, metrics);
  assertBounds(7, structType, null, null, metrics);
}