Java Code Examples for org.apache.iceberg.types.Types#StructType

The following examples show how to use org.apache.iceberg.types.Types#StructType . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: V2Metadata.java    From iceberg with Apache License 2.0 6 votes vote down vote up
static Types.StructType fileType(Types.StructType partitionType) {
  return Types.StructType.of(
      DataFile.CONTENT.asRequired(),
      DataFile.FILE_PATH,
      DataFile.FILE_FORMAT,
      required(DataFile.PARTITION_ID, DataFile.PARTITION_NAME, partitionType, DataFile.PARTITION_DOC),
      DataFile.RECORD_COUNT,
      DataFile.FILE_SIZE,
      DataFile.COLUMN_SIZES,
      DataFile.VALUE_COUNTS,
      DataFile.NULL_VALUE_COUNTS,
      DataFile.LOWER_BOUNDS,
      DataFile.UPPER_BOUNDS,
      DataFile.KEY_METADATA,
      DataFile.SPLIT_OFFSETS
  );
}
 
Example 2
Source File: V1Metadata.java    From iceberg with Apache License 2.0 6 votes vote down vote up
static Types.StructType dataFileSchema(Types.StructType partitionType) {
  return Types.StructType.of(
      DataFile.FILE_PATH,
      DataFile.FILE_FORMAT,
      required(DataFile.PARTITION_ID, DataFile.PARTITION_NAME, partitionType),
      DataFile.RECORD_COUNT,
      DataFile.FILE_SIZE,
      BLOCK_SIZE,
      DataFile.COLUMN_SIZES,
      DataFile.VALUE_COUNTS,
      DataFile.NULL_VALUE_COUNTS,
      DataFile.LOWER_BOUNDS,
      DataFile.UPPER_BOUNDS,
      DataFile.KEY_METADATA,
      DataFile.SPLIT_OFFSETS
  );
}
 
Example 3
Source File: TestBuildOrcProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testProjectionNestedNoOp() {
  Types.StructType nestedStructType = Types.StructType.of(
      optional(2, "b", Types.StringType.get()),
      optional(3, "c", Types.DateType.get())
  );
  Schema originalSchema = new Schema(
      optional(1, "a", nestedStructType)
  );

  // Original mapping (stored in ORC)
  TypeDescription orcSchema = ORCSchemaUtil.convert(originalSchema);

  TypeDescription newOrcSchema = ORCSchemaUtil.buildOrcProjection(originalSchema, orcSchema);
  assertEquals(1, newOrcSchema.getChildren().size());
  assertEquals(TypeDescription.Category.STRUCT, newOrcSchema.findSubtype("a").getCategory());
  TypeDescription nestedCol = newOrcSchema.findSubtype("a");
  assertEquals(2, nestedCol.findSubtype("b").getId());
  assertEquals(TypeDescription.Category.STRING, nestedCol.findSubtype("b").getCategory());
  assertEquals(3, nestedCol.findSubtype("c").getId());
  assertEquals(TypeDescription.Category.DATE, nestedCol.findSubtype("c").getCategory());
}
 
Example 4
Source File: TestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEqualsUnsafe(Types.StructType struct, Record rec, InternalRow row) {
  List<Types.NestedField> fields = struct.fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Type fieldType = fields.get(i).type();

    Object expectedValue = rec.get(i);
    Object actualValue = row.isNullAt(i) ? null : row.get(i, convert(fieldType));

    assertEqualsUnsafe(fieldType, expectedValue, actualValue);
  }
}
 
Example 5
Source File: PartitionKey.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Accessor<InternalRow> newAccessor(int position, boolean isOptional, Types.StructType type,
                                                 Accessor<InternalRow> accessor) {
  int size = type.fields().size();
  if (isOptional) {
    // the wrapped position handles null layers
    return new WrappedPositionAccessor(position, size, accessor);
  } else if (accessor.getClass() == PositionAccessor.class) {
    return new Position2Accessor(position, size, (PositionAccessor) accessor);
  } else if (accessor instanceof Position2Accessor) {
    return new Position3Accessor(position, size, (Position2Accessor) accessor);
  } else {
    return new WrappedPositionAccessor(position, size, accessor);
  }
}
 
Example 6
Source File: VectorizedSparkParquetReaders.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public VectorizedReader<?> struct(
    Types.StructType expected, GroupType groupType,
    List<VectorizedReader<?>> fieldReaders) {
  if (expected != null) {
    throw new UnsupportedOperationException("Vectorized reads are not supported yet for struct fields");
  }
  return null;
}
 
Example 7
Source File: SchemaParser.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Types.StructType structFromJson(JsonNode json) {
  JsonNode fieldArray = json.get(FIELDS);
  Preconditions.checkArgument(fieldArray.isArray(),
      "Cannot parse struct fields from non-array: %s", fieldArray);

  List<Types.NestedField> fields = Lists.newArrayListWithExpectedSize(fieldArray.size());
  Iterator<JsonNode> iterator = fieldArray.elements();
  while (iterator.hasNext()) {
    JsonNode field = iterator.next();
    Preconditions.checkArgument(field.isObject(),
        "Cannot parse struct field from non-object: %s", field);

    int id = JsonUtil.getInt(ID, field);
    String name = JsonUtil.getString(NAME, field);
    Type type = typeFromJson(field.get(TYPE));

    String doc = JsonUtil.getStringOrNull(DOC, field);
    boolean isRequired = JsonUtil.getBool(REQUIRED, field);
    if (isRequired) {
      fields.add(Types.NestedField.required(id, name, type, doc));
    } else {
      fields.add(Types.NestedField.optional(id, name, type, doc));
    }
  }

  return Types.StructType.of(fields);
}
 
Example 8
Source File: DataTestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEquals(Types.StructType struct, Record expected, Record actual) {
  List<Types.NestedField> fields = struct.fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Type fieldType = fields.get(i).type();

    Object expectedValue = expected.get(i);
    Object actualValue = actual.get(i);

    assertEquals(fieldType, expectedValue, actualValue);
  }
}
 
Example 9
Source File: AvroTestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static void assertEquals(Types.StructType struct, Record expected, Record actual) {
  List<Types.NestedField> fields = struct.fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Type fieldType = fields.get(i).type();

    Object expectedValue = expected.get(i);
    Object actualValue = actual.get(i);

    assertEquals(fieldType, expectedValue, actualValue);
  }
}
 
Example 10
Source File: TestFilteredScan.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEqualsSafe(Types.StructType struct,
                                    List<Record> expected, List<Row> actual) {
  // TODO: match records by ID
  int numRecords = Math.min(expected.size(), actual.size());
  for (int i = 0; i < numRecords; i += 1) {
    TestHelpers.assertEqualsSafe(struct, expected.get(i), actual.get(i));
  }
  Assert.assertEquals("Number of results should match expected", expected.size(), actual.size());
}
 
Example 11
Source File: TypeWithSchemaVisitor.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static <T> List<T> visitFields(Types.StructType struct, GroupType group, TypeWithSchemaVisitor<T> visitor) {
  List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount());
  for (Type field : group.getFields()) {
    int id = -1;
    if (field.getId() != null) {
      id = field.getId().intValue();
    }
    Types.NestedField iField = (struct != null && id >= 0) ? struct.field(id) : null;
    results.add(visitField(iField, field, visitor));
  }

  return results;
}
 
Example 12
Source File: IcebergPartitionData.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static Object[] copyData(Types.StructType type, Object[] data) {
  List<Types.NestedField> fields = type.fields();
  Object[] copy = new Object[data.length];
  for (int i = 0; i < data.length; i += 1) {
    if (data[i] == null) {
      copy[i] = null;
    } else {
      Types.NestedField field = fields.get(i);
      switch (field.type().typeId()) {
        case STRUCT:
        case LIST:
        case MAP:
          throw new IllegalArgumentException("Unsupported type in partition data: " + type);
        case BINARY:
        case FIXED:
          byte[] buffer = (byte[]) data[i];
          copy[i] = Arrays.copyOf(buffer, buffer.length);
          break;
        case STRING:
          copy[i] = data[i].toString();
          break;
        default:
          // no need to copy the object
          copy[i] = data[i];
      }
    }
  }

  return copy;
}
 
Example 13
Source File: V1Metadata.java    From iceberg with Apache License 2.0 4 votes vote down vote up
static Schema wrapFileSchema(Types.StructType fileSchema) {
  // this is used to build projection schemas
  return new Schema(
      ManifestEntry.STATUS, ManifestEntry.SNAPSHOT_ID,
      required(ManifestEntry.DATA_FILE_ID, "data_file", fileSchema));
}
 
Example 14
Source File: StructInternalRow.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private StructInternalRow(Types.StructType type, StructLike struct) {
  this.type = type;
  this.struct = struct;
}
 
Example 15
Source File: V2Metadata.java    From iceberg with Apache License 2.0 4 votes vote down vote up
static Schema entrySchema(Types.StructType partitionType) {
  return wrapFileSchema(fileType(partitionType));
}
 
Example 16
Source File: TestSchemaUpdate.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Test
public void testRename() {
  Types.StructType expected = Types.StructType.of(
      required(1, "id", Types.IntegerType.get()),
      optional(2, "json", Types.StringType.get()),
      optional(3, "options", Types.StructType.of(
          required(8, "feature1", Types.BooleanType.get()),
          optional(9, "newfeature", Types.BooleanType.get())
      ), "struct of named boolean options"),
      required(4, "locations", Types.MapType.ofRequired(10, 11,
          Types.StructType.of(
              required(20, "address", Types.StringType.get()),
              required(21, "city", Types.StringType.get()),
              required(22, "state", Types.StringType.get()),
              required(23, "zip", Types.IntegerType.get())
          ),
          Types.StructType.of(
              required(12, "latitude", Types.FloatType.get()),
              required(13, "long", Types.FloatType.get())
          )), "map of address to coordinate"),
      optional(5, "points", Types.ListType.ofOptional(14,
          Types.StructType.of(
              required(15, "X", Types.LongType.get()),
              required(16, "y.y", Types.LongType.get())
          )), "2-D cartesian points"),
      required(6, "doubles", Types.ListType.ofRequired(17,
          Types.DoubleType.get()
      )),
      optional(7, "properties", Types.MapType.ofOptional(18, 19,
          Types.StringType.get(),
          Types.StringType.get()
      ), "string map of properties")
  );

  Schema renamed = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
      .renameColumn("data", "json")
      .renameColumn("preferences", "options")
      .renameColumn("preferences.feature2", "newfeature") // inside a renamed column
      .renameColumn("locations.lat", "latitude")
      .renameColumn("points.x", "X")
      .renameColumn("points.y", "y.y") // has a '.' in the field name
      .apply();

  Assert.assertEquals("Should rename all fields", expected, renamed.asStruct());
}
 
Example 17
Source File: GenericOrcReaders.java    From iceberg with Apache License 2.0 4 votes vote down vote up
protected StructReader(List<OrcValueReader<?>> readers, Types.StructType structType, Map<Integer, ?> idToConstant) {
  super(readers, structType, idToConstant);
  this.structType = structType;
}
 
Example 18
Source File: TestHelpers.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static void assertEqualsMaps(String prefix, Types.MapType type,
                                     MapData expected, Map<?, ?> actual) {
  if (expected == null || actual == null) {
    Assert.assertEquals(prefix, expected, actual);
  } else {
    Type keyType = type.keyType();
    Type valueType = type.valueType();
    ArrayData expectedKeyArray = expected.keyArray();
    ArrayData expectedValueArray = expected.valueArray();
    Assert.assertEquals(prefix + " length", expected.numElements(), actual.size());
    for (int e = 0; e < expected.numElements(); ++e) {
      Object expectedKey = getValue(expectedKeyArray, e, keyType);
      Object actualValue = actual.get(expectedKey);
      if (actualValue == null) {
        Assert.assertEquals(prefix + ".key=" + expectedKey + " has null", true,
            expected.valueArray().isNullAt(e));
      } else {
        switch (valueType.typeId()) {
          case BOOLEAN:
          case INTEGER:
          case LONG:
          case FLOAT:
          case DOUBLE:
          case STRING:
          case DECIMAL:
          case DATE:
          case TIMESTAMP:
            Assert.assertEquals(prefix + ".key=" + expectedKey + " - " + valueType,
                getValue(expectedValueArray, e, valueType),
                actual.get(expectedKey));
            break;
          case UUID:
          case FIXED:
          case BINARY:
            assertEqualBytes(prefix + ".key=" + expectedKey,
                (byte[]) getValue(expectedValueArray, e, valueType),
                (byte[]) actual.get(expectedKey));
            break;
          case STRUCT: {
            Types.StructType st = (Types.StructType) valueType;
            assertEquals(prefix + ".key=" + expectedKey, st,
                expectedValueArray.getStruct(e, st.fields().size()),
                (Row) actual.get(expectedKey));
            break;
          }
          case LIST:
            assertEqualsLists(prefix + ".key=" + expectedKey,
                valueType.asListType(),
                expectedValueArray.getArray(e),
                toList((Seq<?>) actual.get(expectedKey)));
            break;
          case MAP:
            assertEqualsMaps(prefix + ".key=" + expectedKey, valueType.asMapType(),
                expectedValueArray.getMap(e),
                toJavaMap((scala.collection.Map<?, ?>) actual.get(expectedKey)));
            break;
          default:
            throw new IllegalArgumentException("Unhandled type " + valueType);
        }
      }
    }
  }
}
 
Example 19
Source File: TestHadoopCommits.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Test
public void testSchemaUpdateComplexType() throws Exception {
  Assert.assertTrue("Should create v1 metadata",
      version(1).exists() && version(1).isFile());
  Assert.assertFalse("Should not create v2 or newer versions",
      version(2).exists());

  Types.StructType complexColumn = Types.StructType.of(
      required(0, "w", Types.IntegerType.get()),
      required(1, "x", Types.StringType.get()),
      required(2, "y", Types.BooleanType.get()),
      optional(3, "z", Types.MapType.ofOptional(
          0, 1, Types.IntegerType.get(), Types.StringType.get()
      ))
  );
  Schema updatedSchema = new Schema(
      required(1, "id", Types.IntegerType.get(), "unique ID"),
      required(2, "data", Types.StringType.get()),
      optional(3, "complex", Types.StructType.of(
          required(4, "w", Types.IntegerType.get()),
          required(5, "x", Types.StringType.get()),
          required(6, "y", Types.BooleanType.get()),
          optional(7, "z", Types.MapType.ofOptional(
              8, 9, Types.IntegerType.get(), Types.StringType.get()
          ))
      ))
  );

  table.updateSchema()
      .addColumn("complex", complexColumn)
      .commit();

  Assert.assertTrue("Should create v2 for the update",
      version(2).exists() && version(2).isFile());
  Assert.assertEquals("Should write the current version to the hint file",
      2, readVersionHint());
  Assert.assertEquals("Table schema should match schema with reassigned ids",
      updatedSchema.asStruct(), table.schema().asStruct());

  List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles());
  Assert.assertEquals("Should not create any scan tasks", 0, tasks.size());

  List<File> manifests = listManifestFiles();
  Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size());
}
 
Example 20
Source File: BaseFile.java    From iceberg with Apache License 2.0 votes vote down vote up
protected abstract Schema getAvroSchema(Types.StructType partitionStruct);