org.apache.iceberg.types.TypeUtil Java Examples

The following examples show how to use org.apache.iceberg.types.TypeUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IcebergCatalog.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public void changeColumn(String columnToChange, Types.NestedField newDef) {
  IcebergTableOperations tableOperations = new IcebergTableOperations(fsPath, configuration);
  table = new BaseTable(tableOperations, fsPath.getName());
  Types.NestedField columnToChangeInIceberg = table.schema().caseInsensitiveFindField(columnToChange);
  if (!table.spec().getFieldsBySourceId(columnToChangeInIceberg.fieldId()).isEmpty()) { // column is part of partitionspec
    throw UserException.unsupportedError().message("[%s] is a partition column. Partition spec change is not supported.",
        columnToChangeInIceberg.name()).buildSilently();
  }

  if (!TypeUtil.isPromotionAllowed(columnToChangeInIceberg.type(), newDef.type()
      .asPrimitiveType())) {
    throw UserException.validationError()
        .message("Cannot change data type of column [%s] from %s to %s",
            columnToChangeInIceberg.name(),
            sqlTypeNameWithPrecisionAndScale(columnToChangeInIceberg.type()),
            sqlTypeNameWithPrecisionAndScale(newDef.type()))
        .buildSilently();
  }

  table.updateSchema()
      .renameColumn(columnToChangeInIceberg.name(), newDef.name())
      .updateColumn(columnToChangeInIceberg.name(), newDef.type().asPrimitiveType())
      .commit();
}
 
Example #2
Source File: IcebergSource.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public StreamWriter createStreamWriter(String runId, StructType dsStruct,
                                       OutputMode mode, DataSourceOptions options) {
  Preconditions.checkArgument(
      mode == OutputMode.Append() || mode == OutputMode.Complete(),
      "Output mode %s is not supported", mode);
  Configuration conf = new Configuration(lazyBaseConf());
  Table table = getTableAndResolveHadoopConfiguration(options, conf);
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsStruct);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema, checkNullability(options), checkOrdering(options));
  SparkUtil.validatePartitionTransforms(table.spec());
  // Spark 2.4.x passes runId to createStreamWriter instead of real queryId,
  // so we fetch it directly from sparkContext to make writes idempotent
  String queryId = lazySparkSession().sparkContext().getLocalProperty(StreamExecution.QUERY_ID_KEY());
  String appId = lazySparkSession().sparkContext().applicationId();

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new StreamingWriter(table, io, encryptionManager, options, queryId, mode, appId, writeSchema, dsStruct);
}
 
Example #3
Source File: TestIcebergInputFormat.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testProjection() throws Exception {
  File location = temp.newFolder(format.name());
  Assert.assertTrue(location.delete());
  Schema projectedSchema = TypeUtil.select(SCHEMA, ImmutableSet.of(1));
  Table table = tables.create(SCHEMA, SPEC,
                              ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name()),
                              location.toString());
  List<Record> inputRecords = RandomGenericData.generate(table.schema(), 1, 0L);
  DataFile dataFile = writeFile(table, Row.of("2020-03-20", 0), format, inputRecords);
  table.newAppend()
       .appendFile(dataFile)
       .commit();

  Job job = Job.getInstance(conf);
  IcebergInputFormat.ConfigBuilder configBuilder = IcebergInputFormat.configure(job);
  configBuilder
      .readFrom(location.toString())
      .project(projectedSchema);
  List<Record> outputRecords = readRecords(job.getConfiguration());
  Assert.assertEquals(inputRecords.size(), outputRecords.size());
  Assert.assertEquals(projectedSchema.asStruct(), outputRecords.get(0).struct());
}
 
Example #4
Source File: IcebergSource.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public Optional<DataSourceWriter> createWriter(String jobId, StructType dsStruct, SaveMode mode,
                                               DataSourceOptions options) {
  Preconditions.checkArgument(mode == SaveMode.Append || mode == SaveMode.Overwrite,
      "Save mode %s is not supported", mode);
  Configuration conf = new Configuration(lazyBaseConf());
  Table table = getTableAndResolveHadoopConfiguration(options, conf);
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsStruct);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema, checkNullability(options), checkOrdering(options));
  SparkUtil.validatePartitionTransforms(table.spec());
  String appId = lazySparkSession().sparkContext().applicationId();
  String wapId = lazySparkSession().conf().get("spark.wap.id", null);
  boolean replacePartitions = mode == SaveMode.Overwrite;

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return Optional.of(new Writer(
      table, io, encryptionManager, options, replacePartitions, appId, wapId, writeSchema, dsStruct));
}
 
Example #5
Source File: Schema.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private Schema internalSelect(Collection<String> names, boolean caseSensitive) {
  if (names.contains(ALL_COLUMNS)) {
    return this;
  }

  Set<Integer> selected = Sets.newHashSet();
  for (String name : names) {
    Integer id;
    if (caseSensitive) {
      id = lazyNameToId().get(name);
    } else {
      id = lazyLowerCaseNameToId().get(name.toLowerCase(Locale.ROOT));
    }

    if (id != null) {
      selected.add(id);
    }
  }

  return TypeUtil.select(this, selected);
}
 
Example #6
Source File: TestParquetVectorizedReads.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private void writeAndValidate(
    Schema schema, int numRecords, long seed, float nullPercentage,
    boolean setAndCheckArrowValidityVector, boolean reuseContainers)
    throws IOException {
  // Write test data
  Assume.assumeTrue("Parquet Avro cannot write non-string map keys", null == TypeUtil.find(
      schema,
      type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get()));

  Iterable<GenericData.Record> expected = generateData(schema, numRecords, seed, nullPercentage);

  // write a test parquet file using iceberg writer
  File testFile = temp.newFile();
  Assert.assertTrue("Delete should succeed", testFile.delete());

  try (FileAppender<GenericData.Record> writer = getParquetWriter(schema, testFile)) {
    writer.addAll(expected);
  }
  assertRecordsMatch(schema, numRecords, expected, testFile, setAndCheckArrowValidityVector, reuseContainers);
}
 
Example #7
Source File: TestIcebergInputFormat.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private void validateIdentityPartitionProjections(
    String tablePath, Schema projectedSchema, List<Record> inputRecords) throws Exception {
  Job job = Job.getInstance(conf);
  IcebergInputFormat.ConfigBuilder configBuilder = IcebergInputFormat.configure(job);
  configBuilder
      .readFrom(tablePath)
      .project(projectedSchema);
  List<Record> actualRecords = readRecords(job.getConfiguration());

  Set<String> fieldNames = TypeUtil.indexByName(projectedSchema.asStruct()).keySet();
  for (int pos = 0; pos < inputRecords.size(); pos++) {
    Record inputRecord = inputRecords.get(pos);
    Record actualRecord = actualRecords.get(pos);
    Assert.assertEquals("Projected schema should match", projectedSchema.asStruct(), actualRecord.struct());
    for (String name : fieldNames) {
      Assert.assertEquals(
          "Projected field " + name + " should match", inputRecord.getField(name), actualRecord.getField(name));
    }
  }
}
 
Example #8
Source File: RowDataReader.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
CloseableIterator<InternalRow> open(FileScanTask task) {
  DataFile file = task.file();

  // update the current file for Spark's filename() function
  InputFileBlockHolder.set(file.path().toString(), task.start(), task.length());

  // schema or rows returned by readers
  PartitionSpec spec = task.spec();
  Set<Integer> idColumns = spec.identitySourceIds();
  Schema partitionSchema = TypeUtil.select(expectedSchema, idColumns);
  boolean projectsIdentityPartitionColumns = !partitionSchema.columns().isEmpty();

  if (projectsIdentityPartitionColumns) {
    return open(task, expectedSchema, PartitionUtil.constantsMap(task, RowDataReader::convertConstant))
        .iterator();
  }
  // return the base iterator
  return open(task, expectedSchema, ImmutableMap.of()).iterator();
}
 
Example #9
Source File: RandomData.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public static Iterable<InternalRow> generateSpark(Schema schema, int numRecords, long seed) {
  return () -> new Iterator<InternalRow>() {
    private SparkRandomDataGenerator generator = new SparkRandomDataGenerator(seed);
    private int count = 0;

    @Override
    public boolean hasNext() {
      return count < numRecords;
    }

    @Override
    public InternalRow next() {
      if (count >= numRecords) {
        throw new NoSuchElementException();
      }
      count += 1;
      return (InternalRow) TypeUtil.visit(schema, generator);
    }
  };
}
 
Example #10
Source File: RandomData.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static Iterable<Record> newIterable(Supplier<RandomDataGenerator> newGenerator,
                                            Schema schema, int numRecords) {
  return () -> new Iterator<Record>() {
    private int count = 0;
    private RandomDataGenerator generator = newGenerator.get();

    @Override
    public boolean hasNext() {
      return count < numRecords;
    }

    @Override
    public Record next() {
      if (count >= numRecords) {
        throw new NoSuchElementException();
      }
      count += 1;
      return (Record) TypeUtil.visit(schema, generator);
    }
  };
}
 
Example #11
Source File: IcebergMetadata.java    From presto with Apache License 2.0 6 votes vote down vote up
private static Schema toIcebergSchema(List<ColumnMetadata> columns)
{
    List<NestedField> icebergColumns = new ArrayList<>();
    for (ColumnMetadata column : columns) {
        if (!column.isHidden()) {
            int index = icebergColumns.size();
            Type type = toIcebergType(column.getType());
            NestedField field = column.isNullable()
                    ? NestedField.optional(index, column.getName(), type, column.getComment())
                    : NestedField.required(index, column.getName(), type, column.getComment());
            icebergColumns.add(field);
        }
    }
    Schema schema = new Schema(icebergColumns);
    AtomicInteger nextFieldId = new AtomicInteger(1);
    return TypeUtil.assignFreshIds(schema, nextFieldId::getAndIncrement);
}
 
Example #12
Source File: RandomData.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static Iterable<Row> generateData(Schema schema, int numRecords, Supplier<RandomRowGenerator> supplier) {
  return () -> new Iterator<Row>() {
    private final RandomRowGenerator generator = supplier.get();
    private int count = 0;

    @Override
    public boolean hasNext() {
      return count < numRecords;
    }

    @Override
    public Row next() {
      if (!hasNext()) {
        throw new NoSuchElementException();
      }
      ++count;
      return (Row) TypeUtil.visit(schema, generator);
    }
  };
}
 
Example #13
Source File: SparkWriteBuilder.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public BatchWrite buildForBatch() {
  // Validate
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsSchema);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema,
      checkNullability(spark, options), checkOrdering(spark, options));
  SparkUtil.validatePartitionTransforms(table.spec());

  // Get application id
  String appId = spark.sparkContext().applicationId();

  // Get write-audit-publish id
  String wapId = spark.conf().get("spark.wap.id", null);

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new SparkBatchWrite(
      table, io, encryptionManager, options, overwriteDynamic, overwriteByFilter, overwriteExpr, appId, wapId,
      writeSchema, dsSchema);
}
 
Example #14
Source File: SparkWriteBuilder.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public StreamingWrite buildForStreaming() {
  // Validate
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsSchema);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema,
      checkNullability(spark, options), checkOrdering(spark, options));
  SparkUtil.validatePartitionTransforms(table.spec());

  // Change to streaming write if it is just append
  Preconditions.checkState(!overwriteDynamic,
      "Unsupported streaming operation: dynamic partition overwrite");
  Preconditions.checkState(!overwriteByFilter || overwriteExpr == Expressions.alwaysTrue(),
      "Unsupported streaming operation: overwrite by filter: %s", overwriteExpr);

  // Get application id
  String appId = spark.sparkContext().applicationId();

  // Get write-audit-publish id
  String wapId = spark.conf().get("spark.wap.id", null);

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new SparkStreamingWrite(
      table, io, encryptionManager, options, overwriteByFilter, writeQueryId, appId, wapId, writeSchema, dsSchema);
}
 
Example #15
Source File: BaseTableScan.java    From iceberg with Apache License 2.0 6 votes vote down vote up
/**
 * To be able to make refinements {@link #select(Collection)} and {@link #caseSensitive(boolean)} in any order,
 * we resolve the schema to be projected lazily here.
 *
 * @return the Schema to project
 */
private Schema lazyColumnProjection() {
  Collection<String> selectedColumns = context.selectedColumns();
  if (selectedColumns != null) {
    Set<Integer> requiredFieldIds = Sets.newHashSet();

    // all of the filter columns are required
    requiredFieldIds.addAll(
        Binder.boundReferences(table.schema().asStruct(),
            Collections.singletonList(context.rowFilter()), context.caseSensitive()));

    // all of the projection columns are required
    Set<Integer> selectedIds;
    if (context.caseSensitive()) {
      selectedIds = TypeUtil.getProjectedIds(table.schema().select(selectedColumns));
    } else {
      selectedIds = TypeUtil.getProjectedIds(table.schema().caseInsensitiveSelect(selectedColumns));
    }
    requiredFieldIds.addAll(selectedIds);

    return TypeUtil.select(table.schema(), requiredFieldIds);
  }

  return schema;
}
 
Example #16
Source File: TestCreateTransaction.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateTransaction() throws IOException {
  File tableDir = temp.newFolder();
  Assert.assertTrue(tableDir.delete());

  Transaction txn = TestTables.beginCreate(tableDir, "test_create", SCHEMA, unpartitioned());

  Assert.assertNull("Starting a create transaction should not commit metadata",
      TestTables.readMetadata("test_create"));
  Assert.assertNull("Should have no metadata version",
      TestTables.metadataVersion("test_create"));

  txn.commitTransaction();

  TableMetadata meta = TestTables.readMetadata("test_create");
  Assert.assertNotNull("Table metadata should be created after transaction commits", meta);
  Assert.assertEquals("Should have metadata version 0",
      0, (int) TestTables.metadataVersion("test_create"));
  Assert.assertEquals("Should have 0 manifest files",
      0, listManifestFiles(tableDir).size());

  Assert.assertEquals("Table schema should match with reassigned IDs",
      TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
  Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec());
  Assert.assertEquals("Table should not have any snapshots", 0, meta.snapshots().size());
}
 
Example #17
Source File: TestSchemaUpdate.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testDeleteFields() {
  // use schema projection to test column deletes
  List<String> columns = Lists.newArrayList("id", "data", "preferences", "preferences.feature1",
      "preferences.feature2", "locations", "locations.lat", "locations.long", "points",
      "points.x", "points.y", "doubles", "properties");
  for (String name : columns) {
    Set<Integer> selected = Sets.newHashSet(ALL_IDS);
    // remove the id and any nested fields from the projection
    Types.NestedField nested = SCHEMA.findField(name);
    selected.remove(nested.fieldId());
    selected.removeAll(TypeUtil.getProjectedIds(nested.type()));

    Schema del = new SchemaUpdate(SCHEMA, 19).deleteColumn(name).apply();

    Assert.assertEquals("Should match projection with '" + name + "' removed",
        TypeUtil.select(SCHEMA, selected).asStruct(), del.asStruct());
  }
}
 
Example #18
Source File: SchemaUpdate.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Schema applyChanges(Schema schema, List<Integer> deletes,
                                   Map<Integer, Types.NestedField> updates,
                                   Multimap<Integer, Types.NestedField> adds,
                                   Multimap<Integer, Move> moves) {
  Types.StructType struct = TypeUtil
      .visit(schema, new ApplyChanges(deletes, updates, adds, moves))
      .asNestedType().asStructType();
  return new Schema(struct.fields());
}
 
Example #19
Source File: SparkParquetWriters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private FixedDecimalWriter(ColumnDescriptor desc, int precision, int scale) {
  super(desc);
  this.precision = precision;
  this.scale = scale;
  this.length = TypeUtil.decimalRequiredBytes(precision);
  this.bytes = ThreadLocal.withInitial(() -> new byte[length]);
}
 
Example #20
Source File: AvroDataTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testArrayOfStructs() throws IOException {
  Schema schema = TypeUtil.assignIncreasingFreshIds(new Schema(
      required(0, "id", LongType.get()),
      optional(1, "data", ListType.ofOptional(2, SUPPORTED_PRIMITIVES))));

  writeAndValidate(schema);
}
 
Example #21
Source File: AvroDataTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapOfStructs() throws IOException {
  Schema schema = TypeUtil.assignIncreasingFreshIds(new Schema(
      required(0, "id", LongType.get()),
      optional(1, "data", MapType.ofOptional(2, 3,
          Types.StringType.get(),
          SUPPORTED_PRIMITIVES))));

  writeAndValidate(schema);
}
 
Example #22
Source File: AvroDataTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testMixedTypes() throws IOException {
  StructType structType = StructType.of(
      required(0, "id", LongType.get()),
      optional(1, "list_of_maps",
          ListType.ofOptional(2, MapType.ofOptional(3, 4,
              Types.StringType.get(),
              SUPPORTED_PRIMITIVES))),
      optional(5, "map_of_lists",
          MapType.ofOptional(6, 7,
              Types.StringType.get(),
              ListType.ofOptional(8, SUPPORTED_PRIMITIVES))),
      required(9, "list_of_lists",
          ListType.ofOptional(10, ListType.ofOptional(11, SUPPORTED_PRIMITIVES))),
      required(12, "map_of_maps",
          MapType.ofOptional(13, 14,
              Types.StringType.get(),
              MapType.ofOptional(15, 16,
                  Types.StringType.get(),
                  SUPPORTED_PRIMITIVES))),
      required(17, "list_of_struct_of_nested_types", ListType.ofOptional(19, StructType.of(
          Types.NestedField.required(20, "m1", MapType.ofOptional(21, 22,
              Types.StringType.get(),
              SUPPORTED_PRIMITIVES)),
          Types.NestedField.optional(23, "l1", ListType.ofRequired(24, SUPPORTED_PRIMITIVES)),
          Types.NestedField.required(25, "l2", ListType.ofRequired(26, SUPPORTED_PRIMITIVES)),
          Types.NestedField.optional(27, "m2", MapType.ofOptional(28, 29,
              Types.StringType.get(),
              SUPPORTED_PRIMITIVES))
      )))
  );

  Schema schema = new Schema(TypeUtil.assignFreshIds(structType, new AtomicInteger(0)::incrementAndGet)
      .asStructType().fields());

  writeAndValidate(schema);
}
 
Example #23
Source File: TestParquetVectorizedReads.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
@Override
public void testNestedStruct() {
  AssertHelpers.assertThrows(
      "Vectorized reads are not supported yet for struct fields",
      UnsupportedOperationException.class,
      "Vectorized reads are not supported yet for struct fields",
      () -> VectorizedSparkParquetReaders.buildReader(
          TypeUtil.assignIncreasingFreshIds(new Schema(required(
              1,
              "struct",
              SUPPORTED_PRIMITIVES))),
          new MessageType("struct", new GroupType(Type.Repetition.OPTIONAL, "struct").withId(1)),
          false));
}
 
Example #24
Source File: TestParquetVectorizedReads.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testMostlyNullsForOptionalFields() throws IOException {
  writeAndValidate(
      TypeUtil.assignIncreasingFreshIds(new Schema(SUPPORTED_PRIMITIVES.fields())),
      getNumRows(),
      0L,
      0.99f,
      false,
      true);
}
 
Example #25
Source File: SchemaUpdate.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * For testing only.
 */
SchemaUpdate(Schema schema, int lastColumnId) {
  this.ops = null;
  this.base = null;
  this.schema = schema;
  this.lastColumnId = lastColumnId;
  this.idToParent = Maps.newHashMap(TypeUtil.indexParents(schema.asStruct()));
}
 
Example #26
Source File: SchemaUpdate.java    From iceberg with Apache License 2.0 5 votes vote down vote up
SchemaUpdate(TableOperations ops) {
  this.ops = ops;
  this.base = ops.current();
  this.schema = base.schema();
  this.lastColumnId = base.lastColumnId();
  this.idToParent = Maps.newHashMap(TypeUtil.indexParents(schema.asStruct()));
}
 
Example #27
Source File: ProjectionDatumReader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public void setSchema(Schema newFileSchema) {
  this.fileSchema = newFileSchema;
  if (nameMapping == null && !AvroSchemaUtil.hasIds(fileSchema)) {
    nameMapping = MappingUtil.create(expectedSchema);
  }
  Set<Integer> projectedIds = TypeUtil.getProjectedIds(expectedSchema);
  Schema prunedSchema = AvroSchemaUtil.pruneColumns(newFileSchema, projectedIds, nameMapping);
  this.readSchema = AvroSchemaUtil.buildAvroProjection(prunedSchema, expectedSchema, renames);
  this.wrapped = newDatumReader();
}
 
Example #28
Source File: TableMetadata.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static TableMetadata newTableMetadata(Schema schema,
                                      PartitionSpec spec,
                                      String location,
                                      Map<String, String> properties,
                                      int formatVersion) {
  // reassign all column ids to ensure consistency
  AtomicInteger lastColumnId = new AtomicInteger(0);
  Schema freshSchema = TypeUtil.assignFreshIds(schema, lastColumnId::incrementAndGet);

  // rebuild the partition spec using the new column ids
  PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(freshSchema)
      .withSpecId(INITIAL_SPEC_ID);
  for (PartitionField field : spec.fields()) {
    // look up the name of the source field in the old schema to get the new schema's id
    String sourceName = schema.findColumnName(field.sourceId());
    // reassign all partition fields with fresh partition field Ids to ensure consistency
    specBuilder.add(
        freshSchema.findField(sourceName).fieldId(),
        field.name(),
        field.transform().toString());
  }
  PartitionSpec freshSpec = specBuilder.build();

  return new TableMetadata(null, formatVersion, UUID.randomUUID().toString(), location,
      INITIAL_SEQUENCE_NUMBER, System.currentTimeMillis(),
      lastColumnId.get(), freshSchema, INITIAL_SPEC_ID, ImmutableList.of(freshSpec),
      ImmutableMap.copyOf(properties), -1, ImmutableList.of(),
      ImmutableList.of(), ImmutableList.of());
}
 
Example #29
Source File: ParquetValueWriters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private FixedDecimalWriter(ColumnDescriptor desc, int precision, int scale) {
  super(desc);
  this.precision = precision;
  this.scale = scale;
  this.length = TypeUtil.decimalRequiredBytes(precision);
  this.bytes = ThreadLocal.withInitial(() -> new byte[length]);
}
 
Example #30
Source File: TestCreateTransaction.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateAndAppendWithTransaction() throws IOException {
  File tableDir = temp.newFolder();
  Assert.assertTrue(tableDir.delete());

  Transaction txn = TestTables.beginCreate(tableDir, "test_append", SCHEMA, unpartitioned());

  Assert.assertNull("Starting a create transaction should not commit metadata",
      TestTables.readMetadata("test_append"));
  Assert.assertNull("Should have no metadata version",
      TestTables.metadataVersion("test_append"));

  txn.newAppend()
      .appendFile(FILE_A)
      .appendFile(FILE_B)
      .commit();

  Assert.assertNull("Appending in a transaction should not commit metadata",
      TestTables.readMetadata("test_append"));
  Assert.assertNull("Should have no metadata version",
      TestTables.metadataVersion("test_append"));

  txn.commitTransaction();

  TableMetadata meta = TestTables.readMetadata("test_append");
  Assert.assertNotNull("Table metadata should be created after transaction commits", meta);
  Assert.assertEquals("Should have metadata version 0",
      0, (int) TestTables.metadataVersion("test_append"));
  Assert.assertEquals("Should have 1 manifest file",
      1, listManifestFiles(tableDir).size());

  Assert.assertEquals("Table schema should match with reassigned IDs",
      TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
  Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec());
  Assert.assertEquals("Table should have one snapshot", 1, meta.snapshots().size());

  validateSnapshot(null, meta.currentSnapshot(), FILE_A, FILE_B);
}