org.apache.iceberg.TableMetadata Java Examples

The following examples show how to use org.apache.iceberg.TableMetadata. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveTableOperations.java    From presto with Apache License 2.0 6 votes vote down vote up
@Override
public TableMetadata refresh()
{
    if (location.isPresent()) {
        refreshFromMetadataLocation(null);
        return currentMetadata;
    }

    Table table = getTable();

    if (!isIcebergTable(table)) {
        throw new UnknownTableTypeException(getSchemaTableName());
    }

    String metadataLocation = table.getParameters().get(METADATA_LOCATION);
    if (metadataLocation == null) {
        throw new PrestoException(ICEBERG_INVALID_METADATA, format("Table is missing [%s] property: %s", METADATA_LOCATION, getSchemaTableName()));
    }

    refreshFromMetadataLocation(metadataLocation);

    return currentMetadata;
}
 
Example #2
Source File: HiveTableOperations.java    From presto with Apache License 2.0 6 votes vote down vote up
@Override
public String metadataFileLocation(String filename)
{
    TableMetadata metadata = current();
    String location;
    if (metadata != null) {
        String writeLocation = metadata.properties().get(WRITE_METADATA_LOCATION);
        if (writeLocation != null) {
            return format("%s/%s", writeLocation, filename);
        }
        location = metadata.location();
    }
    else {
        location = this.location.orElseThrow(() -> new IllegalStateException("Location not set"));
    }
    return format("%s/%s/%s", location, METADATA_FOLDER_NAME, filename);
}
 
Example #3
Source File: IcebergCatalog.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public void beginCreateTable(BatchSchema writerSchema, List<String> partitionColumns) {
  Preconditions.checkState(transaction == null, "Unexpected state");
  IcebergTableOperations tableOperations = new IcebergTableOperations(fsPath, configuration);
  SchemaConverter schemaConverter = new SchemaConverter();
  Schema schema;
  try {
    schema = schemaConverter.toIceberg(writerSchema);
  } catch (Exception ex) {
    throw UserException.validationError(ex).buildSilently();
  }
  PartitionSpec partitionSpec = getIcebergPartitionSpec(writerSchema, partitionColumns);
  TableMetadata metadata = TableMetadata.newTableMetadata(tableOperations, schema, partitionSpec, fsPath.toString());
  transaction = createTableTransaction(tableOperations, metadata);
  table = transaction.table();
  beginInsert();
}
 
Example #4
Source File: TestHadoopCommits.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testMergeAppend() throws Exception {
  testFastAppend(); // create 2 compatible manifest files that will be merged

  // merge all manifests for this test
  table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit();

  // third append
  table.newAppend()
      .appendFile(FILE_C)
      .commit();

  List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles());
  Assert.assertEquals("Should scan 3 files", 3, tasks.size());

  Assert.assertEquals("Should contain 3 Avro manifest files",
      3, listManifestFiles().size());

  TableMetadata metadata = readMetadataVersion(5);
  Assert.assertEquals("Current snapshot should contain 1 merged manifest",
      1, metadata.currentSnapshot().allManifests().size());
}
 
Example #5
Source File: HadoopTables.java    From iceberg with Apache License 2.0 6 votes vote down vote up
/**
 * Create a table using the FileSystem implementation resolve from
 * location.
 *
 * @param schema iceberg schema used to create the table
 * @param spec partitioning spec, if null the table will be unpartitioned
 * @param properties a string map of table properties, initialized to empty if null
 * @param location a path URI (e.g. hdfs:///warehouse/my_table)
 * @return newly created table implementation
 */
@Override
public Table create(Schema schema, PartitionSpec spec, Map<String, String> properties,
                    String location) {
  Preconditions.checkNotNull(schema, "A table schema is required");

  TableOperations ops = newTableOps(location);
  if (ops.current() != null) {
    throw new AlreadyExistsException("Table already exists at location: " + location);
  }

  Map<String, String> tableProps = properties == null ? ImmutableMap.of() : properties;
  PartitionSpec partitionSpec = spec == null ? PartitionSpec.unpartitioned() : spec;
  TableMetadata metadata = TableMetadata.newTableMetadata(schema, partitionSpec, location, tableProps);
  ops.commit(null, metadata);

  return new BaseTable(ops, location);
}
 
Example #6
Source File: HadoopTableOperations.java    From iceberg with Apache License 2.0 6 votes vote down vote up
/**
 * Deletes the oldest metadata files if {@link TableProperties#METADATA_DELETE_AFTER_COMMIT_ENABLED} is true.
 *
 * @param base     table metadata on which previous versions were based
 * @param metadata new table metadata with updated previous versions
 */
private void deleteRemovedMetadataFiles(TableMetadata base, TableMetadata metadata) {
  if (base == null) {
    return;
  }

  boolean deleteAfterCommit = metadata.propertyAsBoolean(
      TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED,
      TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT);

  Set<TableMetadata.MetadataLogEntry> removedPreviousMetadataFiles = Sets.newHashSet(base.previousFiles());
  removedPreviousMetadataFiles.removeAll(metadata.previousFiles());

  if (deleteAfterCommit) {
    Tasks.foreach(removedPreviousMetadataFiles)
        .noRetry().suppressFailureWhenFinished()
        .onFailure((previousMetadataFile, exc) ->
            LOG.warn("Delete failed for previous metadata file: {}", previousMetadataFile, exc))
        .run(previousMetadataFile -> io().deleteFile(previousMetadataFile.file()));
  }
}
 
Example #7
Source File: TestTables.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public void commit(TableMetadata base, TableMetadata metadata) {
  if (base != current) {
    throw new CommitFailedException("Cannot commit changes based on stale metadata");
  }
  synchronized (METADATA) {
    refresh();
    if (base == current) {
      if (failCommits > 0) {
        this.failCommits -= 1;
        throw new CommitFailedException("Injected failure");
      }
      METADATA.put(tableName, metadata);
      this.current = metadata;
    } else {
      throw new CommitFailedException(
          "Commit failed: table was updated at %d", base.lastUpdatedMillis());
    }
  }
}
 
Example #8
Source File: HadoopTableOperations.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public TableMetadata current() {
  if (shouldRefresh) {
    return refresh();
  }
  return currentMetadata;
}
 
Example #9
Source File: IcebergTableOps.java    From metacat with Apache License 2.0 5 votes vote down vote up
@Override
public void commit(final TableMetadata base, final TableMetadata metadata) {
    if (!base.equals(metadata)) {
        location = writeNewMetadata(metadata, currentVersion() + 1);
        tableMetadata = null;
        this.requestRefresh();
    }
}
 
Example #10
Source File: IcebergTableOps.java    From metacat with Apache License 2.0 5 votes vote down vote up
@Override
public TableMetadata current() {
    if (tableMetadata == null) {
        tableMetadata = icebergTableOpsProxy.getMetadata(this, config.isIcebergCacheEnabled());
    }
    return tableMetadata;
}
 
Example #11
Source File: TestHadoopCommits.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testFastAppend() throws Exception {
  // first append
  table.newFastAppend()
      .appendFile(FILE_A)
      .commit();

  Assert.assertTrue("Should create v2 for the update",
      version(2).exists() && version(2).isFile());
  Assert.assertEquals("Should write the current version to the hint file",
      2, readVersionHint());

  List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles());
  Assert.assertEquals("Should scan 1 file", 1, tasks.size());

  List<File> manifests = listManifestFiles();
  Assert.assertEquals("Should contain only one Avro manifest file", 1, manifests.size());

  // second append
  table.newFastAppend()
      .appendFile(FILE_B)
      .commit();

  Assert.assertTrue("Should create v3 for the update",
      version(3).exists() && version(3).isFile());
  Assert.assertEquals("Should write the current version to the hint file",
      3, readVersionHint());

  tasks = Lists.newArrayList(table.newScan().planFiles());
  Assert.assertEquals("Should scan 2 files", 2, tasks.size());

  Assert.assertEquals("Should contain 2 Avro manifest files",
      2, listManifestFiles().size());

  TableMetadata metadata = readMetadataVersion(3);
  Assert.assertEquals("Current snapshot should contain 2 manifests",
      2, metadata.currentSnapshot().allManifests().size());
}
 
Example #12
Source File: WapUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * Check if a given staged snapshot's associated wap-id was already published. Does not fail for non-WAP workflows.
 *
 * @param current the current {@link TableMetadata metadata} for the target table
 * @param wapSnapshotId a snapshot id which could have been staged and is associated with a wap id
 * @return the WAP ID that will be published, if the snapshot has one
 */
public static String validateWapPublish(TableMetadata current, long wapSnapshotId) {
  Snapshot cherryPickSnapshot = current.snapshot(wapSnapshotId);
  String wapId = stagedWapId(cherryPickSnapshot);
  if (wapId != null && !wapId.isEmpty()) {
    if (WapUtil.isWapIdPublished(current, wapId)) {
      throw new DuplicateWAPCommitException(wapId);
    }
  }

  return wapId;
}
 
Example #13
Source File: HadoopCatalog.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public boolean dropTable(TableIdentifier identifier, boolean purge) {
  if (!isValidIdentifier(identifier)) {
    throw new NoSuchTableException("Invalid identifier: %s", identifier);
  }

  Path tablePath = new Path(defaultWarehouseLocation(identifier));
  TableOperations ops = newTableOps(identifier);
  TableMetadata lastMetadata;
  if (purge && ops.current() != null) {
    lastMetadata = ops.current();
  } else {
    lastMetadata = null;
  }

  try {
    if (purge && lastMetadata != null) {
      // Since the data files and the metadata files may store in different locations,
      // so it has to call dropTableData to force delete the data file.
      dropTableData(ops.io(), lastMetadata);
    }
    fs.delete(tablePath, true /* recursive */);
    return true;
  } catch (IOException e) {
    throw new RuntimeIOException(e, "Failed to delete file: %s", tablePath);
  }
}
 
Example #14
Source File: HadoopTableOperations.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static TableMetadata checkUUID(TableMetadata currentMetadata, TableMetadata newMetadata) {
  String newUUID = newMetadata.uuid();
  if (currentMetadata != null && currentMetadata.uuid() != null && newUUID != null) {
    Preconditions.checkState(newUUID.equals(currentMetadata.uuid()),
        "Table UUID does not match: current=%s != refreshed=%s", currentMetadata.uuid(), newUUID);
  }
  return newMetadata;
}
 
Example #15
Source File: HadoopTableOperations.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public TableMetadata refresh() {
  int ver = version != null ? version : readVersionHint();
  try {
    Path metadataFile = getMetadataFile(ver);
    if (version == null && metadataFile == null && ver == 0) {
      // no v0 metadata means the table doesn't exist yet
      return null;
    } else if (metadataFile == null) {
      throw new ValidationException("Metadata file for version %d is missing", ver);
    }

    Path nextMetadataFile = getMetadataFile(ver + 1);
    while (nextMetadataFile != null) {
      ver += 1;
      metadataFile = nextMetadataFile;
      nextMetadataFile = getMetadataFile(ver + 1);
    }

    updateVersionAndMetadata(ver, metadataFile.toString());

    this.shouldRefresh = false;
    return currentMetadata;
  } catch (IOException e) {
    throw new RuntimeIOException(e, "Failed to refresh the table");
  }
}
 
Example #16
Source File: TestTables.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public TableMetadata refresh() {
  synchronized (METADATA) {
    this.current = METADATA.get(tableName);
  }
  return current;
}
 
Example #17
Source File: HiveTableOperations.java    From presto with Apache License 2.0 5 votes vote down vote up
private static String metadataFileLocation(TableMetadata metadata, String filename)
{
    String location = metadata.properties().get(WRITE_METADATA_LOCATION);
    if (location != null) {
        return format("%s/%s", location, filename);
    }
    return format("%s/%s/%s", metadata.location(), METADATA_FOLDER_NAME, filename);
}
 
Example #18
Source File: HiveTableOperations.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public TableMetadata current()
{
    if (shouldRefresh) {
        return refresh();
    }
    return currentMetadata;
}
 
Example #19
Source File: HiveTableOperations.java    From presto with Apache License 2.0 5 votes vote down vote up
private String writeNewMetadata(TableMetadata metadata, int newVersion)
{
    String newTableMetadataFilePath = newTableMetadataFilePath(metadata, newVersion);
    OutputFile newMetadataLocation = fileIo.newOutputFile(newTableMetadataFilePath);

    // write the new metadata
    TableMetadataParser.write(metadata, newMetadataLocation);

    return newTableMetadataFilePath;
}
 
Example #20
Source File: HiveTableOperations.java    From presto with Apache License 2.0 5 votes vote down vote up
private void refreshFromMetadataLocation(String newLocation)
{
    // use null-safe equality check because new tables have a null metadata location
    if (Objects.equals(currentMetadataLocation, newLocation)) {
        shouldRefresh = false;
        return;
    }

    AtomicReference<TableMetadata> newMetadata = new AtomicReference<>();
    Tasks.foreach(newLocation)
            .retry(20)
            .exponentialBackoff(100, 5000, 600000, 4.0)
            .suppressFailureWhenFinished()
            .run(metadataLocation -> newMetadata.set(
                    TableMetadataParser.read(this, io().newInputFile(metadataLocation))));

    String newUUID = newMetadata.get().uuid();
    if (currentMetadata != null) {
        checkState(newUUID == null || newUUID.equals(currentMetadata.uuid()),
                "Table UUID does not match: current=%s != refreshed=%s", currentMetadata.uuid(), newUUID);
    }

    currentMetadata = newMetadata.get();
    currentMetadataLocation = newLocation;
    version = parseVersion(newLocation);
    shouldRefresh = false;
}
 
Example #21
Source File: TestHiveCommits.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testSuppressUnlockExceptions() throws TException, InterruptedException {
  Table table = catalog.loadTable(TABLE_IDENTIFIER);
  HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations();

  TableMetadata metadataV1 = ops.current();

  table.updateSchema()
      .addColumn("n", Types.IntegerType.get())
      .commit();

  ops.refresh();

  TableMetadata metadataV2 = ops.current();

  Assert.assertEquals(2, ops.current().schema().columns().size());

  HiveTableOperations spyOps = spy(ops);

  ArgumentCaptor<Long> lockId = ArgumentCaptor.forClass(Long.class);
  doThrow(new RuntimeException()).when(spyOps).doUnlock(lockId.capture());

  try {
    spyOps.commit(metadataV2, metadataV1);
  } finally {
    ops.doUnlock(lockId.getValue());
  }

  ops.refresh();

  // the commit must succeed
  Assert.assertEquals(1, ops.current().schema().columns().size());
}
 
Example #22
Source File: RemoveOrphanFilesAction.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private Dataset<Row> buildValidMetadataFileDF() {
  String allManifestsMetadataTable = metadataTableName(MetadataTableType.ALL_MANIFESTS);
  Dataset<Row> manifestDF = spark.read().format("iceberg")
      .load(allManifestsMetadataTable)
      .selectExpr("path as file_path");

  List<String> otherMetadataFiles = Lists.newArrayList();

  for (Snapshot snapshot : table.snapshots()) {
    String manifestListLocation = snapshot.manifestListLocation();
    if (manifestListLocation != null) {
      otherMetadataFiles.add(manifestListLocation);
    }
  }

  otherMetadataFiles.add(ops.metadataFileLocation("version-hint.text"));

  TableMetadata metadata = ops.current();
  otherMetadataFiles.add(metadata.metadataFileLocation());
  for (TableMetadata.MetadataLogEntry previousMetadataFile : metadata.previousFiles()) {
    otherMetadataFiles.add(previousMetadataFile.file());
  }

  Dataset<Row> otherMetadataFileDF = spark
      .createDataset(otherMetadataFiles, Encoders.STRING())
      .toDF("file_path");

  return manifestDF.union(otherMetadataFileDF);
}
 
Example #23
Source File: TestTables.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static TestTable create(File temp, String name, Schema schema, PartitionSpec spec) {
  TestTableOperations ops = new TestTableOperations(name);
  if (ops.current() != null) {
    throw new AlreadyExistsException("Table %s already exists at location: %s", name, temp);
  }
  ops.commit(null, TableMetadata.newTableMetadata(schema, spec, temp.toString(), ImmutableMap.of()));
  return new TestTable(ops, name);
}
 
Example #24
Source File: HadoopTableOperations.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public TableOperations temp(TableMetadata uncommittedMetadata) {
  return new TableOperations() {
    @Override
    public TableMetadata current() {
      return uncommittedMetadata;
    }

    @Override
    public TableMetadata refresh() {
      throw new UnsupportedOperationException("Cannot call refresh on temporary table operations");
    }

    @Override
    public void commit(TableMetadata base, TableMetadata metadata) {
      throw new UnsupportedOperationException("Cannot call commit on temporary table operations");
    }

    @Override
    public String metadataFileLocation(String fileName) {
      return HadoopTableOperations.this.metadataFileLocation(fileName);
    }

    @Override
    public LocationProvider locationProvider() {
      return LocationProviders.locationsFor(uncommittedMetadata.location(), uncommittedMetadata.properties());
    }

    @Override
    public FileIO io() {
      return HadoopTableOperations.this.io();
    }

    @Override
    public EncryptionManager encryption() {
      return HadoopTableOperations.this.encryption();
    }

    @Override
    public long newSnapshotId() {
      return HadoopTableOperations.this.newSnapshotId();
    }
  };
}
 
Example #25
Source File: HadoopTableOperations.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public void commit(TableMetadata base, TableMetadata metadata) {
  Pair<Integer, TableMetadata> current = versionAndMetadata();
  if (base != current.second()) {
    throw new CommitFailedException("Cannot commit changes based on stale table metadata");
  }

  if (base == metadata) {
    LOG.info("Nothing to commit.");
    return;
  }

  Preconditions.checkArgument(base == null || base.location().equals(metadata.location()),
      "Hadoop path-based tables cannot be relocated");
  Preconditions.checkArgument(
      !metadata.properties().containsKey(TableProperties.WRITE_METADATA_LOCATION),
      "Hadoop path-based tables cannot relocate metadata");

  String codecName = metadata.property(
      TableProperties.METADATA_COMPRESSION, TableProperties.METADATA_COMPRESSION_DEFAULT);
  TableMetadataParser.Codec codec = TableMetadataParser.Codec.fromName(codecName);
  String fileExtension = TableMetadataParser.getFileExtension(codec);
  Path tempMetadataFile = metadataPath(UUID.randomUUID().toString() + fileExtension);
  TableMetadataParser.write(metadata, io().newOutputFile(tempMetadataFile.toString()));

  int nextVersion = (current.first() != null ? current.first() : 0) + 1;
  Path finalMetadataFile = metadataFilePath(nextVersion, codec);
  FileSystem fs = getFileSystem(tempMetadataFile, conf);

  try {
    if (fs.exists(finalMetadataFile)) {
      throw new CommitFailedException(
          "Version %d already exists: %s", nextVersion, finalMetadataFile);
    }
  } catch (IOException e) {
    throw new RuntimeIOException(e,
        "Failed to check if next version exists: " + finalMetadataFile);
  }

  // this rename operation is the atomic commit operation
  renameToFinal(fs, tempMetadataFile, finalMetadataFile);

  // update the best-effort version pointer
  writeVersionHint(nextVersion);

  deleteRemovedMetadataFiles(base, metadata);

  this.shouldRefresh = true;
}
 
Example #26
Source File: TestTables.java    From iceberg with Apache License 2.0 4 votes vote down vote up
static TableMetadata readMetadata(String tableName) {
  synchronized (METADATA) {
    return METADATA.get(tableName);
  }
}
 
Example #27
Source File: TestTables.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public TableMetadata current() {
  return current;
}
 
Example #28
Source File: HiveTableOperations.java    From presto with Apache License 2.0 4 votes vote down vote up
private static String newTableMetadataFilePath(TableMetadata meta, int newVersion)
{
    String codec = meta.property(METADATA_COMPRESSION, METADATA_COMPRESSION_DEFAULT);
    return metadataFileLocation(meta, format("%05d-%s%s", newVersion, randomUUID(), getFileExtension(codec)));
}
 
Example #29
Source File: HadoopTableOperations.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private synchronized Pair<Integer, TableMetadata> versionAndMetadata() {
  return Pair.of(version, currentMetadata);
}
 
Example #30
Source File: IcebergMetadata.java    From presto with Apache License 2.0 4 votes vote down vote up
@Override
public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout)
{
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();

    Schema schema = toIcebergSchema(tableMetadata.getColumns());

    PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties()));

    Database database = metastore.getDatabase(schemaName)
            .orElseThrow(() -> new SchemaNotFoundException(schemaName));

    HdfsContext hdfsContext = new HdfsContext(session, schemaName, tableName);
    HiveIdentity identity = new HiveIdentity(session);
    String targetPath = getTableLocation(tableMetadata.getProperties());
    if (targetPath == null) {
        targetPath = getTableDefaultLocation(database, hdfsContext, hdfsEnvironment, schemaName, tableName).toString();
    }

    TableOperations operations = new HiveTableOperations(metastore, hdfsEnvironment, hdfsContext, identity, schemaName, tableName, session.getUser(), targetPath);
    if (operations.current() != null) {
        throw new TableAlreadyExistsException(schemaTableName);
    }

    ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builderWithExpectedSize(2);
    FileFormat fileFormat = getFileFormat(tableMetadata.getProperties());
    propertiesBuilder.put(DEFAULT_FILE_FORMAT, fileFormat.toString());
    if (tableMetadata.getComment().isPresent()) {
        propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get());
    }

    TableMetadata metadata = newTableMetadata(operations, schema, partitionSpec, targetPath, propertiesBuilder.build());

    transaction = createTableTransaction(operations, metadata);

    return new IcebergWritableTableHandle(
            schemaName,
            tableName,
            SchemaParser.toJson(metadata.schema()),
            PartitionSpecParser.toJson(metadata.spec()),
            getColumns(metadata.schema(), typeManager),
            targetPath,
            fileFormat);
}