org.apache.iceberg.TableOperations Java Examples

The following examples show how to use org.apache.iceberg.TableOperations. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RewriteManifestsAction.java    From iceberg with Apache License 2.0 6 votes vote down vote up
RewriteManifestsAction(SparkSession spark, Table table) {
  this.spark = spark;
  this.sparkContext = new JavaSparkContext(spark.sparkContext());
  this.manifestEncoder = Encoders.javaSerialization(ManifestFile.class);
  this.table = table;
  this.spec = table.spec();
  this.targetManifestSizeBytes = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.MANIFEST_TARGET_SIZE_BYTES,
      TableProperties.MANIFEST_TARGET_SIZE_BYTES_DEFAULT);
  this.fileIO = SparkUtil.serializableFileIO(table);

  // default the staging location to the metadata location
  TableOperations ops = ((HasTableOperations) table).operations();
  Path metadataFilePath = new Path(ops.metadataFileLocation("file"));
  this.stagingLocation = metadataFilePath.getParent().toString();

  // use the current table format version for new manifests
  this.formatVersion = ops.current().formatVersion();
}
 
Example #2
Source File: HadoopTables.java    From iceberg with Apache License 2.0 6 votes vote down vote up
/**
 * Loads the table location from a FileSystem path location.
 *
 * @param location a path URI (e.g. hdfs:///warehouse/my_table/)
 * @return table implementation
 */
@Override
public Table load(String location) {
  TableOperations ops = newTableOps(location);
  if (ops.current() == null) {
    // try to resolve a metadata table, which we encode as URI fragments
    // e.g. hdfs:///warehouse/my_table#snapshots
    int hashIndex = location.lastIndexOf('#');
    if (hashIndex != -1 && location.length() - 1 != hashIndex) {
      // we found char '#', and it is not the last char of location
      String baseTable = location.substring(0, hashIndex);
      String metaTable = location.substring(hashIndex + 1);
      MetadataTableType type = MetadataTableType.from(metaTable);
      if (type != null) {
        return loadMetadataTable(baseTable, type);
      } else {
        throw new NoSuchTableException("Table does not exist at location: " + location);
      }
    } else {
      throw new NoSuchTableException("Table does not exist at location: " + location);
    }
  }

  return new BaseTable(ops, location);
}
 
Example #3
Source File: HadoopTables.java    From iceberg with Apache License 2.0 6 votes vote down vote up
/**
 * Create a table using the FileSystem implementation resolve from
 * location.
 *
 * @param schema iceberg schema used to create the table
 * @param spec partitioning spec, if null the table will be unpartitioned
 * @param properties a string map of table properties, initialized to empty if null
 * @param location a path URI (e.g. hdfs:///warehouse/my_table)
 * @return newly created table implementation
 */
@Override
public Table create(Schema schema, PartitionSpec spec, Map<String, String> properties,
                    String location) {
  Preconditions.checkNotNull(schema, "A table schema is required");

  TableOperations ops = newTableOps(location);
  if (ops.current() != null) {
    throw new AlreadyExistsException("Table already exists at location: " + location);
  }

  Map<String, String> tableProps = properties == null ? ImmutableMap.of() : properties;
  PartitionSpec partitionSpec = spec == null ? PartitionSpec.unpartitioned() : spec;
  TableMetadata metadata = TableMetadata.newTableMetadata(schema, partitionSpec, location, tableProps);
  ops.commit(null, metadata);

  return new BaseTable(ops, location);
}
 
Example #4
Source File: IcebergUtil.java    From presto with Apache License 2.0 5 votes vote down vote up
public static Table getIcebergTable(HiveMetastore metastore, HdfsEnvironment hdfsEnvironment, ConnectorSession session, SchemaTableName table)
{
    HdfsContext hdfsContext = new HdfsContext(session, table.getSchemaName(), table.getTableName());
    HiveIdentity identity = new HiveIdentity(session);
    TableOperations operations = new HiveTableOperations(metastore, hdfsEnvironment, hdfsContext, identity, table.getSchemaName(), table.getTableName());
    return new BaseTable(operations, table.getSchemaName() + "." + table.getTableName());
}
 
Example #5
Source File: HadoopTables.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private Table loadMetadataTable(String location, MetadataTableType type) {
  TableOperations ops = newTableOps(location);
  if (ops.current() == null) {
    throw new NoSuchTableException("Table does not exist at location: " + location);
  }

  Table baseTable = new BaseTable(ops, location);

  switch (type) {
    case ENTRIES:
      return new ManifestEntriesTable(ops, baseTable);
    case FILES:
      return new DataFilesTable(ops, baseTable);
    case HISTORY:
      return new HistoryTable(ops, baseTable);
    case SNAPSHOTS:
      return new SnapshotsTable(ops, baseTable);
    case MANIFESTS:
      return new ManifestsTable(ops, baseTable);
    case PARTITIONS:
      return new PartitionsTable(ops, baseTable);
    case ALL_DATA_FILES:
      return new AllDataFilesTable(ops, baseTable);
    case ALL_MANIFESTS:
      return new AllManifestsTable(ops, baseTable);
    case ALL_ENTRIES:
      return new AllEntriesTable(ops, baseTable);
    default:
      throw new NoSuchTableException(String.format("Unknown metadata table type: %s for %s", type, location));
  }
}
 
Example #6
Source File: HadoopCatalog.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public boolean dropTable(TableIdentifier identifier, boolean purge) {
  if (!isValidIdentifier(identifier)) {
    throw new NoSuchTableException("Invalid identifier: %s", identifier);
  }

  Path tablePath = new Path(defaultWarehouseLocation(identifier));
  TableOperations ops = newTableOps(identifier);
  TableMetadata lastMetadata;
  if (purge && ops.current() != null) {
    lastMetadata = ops.current();
  } else {
    lastMetadata = null;
  }

  try {
    if (purge && lastMetadata != null) {
      // Since the data files and the metadata files may store in different locations,
      // so it has to call dropTableData to force delete the data file.
      dropTableData(ops.io(), lastMetadata);
    }
    fs.delete(tablePath, true /* recursive */);
    return true;
  } catch (IOException e) {
    throw new RuntimeIOException(e, "Failed to delete file: %s", tablePath);
  }
}
 
Example #7
Source File: IcebergMetadata.java    From presto with Apache License 2.0 4 votes vote down vote up
@Override
public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout)
{
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();

    Schema schema = toIcebergSchema(tableMetadata.getColumns());

    PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties()));

    Database database = metastore.getDatabase(schemaName)
            .orElseThrow(() -> new SchemaNotFoundException(schemaName));

    HdfsContext hdfsContext = new HdfsContext(session, schemaName, tableName);
    HiveIdentity identity = new HiveIdentity(session);
    String targetPath = getTableLocation(tableMetadata.getProperties());
    if (targetPath == null) {
        targetPath = getTableDefaultLocation(database, hdfsContext, hdfsEnvironment, schemaName, tableName).toString();
    }

    TableOperations operations = new HiveTableOperations(metastore, hdfsEnvironment, hdfsContext, identity, schemaName, tableName, session.getUser(), targetPath);
    if (operations.current() != null) {
        throw new TableAlreadyExistsException(schemaTableName);
    }

    ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builderWithExpectedSize(2);
    FileFormat fileFormat = getFileFormat(tableMetadata.getProperties());
    propertiesBuilder.put(DEFAULT_FILE_FORMAT, fileFormat.toString());
    if (tableMetadata.getComment().isPresent()) {
        propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get());
    }

    TableMetadata metadata = newTableMetadata(operations, schema, partitionSpec, targetPath, propertiesBuilder.build());

    transaction = createTableTransaction(operations, metadata);

    return new IcebergWritableTableHandle(
            schemaName,
            tableName,
            SchemaParser.toJson(metadata.schema()),
            PartitionSpecParser.toJson(metadata.spec()),
            getColumns(metadata.schema(), typeManager),
            targetPath,
            fileFormat);
}
 
Example #8
Source File: HiveCatalog.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public TableOperations newTableOps(TableIdentifier tableIdentifier) {
  String dbName = tableIdentifier.namespace().level(0);
  String tableName = tableIdentifier.name();
  return new HiveTableOperations(conf, clients, dbName, tableName);
}
 
Example #9
Source File: HadoopTableOperations.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public TableOperations temp(TableMetadata uncommittedMetadata) {
  return new TableOperations() {
    @Override
    public TableMetadata current() {
      return uncommittedMetadata;
    }

    @Override
    public TableMetadata refresh() {
      throw new UnsupportedOperationException("Cannot call refresh on temporary table operations");
    }

    @Override
    public void commit(TableMetadata base, TableMetadata metadata) {
      throw new UnsupportedOperationException("Cannot call commit on temporary table operations");
    }

    @Override
    public String metadataFileLocation(String fileName) {
      return HadoopTableOperations.this.metadataFileLocation(fileName);
    }

    @Override
    public LocationProvider locationProvider() {
      return LocationProviders.locationsFor(uncommittedMetadata.location(), uncommittedMetadata.properties());
    }

    @Override
    public FileIO io() {
      return HadoopTableOperations.this.io();
    }

    @Override
    public EncryptionManager encryption() {
      return HadoopTableOperations.this.encryption();
    }

    @Override
    public long newSnapshotId() {
      return HadoopTableOperations.this.newSnapshotId();
    }
  };
}
 
Example #10
Source File: HadoopTables.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private TableOperations newTableOps(String location) {
  return new HadoopTableOperations(new Path(location), conf);
}
 
Example #11
Source File: IcebergMetastoreTables.java    From metacat with Apache License 2.0 4 votes vote down vote up
@Override
protected TableOperations newTableOps(final TableIdentifier tableIdentifier) {
    return getTableOps();
}
 
Example #12
Source File: HadoopCatalog.java    From iceberg with Apache License 2.0 3 votes vote down vote up
@Override
protected TableOperations newTableOps(TableIdentifier identifier) {
  return new HadoopTableOperations(new Path(defaultWarehouseLocation(identifier)), conf);
}