Java Code Examples for org.apache.iceberg.io.FileIO

The following examples show how to use org.apache.iceberg.io.FileIO. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: iceberg   Source File: Writer.java    License: Apache License 2.0 6 votes vote down vote up
Writer(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
       DataSourceOptions options, boolean replacePartitions, String applicationId, String wapId,
       Schema writeSchema, StructType dsSchema) {
  this.table = table;
  this.format = getFileFormat(table.properties(), options);
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.replacePartitions = replacePartitions;
  this.applicationId = applicationId;
  this.wapId = wapId;
  this.writeSchema = writeSchema;
  this.dsSchema = dsSchema;

  long tableTargetFileSize = PropertyUtil.propertyAsLong(
      table.properties(), WRITE_TARGET_FILE_SIZE_BYTES, WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
  this.targetFileSize = options.getLong("target-file-size-bytes", tableTargetFileSize);
}
 
Example 2
Source Project: iceberg   Source File: IcebergSource.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public DataSourceReader createReader(StructType readSchema, DataSourceOptions options) {
  Configuration conf = new Configuration(lazyBaseConf());
  Table table = getTableAndResolveHadoopConfiguration(options, conf);
  String caseSensitive = lazySparkSession().conf().get("spark.sql.caseSensitive");

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  Reader reader = new Reader(table, io, encryptionManager, Boolean.parseBoolean(caseSensitive), options);
  if (readSchema != null) {
    // convert() will fail if readSchema contains fields not in table.schema()
    SparkSchemaUtil.convert(table.schema(), readSchema);
    reader.pruneColumns(readSchema);
  }

  return reader;
}
 
Example 3
Source Project: iceberg   Source File: IcebergSource.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Optional<DataSourceWriter> createWriter(String jobId, StructType dsStruct, SaveMode mode,
                                               DataSourceOptions options) {
  Preconditions.checkArgument(mode == SaveMode.Append || mode == SaveMode.Overwrite,
      "Save mode %s is not supported", mode);
  Configuration conf = new Configuration(lazyBaseConf());
  Table table = getTableAndResolveHadoopConfiguration(options, conf);
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsStruct);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema, checkNullability(options), checkOrdering(options));
  SparkUtil.validatePartitionTransforms(table.spec());
  String appId = lazySparkSession().sparkContext().applicationId();
  String wapId = lazySparkSession().conf().get("spark.wap.id", null);
  boolean replacePartitions = mode == SaveMode.Overwrite;

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return Optional.of(new Writer(
      table, io, encryptionManager, options, replacePartitions, appId, wapId, writeSchema, dsStruct));
}
 
Example 4
Source Project: iceberg   Source File: IcebergSource.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public StreamWriter createStreamWriter(String runId, StructType dsStruct,
                                       OutputMode mode, DataSourceOptions options) {
  Preconditions.checkArgument(
      mode == OutputMode.Append() || mode == OutputMode.Complete(),
      "Output mode %s is not supported", mode);
  Configuration conf = new Configuration(lazyBaseConf());
  Table table = getTableAndResolveHadoopConfiguration(options, conf);
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsStruct);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema, checkNullability(options), checkOrdering(options));
  SparkUtil.validatePartitionTransforms(table.spec());
  // Spark 2.4.x passes runId to createStreamWriter instead of real queryId,
  // so we fetch it directly from sparkContext to make writes idempotent
  String queryId = lazySparkSession().sparkContext().getLocalProperty(StreamExecution.QUERY_ID_KEY());
  String appId = lazySparkSession().sparkContext().applicationId();

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new StreamingWriter(table, io, encryptionManager, options, queryId, mode, appId, writeSchema, dsStruct);
}
 
Example 5
Source Project: iceberg   Source File: RewriteManifestsAction.java    License: Apache License 2.0 6 votes vote down vote up
private List<ManifestFile> writeManifestsForUnpartitionedTable(Dataset<Row> manifestEntryDF, int numManifests) {
  Broadcast<FileIO> io = sparkContext.broadcast(fileIO);
  StructType sparkType = (StructType) manifestEntryDF.schema().apply("data_file").dataType();

  // we rely only on the target number of manifests for unpartitioned tables
  // as we should not worry about having too much metadata per partition
  long maxNumManifestEntries = Long.MAX_VALUE;

  return manifestEntryDF
      .repartition(numManifests)
      .mapPartitions(
          toManifests(io, maxNumManifestEntries, stagingLocation, formatVersion, spec, sparkType),
          manifestEncoder
      )
      .collectAsList();
}
 
Example 6
Source Project: iceberg   Source File: RewriteManifestsAction.java    License: Apache License 2.0 6 votes vote down vote up
private List<ManifestFile> writeManifestsForPartitionedTable(
    Dataset<Row> manifestEntryDF, int numManifests,
    int targetNumManifestEntries) {

  Broadcast<FileIO> io = sparkContext.broadcast(fileIO);
  StructType sparkType = (StructType) manifestEntryDF.schema().apply("data_file").dataType();

  // we allow the actual size of manifests to be 10% higher if the estimation is not precise enough
  long maxNumManifestEntries = (long) (1.1 * targetNumManifestEntries);

  return withReusableDS(manifestEntryDF, df -> {
    Column partitionColumn = df.col("data_file.partition");
    return df.repartitionByRange(numManifests, partitionColumn)
        .sortWithinPartitions(partitionColumn)
        .mapPartitions(
            toManifests(io, maxNumManifestEntries, stagingLocation, formatVersion, spec, sparkType),
            manifestEncoder
        )
        .collectAsList();
  });
}
 
Example 7
Source Project: iceberg   Source File: RewriteManifestsAction.java    License: Apache License 2.0 6 votes vote down vote up
private static MapPartitionsFunction<Row, ManifestFile> toManifests(
    Broadcast<FileIO> io, long maxNumManifestEntries, String location,
    int format, PartitionSpec spec, StructType sparkType) {

  return (MapPartitionsFunction<Row, ManifestFile>) rows -> {
    List<Row> rowsAsList = Lists.newArrayList(rows);

    if (rowsAsList.isEmpty()) {
      return Collections.emptyIterator();
    }

    List<ManifestFile> manifests = Lists.newArrayList();
    if (rowsAsList.size() <= maxNumManifestEntries) {
      manifests.add(writeManifest(rowsAsList, 0, rowsAsList.size(), io, location, format, spec, sparkType));
    } else {
      int midIndex = rowsAsList.size() / 2;
      manifests.add(writeManifest(rowsAsList, 0, midIndex, io, location, format, spec, sparkType));
      manifests.add(writeManifest(rowsAsList,  midIndex, rowsAsList.size(), io, location, format, spec, sparkType));
    }

    return manifests.iterator();
  };
}
 
Example 8
Source Project: iceberg   Source File: SparkTableUtil.java    License: Apache License 2.0 6 votes vote down vote up
private static Iterator<ManifestFile> buildManifest(SerializableConfiguration conf, PartitionSpec spec,
                                                    String basePath, Iterator<Tuple2<String, DataFile>> fileTuples) {
  if (fileTuples.hasNext()) {
    FileIO io = new HadoopFileIO(conf.get());
    TaskContext ctx = TaskContext.get();
    String suffix = String.format("stage-%d-task-%d-manifest", ctx.stageId(), ctx.taskAttemptId());
    Path location = new Path(basePath, suffix);
    String outputPath = FileFormat.AVRO.addExtension(location.toString());
    OutputFile outputFile = io.newOutputFile(outputPath);
    ManifestWriter<DataFile> writer = ManifestFiles.write(spec, outputFile);

    try (ManifestWriter<DataFile> writerRef = writer) {
      fileTuples.forEachRemaining(fileTuple -> writerRef.add(fileTuple._2));
    } catch (IOException e) {
      throw SparkExceptionUtil.toUncheckedException(e, "Unable to close the manifest writer: %s", outputPath);
    }

    ManifestFile manifestFile = writer.toManifestFile();
    return ImmutableList.of(manifestFile).iterator();
  } else {
    return Collections.emptyIterator();
  }
}
 
Example 9
Source Project: iceberg   Source File: RowDataRewriter.java    License: Apache License 2.0 6 votes vote down vote up
public RowDataRewriter(Table table, PartitionSpec spec, boolean caseSensitive,
                       Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager) {
  this.schema = table.schema();
  this.spec = spec;
  this.locations = table.locationProvider();
  this.properties = table.properties();
  this.io = io;
  this.encryptionManager = encryptionManager;

  this.caseSensitive = caseSensitive;
  this.nameMapping = table.properties().get(DEFAULT_NAME_MAPPING);

  String formatString = table.properties().getOrDefault(
      TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT);
  this.format = FileFormat.valueOf(formatString.toUpperCase(Locale.ENGLISH));
}
 
Example 10
Source Project: iceberg   Source File: SparkBatchWrite.java    License: Apache License 2.0 6 votes vote down vote up
SparkBatchWrite(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
                CaseInsensitiveStringMap options, boolean overwriteDynamic, boolean overwriteByFilter,
                Expression overwriteExpr, String applicationId, String wapId, Schema writeSchema,
                StructType dsSchema) {
  this.table = table;
  this.format = getFileFormat(table.properties(), options);
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.overwriteDynamic = overwriteDynamic;
  this.overwriteByFilter = overwriteByFilter;
  this.overwriteExpr = overwriteExpr;
  this.applicationId = applicationId;
  this.wapId = wapId;
  this.genieId = options.get("genie-id");
  this.writeSchema = writeSchema;
  this.dsSchema = dsSchema;

  long tableTargetFileSize = PropertyUtil.propertyAsLong(
      table.properties(), WRITE_TARGET_FILE_SIZE_BYTES, WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
  this.targetFileSize = options.getLong("target-file-size-bytes", tableTargetFileSize);
}
 
Example 11
Source Project: iceberg   Source File: SparkBatchScan.java    License: Apache License 2.0 6 votes vote down vote up
ReadTask(CombinedScanTask task, String tableSchemaString, String expectedSchemaString, String nameMappingString,
         Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager, boolean caseSensitive,
         boolean localityPreferred) {
  this.task = task;
  this.tableSchemaString = tableSchemaString;
  this.expectedSchemaString = expectedSchemaString;
  this.nameMappingString = nameMappingString;
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.caseSensitive = caseSensitive;
  if (localityPreferred) {
    this.preferredLocations = Util.blockLocations(io.value(), task);
  } else {
    this.preferredLocations = HadoopInputFile.NO_LOCATION_PREFERENCE;
  }
}
 
Example 12
Source Project: iceberg   Source File: SparkWriteBuilder.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public BatchWrite buildForBatch() {
  // Validate
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsSchema);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema,
      checkNullability(spark, options), checkOrdering(spark, options));
  SparkUtil.validatePartitionTransforms(table.spec());

  // Get application id
  String appId = spark.sparkContext().applicationId();

  // Get write-audit-publish id
  String wapId = spark.conf().get("spark.wap.id", null);

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new SparkBatchWrite(
      table, io, encryptionManager, options, overwriteDynamic, overwriteByFilter, overwriteExpr, appId, wapId,
      writeSchema, dsSchema);
}
 
Example 13
Source Project: iceberg   Source File: SparkWriteBuilder.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public StreamingWrite buildForStreaming() {
  // Validate
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsSchema);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema,
      checkNullability(spark, options), checkOrdering(spark, options));
  SparkUtil.validatePartitionTransforms(table.spec());

  // Change to streaming write if it is just append
  Preconditions.checkState(!overwriteDynamic,
      "Unsupported streaming operation: dynamic partition overwrite");
  Preconditions.checkState(!overwriteByFilter || overwriteExpr == Expressions.alwaysTrue(),
      "Unsupported streaming operation: overwrite by filter: %s", overwriteExpr);

  // Get application id
  String appId = spark.sparkContext().applicationId();

  // Get write-audit-publish id
  String wapId = spark.conf().get("spark.wap.id", null);

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new SparkStreamingWrite(
      table, io, encryptionManager, options, overwriteByFilter, writeQueryId, appId, wapId, writeSchema, dsSchema);
}
 
Example 14
Source Project: iceberg   Source File: BaseSnapshot.java    License: Apache License 2.0 6 votes vote down vote up
BaseSnapshot(FileIO io,
             long sequenceNumber,
             long snapshotId,
             Long parentId,
             long timestampMillis,
             String operation,
             Map<String, String> summary,
             String manifestList) {
  this.io = io;
  this.sequenceNumber = sequenceNumber;
  this.snapshotId = snapshotId;
  this.parentId = parentId;
  this.timestampMillis = timestampMillis;
  this.operation = operation;
  this.summary = summary;
  this.manifestListLocation = manifestList;
}
 
Example 15
Source Project: presto   Source File: HiveTableOperations.java    License: Apache License 2.0 5 votes vote down vote up
private HiveTableOperations(FileIO fileIo, HiveMetastore metastore, HiveIdentity identity, String database, String table, Optional<String> owner, Optional<String> location)
{
    this.fileIo = requireNonNull(fileIo, "fileIo is null");
    this.metastore = requireNonNull(metastore, "metastore is null");
    this.identity = requireNonNull(identity, "identity is null");
    this.database = requireNonNull(database, "database is null");
    this.tableName = requireNonNull(table, "table is null");
    this.owner = requireNonNull(owner, "owner is null");
    this.location = requireNonNull(location, "location is null");
}
 
Example 16
Source Project: iceberg   Source File: HiveTableOperations.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public FileIO io() {
  if (fileIO == null) {
    fileIO = new HadoopFileIO(conf);
  }

  return fileIO;
}
 
Example 17
Source Project: iceberg   Source File: StreamingWriter.java    License: Apache License 2.0 5 votes vote down vote up
StreamingWriter(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
                DataSourceOptions options, String queryId, OutputMode mode, String applicationId,
                Schema writeSchema, StructType dsSchema) {
  super(table, io, encryptionManager, options, false, applicationId, writeSchema, dsSchema);
  this.queryId = queryId;
  this.mode = mode;
}
 
Example 18
Source Project: iceberg   Source File: Writer.java    License: Apache License 2.0 5 votes vote down vote up
WriterFactory(PartitionSpec spec, FileFormat format, LocationProvider locations,
              Map<String, String> properties, Broadcast<FileIO> io,
              Broadcast<EncryptionManager> encryptionManager, long targetFileSize,
              Schema writeSchema, StructType dsSchema) {
  this.spec = spec;
  this.format = format;
  this.locations = locations;
  this.properties = properties;
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.targetFileSize = targetFileSize;
  this.writeSchema = writeSchema;
  this.dsSchema = dsSchema;
}
 
Example 19
Source Project: iceberg   Source File: Reader.java    License: Apache License 2.0 5 votes vote down vote up
private ReadTask(CombinedScanTask task, String tableSchemaString, String expectedSchemaString,
                 String nameMappingString, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
                 boolean caseSensitive, boolean localityPreferred, ReaderFactory<T> readerFactory) {
  this.task = task;
  this.tableSchemaString = tableSchemaString;
  this.expectedSchemaString = expectedSchemaString;
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.caseSensitive = caseSensitive;
  this.localityPreferred = localityPreferred;
  this.preferredLocations = getPreferredLocations();
  this.readerFactory = readerFactory;
  this.nameMappingString = nameMappingString;
}
 
Example 20
Source Project: iceberg   Source File: RewriteManifestsAction.java    License: Apache License 2.0 5 votes vote down vote up
private static ManifestFile writeManifest(
    List<Row> rows, int startIndex, int endIndex, Broadcast<FileIO> io,
    String location, int format, PartitionSpec spec, StructType sparkType) throws IOException {

  String manifestName = "optimized-m-" + UUID.randomUUID();
  Path manifestPath = new Path(location, manifestName);
  OutputFile outputFile = io.value().newOutputFile(FileFormat.AVRO.addExtension(manifestPath.toString()));

  Types.StructType dataFileType = DataFile.getType(spec.partitionType());
  SparkDataFile wrapper = new SparkDataFile(dataFileType, sparkType);

  ManifestWriter writer = ManifestFiles.write(format, spec, outputFile, null);

  try {
    for (int index = startIndex; index < endIndex; index++) {
      Row row = rows.get(index);
      long snapshotId = row.getLong(0);
      long sequenceNumber = row.getLong(1);
      Row file = row.getStruct(2);
      writer.existing(wrapper.wrap(file), snapshotId, sequenceNumber);
    }
  } finally {
    writer.close();
  }

  return writer.toManifestFile();
}
 
Example 21
Source Project: iceberg   Source File: BatchDataReader.java    License: Apache License 2.0 5 votes vote down vote up
BatchDataReader(
    CombinedScanTask task, Schema expectedSchema, String nameMapping, FileIO fileIo,
    EncryptionManager encryptionManager, boolean caseSensitive, int size) {
  super(task, fileIo, encryptionManager);
  this.expectedSchema = expectedSchema;
  this.nameMapping = nameMapping;
  this.caseSensitive = caseSensitive;
  this.batchSize = size;
}
 
Example 22
Source Project: iceberg   Source File: OutputFileFactory.java    License: Apache License 2.0 5 votes vote down vote up
OutputFileFactory(PartitionSpec spec, FileFormat format, LocationProvider locations, FileIO io,
                  EncryptionManager encryptionManager, int partitionId, long taskId) {
  this.spec = spec;
  this.format = format;
  this.locations = locations;
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.partitionId = partitionId;
  this.taskId = taskId;
}
 
Example 23
Source Project: iceberg   Source File: BaseDataReader.java    License: Apache License 2.0 5 votes vote down vote up
BaseDataReader(CombinedScanTask task, FileIO fileIo, EncryptionManager encryptionManager) {
  this.fileIo = fileIo;
  this.tasks = task.files().iterator();
  Iterable<InputFile> decryptedFiles = encryptionManager.decrypt(Iterables.transform(
      task.files(),
      fileScanTask ->
          EncryptedFiles.encryptedInput(
              this.fileIo.newInputFile(fileScanTask.file().path().toString()),
              fileScanTask.file().keyMetadata())));
  ImmutableMap.Builder<String, InputFile> inputFileBuilder = ImmutableMap.builder();
  decryptedFiles.forEach(decrypted -> inputFileBuilder.put(decrypted.location(), decrypted));
  this.inputFiles = inputFileBuilder.build();
  this.currentIterator = CloseableIterator.empty();
}
 
Example 24
Source Project: iceberg   Source File: RowDataReader.java    License: Apache License 2.0 5 votes vote down vote up
RowDataReader(
    CombinedScanTask task, Schema tableSchema, Schema expectedSchema, String nameMapping, FileIO fileIo,
    EncryptionManager encryptionManager, boolean caseSensitive) {
  super(task, fileIo, encryptionManager);
  this.tableSchema = tableSchema;
  this.expectedSchema = expectedSchema;
  this.nameMapping = nameMapping;
  this.caseSensitive = caseSensitive;
}
 
Example 25
Source Project: iceberg   Source File: BaseWriter.java    License: Apache License 2.0 5 votes vote down vote up
BaseWriter(PartitionSpec spec, FileFormat format, SparkAppenderFactory appenderFactory,
           OutputFileFactory fileFactory, FileIO io, long targetFileSize) {
  this.spec = spec;
  this.format = format;
  this.appenderFactory = appenderFactory;
  this.fileFactory = fileFactory;
  this.io = io;
  this.targetFileSize = targetFileSize;
}
 
Example 26
Source Project: iceberg   Source File: SparkUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static FileIO serializableFileIO(Table table) {
  if (table.io() instanceof HadoopFileIO) {
    // we need to use Spark's SerializableConfiguration to avoid issues with Kryo serialization
    SerializableConfiguration conf = new SerializableConfiguration(((HadoopFileIO) table.io()).conf());
    return new HadoopFileIO(conf::value);
  } else {
    return table.io();
  }
}
 
Example 27
Source Project: iceberg   Source File: Spark3Util.java    License: Apache License 2.0 5 votes vote down vote up
public static boolean isLocalityEnabled(FileIO io, String location, CaseInsensitiveStringMap readOptions) {
  InputFile in = io.newInputFile(location);
  if (in instanceof HadoopInputFile) {
    String scheme = ((HadoopInputFile) in).getFileSystem().getScheme();
    return readOptions.getBoolean("locality", LOCALITY_WHITELIST_FS.contains(scheme));
  }
  return false;
}
 
Example 28
Source Project: iceberg   Source File: SparkBatchWrite.java    License: Apache License 2.0 5 votes vote down vote up
protected WriterFactory(PartitionSpec spec, FileFormat format, LocationProvider locations,
                        Map<String, String> properties, Broadcast<FileIO> io,
                        Broadcast<EncryptionManager> encryptionManager, long targetFileSize,
                        Schema writeSchema, StructType dsSchema) {
  this.spec = spec;
  this.format = format;
  this.locations = locations;
  this.properties = properties;
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.targetFileSize = targetFileSize;
  this.writeSchema = writeSchema;
  this.dsSchema = dsSchema;
}
 
Example 29
Source Project: iceberg   Source File: SparkBatchScan.java    License: Apache License 2.0 5 votes vote down vote up
SparkBatchScan(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryption, boolean caseSensitive,
               Schema expectedSchema, List<Expression> filters, CaseInsensitiveStringMap options) {
  this.table = table;
  this.io = io;
  this.encryptionManager = encryption;
  this.caseSensitive = caseSensitive;
  this.expectedSchema = expectedSchema;
  this.filterExpressions = filters;
  this.snapshotId = Spark3Util.propertyAsLong(options, "snapshot-id", null);
  this.asOfTimestamp = Spark3Util.propertyAsLong(options, "as-of-timestamp", null);

  if (snapshotId != null && asOfTimestamp != null) {
    throw new IllegalArgumentException(
        "Cannot scan using both snapshot-id and as-of-timestamp to select the table snapshot");
  }

  this.startSnapshotId = Spark3Util.propertyAsLong(options, "start-snapshot-id", null);
  this.endSnapshotId = Spark3Util.propertyAsLong(options, "end-snapshot-id", null);
  if (snapshotId != null || asOfTimestamp != null) {
    if (startSnapshotId != null || endSnapshotId != null) {
      throw new IllegalArgumentException(
          "Cannot specify start-snapshot-id and end-snapshot-id to do incremental scan when either snapshot-id or " +
              "as-of-timestamp is specified");
    }
  } else if (startSnapshotId == null && endSnapshotId != null) {
    throw new IllegalArgumentException("Cannot only specify option end-snapshot-id to do incremental scan");
  }

  // look for split behavior overrides in options
  this.splitSize = Spark3Util.propertyAsLong(options, "split-size", null);
  this.splitLookback = Spark3Util.propertyAsInt(options, "lookback", null);
  this.splitOpenFileCost = Spark3Util.propertyAsLong(options, "file-open-cost", null);
  this.localityPreferred = Spark3Util.isLocalityEnabled(io.value(), table.location(), options);
  this.batchReadsEnabled = Spark3Util.isVectorizationEnabled(table.properties(), options);
  this.batchSize = Spark3Util.batchSize(table.properties(), options);
}
 
Example 30
Source Project: iceberg   Source File: SparkStreamingWrite.java    License: Apache License 2.0 5 votes vote down vote up
SparkStreamingWrite(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
                    CaseInsensitiveStringMap options, boolean truncateBatches, String queryId,
                    String applicationId, String wapId, Schema writeSchema, StructType dsSchema) {
  super(
      table, io, encryptionManager, options, false, truncateBatches, Expressions.alwaysTrue(), applicationId, wapId,
      writeSchema, dsSchema);
  this.truncateBatches = truncateBatches;
  this.queryId = queryId;
}