org.apache.spark.sql.sources.v2.writer.WriterCommitMessage Java Examples

The following examples show how to use org.apache.spark.sql.sources.v2.writer.WriterCommitMessage. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParallelRowReadWriteDataSource.java    From spark-data-sources with MIT License 6 votes vote down vote up
@Override
public void commit(WriterCommitMessage[] messages) {
    log.info("before global commit: table " + _table +
            " from " + messages.length + " local commits");
    // pull out the temp table names
    List<String> sourceTables = new ArrayList<>();
    for (WriterCommitMessage wcm : messages) {
        sourceTables.add(((PartialCommit) wcm).getTempTableName());
    }
    // append them all to the destination table atomically
    DBClientWrapper db = new DBClientWrapper(_host, _port);
    db.connect();
    db.bulkInsertFromTables(_table, _truncateOnCommit, sourceTables);
    log.info("after global commit: table " + _table +
            " from " + messages.length + " local commits");
}
 
Example #2
Source File: Writer.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public WriterCommitMessage commit() throws IOException {
  Preconditions.checkArgument(appender != null, "Commit called on a closed writer: %s", this);

  close();

  if (metrics.recordCount() == 0L) {
    FileSystem fs = file.getFileSystem(conf);
    fs.delete(file, false);
    return new TaskCommit();
  }

  InputFile inFile = HadoopInputFile.fromPath(file, conf);
  DataFile dataFile = DataFiles.fromInputFile(inFile, null, metrics);

  return new TaskCommit(dataFile);
}
 
Example #3
Source File: Writer.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public void commit(WriterCommitMessage[] messages) {
  AppendFiles append = table.newAppend();

  int numFiles = 0;
  for (DataFile file : files(messages)) {
    numFiles += 1;
    append.appendFile(file);
  }

  LOG.info("Appending {} files to {}", numFiles, table);
  long start = System.currentTimeMillis();
  append.commit(); // abort is automatically called if this fails
  long duration = System.currentTimeMillis() - start;
  LOG.info("Committed in {} ms", duration);
}
 
Example #4
Source File: ParallelRowReadWriteDataSource.java    From spark-data-sources with MIT License 5 votes vote down vote up
@Override
public void abort(WriterCommitMessage[] messages) {
    log.info("before global abort: table " + _table +
            " from " + messages.length + " local commits");
    // TODO: blow away all the temp tables
    log.info("after global abort: table " + _table +
            " from " + messages.length + " local commits");
}
 
Example #5
Source File: HiveWarehouseDataSourceWriter.java    From spark-llap with Apache License 2.0 5 votes vote down vote up
@Override public void abort(WriterCommitMessage[] messages) {
  try {
    path.getFileSystem(conf).delete(path, true);
  } catch(Exception e) {
    LOG.warn("Failed to cleanup temp dir {}", path.toString());
  }
  LOG.error("Aborted DataWriter job {}", jobId);
}
 
Example #6
Source File: HiveStreamingDataWriter.java    From spark-llap with Apache License 2.0 5 votes vote down vote up
@Override
public WriterCommitMessage commit() throws IOException {
  try {
    streamingConnection.commitTransaction();
  } catch (StreamingException e) {
    throw new IOException(e);
  }
  String msg = "Committed jobId: " + jobId + " partitionId: " + partitionId + " attemptNumber: " + attemptNumber +
    " connectionStats: " + streamingConnection.getConnectionStats();
  streamingConnection.close();
  LOG.info("Closing streaming connection on commit. Msg: {} rowsWritten: {}", rowsWritten);
  return new SimpleWriterCommitMessage(msg);
}
 
Example #7
Source File: Writer.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private Iterable<DataFile> files(WriterCommitMessage[] messages) {
  if (messages.length > 0) {
    return concat(transform(Arrays.asList(messages), message -> message != null
        ? ImmutableList.copyOf(((TaskCommit) message).files())
        : ImmutableList.of()));
  }
  return ImmutableList.of();
}
 
Example #8
Source File: Writer.java    From iceberg with Apache License 2.0 5 votes vote down vote up
protected Iterable<DataFile> files(WriterCommitMessage[] messages) {
  if (messages.length > 0) {
    return Iterables.concat(Iterables.transform(Arrays.asList(messages), message -> message != null ?
        ImmutableList.copyOf(((TaskResult) message).files()) :
        ImmutableList.of()));
  }
  return ImmutableList.of();
}
 
Example #9
Source File: Writer.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public void abort(WriterCommitMessage[] messages) {
  Map<String, String> props = table.properties();
  Tasks.foreach(files(messages))
      .retry(PropertyUtil.propertyAsInt(props, COMMIT_NUM_RETRIES, COMMIT_NUM_RETRIES_DEFAULT))
      .exponentialBackoff(
          PropertyUtil.propertyAsInt(props, COMMIT_MIN_RETRY_WAIT_MS, COMMIT_MIN_RETRY_WAIT_MS_DEFAULT),
          PropertyUtil.propertyAsInt(props, COMMIT_MAX_RETRY_WAIT_MS, COMMIT_MAX_RETRY_WAIT_MS_DEFAULT),
          PropertyUtil.propertyAsInt(props, COMMIT_TOTAL_RETRY_TIME_MS, COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT),
          2.0 /* exponential */)
      .throwFailureWhenFinished()
      .run(file -> {
        io.value().deleteFile(file.path().toString());
      });
}
 
Example #10
Source File: Writer.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private void replacePartitions(WriterCommitMessage[] messages) {
  ReplacePartitions dynamicOverwrite = table.newReplacePartitions();

  int numFiles = 0;
  for (DataFile file : files(messages)) {
    numFiles += 1;
    dynamicOverwrite.addFile(file);
  }

  commitOperation(dynamicOverwrite, numFiles, "dynamic partition overwrite");
}
 
Example #11
Source File: Writer.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public void commit(WriterCommitMessage[] messages) {
  if (replacePartitions) {
    replacePartitions(messages);
  } else {
    append(messages);
  }
}
 
Example #12
Source File: ParallelRowReadWriteDataSource.java    From spark-data-sources with MIT License 5 votes vote down vote up
@Override
public WriterCommitMessage commit() {
    log.info("before local commit: partition " + _partitionId + " attempt " + _attemptNumber);
    _tempTableName = _db.saveToTempTable(_uncommitted, _schema);
    _uncommitted.clear();
    PartialCommit pc = new PartialCommit(_tempTableName);
    log.info("before local commit: table " + _tempTableName +
            " partition " + _partitionId + " attempt " + _attemptNumber);
    return pc;
}
 
Example #13
Source File: Writer.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public WriterCommitMessage commit() throws IOException {
  return new TaskCommit(complete());
}
 
Example #14
Source File: Writer.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public WriterCommitMessage commit() throws IOException {
  return new TaskCommit(complete());
}
 
Example #15
Source File: Writer.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public WriterCommitMessage commit() throws IOException {
  closeCurrent();
  return new TaskCommit(completedFiles);
}
 
Example #16
Source File: StreamingWriter.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public void abort(long epochId, WriterCommitMessage[] messages) {
  abort(messages);
}
 
Example #17
Source File: HiveStreamingDataSourceWriter.java    From spark-llap with Apache License 2.0 4 votes vote down vote up
@Override
public void commit(WriterCommitMessage[] messages) {
  LOG.info("Commit job {}", jobId);
}
 
Example #18
Source File: HiveStreamingDataSourceWriter.java    From spark-llap with Apache License 2.0 4 votes vote down vote up
@Override
public void abort(WriterCommitMessage[] messages) {
  LOG.info("Abort job {}", jobId);
}
 
Example #19
Source File: HiveWarehouseDataWriter.java    From spark-llap with Apache License 2.0 4 votes vote down vote up
@Override public WriterCommitMessage commit() throws IOException {
  out.close();
  return new SimpleWriterCommitMessage(String.format("COMMIT %s_%s_%s", jobId, partitionId, attemptNumber));
}
 
Example #20
Source File: HiveStreamingDataSourceWriter.java    From spark-llap with Apache License 2.0 4 votes vote down vote up
@Override
public void commit(final long epochId, final WriterCommitMessage[] messages) {
  LOG.info("Commit job {}", jobId);
}
 
Example #21
Source File: HiveStreamingDataSourceWriter.java    From spark-llap with Apache License 2.0 4 votes vote down vote up
@Override
public void abort(final long epochId, final WriterCommitMessage[] messages) {
  LOG.info("Abort job {}", jobId);
}
 
Example #22
Source File: MockWriteSupport.java    From spark-llap with Apache License 2.0 2 votes vote down vote up
@Override public void commit(WriterCommitMessage[] messages) {

    }