Java Code Examples for org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition#CREATE_NEVER

The following examples show how to use org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition#CREATE_NEVER . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FakeJobService.java    From beam with Apache License 2.0 6 votes vote down vote up
private boolean validateDispositions(
    Table table, CreateDisposition createDisposition, WriteDisposition writeDisposition)
    throws InterruptedException, IOException {
  if (table == null) {
    if (createDisposition == CreateDisposition.CREATE_NEVER) {
      return false;
    }
  } else if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
    datasetService.deleteTable(table.getTableReference());
  } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) {
    List<TableRow> allRows =
        datasetService.getAllRows(
            table.getTableReference().getProjectId(),
            table.getTableReference().getDatasetId(),
            table.getTableReference().getTableId());
    if (!allRows.isEmpty()) {
      return false;
    }
  }
  return true;
}
 
Example 2
Source File: WriteRename.java    From beam with Apache License 2.0 5 votes vote down vote up
private PendingJobData startWriteRename(
    TableDestination finalTableDestination, Iterable<String> tempTableNames, ProcessContext c)
    throws Exception {
  WriteDisposition writeDisposition =
      (c.pane().getIndex() == 0) ? firstPaneWriteDisposition : WriteDisposition.WRITE_APPEND;
  CreateDisposition createDisposition =
      (c.pane().getIndex() == 0) ? firstPaneCreateDisposition : CreateDisposition.CREATE_NEVER;
  List<TableReference> tempTables =
      StreamSupport.stream(tempTableNames.spliterator(), false)
          .map(table -> BigQueryHelpers.fromJsonString(table, TableReference.class))
          .collect(Collectors.toList());
  ;

  // Make sure each destination table gets a unique job id.
  String jobIdPrefix =
      BigQueryHelpers.createJobId(
          c.sideInput(jobIdToken), finalTableDestination, -1, c.pane().getIndex());

  BigQueryHelpers.PendingJob retryJob =
      startCopy(
          bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)),
          bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)),
          jobIdPrefix,
          finalTableDestination.getTableReference(),
          tempTables,
          writeDisposition,
          createDisposition,
          kmsKey);
  return new PendingJobData(retryJob, finalTableDestination, tempTables);
}
 
Example 3
Source File: WriteTables.java    From beam with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, BoundedWindow window) throws Exception {
  dynamicDestinations.setSideInputAccessorFromProcessContext(c);
  DestinationT destination = c.element().getKey().getKey();
  TableSchema tableSchema;
  if (firstPaneCreateDisposition == CreateDisposition.CREATE_NEVER) {
    tableSchema = null;
  } else if (jsonSchemas.containsKey(destination)) {
    tableSchema =
        BigQueryHelpers.fromJsonString(jsonSchemas.get(destination), TableSchema.class);
  } else {
    tableSchema = dynamicDestinations.getSchema(destination);
    checkArgument(
        tableSchema != null,
        "Unless create disposition is %s, a schema must be specified, i.e. "
            + "DynamicDestinations.getSchema() may not return null. "
            + "However, create disposition is %s, and %s returned null for destination %s",
        CreateDisposition.CREATE_NEVER,
        firstPaneCreateDisposition,
        dynamicDestinations,
        destination);
    jsonSchemas.put(destination, BigQueryHelpers.toJsonString(tableSchema));
  }

  TableDestination tableDestination = dynamicDestinations.getTable(destination);
  checkArgument(
      tableDestination != null,
      "DynamicDestinations.getTable() may not return null, "
          + "but %s returned null for destination %s",
      dynamicDestinations,
      destination);
  boolean destinationCoderSupportsClustering =
      !(dynamicDestinations.getDestinationCoder() instanceof TableDestinationCoderV2);
  checkArgument(
      tableDestination.getClustering() == null || destinationCoderSupportsClustering,
      "DynamicDestinations.getTable() may only return destinations with clustering configured"
          + " if a destination coder is supplied that supports clustering, but %s is configured"
          + " to use TableDestinationCoderV2. Set withClustering() on BigQueryIO.write() and, "
          + " if you provided a custom DynamicDestinations instance, override"
          + " getDestinationCoder() to return TableDestinationCoderV3.",
      dynamicDestinations);
  TableReference tableReference = tableDestination.getTableReference();
  if (Strings.isNullOrEmpty(tableReference.getProjectId())) {
    tableReference.setProjectId(c.getPipelineOptions().as(BigQueryOptions.class).getProject());
    tableDestination = tableDestination.withTableReference(tableReference);
  }

  Integer partition = c.element().getKey().getShardNumber();
  List<String> partitionFiles = Lists.newArrayList(c.element().getValue());
  String jobIdPrefix =
      BigQueryHelpers.createJobId(
          c.sideInput(loadJobIdPrefixView), tableDestination, partition, c.pane().getIndex());

  if (tempTable) {
    // This is a temp table. Create a new one for each partition and each pane.
    tableReference.setTableId(jobIdPrefix);
  }

  WriteDisposition writeDisposition = firstPaneWriteDisposition;
  CreateDisposition createDisposition = firstPaneCreateDisposition;
  if (c.pane().getIndex() > 0 && !tempTable) {
    // If writing directly to the destination, then the table is created on the first write
    // and we should change the disposition for subsequent writes.
    writeDisposition = WriteDisposition.WRITE_APPEND;
    createDisposition = CreateDisposition.CREATE_NEVER;
  } else if (tempTable) {
    // In this case, we are writing to a temp table and always need to create it.
    // WRITE_TRUNCATE is set so that we properly handle retries of this pane.
    writeDisposition = WriteDisposition.WRITE_TRUNCATE;
    createDisposition = CreateDisposition.CREATE_IF_NEEDED;
  }

  BigQueryHelpers.PendingJob retryJob =
      startLoad(
          bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)),
          bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)),
          jobIdPrefix,
          tableReference,
          tableDestination.getTimePartitioning(),
          tableDestination.getClustering(),
          tableSchema,
          partitionFiles,
          writeDisposition,
          createDisposition,
          schemaUpdateOptions);
  pendingJobs.add(
      new PendingJobData(window, retryJob, partitionFiles, tableDestination, tableReference));
}
 
Example 4
Source File: CreateTables.java    From beam with Apache License 2.0 4 votes vote down vote up
private TableDestination getTableDestination(ProcessContext context, DestinationT destination) {
  TableDestination tableDestination = dynamicDestinations.getTable(destination);
  checkArgument(
      tableDestination != null,
      "DynamicDestinations.getTable() may not return null, "
          + "but %s returned null for destination %s",
      dynamicDestinations,
      destination);
  checkArgument(
      tableDestination.getTableSpec() != null,
      "DynamicDestinations.getTable() must return a TableDestination "
          + "with a non-null table spec, but %s returned %s for destination %s,"
          + "which has a null table spec",
      dynamicDestinations,
      tableDestination,
      destination);
  boolean destinationCoderSupportsClustering =
      !(dynamicDestinations.getDestinationCoder() instanceof TableDestinationCoderV2);
  checkArgument(
      tableDestination.getClustering() == null || destinationCoderSupportsClustering,
      "DynamicDestinations.getTable() may only return destinations with clustering configured"
          + " if a destination coder is supplied that supports clustering, but %s is configured"
          + " to use TableDestinationCoderV2. Set withClustering() on BigQueryIO.write() and, "
          + " if you provided a custom DynamicDestinations instance, override"
          + " getDestinationCoder() to return TableDestinationCoderV3.",
      dynamicDestinations);
  TableReference tableReference = tableDestination.getTableReference().clone();
  if (Strings.isNullOrEmpty(tableReference.getProjectId())) {
    tableReference.setProjectId(
        context.getPipelineOptions().as(BigQueryOptions.class).getProject());
    tableDestination = tableDestination.withTableReference(tableReference);
  }
  if (createDisposition == CreateDisposition.CREATE_NEVER) {
    return tableDestination;
  }

  String tableSpec = BigQueryHelpers.stripPartitionDecorator(tableDestination.getTableSpec());
  if (!createdTables.contains(tableSpec)) {
    // Another thread may have succeeded in creating the table in the meanwhile, so
    // check again. This check isn't needed for correctness, but we add it to prevent
    // every thread from attempting a create and overwhelming our BigQuery quota.
    synchronized (createdTables) {
      if (!createdTables.contains(tableSpec)) {
        tryCreateTable(context, destination, tableDestination, tableSpec, kmsKey);
      }
    }
  }
  return tableDestination;
}