com.google.cloud.bigquery.BigQueryException Java Examples

The following examples show how to use com.google.cloud.bigquery.BigQueryException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PutBigQueryBatchTest.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testFailedLoad() throws Exception {
    when(table.exists()).thenReturn(Boolean.TRUE);
    when(bq.create(ArgumentMatchers.isA(JobInfo.class))).thenReturn(job);
    when(bq.writer(ArgumentMatchers.isA(WriteChannelConfiguration.class))).thenReturn(tableDataWriteChannel);
    when(tableDataWriteChannel.getJob()).thenReturn(job);
    when(job.waitFor(ArgumentMatchers.isA(RetryOption.class))).thenThrow(BigQueryException.class);
    when(job.getStatus()).thenReturn(jobStatus);
    when(job.getStatistics()).thenReturn(stats);

    when(stats.getCreationTime()).thenReturn(0L);
    when(stats.getStartTime()).thenReturn(1L);
    when(stats.getEndTime()).thenReturn(2L);

    final TestRunner runner = buildNewRunner(getProcessor());
    addRequiredPropertiesToRunner(runner);
    runner.assertValid();

    runner.enqueue("{ \"data\": \"datavalue\" }");

    runner.run();

    runner.assertAllFlowFilesTransferred(PutBigQueryBatch.REL_FAILURE);
}
 
Example #2
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of creating a query job. */
// [TARGET create(JobInfo, JobOption...)]
// [VARIABLE "SELECT field FROM my_dataset_name.my_table_name"]
public Job createJob(String query) {
  // [START ]
  Job job = null;
  JobConfiguration jobConfiguration = QueryJobConfiguration.of(query);
  JobInfo jobInfo = JobInfo.of(jobConfiguration);
  try {
    job = bigquery.create(jobInfo);
  } catch (BigQueryException e) {
    // the job was not created
  }
  // [END ]
  return job;
}
 
Example #3
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of creating a dataset. */
// [TARGET create(DatasetInfo, DatasetOption...)]
// [VARIABLE "my_dataset_name"]
public Dataset createDataset(String datasetName) {
  // [START bigquery_create_dataset]
  Dataset dataset = null;
  DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetName).build();
  try {
    // the dataset was created
    dataset = bigquery.create(datasetInfo);
  } catch (BigQueryException e) {
    // the dataset was not created
  }
  // [END bigquery_create_dataset]
  return dataset;
}
 
Example #4
Source File: TableSnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example copying the table to a destination table. */
// [TARGET copy(TableId, JobOption...)]
// [VARIABLE "my_dataset"]
// [VARIABLE "my_destination_table"]
public Job copyTableId(String dataset, String tableName) throws BigQueryException {
  // [START bigquery_copy_table]
  TableId destinationId = TableId.of(dataset, tableName);
  JobOption options = JobOption.fields(JobField.STATUS, JobField.USER_EMAIL);
  Job job = table.copy(destinationId, options);
  // Wait for the job to complete.
  try {
    Job completedJob =
        job.waitFor(
            RetryOption.initialRetryDelay(Duration.ofSeconds(1)),
            RetryOption.totalTimeout(Duration.ofMinutes(3)));
    if (completedJob != null && completedJob.getStatus().getError() == null) {
      // Job completed successfully.
    } else {
      // Handle error case.
    }
  } catch (InterruptedException e) {
    // Handle interrupted wait
  }
  // [END bigquery_copy_table]
  return job;
}
 
Example #5
Source File: JobSnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example usage of {@code waitFor()} with checking period and timeout. */
// [TARGET waitFor(RetryOption...)]
public boolean waitForWithOptions() throws InterruptedException {
  try {
    // [START ]
    Job completedJob =
        job.waitFor(
            RetryOption.initialRetryDelay(Duration.ofSeconds(1)),
            RetryOption.totalTimeout(Duration.ofMinutes(1)));
    if (completedJob == null) {
      // job no longer exists
    } else if (completedJob.getStatus().getError() != null) {
      // job failed, handle error
    } else {
      // job completed successfully
    }
    // [END ]
  } catch (BigQueryException e) {
    if (e.getCause() instanceof PollException) {
      return false;
    }
    throw e;
  }
  return true;
}
 
Example #6
Source File: JobSnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example usage of {@code waitFor()}. */
// [TARGET waitFor(RetryOption...)]
public boolean waitFor() throws InterruptedException {
  try {
    // [START ]
    Job completedJob = job.waitFor();
    if (completedJob == null) {
      // job no longer exists
    } else if (completedJob.getStatus().getError() != null) {
      // job failed, handle error
    } else {
      // job completed successfully
    }
    // [END ]
  } catch (BigQueryException e) {
    // Timeouts shouldn't happen without a timeout option.
    if (e.getCause() instanceof PollException) {
      return false;
    }
    throw e;
  }
  return true;
}
 
Example #7
Source File: BigQueryStatementIssuingFn.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private Table createBigQueryTable(BigQueryAction action) {
  TableDefinition definition = StandardTableDefinition.of(
      BigQuerySchemaUtils.beamSchemaToBigQueryClientSchema(action.tableSchema));

  TableId tableId = TableId.of(action.projectId, action.dataset, action.tableName);
  TableInfo tableInfo = TableInfo.newBuilder(tableId, definition).build();

  LOG.info("Creating a new BigQuery table: {}", tableInfo);

  try {
    return bigQueryClient.create(tableInfo);
  } catch (BigQueryException e) {
    if (e.getMessage().startsWith("Already Exists")) {
      return null;
    } else {
      throw e;
    }
  }
}
 
Example #8
Source File: BqQueueWorkerTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldCloseCommitterWhenBiqQueryExceptionHappens() throws InterruptedException {
    BlockingQueue<Records> queue = new LinkedBlockingQueue<>();
    queue.put(messages);
    doThrow(new BigQueryException(10, "Some Error")).when(failureSink).push(messages);
    BqQueueWorker worker = new BqQueueWorker("bq-worker", failureSink, queueConfig, committer, queue, workerState);
    Thread workerThread = new Thread(worker);

    workerThread.start();
    workerThread.join();
    //TODO: change Worker run to callable and verify return value
}
 
Example #9
Source File: BigQueryDatasetRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
private TableResult query(BigQuery bigquery, QueryJobConfiguration queryRequest, String projectId,
                          BigQuery.JobOption... options) {
    TableResult queryResponse = null;
    try {
        queryResponse = bigquery.query(queryRequest, options);
    } catch (BigQueryException exception) {
        if ("responseTooLarge".equals(exception.getReason())) {
            return queryWithLarge(bigquery, queryRequest, projectId, options);
        }
    } catch (final InterruptedException e) {
        Thread.currentThread().interrupt();
    }
    return loopQueryResponse(queryResponse);
}
 
Example #10
Source File: BigQueryOutputTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldBeRunnable() throws Exception {
  final String nonExistentProject = UUID.randomUUID().toString();
  final Task<TableId> task = Task.named("task").ofType(TableId.class)
      .output(BigQueryOutput.create(nonExistentProject, "foo", "bar"))
      .process(StagingTableId::tableId);

  final Future<TableId> future = FloRunner.runTask(task).future();
  try {
    future.get(30, TimeUnit.SECONDS);
  } catch (ExecutionException e) {
    final Throwable rootCause = Throwables.getRootCause(e);
    if (rootCause instanceof GoogleJsonResponseException) {
      // Seems we managed to make a request, so the lookup context was successfully invoked. We're done here.
    } else if (rootCause instanceof IllegalArgumentException
        && rootCause.getMessage().startsWith("A project ID is required")) {
      // Seems we got as far as to instantiate the BigQuery client. We're done here.
    } else if (rootCause instanceof IllegalArgumentException &&
        rootCause.getMessage().startsWith("Dataset does not exist.")) {
      // Seems we managed to make a request, so the lookup context was successfully invoked. We're done here.
    } else if (rootCause instanceof BigQueryException &&
        rootCause.getMessage().equals("The project " + nonExistentProject + " has not enabled BigQuery.")) {
      // Seems we managed to make a request, so the lookup context was successfully invoked. We're done here.
    } else {
      // Not sure what error we got here, might be a serialization problem. Be conservative and fail.
      throw new AssertionError("Unknown error, might be serialization problem that needs fixing?", e);
    }
  }
}
 
Example #11
Source File: BigQueryOutputTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test(expected = BigQueryException.class)
public void shouldFailWhenJobTerminatesExceptionally() throws InterruptedException {
  when(bigQuery.getDataset(DATASET_ID)).thenReturn(mock(Dataset.class));

  when(bigQuery.create(any(JobInfo.class))).thenReturn(job);
  doThrow(new BigQueryException(mock(IOException.class))).when(job)
      .waitFor(any(RetryOption.class));

  BigQueryOutput.create(() -> floBigQueryClient, TABLE_ID).provide(null).publish();
}
 
Example #12
Source File: BqQueueWorkerIntegrationTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldKeepElementInQueueOnException() throws InterruptedException {
    queue.put(messages);
    doThrow(new BigQueryException(10, "failed to push to BQ")).when(sink).push(messages);
    startWorkers(workers);

    Thread closer = WorkerUtil.closeWorkers(workers, workerStates, 500);
    closer.join();
    assertEquals(1, queue.size());
}
 
Example #13
Source File: RetrySinkTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldRetryForMaxAttemptsIfExceptionIsThrown() {
    int maxPushAttempts = 5;
    backOffProvider = new ExponentialBackOffProvider(10, 1000, 2, new BackOff());
    when(failureSink.push(records)).thenThrow(new BigQueryException(10, "Some Error"));
    retrySink = new RetrySink(failureSink, backOffProvider, maxPushAttempts);

    Status status = retrySink.push(records);
    assertFalse(status.isSuccess());
    verify(failureSink, times(maxPushAttempts)).push(records);
}
 
Example #14
Source File: BQClientTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Test(expected = BigQueryException.class)
public void shouldThrowExceptionIfUpdateTableFails() {
    when(bqConfig.isBQTablePartitioningEnabled()).thenReturn(false);
    when(bqConfig.getTable()).thenReturn("bq-table");
    when(bqConfig.getDataset()).thenReturn("bq-proto");

    ArrayList<Field> bqSchemaFields = new ArrayList<Field>() {{
        add(Field.newBuilder("test-1", LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder("test-2", LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.OFFSET_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TOPIC_COLUMN_NAME, LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.LOAD_TIME_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TIMESTAMP_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.PARTITION_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
    }};

    TableDefinition tableDefinition = getNonPartitionedTableDefinition(bqSchemaFields);
    ArrayList<Field> updatedBQSchemaFields = new ArrayList<>();
    updatedBQSchemaFields.addAll(bqSchemaFields);
    updatedBQSchemaFields.add(Field.newBuilder("new-field", LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
    TableDefinition updatedBQTableDefinition = getNonPartitionedTableDefinition(updatedBQSchemaFields);

    TableId tableId = TableId.of(bqConfig.getDataset(), bqConfig.getTable());
    TableInfo tableInfo = TableInfo.newBuilder(tableId, updatedBQTableDefinition).build();
    when(bigquery.getDataset(tableId.getDataset())).thenReturn(dataset);
    when(dataset.exists()).thenReturn(true);
    when(table.exists()).thenReturn(true);
    when(bigquery.getTable(tableId)).thenReturn(table);
    when(table.getDefinition()).thenReturn(mockTableDefinition);
    when(mockTableDefinition.getSchema()).thenReturn(tableDefinition.getSchema());
    when(bigquery.update(tableInfo)).thenThrow(new BigQueryException(404, "Failed to update"));

    bqClient = new BQClient(bigquery, bqConfig);
    bqClient.upsertTable(updatedBQSchemaFields);
}
 
Example #15
Source File: ProtoUpdateListenerTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Test(expected = BQTableUpdateFailure.class)
public void shouldThrowExceptionIfConverterFails() throws IOException {
    ProtoField returnedProtoField = new ProtoField();
    when(protoFieldFactory.getProtoField()).thenReturn(returnedProtoField);
    returnedProtoField.addField(new ProtoField("order_number", 1));
    returnedProtoField.addField(new ProtoField("order_url", 2));

    HashMap<String, DescriptorAndTypeName> descriptorsMap = new HashMap<String, DescriptorAndTypeName>() {{
        put(String.format("%s.%s", TestKey.class.getPackage(), TestKey.class.getName()), new DescriptorAndTypeName(TestKey.getDescriptor(), String.format(".%s.%s", TestKey.getDescriptor().getFile().getPackage(), TestKey.getDescriptor().getName())));
    }};
    when(protoMappingParser.parseFields(returnedProtoField, stencilConfig.getProtoSchema(), StencilUtils.getAllProtobufDescriptors(descriptorsMap), StencilUtils.getTypeNameToPackageNameMap(descriptorsMap))).thenReturn(returnedProtoField);
    ObjectNode objNode = JsonNodeFactory.instance.objectNode();
    objNode.put("1", "order_number");
    objNode.put("2", "order_url");
    String expectedProtoMapping = objectMapper.writeValueAsString(objNode);
    when(protoMappingConverter.generateColumnMappings(returnedProtoField.getFields())).thenReturn(expectedProtoMapping);

    ArrayList<Field> returnedSchemaFields = new ArrayList<Field>() {{
        add(Field.newBuilder("order_number", LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder("order_url", LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
    }};
    when(protoMappingConverter.generateBigquerySchema(returnedProtoField)).thenReturn(returnedSchemaFields);

    ArrayList<Field> bqSchemaFields = new ArrayList<Field>() {{
        add(Field.newBuilder("order_number", LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder("order_url", LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.OFFSET_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TOPIC_COLUMN_NAME, LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.LOAD_TIME_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TIMESTAMP_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.PARTITION_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
    }};
    doThrow(new BigQueryException(10, "bigquery mapping has failed")).when(bqInstance).upsertTable(bqSchemaFields);

    protoUpdateListener.onProtoUpdate(stencilConfig.getStencilUrl(), descriptorsMap);
}
 
Example #16
Source File: BqQueueWorker.java    From beast with Apache License 2.0 5 votes vote down vote up
private Status pushToSink(Records poll) {
    Status status;
    try {
        status = sink.push(poll);
        statsClient.count("kafka.batch.records.size," + statsClient.getBqTags(), poll.getSize());
        poll.getRecordCountByPartition().forEach((partition, recordCount) -> statsClient.count("kafka.batch.records.count," + statsClient.getBqTags() + ",partition=" + partition.toString(), recordCount));
    } catch (BigQueryException e) {
        statsClient.increment("worker.queue.bq.errors");
        log.error("Exception::Failed to write to BQ: {}", e.getMessage());
        return new FailureStatus(e);
    } catch (BQErrorHandlerException bqhe) {
        statsClient.increment("worker.queue.handler.errors");
        log.error("Exception::Could not process the errors with handler sink: {}", bqhe.getMessage());
        return new FailureStatus(bqhe);
    }
    if (status.isSuccess()) {
        boolean ackStatus = acknowledger.acknowledge(poll.getPartitionsCommitOffset());
        statsClient.timeIt("batch.processing.latency.time," + statsClient.getBqTags(), poll.getPolledTime());
        if (!ackStatus) {
            statsClient.increment("batch.partition.offsets.reprocessed");
        }
        return SUCCESS_STATUS;
    } else {
        statsClient.increment("worker.queue.bq.push_failure");
        log.error("Failed to push records to sink {}", status.toString());
        return status;
    }
}
 
Example #17
Source File: BQClient.java    From beast with Apache License 2.0 5 votes vote down vote up
public void upsertTable(List<Field> bqSchemaFields) throws BigQueryException {
    Schema schema = Schema.of(bqSchemaFields);
    TableDefinition tableDefinition = getTableDefinition(schema);
    TableInfo tableInfo = TableInfo.newBuilder(tableID, tableDefinition)
            .setLabels(bqConfig.getTableLabels())
            .build();
    upsertDatasetAndTable(tableInfo);
}
 
Example #18
Source File: ProtoUpdateListener.java    From beast with Apache License 2.0 5 votes vote down vote up
@Override
public void onProtoUpdate(String url, Map<String, DescriptorAndTypeName> newDescriptors) {
    log.info("stencil cache was refreshed, validating if bigquery schema changed");
    try {
        ProtoField protoField = protoFieldFactory.getProtoField();
        protoField = protoMappingParser.parseFields(protoField, proto, StencilUtils.getAllProtobufDescriptors(newDescriptors), StencilUtils.getTypeNameToPackageNameMap(newDescriptors));
        updateProtoParser(protoField);
    } catch (BigQueryException | ProtoNotFoundException | BQSchemaMappingException | BQPartitionKeyNotSpecified | IOException e) {
        String errMsg = "Error while updating bigquery table on callback:" + e.getMessage();
        log.error(errMsg);
        statsClient.increment("bq.table.upsert.failures");
        throw new BQTableUpdateFailure(errMsg);
    }
}
 
Example #19
Source File: ReadSessionCreator.java    From presto with Apache License 2.0 5 votes vote down vote up
Job waitForJob(Job job)
{
    try {
        return job.waitFor();
    }
    catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new BigQueryException(BaseServiceException.UNKNOWN_CODE, format("Job %s has been interrupted", job.getJobId()), e);
    }
}
 
Example #20
Source File: BigQuerySplitManager.java    From presto with Apache License 2.0 5 votes vote down vote up
private List<BigQuerySplit> createEmptyProjection(TableId tableId, int actualParallelism, Optional<String> filter)
{
    log.debug("createEmptyProjection(tableId=%s, actualParallelism=%s, filter=[%s])", tableId, actualParallelism, filter);
    try {
        long numberOfRows;
        if (filter.isPresent()) {
            // count the rows based on the filter
            String sql = bigQueryClient.selectSql(tableId, "COUNT(*)", new String[] {filter.get()});
            TableResult result = bigQueryClient.query(sql);
            numberOfRows = result.iterateAll().iterator().next().get(0).getLongValue();
        }
        else {
            // no filters, so we can take the value from the table info
            numberOfRows = bigQueryClient.getTable(tableId).getNumRows().longValue();
        }

        long rowsPerSplit = numberOfRows / actualParallelism;
        long remainingRows = numberOfRows - (rowsPerSplit * actualParallelism); // need to be added to one fo the split due to integer division
        List<BigQuerySplit> splits = range(0, actualParallelism)
                .mapToObj(ignored -> BigQuerySplit.emptyProjection(rowsPerSplit))
                .collect(toList());
        splits.set(0, BigQuerySplit.emptyProjection(rowsPerSplit + remainingRows));
        return splits;
    }
    catch (BigQueryException e) {
        throw new PrestoException(BIGQUERY_FAILED_TO_EXECUTE_QUERY, "Failed to compute empty projection", e);
    }
}
 
Example #21
Source File: BigQueryClient.java    From presto with Apache License 2.0 5 votes vote down vote up
TableResult query(String sql)
{
    try {
        return bigQuery.query(QueryJobConfiguration.of(sql));
    }
    catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new BigQueryException(BaseHttpServiceException.UNKNOWN_CODE, format("Failed to run the query [%s]", sql), e);
    }
}
 
Example #22
Source File: BigQueryUtil.java    From spark-bigquery-connector with Apache License 2.0 4 votes vote down vote up
static BigQueryException convertToBigQueryException(BigQueryError error) {
    return new BigQueryException(UNKNOWN_CODE, error.getMessage(), error);
}
 
Example #23
Source File: SinkConfig.java    From gcp-ingestion with Mozilla Public License 2.0 4 votes vote down vote up
@Override
Output getOutput(Env env, Executor executor) {
  final com.google.cloud.bigquery.BigQuery bigQuery = getBigQueryService(env);
  final Storage storage = getGcsService(env);
  final Function<Blob, CompletableFuture<Void>> bigQueryLoad;
  if (env.containsKey(OUTPUT_TOPIC)) {
    // BigQuery Load API limits maximum load requests per table per day to 1,000 so if
    // OUTPUT_TOPIC is present send blobInfo to pubsub and run load jobs separately
    final Function<PubsubMessage, CompletableFuture<Void>> pubsubOutput = pubsub
        .getOutput(env, executor);
    bigQueryLoad = blob -> pubsubOutput.apply(BlobIdToPubsubMessage.encode(blob.getBlobId()));
  } else {
    bigQueryLoad = new BigQuery.Load(bigQuery, storage,
        env.getLong(LOAD_MAX_BYTES, DEFAULT_LOAD_MAX_BYTES),
        env.getInt(LOAD_MAX_FILES, DEFAULT_LOAD_MAX_FILES),
        env.getDuration(LOAD_MAX_DELAY, DEFAULT_STREAMING_LOAD_MAX_DELAY), executor,
        // files will be recreated if not successfully loaded
        BigQuery.Load.Delete.always).withOpenCensusMetrics();
  }
  // Combine bigQueryFiles and bigQueryLoad without an intermediate PubSub topic
  Function<PubsubMessage, CompletableFuture<Void>> fileOutput = new Gcs.Write.Ndjson(storage,
      env.getLong(BATCH_MAX_BYTES, DEFAULT_BATCH_MAX_BYTES),
      env.getInt(BATCH_MAX_MESSAGES, DEFAULT_BATCH_MAX_MESSAGES),
      env.getDuration(BATCH_MAX_DELAY, DEFAULT_BATCH_MAX_DELAY),
      PubsubMessageToTemplatedString.forBigQuery(getBigQueryOutputBucket(env)), executor,
      getFormat(env), bigQueryLoad).withOpenCensusMetrics();
  // Like bigQueryStreaming, but use STREAMING_ prefix env vars for batch configuration
  Function<PubsubMessage, CompletableFuture<Void>> streamingOutput = new BigQuery.Write(
      bigQuery, env.getLong(STREAMING_BATCH_MAX_BYTES, DEFAULT_STREAMING_BATCH_MAX_BYTES),
      env.getInt(STREAMING_BATCH_MAX_MESSAGES, DEFAULT_STREAMING_BATCH_MAX_MESSAGES),
      env.getDuration(STREAMING_BATCH_MAX_DELAY, DEFAULT_STREAMING_BATCH_MAX_DELAY),
      PubsubMessageToTemplatedString.forBigQuery(env.getString(OUTPUT_TABLE)), executor,
      getFormat(env)).withOpenCensusMetrics();
  // fallbackOutput sends messages to fileOutput when rejected by streamingOutput due to size
  Function<PubsubMessage, CompletableFuture<Void>> fallbackOutput = message -> streamingOutput
      .apply(message).thenApply(CompletableFuture::completedFuture).exceptionally(t -> {
        if (t.getCause() instanceof BigQueryErrors) {
          BigQueryErrors cause = (BigQueryErrors) t.getCause();
          if (cause.errors.size() == 1 && cause.errors.get(0).getMessage()
              .startsWith("Maximum allowed row size exceeded")) {
            return fileOutput.apply(message);
          }
        } else if (t.getCause() instanceof BigQueryException && t.getCause().getMessage()
            .startsWith("Request payload size exceeds the limit")) {
          // t.getCause() was not a BatchException, so this message exceeded the
          // request payload size limit when sent individually.
          return fileOutput.apply(message);
        }
        throw (RuntimeException) t;
      }).thenCompose(v -> v);
  // Send messages not matched by STREAMING_DOCTYPES directly to fileOutput
  final Function<PubsubMessage, CompletableFuture<Void>> mixedOutput;
  if (env.containsKey(STREAMING_DOCTYPES)) {
    Predicate<PubsubMessage> streamingDoctypes = DocumentTypePredicate
        .of(env.getPattern(STREAMING_DOCTYPES));
    mixedOutput = message -> {
      if (streamingDoctypes.test(message)) {
        return fallbackOutput.apply(message);
      }
      return fileOutput.apply(message);
    };
  } else {
    mixedOutput = fallbackOutput;
  }
  return new Output(env, this, mixedOutput);
}
 
Example #24
Source File: BigQueryUtil.java    From presto with Apache License 2.0 4 votes vote down vote up
static BigQueryException convertToBigQueryException(BigQueryError error)
{
    return new BigQueryException(UNKNOWN_CODE, error.getMessage(), error);
}