java source code of BigQueryOutputConfiguration

hadoop-connectors-master
- cloudbuild
  - cloudbuild.yaml
  - Dockerfile
  - presubmit.sh
- util-hadoop
  - src
    - main
      - java
        com
        google
        common
        flogger
        backend
        system
        DefaultPlatform.java
        cloud
        hadoop
        util
        HadoopCredentialConfiguration.java
        HadoopConfigurationProperty.java
        AccessTokenProvider.java
        ConfigurationUtil.java
        HadoopToStringUtil.java
        CredentialFromAccessTokenProviderClassFactory.java
        testing
        CredentialConfigurationUtil.java
        TestingAccessTokenProvider.java
        HadoopConfigurationUtils.java
    - test
      - resources
        test-key.txt
        test-client-credential.json
        test-credential.json
        test-key.p12
      - java
        com
        google
        cloud
        hadoop
        util
        CredentialFromAccessTokenProviderClassFactoryTest.java
        ConfigurationUtilTest.java
        HadoopCredentialConfigurationTest.java
        HadoopConfigurationPropertyTest.java
  - pom.xml
- pom.xml
- mvnw
- bigquery
  - src
    - main
      - resources
        com
        google
        cloud
        hadoop
        io
        bigquery
        bigquery.properties
      - java
        com
        google
        cloud
        hadoop
        io
        bigquery
        AbstractExportToCloudStorage.java
        BigQueryConfiguration.java
        samples
        WordCount.java
        DirectBigQueryWordCount.java
        WikipediaRequestBytes.java
        DirectBigQueryRecordReader.java
        DelegateRecordReaderFactory.java
        DynamicFileListRecordReader.java
        BigQueryFactory.java
        DirectBigQueryInputFormat.java
        Export.java
        NoopFederatedExportToCloudStorage.java
        JsonTextBigQueryInputFormat.java
        HadoopCredentialConfiguration.java
        output
        BigQueryTimePartitioning.java
        IndirectBigQueryOutputFormat.java
        BigQueryOutputConfiguration.java
        IndirectBigQueryOutputCommitter.java
        ForwardingBigQueryFileOutputCommitter.java
        FederatedBigQueryOutputCommitter.java
        BigQueryTableSchema.java
        ForwardingBigQueryFileOutputFormat.java
        FederatedBigQueryOutputFormat.java
        BigQueryTableHelper.java
        BigQueryTableFieldSchema.java
        HadoopConfigurationProperty.java
        GsonBigQueryInputFormat.java
        GsonRecordReader.java
        BigQueryHelper.java
        AbstractBigQueryInputFormat.java
        BigQueryUtils.java
        ShardedInputSplit.java
        AvroRecordReader.java
        AvroBigQueryInputFormat.java
        BigQueryStrings.java
        UnshardedExportToCloudStorage.java
        UnshardedInputSplit.java
        ExportFileFormat.java
        BigQueryFileFormat.java
    - test
      - resources
        mockito-extensions
        org.mockito.plugins.MockMaker
      - java
        com
        google
        cloud
        hadoop
        io
        bigquery
        GsonRecordReaderTest.java
        BigQueryStringsTest.java
        RegionalIntegrationTest.java
        NoopFederatedExportToCloudStorageTest.java
        output
        BigQueryOutputConfigurationTest.java
        BigQueryTimePartitioningTest.java
        IndirectBigQueryOutputCommitterTest.java
        BigQueryTableSchemaTest.java
        ForwardingBigQueryFileOutputCommitterTest.java
        FederatedBigQueryOutputFormatTest.java
        ForwardingBigQueryFileOutputFormatTest.java
        FederatedBigQueryOutputCommitterTest.java
        IndirectBigQueryOutputFormatTest.java
        GsonBigQueryInputFormatTest.java
        ShardedInputSplitTest.java
        DynamicFileListRecordReaderTest.java
        BigQueryConfigurationTest.java
        BigQueryFactoryTest.java
        BigQueryUtilsTest.java
        UnshardedInputSplitTest.java
        BigQueryHelperTest.java
        AbstractBigQueryIoIntegrationTestBase.java
        UnshardedExportToCloudStorageTest.java
        AvroBigQueryIoIntegrationTest.java
        DirectBigQueryInputFormatTest.java
        AvroRecordReaderTest.java
        GsonBigQueryIoIntegrationTest.java
        DirectBigQueryRecordReaderTest.java
  - pom.xml
  - CHANGES.md
- codecov.yml
- LICENSE
- util
  - src
    - main
      - java
        com
        google
        cloud
        hadoop
        util
        CredentialFactory.java
        PropertyUtil.java
        RequesterPaysOptions.java
        CredentialOptions.java
        BaseAbstractGoogleAsyncWriteChannel.java
        LazyExecutorService.java
        ClientRequestHelper.java
        GoogleCredentialWithIamAccessToken.java
        RetryBoundedBackOff.java
        HttpTransportFactory.java
        RetryDeterminer.java
        AsyncWriteChannelOptions.java
        RetryHttpInitializer.java
        AbstractGoogleAsyncWriteChannel.java
        RedactedString.java
        LoggingMediaHttpUploaderProgressListener.java
        ApiErrorExtractor.java
        RetryHttpInitializerOptions.java
        ChainingHttpRequestInitializer.java
        ResilientOperation.java
        testing
        MockHttpTransportHelper.java
    - test
      - resources
        log4j.properties
        com
        google
        cloud
        hadoop
        util
        test.properties
      - java
        com
        google
        cloud
        hadoop
        util
        RetryBoundedBackOffTest.java
        ApiErrorExtractorTest.java
        LazyExecutorServiceTest.java
        RetryHttpInitializerTest.java
        RetryDeterminerTest.java
        GoogleCredentialWithIamAccessTokenTest.java
        LoggingMediaHttpUploaderProgressListenerTest.java
        PropertyUtilTest.java
        ResilientOperationTest.java
        HttpTransportFactoryTest.java
  - pom.xml
- CONTRIBUTING.md
- .mvn
  - wrapper
    - MavenWrapperDownloader.java
    - maven-wrapper.properties
- README.md
- gcs
  - src
    - main
      - resources
        META-INF
        services
        org.apache.hadoop.fs.FileSystem
        com
        google
        cloud
        hadoop
        fs
        gcs
        gcs.properties
      - hadoop3
        resources
        META-INF
        services
        org.apache.hadoop.security.token.DtFetcher
        java
        com
        google
        cloud
        hadoop
        fs
        gcs
        auth
        GcsDtFetcher.java
      - java
        com
        google
        cloud
        hadoop
        fs
        gcs
        CoopLockFsckRunner.java
        FileSystemDescriptor.java
        HadoopCredentialConfiguration.java
        GoogleHadoopOutputStream.java
        HadoopConfigurationProperty.java
        InMemoryGlobberFileSystem.java
        GoogleHadoopFS.java
        GoogleHadoopSyncableOutputStream.java
        GoogleHadoopFileSystemConfiguration.java
        GoogleHadoopFileSystemBase.java
        CoopLockFsck.java
        auth
        DelegationTokenIOException.java
        GcsDelegationTokens.java
        AbstractDelegationTokenBinding.java
        GoogleHadoopFSInputStream.java
        SyncableOutputStreamOptions.java
        GoogleHadoopFileSystem.java
    - test
      - resources
        contract
        gs.xml
        META-INF
        services
        org.apache.hadoop.security.token.TokenIdentifier
        core-site.xml
      - java
        com
        google
        cloud
        hadoop
        fs
        gcs
        GoogleHadoopFileSystemTestHelper.java
        GoogleHadoopFileSystemDelegationTokensTest.java
        LocalFileSystemIntegrationTest.java
        LocalFileSystemIntegrationHelper.java
        GoogleHadoopFileSystemXAttrsIntegrationTest.java
        GoogleHadoopSyncableOutputStreamTest.java
        HadoopFileSystemIntegrationHelper.java
        HadoopFileSystemTestBase.java
        WebHdfsIntegrationTest.java
        GoogleHadoopFileSystemTestBase.java
        GoogleHadoopSyncableOutputStreamIntegrationTest.java
        GoogleHadoopFileSystemConfigurationTest.java
        HadoopFileSystemIntegrationTest.java
        contract
        GoogleContract.java
        TestInMemoryGoogleContractMkdir.java
        TestGoogleContractAppend.java
        TestInMemoryGoogleContractConcat.java
        TestGoogleContractDelete.java
        TestGoogleContractRename.java
        TestInMemoryGoogleContractCreate.java
        TestGoogleContractConcat.java
        TestGoogleContractGetFileStatus.java
        TestInMemoryGoogleContractSeek.java
        TestInMemoryGoogleContractRootDirectory.java
        TestGoogleContractOpen.java
        AbstractGoogleContractConcatTest.java
        InMemoryGoogleContract.java
        TestInMemoryGoogleContractAppend.java
        TestGoogleContractCreate.java
        TestGoogleContractSeek.java
        TestInMemoryGoogleContractDelete.java
        TestInMemoryGoogleContractRename.java
        TestInMemoryGoogleContractOpen.java
        TestGoogleContractMkdir.java
        TestGoogleContractRootDirectory.java
        GoogleHadoopFSIntegrationTest.java
        GoogleHadoopFileSystemTest.java
        GoogleHadoopFileSystemIntegrationTest.java
        hcfs
        GoogleHadoopFileSystemContractTest.java
        GoogleHadoopFSMainOperationsTest.java
        InMemoryGlobberFileSystemTest.java
        GoogleHadoopOutputStreamIntegrationTest.java
        GoogleHadoopFSInputStreamIntegrationTest.java
        InMemoryGoogleHadoopFileSystem.java
        auth
        TestTokenIdentifierImpl.java
        TestDelegationTokenBindingImpl.java
        CoopLockRepairIntegrationTest.java
        HdfsBehavior.java
        WebHdfsIntegrationHelper.java
        GoogleHadoopFileSystemIntegrationHelper.java
  - pom.xml
  - CHANGES.md
  - README.md
  - COOPERATIVE_LOCKING.md
  - INSTALL.md
  - CONFIGURATION.md
- coverage
  - pom.xml
- tools
  - run_integration_tests.sh
- gcsio
  - src
    - main
      - proto
        google
        storage
        v1
        storage.proto
        storage_resources.proto
      - java
        com
        google
        cloud
        hadoop
        gcsio
        GoogleCloudStorageFileSystem.java
        ForwardingGoogleCloudStorage.java
        UpdatableItemInfo.java
        StorageResourceId.java
        CreateFileOptions.java
        ObjectWriteConditions.java
        GoogleCloudStorageWriteChannel.java
        PerformanceCachingGoogleCloudStorageOptions.java
        GoogleCloudStorageExceptions.java
        GoogleCloudStorageStrings.java
        GoogleCloudStorageItemInfo.java
        GoogleCloudStorageImpl.java
        cooplock
        CooperativeLockingOptions.java
        RenameOperation.java
        CoopLockRecord.java
        CoopLockOperationRename.java
        CoopLockUtils.java
        CoopLockOperationDao.java
        CoopLockOperationType.java
        CoopLockRecords.java
        RenameOperationLogRecord.java
        CoopLockOperationDelete.java
        DeleteOperation.java
        CoopLockRecordsDao.java
        GoogleCloudStorage.java
        PerformanceCachingGoogleCloudStorage.java
        ThrottledGoogleCloudStorage.java
        GoogleCloudStorageOptions.java
        GoogleCloudStorageReadOptions.java
        GoogleCloudStorageGrpcWriteChannel.java
        StringPaths.java
        PrefixMappedItemCache.java
        VerificationAttributes.java
        BatchHelper.java
        CreateBucketOptions.java
        GoogleCloudStorageReadChannel.java
        UriPaths.java
        StorageStubProvider.java
        GoogleCloudStorageFileSystemOptions.java
        GoogleCloudStorageGrpcReadChannel.java
        testing
        GcsItemInfoTestBuilder.java
        InMemoryBucketEntry.java
        InMemoryObjectReadChannel.java
        InMemoryObjectEntry.java
        TestConfiguration.java
        InMemoryGoogleCloudStorage.java
        CreateObjectOptions.java
        FileInfo.java
    - test
      - resources
        log4j.properties
      - java
        com
        google
        cloud
        hadoop
        gcsio
        DeletionBehavior.java
        FileInfoTest.java
        CreateObjectOptionsTest.java
        GoogleCloudStorageMockitoTest.java
        CreateFileOptionsTest.java
        GoogleCloudStorageTestUtils.java
        GoogleCloudStorageFileSystemOptionsTestBase.java
        GoogleCloudStorageFileSystemOptionsUnitTest.java
        GoogleCloudStorageExceptionsTest.java
        integration
        GoogleCloudStorageImplTest.java
        CsekEncryptionIntegrationTest.java
        GoogleCloudStorageTestHelper.java
        GoogleCloudStorageTest.java
        GoogleCloudStorageIntegrationTest.java
        GoogleCloudStorageFileSystemIntegrationHelper.java
        GoogleCloudStorageNewIntegrationTest.java
        GoogleCloudStorageFileSystemTest.java
        PrefixMappedItemCacheTest.java
        LaggedGoogleCloudStorage.java
        GoogleCloudStorageReadOptionsTest.java
        InputStreamSeekableReadableByteChannel.java
        GoogleCloudStorageFileSystemIntegrationTest.java
        MkdirsBehavior.java
        BatchHelperTest.java
        GoogleCloudStorageTest.java
        GoogleCloudStorageFileSystemNewIntegrationTest.java
        ForwardingGoogleCloudStorageTest.java
        GoogleCloudStorageGrpcReadChannelTest.java
        PerformanceCachingGoogleCloudStorageTest.java
        GoogleCloudStorageGrpcWriteChannelTest.java
        ThrowingInputStream.java
        GoogleCloudStorageStringsTest.java
        GoogleCloudStorageReadChannelTest.java
        GoogleCloudStorageWriteChannelTest.java
        GoogleCloudStorageIntegrationHelper.java
        MethodOutcome.java
        RenameBehavior.java
        TrackingHttpRequestInitializer.java
        UriEncodingPathCodecTest.java
        CoopLockIntegrationTest.java
        CoopLockLoadIntegrationTest.java
        GoogleCloudStorageOptionsTest.java
  - pom.xml
- .gitignore
- mvnw.cmd

/*
 * Copyright 2017 Google LLC
 *
 *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software distributed under the
 * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.cloud.hadoop.io.bigquery.output;

import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_CLEANUP_TEMP;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_DATASET_ID;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_FILE_FORMAT;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_FORMAT_CLASS;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_PROJECT_ID;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_TABLE_CREATE_DISPOSITION;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_TABLE_ID;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_TABLE_KMS_KEY_NAME;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_TABLE_PARTITIONING;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_TABLE_SCHEMA;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.OUTPUT_TABLE_WRITE_DISPOSITION;
import static com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration.PROJECT_ID;
import static com.google.cloud.hadoop.util.ConfigurationUtil.getMandatoryConfig;

import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.api.services.bigquery.model.TimePartitioning;
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration;
import com.google.cloud.hadoop.io.bigquery.BigQueryFileFormat;
import com.google.cloud.hadoop.io.bigquery.BigQueryStrings;
import com.google.cloud.hadoop.io.bigquery.HadoopConfigurationProperty;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.ReflectionUtils;

/**
 * A container for configuration keys related to BigQuery indirect output formats. Alternatively,
 * the properties can be set in the configuration xml files with proper values.
 */
@InterfaceStability.Unstable
public class BigQueryOutputConfiguration {

  /** A list of keys that are required for this output connector. */
  public static final List<HadoopConfigurationProperty<?>> REQUIRED_PROPERTIES =
      ImmutableList.of(OUTPUT_DATASET_ID, OUTPUT_TABLE_ID, OUTPUT_FILE_FORMAT, OUTPUT_FORMAT_CLASS);

  /**
   * A helper function to set the required output keys in the given configuration.
   *
   * @param conf the configuration to set the keys on.
   * @param qualifiedOutputTableId the qualified id of the output table in the form: <code>(Optional
   *     ProjectId):[DatasetId].[TableId]</code>. If the project id is missing, the default project
   *     id is attempted {@link BigQueryConfiguration#PROJECT_ID}.
   * @param outputTableSchemaJson the schema of the BigQuery output table.
   * @param outputGcsPath the path in GCS to stage data in. Example: 'gs://bucket/job'.
   * @param outputFileFormat the formatting of the data being written by the output format class.
   * @param outputFormatClass the file output format that will write files to GCS.
   * @throws IOException
   */
  @SuppressWarnings("rawtypes")
  public static void configure(
      Configuration conf,
      String qualifiedOutputTableId,
      String outputTableSchemaJson,
      String outputGcsPath,
      BigQueryFileFormat outputFileFormat,
      Class<? extends FileOutputFormat> outputFormatClass)
      throws IOException {
    Preconditions.checkArgument(
        !Strings.isNullOrEmpty(outputTableSchemaJson),
        "outputTableSchemaJson must not be null or empty.");
    TableReference outputTable = BigQueryStrings.parseTableReference(qualifiedOutputTableId);
    configure(
        conf,
        outputTable.getProjectId(),
        outputTable.getDatasetId(),
        outputTable.getTableId(),
        Optional.of(outputTableSchemaJson),
        outputGcsPath,
        outputFileFormat,
        outputFormatClass);
  }

  /**
   * A helper function to set the required output keys in the given configuration.
   *
   * @param conf the configuration to set the keys on.
   * @param outputProjectId the id of the output project. If the project id is null, the default
   *     project id is attempted {@link BigQueryConfiguration#PROJECT_ID}.
   * @param outputDatasetId the id of the output dataset.
   * @param outputTableId the id of the output table.
   * @param outputTableSchemaJson the schema of the BigQuery output table. If the schema is null,
   *     BigQuery will attempt to auto detect the schema. When using avro formatted data, a schema
   *     is not required as avro stores the schema in the file.
   * @param outputGcsPath the path in GCS to stage data in. Example: 'gs://bucket/job'.
   * @param outputFileFormat the formatting of the data being written by the output format class.
   * @param outputFormatClass the file output format that will write files to GCS.
   * @throws IOException
   */
  @SuppressWarnings("rawtypes")
  private static void configure(
      Configuration conf,
      String outputProjectId,
      String outputDatasetId,
      String outputTableId,
      Optional<String> outputTableSchemaJson,
      String outputGcsPath,
      BigQueryFileFormat outputFileFormat,
      Class<? extends FileOutputFormat> outputFormatClass)
      throws IOException {

    // Use the default project ID as a backup.
    if (Strings.isNullOrEmpty(outputProjectId)) {
      outputProjectId = PROJECT_ID.get(conf, conf::get);
    }

    Preconditions.checkArgument(
        !Strings.isNullOrEmpty(outputProjectId), "outputProjectId must not be null or empty.");
    Preconditions.checkArgument(
        !Strings.isNullOrEmpty(outputDatasetId), "outputDatasetId must not be null or empty.");
    Preconditions.checkArgument(
        !Strings.isNullOrEmpty(outputTableId), "outputTableId must not be null or empty.");
    Preconditions.checkArgument(
        !Strings.isNullOrEmpty(outputGcsPath), "outputGcsPath must not be null or empty.");
    Preconditions.checkNotNull(outputFileFormat, "outputFileFormat must not be null.");
    Preconditions.checkNotNull(outputFormatClass, "outputFormatClass must not be null.");

    conf.set(OUTPUT_PROJECT_ID.getKey(), outputProjectId);
    conf.set(OUTPUT_DATASET_ID.getKey(), outputDatasetId);
    conf.set(OUTPUT_TABLE_ID.getKey(), outputTableId);
    conf.set(OUTPUT_FILE_FORMAT.getKey(), outputFileFormat.name());
    conf.setClass(OUTPUT_FORMAT_CLASS.getKey(), outputFormatClass, FileOutputFormat.class);

    setFileOutputFormatOutputPath(conf, outputGcsPath);

    // If a schema is provided, serialize it.
    if (outputTableSchemaJson.isPresent()) {
      TableSchema tableSchema = BigQueryTableHelper.parseTableSchema(outputTableSchemaJson.get());
      String fieldsJson = BigQueryTableHelper.getTableFieldsJson(tableSchema);
      conf.set(OUTPUT_TABLE_SCHEMA.getKey(), fieldsJson);
    }
  }

  /**
   * A helper function to set the required output keys in the given configuration.
   *
   * @param conf the configuration to set the keys on.
   * @param qualifiedOutputTableId the qualified id of the output table in the form: <code>(Optional
   *     ProjectId):[DatasetId].[TableId]</code>. If the project id is missing, the default project
   *     id is attempted {@link BigQueryConfiguration#PROJECT_ID}.
   * @param outputTableSchema the schema of the BigQuery output table. If the schema is null,
   *     BigQuery will attempt to auto detect the schema. When using avro formatted data, a schema
   *     is not required as avro stores the schema in the file.
   * @param outputGcsPath the path in GCS to stage data in. Example: 'gs://bucket/job'.
   * @param outputFileFormat the formatting of the data being written by the output format class.
   * @param outputFormatClass the file output format that will write files to GCS.
   * @throws IOException
   */
  @SuppressWarnings("rawtypes")
  public static void configure(
      Configuration conf,
      String qualifiedOutputTableId,
      BigQueryTableSchema outputTableSchema,
      String outputGcsPath,
      BigQueryFileFormat outputFileFormat,
      Class<? extends FileOutputFormat> outputFormatClass)
      throws IOException {
    configure(
        conf,
        qualifiedOutputTableId,
        BigQueryTableHelper.getTableSchemaJson(outputTableSchema.get()),
        outputGcsPath,
        outputFileFormat,
        outputFormatClass);
  }

  /**
   * A helper function to set the required output keys in the given configuration.
   *
   * <p>This method will set the output table schema as auto-detected.
   *
   * @param conf the configuration to set the keys on.
   * @param qualifiedOutputTableId the qualified id of the output table in the form: <code>(Optional
   *     ProjectId):[DatasetId].[TableId]</code>. If the project id is missing, the default project
   *     id is attempted {@link BigQueryConfiguration#PROJECT_ID}.
   * @param outputGcsPath the path in GCS to stage data in. Example: 'gs://bucket/job'.
   * @param outputFileFormat the formatting of the data being written by the output format class.
   * @param outputFormatClass the file output format that will write files to GCS.
   * @throws IOException
   */
  @SuppressWarnings("rawtypes")
  public static void configureWithAutoSchema(
      Configuration conf,
      String qualifiedOutputTableId,
      String outputGcsPath,
      BigQueryFileFormat outputFileFormat,
      Class<? extends FileOutputFormat> outputFormatClass)
      throws IOException {
    TableReference outputTable = BigQueryStrings.parseTableReference(qualifiedOutputTableId);
    configure(
        conf,
        outputTable.getProjectId(),
        outputTable.getDatasetId(),
        outputTable.getTableId(),
        /* outputTableSchemaJson= */ Optional.empty(),
        outputGcsPath,
        outputFileFormat,
        outputFormatClass);
  }

  public static void setKmsKeyName(Configuration conf, String kmsKeyName) {
    Preconditions.checkArgument(
        !Strings.isNullOrEmpty(kmsKeyName), "kmsKeyName must not be null or empty.");
    conf.set(OUTPUT_TABLE_KMS_KEY_NAME.getKey(), kmsKeyName);
  }

  /**
   * Helper function that validates the output configuration. Ensures the project id, dataset id,
   * and table id exist in the configuration. This also ensures that if a schema is provided, that
   * it is properly formatted.
   *
   * @param conf the configuration to validate.
   * @throws IOException if the configuration is missing a key, or there's an issue while parsing
   *     the schema in the configuration.
   */
  public static void validateConfiguration(Configuration conf) throws IOException {
    // Ensure the BigQuery output information is valid.
    getMandatoryConfig(conf, REQUIRED_PROPERTIES);

    // Run through the individual getters as they manage error handling.
    getProjectId(conf);
    getJobProjectId(conf);
    getTableSchema(conf);
    getFileFormat(conf);
    getFileOutputFormat(conf);
    getGcsOutputPath(conf);
  }

  /**
   * Gets if the configuration flag to cleanup temporary data in GCS is enabled or not.
   *
   * @param conf the configuration to reference the key from.
   * @return true if the flag is enabled or missing, false otherwise.
   */
  public static boolean getCleanupTemporaryDataFlag(Configuration conf) {
    return OUTPUT_CLEANUP_TEMP.get(conf, conf::getBoolean);
  }

  /**
   * Gets the output dataset project id based on the given configuration.
   *
   * <p>If the {@link BigQueryConfiguration#OUTPUT_PROJECT_ID} is missing, this resolves to
   * referencing the {@link BigQueryConfiguration#PROJECT_ID} key.
   *
   * <p>The load job can be configured with two project identifiers. Configuration key {@link
   * BigQueryConfiguration#PROJECT_ID} can set the project on whose behalf to perform BigQuery load
   * operation, while {@link BigQueryConfiguration#OUTPUT_PROJECT_ID} can be used to name the
   * project that the target dataset belongs to.
   *
   * @param conf the configuration to reference the keys from.
   * @return the project id based on the given configuration.
   * @throws IOException if a required key is missing.
   */
  public static String getProjectId(Configuration conf) throws IOException {
    // Reference the default project ID as a backup.
    String projectId = OUTPUT_PROJECT_ID.get(conf, conf::get);
    if (Strings.isNullOrEmpty(projectId)) {
      projectId = PROJECT_ID.get(conf, conf::get);
    }
    if (Strings.isNullOrEmpty(projectId)) {
      throw new IOException(
          "Must supply a value for configuration setting: " + OUTPUT_PROJECT_ID.getKey());
    }
    return projectId;
  }

  /**
   * Gets the project id to be used to run BQ load job based on the given configuration.
   *
   * <p>If the {@link BigQueryConfiguration#PROJECT_ID} is missing, this resolves to referencing the
   * {@link BigQueryConfiguration#OUTPUT_PROJECT_ID} key.
   *
   * <p>The load job can be configured with two project identifiers. Configuration key {@link
   * BigQueryConfiguration#PROJECT_ID} can set the project on whose behalf to perform BigQuery load
   * operation, while {@link BigQueryConfiguration#OUTPUT_PROJECT_ID} can be used to name the
   * project that the target dataset belongs to.
   *
   * @param conf the configuration to reference the keys from.
   * @return the project id based on the given configuration.
   * @throws IOException if a required key is missing.
   */
  public static String getJobProjectId(Configuration conf) throws IOException {
    // Reference the default project ID as a backup.
    String projectId = PROJECT_ID.get(conf, conf::get);
    if (Strings.isNullOrEmpty(projectId)) {
      projectId = OUTPUT_PROJECT_ID.get(conf, conf::get);
    }
    if (Strings.isNullOrEmpty(projectId)) {
      throw new IOException(
          "Must supply a value for configuration setting: " + PROJECT_ID.getKey());
    }
    return projectId;
  }

  /**
   * Gets the output table reference based on the given configuration. If the {@link
   * BigQueryConfiguration#OUTPUT_PROJECT_ID} is missing, this resolves to referencing the
   * {@link BigQueryConfiguration#PROJECT_ID} key.
   *
   * @param conf the configuration to reference the keys from.
   * @return a reference to the derived output table in the format of "<project>:<dataset>.<table>".
   * @throws IOException if a required key is missing.
   */
  static TableReference getTableReference(Configuration conf) throws IOException {
    // Ensure the BigQuery output information is valid.
    String projectId = getProjectId(conf);
    String datasetId = getMandatoryConfig(conf, OUTPUT_DATASET_ID);
    String tableId = getMandatoryConfig(conf, OUTPUT_TABLE_ID);

    return new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId);
  }

  /**
   * Gets the output table schema based on the given configuration.
   *
   * @param conf the configuration to reference the keys from.
   * @return the derived table schema, absent value if no table schema exists in the configuration.
   * @throws IOException if a table schema was set in the configuration but couldn't be parsed.
   */
  static Optional<BigQueryTableSchema> getTableSchema(Configuration conf) throws IOException {
    String fieldsJson = OUTPUT_TABLE_SCHEMA.get(conf, conf::get);
    if (!Strings.isNullOrEmpty(fieldsJson)) {
      try {
        TableSchema tableSchema = BigQueryTableHelper.createTableSchemaFromFields(fieldsJson);
        return Optional.of(BigQueryTableSchema.wrap(tableSchema));
      } catch (IOException e) {
        throw new IOException("Unable to parse key '" + OUTPUT_TABLE_SCHEMA.getKey() + "'.", e);
      }
    }
    return Optional.empty();
  }

  /**
   * Gets the output table time partitioning based on the given configuration.
   *
   * @param conf the configuration to reference the keys from.
   * @return the derived table time partitioning, absent value if no table time partitioning exists
   *     in the configuration.
   * @throws IOException if a table time partitioning was set in the configuration but couldn't be
   *     parsed.
   */
  static Optional<BigQueryTimePartitioning> getTablePartitioning(Configuration conf)
      throws IOException {
    String fieldsJson = OUTPUT_TABLE_PARTITIONING.get(conf, conf::get);
    if (!Strings.isNullOrEmpty(fieldsJson)) {
      try {
        TimePartitioning tablePartitioning = BigQueryTimePartitioning.getFromJson(fieldsJson);
        return Optional.of(BigQueryTimePartitioning.wrap(tablePartitioning));
      } catch (IOException e) {
        throw new IOException(
            "Unable to parse key '" + OUTPUT_TABLE_PARTITIONING.getKey() + "'.", e);
      }
    }
    return Optional.empty();
  }

  /**
   * Gets the output table KMS key name based on the given configuration.
   *
   * @param conf the configuration to reference the keys from.
   * @return the KMS key name of the output table, null if no KMS key name exists in the
   *     configuration.
   */
  public static String getKmsKeyName(Configuration conf) throws IOException {
    return OUTPUT_TABLE_KMS_KEY_NAME.get(conf, conf::get);
  }

  /**
   * Gets the stored output {@link BigQueryFileFormat} in the configuration.
   *
   * @param conf the configuration to reference the keys from.
   * @return the stored output {@link BigQueryFileFormat} in the configuration.
   * @throws IOException if file format value is missing from the configuration.
   */
  public static BigQueryFileFormat getFileFormat(Configuration conf) throws IOException {
    // Ensure the BigQuery output information is valid.
    String fileFormatName = getMandatoryConfig(conf, OUTPUT_FILE_FORMAT);

    return BigQueryFileFormat.fromName(fileFormatName);
  }

  /**
   * Gets a configured instance of the stored {@link FileOutputFormat} in the configuration.
   *
   * @param conf the configuration to reference the keys from.
   * @return a configured instance of the stored {@link FileOutputFormat} in the configuration.
   * @throws IOException if there's an issue getting an instance of a FileOutputFormat from the
   *     configuration.
   */
  @SuppressWarnings("rawtypes")
  public static FileOutputFormat getFileOutputFormat(Configuration conf) throws IOException {
    // Ensure the BigQuery output information is valid.
    getMandatoryConfig(conf, OUTPUT_FORMAT_CLASS);

    Class<?> confClass = OUTPUT_FORMAT_CLASS.get(conf, conf::getClass);

    // Fail if the default value was used, or the class isn't a FileOutputFormat.
    if (confClass == null) {
      throw new IOException(
          "Unable to resolve value for the configuration key '"
              + OUTPUT_FORMAT_CLASS.getKey()
              + "'.");
    } else if (!FileOutputFormat.class.isAssignableFrom(confClass)) {
      throw new IOException("The class " + confClass.getName() + " is not a FileOutputFormat.");
    }

    Class<? extends FileOutputFormat> fileOutputClass =
        confClass.asSubclass(FileOutputFormat.class);

    // Create a new instance and configure it if it's configurable.
    return ReflectionUtils.newInstance(fileOutputClass, conf);
  }

  /**
   * Gets the stored GCS output path in the configuration.
   *
   * @param conf the configuration to reference the keys from.
   * @return the stored output path in the configuration.
   * @throws IOException if the output path isn't set in the configuration, or the output path's
   *     file system isn't GCS.
   */
  public static Path getGcsOutputPath(Configuration conf) throws IOException {
    Job tempJob = new JobConfigurationAdapter(conf);

    // Error if the output path is missing.
    Path outputPath = FileOutputFormat.getOutputPath(tempJob);
    if (outputPath == null) {
      throw new IOException("FileOutputFormat output path not set.");
    }

    // Error if the output file system isn't GCS.
    FileSystem fs = outputPath.getFileSystem(conf);
    if (!"gs".equals(fs.getScheme())) {
      throw new IOException("Output FileSystem must be GCS ('gs' scheme).");
    }

    return outputPath;
  }

  /**
   * Gets the create disposition of the output table. This specifies if the job should create a
   * table for loading data.
   *
   * @param conf the configuration to reference the keys from.
   * @return the create disposition of the output table.
   */
  public static String getCreateDisposition(Configuration conf) {
    return OUTPUT_TABLE_CREATE_DISPOSITION.get(conf, conf::get);
  }


  /**
   * Gets the write disposition of the output table. This specifies the action that occurs if the
   * destination table already exists. By default, if the table already exists, BigQuery appends
   * data to the output table.
   *
   * @param conf the configuration to reference the keys from.
   * @return the write disposition of the output table.
   */
  public static String getWriteDisposition(Configuration conf) {
    return OUTPUT_TABLE_WRITE_DISPOSITION.get(conf, conf::get);
  }

  /**
   * Sets the output path for FileOutputFormat.
   *
   * @param conf the configuration to pass to FileOutputFormat.
   * @param outputPath the path to set as the output path.
   * @throws IOException
   */
  @VisibleForTesting
  static void setFileOutputFormatOutputPath(Configuration conf, String outputPath)
      throws IOException {
    Job tempJob = new JobConfigurationAdapter(conf);
    FileOutputFormat.setOutputPath(tempJob, new Path(outputPath));
  }

  /**
   * This class provides a workaround for setting FileOutputFormat's output path. Creating a job
   * with a configuration creates a defensive copy of the configuration for the job, meaning changes
   * in either configuration will not be reflected in the other. Because FileOutputFormat requires a
   * job for the API to set an output path, this adapter is used to ensure changes are propagated
   * out to the wrapped configuration.
   */
  private static class JobConfigurationAdapter extends Job {

    private final Configuration config;

    public JobConfigurationAdapter(Configuration config) throws IOException {
      super();
      this.config = config;
    }

    @Override
    public Configuration getConfiguration() {
      return config;
    }
  }
}