com.google.api.services.bigquery.model.JobConfiguration Java Examples

The following examples show how to use com.google.api.services.bigquery.model.JobConfiguration. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BqExtractOperatorFactory.java    From digdag with Apache License 2.0 6 votes vote down vote up
@Override
protected JobConfiguration jobConfiguration(String projectId)
{
    JobConfigurationExtract cfg = new JobConfigurationExtract();

    try {
        cfg.setDestinationUris(params.getList("destination", String.class));
    }
    catch (ConfigException ignore) {
        cfg.setDestinationUri(params.get("destination", String.class));
    }

    Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class)
            .transform(Bq::datasetReference);
    String sourceTable = params.get("_command", String.class);
    cfg.setSourceTable(tableReference(projectId, defaultDataset, sourceTable));

    params.getOptional("print_header", boolean.class).transform(cfg::setPrintHeader);
    params.getOptional("field_delimiter", String.class).transform(cfg::setFieldDelimiter);
    params.getOptional("destination_format", String.class).transform(cfg::setDestinationFormat);
    params.getOptional("compression", String.class).transform(cfg::setCompression);

    return new JobConfiguration()
            .setExtract(cfg);
}
 
Example #2
Source File: BigQueryServicesImpl.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public JobStatistics dryRunQuery(
    String projectId, JobConfigurationQuery queryConfig, String location)
    throws InterruptedException, IOException {
  JobReference jobRef = new JobReference().setLocation(location).setProjectId(projectId);
  Job job =
      new Job()
          .setJobReference(jobRef)
          .setConfiguration(new JobConfiguration().setQuery(queryConfig).setDryRun(true));
  return executeWithRetries(
          client.jobs().insert(projectId, job),
          String.format(
              "Unable to dry run query: %s, aborting after %d retries.",
              queryConfig, MAX_RPC_RETRIES),
          Sleeper.DEFAULT,
          createDefaultBackoff(),
          ALWAYS_RETRY)
      .getStatistics();
}
 
Example #3
Source File: FakeJobService.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void startExtractJob(JobReference jobRef, JobConfigurationExtract extractConfig)
    throws IOException {
  checkArgument(
      "AVRO".equals(extractConfig.getDestinationFormat()), "Only extract to AVRO is supported");
  synchronized (allJobs) {
    verifyUniqueJobId(jobRef.getJobId());
    ++numExtractJobCalls;

    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setExtract(extractConfig));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));
    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}
 
Example #4
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 6 votes vote down vote up
/** Create a table from a SQL query if it doesn't already exist. */
public TableReference ensureTable(TableReference table, String sqlQuery) {
  try {
    runJob(new Job()
        .setConfiguration(new JobConfiguration()
            .setQuery(new JobConfigurationQuery()
                .setQuery(sqlQuery)
                .setDefaultDataset(getDataset())
                .setDestinationTable(table))));
  } catch (BigqueryJobFailureException e) {
    if (e.getReason().equals("duplicate")) {
      // Table already exists.
    } else {
      throw e;
    }
  }
  return table;
}
 
Example #5
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 6 votes vote down vote up
/**
 * Starts an asynchronous job to extract the specified source table and output it to the
 * given GCS filepath in the specified destination format, optionally printing headers.
 * Returns a ListenableFuture that holds the destination GCS URI on success.
 */
private ListenableFuture<String> extractTable(
    DestinationTable sourceTable,
    String destinationUri,
    DestinationFormat destinationFormat,
    boolean printHeader) {
  checkArgument(sourceTable.type == TableType.TABLE);
  Job job = new Job()
      .setConfiguration(new JobConfiguration()
          .setExtract(new JobConfigurationExtract()
              .setSourceTable(sourceTable.getTableReference())
              .setDestinationFormat(destinationFormat.toString())
              .setDestinationUris(ImmutableList.of(destinationUri))
              .setPrintHeader(printHeader)));
  return runJobToCompletion(job, destinationUri);
}
 
Example #6
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 6 votes vote down vote up
/**
 * Starts an asynchronous query job to populate the specified destination table with the results
 * of the specified query, or if the table is a view, to update the view to reflect that query.
 * Returns a ListenableFuture that holds the same destination table object on success.
 */
public ListenableFuture<DestinationTable> query(
    String querySql,
    DestinationTable dest) {
  if (dest.type == TableType.VIEW) {
    // Use Futures.transform() rather than calling apply() directly so that any exceptions thrown
    // by calling updateTable will be propagated on the get() call, not from here.
    return transform(
        Futures.immediateFuture(dest.withQuery(querySql)), this::updateTable, directExecutor());
  } else {
    Job job = new Job()
        .setConfiguration(new JobConfiguration()
            .setQuery(new JobConfigurationQuery()
                .setQuery(querySql)
                .setDefaultDataset(getDataset())
                .setWriteDisposition(dest.getWriteDisposition().toString())
                .setDestinationTable(dest.getTableReference())));
    return transform(runJobToCompletion(job, dest), this::updateTable, directExecutor());
  }
}
 
Example #7
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 5 votes vote down vote up
/**
 * Starts an asynchronous query job to dump the results of the specified query into a local
 * ImmutableTable object, row-keyed by the row number (indexed from 1), column-keyed by the
 * TableFieldSchema for that column, and with the value object as the cell value.  Note that null
 * values will not actually be null, but they can be checked for using Data.isNull().
 *
 * <p>Returns a ListenableFuture that holds the ImmutableTable on success.
 */
public ListenableFuture<ImmutableTable<Integer, TableFieldSchema, Object>>
    queryToLocalTable(String querySql) {
  Job job = new Job()
      .setConfiguration(new JobConfiguration()
          .setQuery(new JobConfigurationQuery()
              .setQuery(querySql)
              .setDefaultDataset(getDataset())));
  return transform(runJobToCompletion(job), this::getQueryResults, directExecutor());
}
 
Example #8
Source File: AbstractExportToCloudStorage.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
@Override
public void beginExport() throws IOException {
  // Create job and configuration.
  JobConfigurationExtract extractConfig = new JobConfigurationExtract();

  // Set source.
  extractConfig.setSourceTable(tableToExport.getTableReference());

  // Set destination.
  extractConfig.setDestinationUris(getExportPaths());
  extractConfig.set(DESTINATION_FORMAT_KEY, fileFormat.getFormatIdentifier());

  JobConfiguration config = new JobConfiguration();
  config.setExtract(extractConfig);

  JobReference jobReference =
      bigQueryHelper.createJobReference(
          projectId, "exporttocloudstorage", tableToExport.getLocation());

  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  try {
    Job response = bigQueryHelper.insertJobOrFetchDuplicate(projectId, job);
    logger.atFine().log("Got response '%s'", response);
    exportJobReference = response.getJobReference();
  } catch (IOException e) {
    String error = String.format(
        "Error while exporting table %s",
        BigQueryStrings.toString(tableToExport.getTableReference()));
    throw new IOException(error, e);
  }
}
 
Example #9
Source File: BqOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
@Override
protected JobConfiguration jobConfiguration(String projectId)
{
    JobConfigurationQuery cfg = new JobConfigurationQuery()
            .setQuery(query);

    cfg.setUseLegacySql(params.get("use_legacy_sql", boolean.class, false));

    params.getOptional("allow_large_results", boolean.class).transform(cfg::setAllowLargeResults);
    params.getOptional("use_query_cache", Boolean.class).transform(cfg::setUseQueryCache);
    params.getOptional("create_disposition", String.class).transform(cfg::setCreateDisposition);
    params.getOptional("write_disposition", String.class).transform(cfg::setWriteDisposition);
    params.getOptional("flatten_results", Boolean.class).transform(cfg::setFlattenResults);
    params.getOptional("maximum_billing_tier", Integer.class).transform(cfg::setMaximumBillingTier);
    params.getOptional("priority", String.class).transform(cfg::setPriority);

    params.getOptional("table_definitions", new TypeReference<Map<String, ExternalDataConfiguration>>() {})
            .transform(cfg::setTableDefinitions);
    params.getOptional("user_defined_function_resources", new TypeReference<List<UserDefinedFunctionResource>>() {})
            .transform(cfg::setUserDefinedFunctionResources);

    Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class)
            .transform(Bq::datasetReference);
    defaultDataset.transform(cfg::setDefaultDataset);

    params.getOptional("destination_table", String.class)
            .transform(s -> cfg.setDestinationTable(tableReference(projectId, defaultDataset, s)));

    return new JobConfiguration()
            .setQuery(cfg);
}
 
Example #10
Source File: BqLoadOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
@Override
protected JobConfiguration jobConfiguration(String projectId)
{
    JobConfigurationLoad cfg = new JobConfigurationLoad()
            .setSourceUris(sourceUris(params));

    if (params.has("schema")) {
        cfg.setSchema(tableSchema(params));
    }

    Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class)
            .transform(Bq::datasetReference);

    String destinationTable = params.get("destination_table", String.class);
    cfg.setDestinationTable(tableReference(projectId, defaultDataset, destinationTable));

    params.getOptional("create_disposition", String.class).transform(cfg::setCreateDisposition);
    params.getOptional("write_disposition", String.class).transform(cfg::setWriteDisposition);

    params.getOptional("source_format", String.class).transform(cfg::setSourceFormat);
    params.getOptional("field_delimiter", String.class).transform(cfg::setFieldDelimiter);
    params.getOptional("skip_leading_rows", int.class).transform(cfg::setSkipLeadingRows);
    params.getOptional("encoding", String.class).transform(cfg::setEncoding);
    params.getOptional("quote", String.class).transform(cfg::setQuote);
    params.getOptional("max_bad_records", int.class).transform(cfg::setMaxBadRecords);
    params.getOptional("allow_quoted_newlines", boolean.class).transform(cfg::setAllowQuotedNewlines);
    params.getOptional("allow_jagged_rows", boolean.class).transform(cfg::setAllowJaggedRows);
    params.getOptional("ignore_unknown_values", boolean.class).transform(cfg::setIgnoreUnknownValues);
    Optional.of(params.getListOrEmpty("projection_fields", String.class)).transform(cfg::setProjectionFields);
    params.getOptional("autodetect", boolean.class).transform(cfg::setAutodetect);
    Optional.of(params.getListOrEmpty("schema_update_options", String.class)).transform(cfg::setSchemaUpdateOptions);

    return new JobConfiguration()
            .setLoad(cfg);
}
 
Example #11
Source File: UploadDatastoreBackupAction.java    From nomulus with Apache License 2.0 5 votes vote down vote up
private Job makeLoadJob(JobReference jobRef, String sourceUri, String tableId) {
  TableReference tableReference =
      new TableReference()
          .setProjectId(jobRef.getProjectId())
          .setDatasetId(BACKUP_DATASET)
          .setTableId(tableId);
  return new Job()
      .setJobReference(jobRef)
      .setConfiguration(new JobConfiguration()
          .setLoad(new JobConfigurationLoad()
              .setWriteDisposition(WriteDisposition.WRITE_EMPTY.toString())
              .setSourceFormat(SourceFormat.DATASTORE_BACKUP.toString())
              .setSourceUris(ImmutableList.of(sourceUri))
              .setDestinationTable(tableReference)));
}
 
Example #12
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the result of calling queryToLocalTable, but synchronously to avoid spawning new
 * background threads, which App Engine doesn't support.
 *
 * @see <a href="https://cloud.google.com/appengine/docs/standard/java/runtime#Threads">App Engine
 *     Runtime</a>
 */
public ImmutableTable<Integer, TableFieldSchema, Object> queryToLocalTableSync(String querySql) {
  Job job = new Job()
      .setConfiguration(new JobConfiguration()
          .setQuery(new JobConfigurationQuery()
              .setQuery(querySql)
              .setDefaultDataset(getDataset())));
  return getQueryResults(runJob(job));
}
 
Example #13
Source File: BigQueryServicesImpl.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 *
 * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds.
 *
 * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts.
 */
@Override
public void startLoadJob(JobReference jobRef, JobConfigurationLoad loadConfig)
    throws InterruptedException, IOException {
  Job job =
      new Job()
          .setJobReference(jobRef)
          .setConfiguration(new JobConfiguration().setLoad(loadConfig));

  startJob(job, errorExtractor, client);
}
 
Example #14
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 5 votes vote down vote up
/**
 * Starts an asynchronous load job to populate the specified destination table with the given
 * source URIs and source format.  Returns a ListenableFuture that holds the same destination
 * table object on success.
 */
public ListenableFuture<DestinationTable> load(
    DestinationTable dest,
    SourceFormat sourceFormat,
    Iterable<String> sourceUris) {
  Job job = new Job()
      .setConfiguration(new JobConfiguration()
          .setLoad(new JobConfigurationLoad()
              .setWriteDisposition(dest.getWriteDisposition().toString())
              .setSourceFormat(sourceFormat.toString())
              .setSourceUris(ImmutableList.copyOf(sourceUris))
              .setDestinationTable(dest.getTableReference())));
  return transform(runJobToCompletion(job, dest), this::updateTable, directExecutor());
}
 
Example #15
Source File: FakeJobService.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void startCopyJob(JobReference jobRef, JobConfigurationTableCopy copyConfig)
    throws IOException {
  synchronized (allJobs) {
    verifyUniqueJobId(jobRef.getJobId());
    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setCopy(copyConfig));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));
    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}
 
Example #16
Source File: FakeJobService.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void startQueryJob(JobReference jobRef, JobConfigurationQuery query) {
  synchronized (allJobs) {
    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setQuery(query));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));
    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}
 
Example #17
Source File: FakeJobService.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void startLoadJob(JobReference jobRef, JobConfigurationLoad loadConfig)
    throws IOException {
  synchronized (allJobs) {
    verifyUniqueJobId(jobRef.getJobId());
    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setLoad(loadConfig));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));

    // Copy the files to a new location for import, as the temporary files will be deleted by
    // the caller.
    if (loadConfig.getSourceUris().size() > 0) {
      ImmutableList.Builder<ResourceId> sourceFiles = ImmutableList.builder();
      ImmutableList.Builder<ResourceId> loadFiles = ImmutableList.builder();
      for (String filename : loadConfig.getSourceUris()) {
        sourceFiles.add(FileSystems.matchNewResource(filename, false /* isDirectory */));
        loadFiles.add(
            FileSystems.matchNewResource(
                filename + ThreadLocalRandom.current().nextInt(), false /* isDirectory */));
      }

      FileSystems.copy(sourceFiles.build(), loadFiles.build());
      filesForLoadJobs.put(jobRef.getProjectId(), jobRef.getJobId(), loadFiles.build());
    }

    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}
 
Example #18
Source File: BigQueryServicesImpl.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 *
 * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds.
 *
 * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts.
 */
@Override
public void startCopyJob(JobReference jobRef, JobConfigurationTableCopy copyConfig)
    throws IOException, InterruptedException {
  Job job =
      new Job()
          .setJobReference(jobRef)
          .setConfiguration(new JobConfiguration().setCopy(copyConfig));

  startJob(job, errorExtractor, client);
}
 
Example #19
Source File: BigQueryServicesImpl.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 *
 * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds.
 *
 * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts.
 */
@Override
public void startQueryJob(JobReference jobRef, JobConfigurationQuery queryConfig)
    throws IOException, InterruptedException {
  Job job =
      new Job()
          .setJobReference(jobRef)
          .setConfiguration(new JobConfiguration().setQuery(queryConfig));

  startJob(job, errorExtractor, client);
}
 
Example #20
Source File: BigQueryServicesImpl.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 *
 * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds.
 *
 * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts.
 */
@Override
public void startExtractJob(JobReference jobRef, JobConfigurationExtract extractConfig)
    throws InterruptedException, IOException {
  Job job =
      new Job()
          .setJobReference(jobRef)
          .setConfiguration(new JobConfiguration().setExtract(extractConfig));

  startJob(job, errorExtractor, client);
}
 
Example #21
Source File: BigqueryClient.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Performs a query without flattening results. */
@Nonnull
public List<TableRow> queryUnflattened(String query, String projectId, boolean typed)
    throws IOException, InterruptedException {
  Random rnd = new Random(System.currentTimeMillis());
  String temporaryDatasetId = "_dataflow_temporary_dataset_" + rnd.nextInt(1000000);
  String temporaryTableId = "dataflow_temporary_table_" + rnd.nextInt(1000000);
  TableReference tempTableReference =
      new TableReference()
          .setProjectId(projectId)
          .setDatasetId(temporaryDatasetId)
          .setTableId(temporaryTableId);

  createNewDataset(projectId, temporaryDatasetId);
  createNewTable(
      projectId, temporaryDatasetId, new Table().setTableReference(tempTableReference));

  JobConfigurationQuery jcQuery =
      new JobConfigurationQuery()
          .setFlattenResults(false)
          .setAllowLargeResults(true)
          .setDestinationTable(tempTableReference)
          .setQuery(query);
  JobConfiguration jc = new JobConfiguration().setQuery(jcQuery);

  Job job = new Job().setConfiguration(jc);

  Job insertedJob = bqClient.jobs().insert(projectId, job).execute();

  GetQueryResultsResponse qResponse;
  do {
    qResponse =
        bqClient
            .jobs()
            .getQueryResults(projectId, insertedJob.getJobReference().getJobId())
            .execute();

  } while (!qResponse.getJobComplete());

  final TableSchema schema = qResponse.getSchema();
  final List<TableRow> rows = qResponse.getRows();
  deleteDataset(projectId, temporaryDatasetId);
  return !typed
      ? rows
      : rows.stream()
          .map(r -> getTypedTableRow(schema.getFields(), r))
          .collect(Collectors.toList());
}
 
Example #22
Source File: BigQueryHelper.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Imports data from GCS into BigQuery via a load job. Optionally polls for completion before
 * returning.
 *
 * @param projectId the project on whose behalf to perform the load.
 * @param tableRef the reference to the destination table.
 * @param schema the schema of the source data to populate the destination table by.
 * @param timePartitioning time partitioning to populate the destination table.
 * @param kmsKeyName the Cloud KMS encryption key used to protect the output table.
 * @param sourceFormat the file format of the source data.
 * @param createDisposition the create disposition of the output table.
 * @param writeDisposition the write disposition of the output table.
 * @param gcsPaths the location of the source data in GCS.
 * @param awaitCompletion if true, block and poll until job completes, otherwise return as soon as
 *     the job has been successfully dispatched.
 * @throws IOException
 * @throws InterruptedException if interrupted while waiting for job completion.
 */
public void importFromGcs(
    String projectId,
    TableReference tableRef,
    @Nullable TableSchema schema,
    @Nullable TimePartitioning timePartitioning,
    @Nullable String kmsKeyName,
    BigQueryFileFormat sourceFormat,
    String createDisposition,
    String writeDisposition,
    List<String> gcsPaths,
    boolean awaitCompletion)
    throws IOException, InterruptedException {
  logger.atInfo().log(
      "Importing into table '%s' from %s paths; path[0] is '%s'; awaitCompletion: %s;"
          + " timePartitioning: %s",
      lazy(() -> BigQueryStrings.toString(tableRef)),
      gcsPaths.size(),
      gcsPaths.isEmpty() ? "(empty)" : gcsPaths.get(0),
      awaitCompletion,
      timePartitioning);

  // Create load conf with minimal requirements.
  JobConfigurationLoad loadConfig = new JobConfigurationLoad();
  loadConfig.setSchema(schema);
  loadConfig.setSourceFormat(sourceFormat.getFormatIdentifier());
  loadConfig.setSourceUris(gcsPaths);
  loadConfig.setDestinationTable(tableRef);
  loadConfig.setTimePartitioning(timePartitioning);
  loadConfig.setCreateDisposition(createDisposition);
  loadConfig.setWriteDisposition(writeDisposition);
  if (!Strings.isNullOrEmpty(kmsKeyName)) {
    loadConfig.setDestinationEncryptionConfiguration(
        new EncryptionConfiguration().setKmsKeyName(kmsKeyName));
  }
  // Auto detect the schema if we're not given one, otherwise use the passed schema.
  if (schema == null) {
    logger.atInfo().log("No import schema provided, auto detecting schema.");
    loadConfig.setAutodetect(true);
  } else {
    logger.atInfo().log("Using provided import schema '%s'.", schema);
  }

  JobConfiguration config = new JobConfiguration();
  config.setLoad(loadConfig);

  // Get the dataset to determine the location
  Dataset dataset;
  try {
    dataset = service.datasets().get(tableRef.getProjectId(), tableRef.getDatasetId()).execute();
  } catch (IOException ioe) {
    throw new IOException(
        String.format(
            "Failed to get dataset '%s' in project '%s' for table '%s'",
            tableRef.getDatasetId(), tableRef.getProjectId(), tableRef),
        ioe);
  }

  JobReference jobReference =
      createJobReference(projectId, "direct-bigqueryhelper-import", dataset.getLocation());
  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  insertJobOrFetchDuplicate(projectId, job);

  if (awaitCompletion) {
    // Poll until job is complete.
    BigQueryUtils.waitForJobCompletion(getRawBigquery(), projectId, jobReference, () -> {});
  }
}
 
Example #23
Source File: BigQueryHelper.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Exports BigQuery results into GCS, polls for completion before returning.
 *
 * @param projectId the project on whose behalf to perform the export.
 * @param tableRef the table to export.
 * @param gcsPaths the GCS paths to export to.
 * @param awaitCompletion if true, block and poll until job completes, otherwise return as soon as
 *     the job has been successfully dispatched.
 * @throws IOException on IO error.
 * @throws InterruptedException on interrupt.
 */
public void exportBigQueryToGcs(
    String projectId, TableReference tableRef, List<String> gcsPaths, boolean awaitCompletion)
    throws IOException, InterruptedException {
  logger.atFine().log(
      "exportBigQueryToGcs(bigquery, '%s', '%s', '%s', '%s')",
      projectId, BigQueryStrings.toString(tableRef), gcsPaths, awaitCompletion);
  logger.atInfo().log(
      "Exporting table '%s' to %s paths; path[0] is '%s'; awaitCompletion: %s",
      BigQueryStrings.toString(tableRef),
      gcsPaths.size(),
      gcsPaths.isEmpty() ? "(empty)" : gcsPaths.get(0),
      awaitCompletion);

  // Create job and configuration.
  JobConfigurationExtract extractConfig = new JobConfigurationExtract();

  // Set source.
  extractConfig.setSourceTable(tableRef);

  // Set destination.
  extractConfig.setDestinationUris(gcsPaths);
  extractConfig.set("destinationFormat", "NEWLINE_DELIMITED_JSON");

  JobConfiguration config = new JobConfiguration();
  config.setExtract(extractConfig);

  // Get the table to determine the location
  Table table = getTable(tableRef);

  JobReference jobReference =
      createJobReference(projectId, "direct-bigqueryhelper-export", table.getLocation());

  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  insertJobOrFetchDuplicate(projectId, job);

  if (awaitCompletion) {
    // Poll until job is complete.
    BigQueryUtils.waitForJobCompletion(service, projectId, jobReference, () -> {});
  }
}
 
Example #24
Source File: BaseBqJobOperator.java    From digdag with Apache License 2.0 votes vote down vote up
protected abstract JobConfiguration jobConfiguration(String projectId);