com.google.api.services.bigquery.model.JobConfigurationLoad Java Examples

The following examples show how to use com.google.api.services.bigquery.model.JobConfigurationLoad. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryServicesImpl.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 *
 * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds.
 *
 * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts.
 */
@Override
public void startLoadJob(JobReference jobRef, JobConfigurationLoad loadConfig)
    throws InterruptedException, IOException {
  Job job =
      new Job()
          .setJobReference(jobRef)
          .setConfiguration(new JobConfiguration().setLoad(loadConfig));

  startJob(job, errorExtractor, client);
}
 
Example #2
Source File: FakeJobService.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void startLoadJob(JobReference jobRef, JobConfigurationLoad loadConfig)
    throws IOException {
  synchronized (allJobs) {
    verifyUniqueJobId(jobRef.getJobId());
    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setLoad(loadConfig));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));

    // Copy the files to a new location for import, as the temporary files will be deleted by
    // the caller.
    if (loadConfig.getSourceUris().size() > 0) {
      ImmutableList.Builder<ResourceId> sourceFiles = ImmutableList.builder();
      ImmutableList.Builder<ResourceId> loadFiles = ImmutableList.builder();
      for (String filename : loadConfig.getSourceUris()) {
        sourceFiles.add(FileSystems.matchNewResource(filename, false /* isDirectory */));
        loadFiles.add(
            FileSystems.matchNewResource(
                filename + ThreadLocalRandom.current().nextInt(), false /* isDirectory */));
      }

      FileSystems.copy(sourceFiles.build(), loadFiles.build());
      filesForLoadJobs.put(jobRef.getProjectId(), jobRef.getJobId(), loadFiles.build());
    }

    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}
 
Example #3
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
void schemaUpdateOptionsTest(
    BigQueryIO.Write.Method insertMethod, Set<SchemaUpdateOption> schemaUpdateOptions)
    throws Exception {
  TableRow row = new TableRow().set("date", "2019-01-01").set("number", "1");

  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema()
                      .setName("date")
                      .setType("DATE")
                      .setName("number")
                      .setType("INTEGER")));

  Write<TableRow> writeTransform =
      BigQueryIO.writeTableRows()
          .to("project-id:dataset-id.table-id")
          .withTestServices(fakeBqServices)
          .withMethod(insertMethod)
          .withSchema(schema)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
          .withSchemaUpdateOptions(schemaUpdateOptions);

  p.apply(Create.<TableRow>of(row)).apply(writeTransform);
  p.run();

  List<String> expectedOptions =
      schemaUpdateOptions.stream().map(Enum::name).collect(Collectors.toList());

  for (Job job : fakeJobService.getAllJobs()) {
    JobConfigurationLoad configuration = job.getConfiguration().getLoad();
    assertEquals(expectedOptions, configuration.getSchemaUpdateOptions());
  }
}
 
Example #4
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 5 votes vote down vote up
/**
 * Starts an asynchronous load job to populate the specified destination table with the given
 * source URIs and source format.  Returns a ListenableFuture that holds the same destination
 * table object on success.
 */
public ListenableFuture<DestinationTable> load(
    DestinationTable dest,
    SourceFormat sourceFormat,
    Iterable<String> sourceUris) {
  Job job = new Job()
      .setConfiguration(new JobConfiguration()
          .setLoad(new JobConfigurationLoad()
              .setWriteDisposition(dest.getWriteDisposition().toString())
              .setSourceFormat(sourceFormat.toString())
              .setSourceUris(ImmutableList.copyOf(sourceUris))
              .setDestinationTable(dest.getTableReference())));
  return transform(runJobToCompletion(job, dest), this::updateTable, directExecutor());
}
 
Example #5
Source File: UploadDatastoreBackupAction.java    From nomulus with Apache License 2.0 5 votes vote down vote up
private Job makeLoadJob(JobReference jobRef, String sourceUri, String tableId) {
  TableReference tableReference =
      new TableReference()
          .setProjectId(jobRef.getProjectId())
          .setDatasetId(BACKUP_DATASET)
          .setTableId(tableId);
  return new Job()
      .setJobReference(jobRef)
      .setConfiguration(new JobConfiguration()
          .setLoad(new JobConfigurationLoad()
              .setWriteDisposition(WriteDisposition.WRITE_EMPTY.toString())
              .setSourceFormat(SourceFormat.DATASTORE_BACKUP.toString())
              .setSourceUris(ImmutableList.of(sourceUri))
              .setDestinationTable(tableReference)));
}
 
Example #6
Source File: BqLoadOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
@Override
protected JobConfiguration jobConfiguration(String projectId)
{
    JobConfigurationLoad cfg = new JobConfigurationLoad()
            .setSourceUris(sourceUris(params));

    if (params.has("schema")) {
        cfg.setSchema(tableSchema(params));
    }

    Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class)
            .transform(Bq::datasetReference);

    String destinationTable = params.get("destination_table", String.class);
    cfg.setDestinationTable(tableReference(projectId, defaultDataset, destinationTable));

    params.getOptional("create_disposition", String.class).transform(cfg::setCreateDisposition);
    params.getOptional("write_disposition", String.class).transform(cfg::setWriteDisposition);

    params.getOptional("source_format", String.class).transform(cfg::setSourceFormat);
    params.getOptional("field_delimiter", String.class).transform(cfg::setFieldDelimiter);
    params.getOptional("skip_leading_rows", int.class).transform(cfg::setSkipLeadingRows);
    params.getOptional("encoding", String.class).transform(cfg::setEncoding);
    params.getOptional("quote", String.class).transform(cfg::setQuote);
    params.getOptional("max_bad_records", int.class).transform(cfg::setMaxBadRecords);
    params.getOptional("allow_quoted_newlines", boolean.class).transform(cfg::setAllowQuotedNewlines);
    params.getOptional("allow_jagged_rows", boolean.class).transform(cfg::setAllowJaggedRows);
    params.getOptional("ignore_unknown_values", boolean.class).transform(cfg::setIgnoreUnknownValues);
    Optional.of(params.getListOrEmpty("projection_fields", String.class)).transform(cfg::setProjectionFields);
    params.getOptional("autodetect", boolean.class).transform(cfg::setAutodetect);
    Optional.of(params.getListOrEmpty("schema_update_options", String.class)).transform(cfg::setSchemaUpdateOptions);

    return new JobConfiguration()
            .setLoad(cfg);
}
 
Example #7
Source File: BigQueryServices.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Start a BigQuery load job. */
void startLoadJob(JobReference jobRef, JobConfigurationLoad loadConfig)
    throws InterruptedException, IOException;
 
Example #8
Source File: FakeJobService.java    From beam with Apache License 2.0 4 votes vote down vote up
private JobStatus runLoadJob(JobReference jobRef, JobConfigurationLoad load)
    throws InterruptedException, IOException {
  TableReference destination = load.getDestinationTable();
  TableSchema schema = load.getSchema();
  checkArgument(schema != null, "No schema specified");
  List<ResourceId> sourceFiles = filesForLoadJobs.get(jobRef.getProjectId(), jobRef.getJobId());
  WriteDisposition writeDisposition = WriteDisposition.valueOf(load.getWriteDisposition());
  CreateDisposition createDisposition = CreateDisposition.valueOf(load.getCreateDisposition());

  Table existingTable = datasetService.getTable(destination);
  if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
    return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
  }
  if (existingTable == null) {
    TableReference strippedDestination =
        destination
            .clone()
            .setTableId(BigQueryHelpers.stripPartitionDecorator(destination.getTableId()));
    existingTable = new Table().setTableReference(strippedDestination).setSchema(schema);
    if (load.getTimePartitioning() != null) {
      existingTable = existingTable.setTimePartitioning(load.getTimePartitioning());
    }
    if (load.getClustering() != null) {
      existingTable = existingTable.setClustering(load.getClustering());
    }
    datasetService.createTable(existingTable);
  }

  List<TableRow> rows = Lists.newArrayList();
  for (ResourceId filename : sourceFiles) {
    if (load.getSourceFormat().equals("NEWLINE_DELIMITED_JSON")) {
      rows.addAll(readJsonTableRows(filename.toString()));
    } else if (load.getSourceFormat().equals("AVRO")) {
      rows.addAll(readAvroTableRows(filename.toString(), schema));
    }
  }

  datasetService.insertAll(destination, rows, null);
  FileSystems.delete(sourceFiles);
  return new JobStatus().setState("DONE");
}
 
Example #9
Source File: UploadDatastoreBackupActionTest.java    From nomulus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSuccess_doPost() throws Exception {
  action.run();

  // Verify that checkedBigquery was called in a way that would create the dataset if it didn't
  // already exist.
  verify(checkedBigquery).ensureDataSetExists("Project-Id", BACKUP_DATASET);

  // Capture the load jobs we inserted to do additional checking on them.
  ArgumentCaptor<Job> jobArgument = ArgumentCaptor.forClass(Job.class);
  verify(bigqueryJobs, times(3)).insert(eq("Project-Id"), jobArgument.capture());
  List<Job> jobs = jobArgument.getAllValues();
  assertThat(jobs).hasSize(3);

  // Check properties that should be common to all load jobs.
  for (Job job : jobs) {
    assertThat(job.getJobReference().getProjectId()).isEqualTo("Project-Id");
    JobConfigurationLoad config = job.getConfiguration().getLoad();
    assertThat(config.getSourceFormat()).isEqualTo("DATASTORE_BACKUP");
    assertThat(config.getDestinationTable().getProjectId()).isEqualTo("Project-Id");
    assertThat(config.getDestinationTable().getDatasetId()).isEqualTo(BACKUP_DATASET);
  }

  // Check the job IDs for each load job.
  assertThat(transform(jobs, job -> job.getJobReference().getJobId()))
      .containsExactly(
          "load-backup-2018_12_05T17_46_39_92612-one",
          "load-backup-2018_12_05T17_46_39_92612-two",
          "load-backup-2018_12_05T17_46_39_92612-three");

  // Check the source URI for each load job.
  assertThat(
          transform(
              jobs,
              job -> Iterables.getOnlyElement(job.getConfiguration().getLoad().getSourceUris())))
      .containsExactly(
          "gs://bucket/path/all_namespaces/kind_one/all_namespaces_kind_one.export_metadata",
          "gs://bucket/path/all_namespaces/kind_two/all_namespaces_kind_two.export_metadata",
          "gs://bucket/path/all_namespaces/kind_three/all_namespaces_kind_three.export_metadata");

  // Check the destination table ID for each load job.
  assertThat(
          transform(
              jobs, job -> job.getConfiguration().getLoad().getDestinationTable().getTableId()))
      .containsExactly(
          "2018_12_05T17_46_39_92612_one",
          "2018_12_05T17_46_39_92612_two",
          "2018_12_05T17_46_39_92612_three");

  // Check that we executed the inserted jobs.
  verify(bigqueryJobsInsert, times(3)).execute();

  // Check that the poll tasks for each load job were enqueued.
  verify(bigqueryPollEnqueuer)
      .enqueuePollTask(
          new JobReference()
              .setProjectId("Project-Id")
              .setJobId("load-backup-2018_12_05T17_46_39_92612-one"),
          UpdateSnapshotViewAction.createViewUpdateTask(
              BACKUP_DATASET, "2018_12_05T17_46_39_92612_one", "one", LATEST_BACKUP_VIEW_NAME),
          QueueFactory.getQueue(UpdateSnapshotViewAction.QUEUE));
  verify(bigqueryPollEnqueuer)
      .enqueuePollTask(
          new JobReference()
              .setProjectId("Project-Id")
              .setJobId("load-backup-2018_12_05T17_46_39_92612-two"),
          UpdateSnapshotViewAction.createViewUpdateTask(
              BACKUP_DATASET, "2018_12_05T17_46_39_92612_two", "two", LATEST_BACKUP_VIEW_NAME),
          QueueFactory.getQueue(UpdateSnapshotViewAction.QUEUE));
  verify(bigqueryPollEnqueuer)
      .enqueuePollTask(
          new JobReference()
              .setProjectId("Project-Id")
              .setJobId("load-backup-2018_12_05T17_46_39_92612-three"),
          UpdateSnapshotViewAction.createViewUpdateTask(
              BACKUP_DATASET,
              "2018_12_05T17_46_39_92612_three",
              "three",
              LATEST_BACKUP_VIEW_NAME),
          QueueFactory.getQueue(UpdateSnapshotViewAction.QUEUE));
}
 
Example #10
Source File: BigQueryHelper.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Imports data from GCS into BigQuery via a load job. Optionally polls for completion before
 * returning.
 *
 * @param projectId the project on whose behalf to perform the load.
 * @param tableRef the reference to the destination table.
 * @param schema the schema of the source data to populate the destination table by.
 * @param timePartitioning time partitioning to populate the destination table.
 * @param kmsKeyName the Cloud KMS encryption key used to protect the output table.
 * @param sourceFormat the file format of the source data.
 * @param createDisposition the create disposition of the output table.
 * @param writeDisposition the write disposition of the output table.
 * @param gcsPaths the location of the source data in GCS.
 * @param awaitCompletion if true, block and poll until job completes, otherwise return as soon as
 *     the job has been successfully dispatched.
 * @throws IOException
 * @throws InterruptedException if interrupted while waiting for job completion.
 */
public void importFromGcs(
    String projectId,
    TableReference tableRef,
    @Nullable TableSchema schema,
    @Nullable TimePartitioning timePartitioning,
    @Nullable String kmsKeyName,
    BigQueryFileFormat sourceFormat,
    String createDisposition,
    String writeDisposition,
    List<String> gcsPaths,
    boolean awaitCompletion)
    throws IOException, InterruptedException {
  logger.atInfo().log(
      "Importing into table '%s' from %s paths; path[0] is '%s'; awaitCompletion: %s;"
          + " timePartitioning: %s",
      lazy(() -> BigQueryStrings.toString(tableRef)),
      gcsPaths.size(),
      gcsPaths.isEmpty() ? "(empty)" : gcsPaths.get(0),
      awaitCompletion,
      timePartitioning);

  // Create load conf with minimal requirements.
  JobConfigurationLoad loadConfig = new JobConfigurationLoad();
  loadConfig.setSchema(schema);
  loadConfig.setSourceFormat(sourceFormat.getFormatIdentifier());
  loadConfig.setSourceUris(gcsPaths);
  loadConfig.setDestinationTable(tableRef);
  loadConfig.setTimePartitioning(timePartitioning);
  loadConfig.setCreateDisposition(createDisposition);
  loadConfig.setWriteDisposition(writeDisposition);
  if (!Strings.isNullOrEmpty(kmsKeyName)) {
    loadConfig.setDestinationEncryptionConfiguration(
        new EncryptionConfiguration().setKmsKeyName(kmsKeyName));
  }
  // Auto detect the schema if we're not given one, otherwise use the passed schema.
  if (schema == null) {
    logger.atInfo().log("No import schema provided, auto detecting schema.");
    loadConfig.setAutodetect(true);
  } else {
    logger.atInfo().log("Using provided import schema '%s'.", schema);
  }

  JobConfiguration config = new JobConfiguration();
  config.setLoad(loadConfig);

  // Get the dataset to determine the location
  Dataset dataset;
  try {
    dataset = service.datasets().get(tableRef.getProjectId(), tableRef.getDatasetId()).execute();
  } catch (IOException ioe) {
    throw new IOException(
        String.format(
            "Failed to get dataset '%s' in project '%s' for table '%s'",
            tableRef.getDatasetId(), tableRef.getProjectId(), tableRef),
        ioe);
  }

  JobReference jobReference =
      createJobReference(projectId, "direct-bigqueryhelper-import", dataset.getLocation());
  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  insertJobOrFetchDuplicate(projectId, job);

  if (awaitCompletion) {
    // Poll until job is complete.
    BigQueryUtils.waitForJobCompletion(getRawBigquery(), projectId, jobReference, () -> {});
  }
}