Java Code Examples for com.google.api.services.bigquery.model.Job#setJobReference()

The following examples show how to use com.google.api.services.bigquery.model.Job#setJobReference() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FakeJobService.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void startExtractJob(JobReference jobRef, JobConfigurationExtract extractConfig)
    throws IOException {
  checkArgument(
      "AVRO".equals(extractConfig.getDestinationFormat()), "Only extract to AVRO is supported");
  synchronized (allJobs) {
    verifyUniqueJobId(jobRef.getJobId());
    ++numExtractJobCalls;

    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setExtract(extractConfig));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));
    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}
 
Example 2
Source File: BigQueryServicesImplTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Tests that {@link BigQueryServicesImpl.JobServiceImpl#startLoadJob} succeeds. */
@Test
public void testStartLoadJobSucceeds() throws IOException, InterruptedException {
  Job testJob = new Job();
  JobReference jobRef = new JobReference();
  jobRef.setJobId("jobId");
  jobRef.setProjectId("projectId");
  testJob.setJobReference(jobRef);

  when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
  when(response.getStatusCode()).thenReturn(200);
  when(response.getContent()).thenReturn(toStream(testJob));

  Sleeper sleeper = new FastNanoClockAndSleeper();
  JobServiceImpl.startJob(
      testJob,
      new ApiErrorExtractor(),
      bigquery,
      sleeper,
      BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.backoff()));

  verify(response, times(1)).getStatusCode();
  verify(response, times(1)).getContent();
  verify(response, times(1)).getContentType();
  expectedLogs.verifyInfo(String.format("Started BigQuery job: %s", jobRef));
}
 
Example 3
Source File: BigQueryServicesImplTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that {@link BigQueryServicesImpl.JobServiceImpl#startLoadJob} succeeds with an already
 * exist job.
 */
@Test
public void testStartLoadJobSucceedsAlreadyExists() throws IOException, InterruptedException {
  Job testJob = new Job();
  JobReference jobRef = new JobReference();
  jobRef.setJobId("jobId");
  jobRef.setProjectId("projectId");
  testJob.setJobReference(jobRef);

  when(response.getStatusCode()).thenReturn(409); // 409 means already exists

  Sleeper sleeper = new FastNanoClockAndSleeper();
  JobServiceImpl.startJob(
      testJob,
      new ApiErrorExtractor(),
      bigquery,
      sleeper,
      BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.backoff()));

  verify(response, times(1)).getStatusCode();
  verify(response, times(1)).getContent();
  verify(response, times(1)).getContentType();
  expectedLogs.verifyNotLogged("Started BigQuery job");
}
 
Example 4
Source File: BigQueryServicesImplTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Tests that {@link BigQueryServicesImpl.JobServiceImpl#startLoadJob} succeeds with a retry. */
@Test
public void testStartLoadJobRetry() throws IOException, InterruptedException {
  Job testJob = new Job();
  JobReference jobRef = new JobReference();
  jobRef.setJobId("jobId");
  jobRef.setProjectId("projectId");
  testJob.setJobReference(jobRef);

  // First response is 403 rate limited, second response has valid payload.
  when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
  when(response.getStatusCode()).thenReturn(403).thenReturn(200);
  when(response.getContent())
      .thenReturn(toStream(errorWithReasonAndStatus("rateLimitExceeded", 403)))
      .thenReturn(toStream(testJob));

  Sleeper sleeper = new FastNanoClockAndSleeper();
  JobServiceImpl.startJob(
      testJob,
      new ApiErrorExtractor(),
      bigquery,
      sleeper,
      BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.backoff()));

  verify(response, times(2)).getStatusCode();
  verify(response, times(2)).getContent();
  verify(response, times(2)).getContentType();
}
 
Example 5
Source File: FakeJobService.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void startLoadJob(JobReference jobRef, JobConfigurationLoad loadConfig)
    throws IOException {
  synchronized (allJobs) {
    verifyUniqueJobId(jobRef.getJobId());
    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setLoad(loadConfig));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));

    // Copy the files to a new location for import, as the temporary files will be deleted by
    // the caller.
    if (loadConfig.getSourceUris().size() > 0) {
      ImmutableList.Builder<ResourceId> sourceFiles = ImmutableList.builder();
      ImmutableList.Builder<ResourceId> loadFiles = ImmutableList.builder();
      for (String filename : loadConfig.getSourceUris()) {
        sourceFiles.add(FileSystems.matchNewResource(filename, false /* isDirectory */));
        loadFiles.add(
            FileSystems.matchNewResource(
                filename + ThreadLocalRandom.current().nextInt(), false /* isDirectory */));
      }

      FileSystems.copy(sourceFiles.build(), loadFiles.build());
      filesForLoadJobs.put(jobRef.getProjectId(), jobRef.getJobId(), loadFiles.build());
    }

    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}
 
Example 6
Source File: FakeJobService.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void startQueryJob(JobReference jobRef, JobConfigurationQuery query) {
  synchronized (allJobs) {
    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setQuery(query));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));
    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}
 
Example 7
Source File: FakeJobService.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void startCopyJob(JobReference jobRef, JobConfigurationTableCopy copyConfig)
    throws IOException {
  synchronized (allJobs) {
    verifyUniqueJobId(jobRef.getJobId());
    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setCopy(copyConfig));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));
    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}
 
Example 8
Source File: AbstractExportToCloudStorage.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
@Override
public void beginExport() throws IOException {
  // Create job and configuration.
  JobConfigurationExtract extractConfig = new JobConfigurationExtract();

  // Set source.
  extractConfig.setSourceTable(tableToExport.getTableReference());

  // Set destination.
  extractConfig.setDestinationUris(getExportPaths());
  extractConfig.set(DESTINATION_FORMAT_KEY, fileFormat.getFormatIdentifier());

  JobConfiguration config = new JobConfiguration();
  config.setExtract(extractConfig);

  JobReference jobReference =
      bigQueryHelper.createJobReference(
          projectId, "exporttocloudstorage", tableToExport.getLocation());

  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  try {
    Job response = bigQueryHelper.insertJobOrFetchDuplicate(projectId, job);
    logger.atFine().log("Got response '%s'", response);
    exportJobReference = response.getJobReference();
  } catch (IOException e) {
    String error = String.format(
        "Error while exporting table %s",
        BigQueryStrings.toString(tableToExport.getTableReference()));
    throw new IOException(error, e);
  }
}
 
Example 9
Source File: BigQueryUtilsTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/**
 * Mocks result of BigQuery for polling for job completion.
 *
 * @throws IOException on IOError.
 */
@Before
public void setUp() throws IOException {

  jobReference = new JobReference().setJobId("test-job-id").setLocation("test-job-location");

  // Create the unfinished job result.
  notDoneJob = new Job();
  notDoneJobStatus = new JobStatus();
  notDoneJobStatus.setState("NOT DONE");
  notDoneJobStatus.setErrorResult(null);
  notDoneJob.setStatus(notDoneJobStatus);
  notDoneJob.setJobReference(jobReference);

  // Create the finished job result.
  job = new Job();
  jobStatus = new JobStatus();
  jobStatus.setState("DONE");
  jobStatus.setErrorResult(null);
  job.setStatus(jobStatus);
  job.setJobReference(jobReference);

  // Mock BigQuery.
  mockBigQuery = mock(Bigquery.class);
  mockBigQueryJobs = mock(Bigquery.Jobs.class);
  mockJobsGet = mock(Bigquery.Jobs.Get.class);
  when(mockBigQuery.jobs()).thenReturn(mockBigQueryJobs);
  when(mockBigQueryJobs.get(projectId, jobReference.getJobId()))
      .thenReturn(mockJobsGet)
      .thenReturn(mockJobsGet);
  when(mockJobsGet.setLocation(any(String.class))).thenReturn(mockJobsGet);
  when(mockJobsGet.execute()).thenReturn(job);

  // Constructor coverage
  new BigQueryUtils();

  // Mock Progressable.
  mockProgressable = mock(Progressable.class);
}
 
Example 10
Source File: BigQueryHelpersTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testPendingJobManager() throws Exception {
  PendingJobManager jobManager =
      new PendingJobManager(
          BackOffAdapter.toGcpBackOff(
              FluentBackoff.DEFAULT
                  .withMaxRetries(Integer.MAX_VALUE)
                  .withInitialBackoff(Duration.millis(10))
                  .withMaxBackoff(Duration.millis(10))
                  .backoff()));

  Set<String> succeeded = Sets.newHashSet();
  for (int i = 0; i < 5; i++) {
    Job currentJob = new Job();
    currentJob.setKind(" bigquery#job");
    PendingJob pendingJob =
        new PendingJob(
            retryId -> {
              if (new Random().nextInt(2) == 0) {
                throw new RuntimeException("Failing to start.");
              }
              currentJob.setJobReference(
                  new JobReference()
                      .setProjectId("")
                      .setLocation("")
                      .setJobId(retryId.getJobId()));
              return null;
            },
            retryId -> {
              if (retryId.getRetryIndex() < 5) {
                currentJob.setStatus(new JobStatus().setErrorResult(new ErrorProto()));
              } else {
                currentJob.setStatus(new JobStatus().setErrorResult(null));
              }
              return currentJob;
            },
            retryId -> {
              if (retryId.getJobId().equals(currentJob.getJobReference().getJobId())) {
                return currentJob;
              } else {
                return null;
              }
            },
            100,
            "JOB_" + i);
    jobManager.addPendingJob(
        pendingJob,
        j -> {
          succeeded.add(j.currentJobId.getJobId());
          return null;
        });
  }

  jobManager.waitForDone();
  Set<String> expectedJobs =
      ImmutableSet.of("JOB_0-5", "JOB_1-5", "JOB_2-5", "JOB_3-5", "JOB_4-5");
  assertEquals(expectedJobs, succeeded);
}
 
Example 11
Source File: BigQueryHelper.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Imports data from GCS into BigQuery via a load job. Optionally polls for completion before
 * returning.
 *
 * @param projectId the project on whose behalf to perform the load.
 * @param tableRef the reference to the destination table.
 * @param schema the schema of the source data to populate the destination table by.
 * @param timePartitioning time partitioning to populate the destination table.
 * @param kmsKeyName the Cloud KMS encryption key used to protect the output table.
 * @param sourceFormat the file format of the source data.
 * @param createDisposition the create disposition of the output table.
 * @param writeDisposition the write disposition of the output table.
 * @param gcsPaths the location of the source data in GCS.
 * @param awaitCompletion if true, block and poll until job completes, otherwise return as soon as
 *     the job has been successfully dispatched.
 * @throws IOException
 * @throws InterruptedException if interrupted while waiting for job completion.
 */
public void importFromGcs(
    String projectId,
    TableReference tableRef,
    @Nullable TableSchema schema,
    @Nullable TimePartitioning timePartitioning,
    @Nullable String kmsKeyName,
    BigQueryFileFormat sourceFormat,
    String createDisposition,
    String writeDisposition,
    List<String> gcsPaths,
    boolean awaitCompletion)
    throws IOException, InterruptedException {
  logger.atInfo().log(
      "Importing into table '%s' from %s paths; path[0] is '%s'; awaitCompletion: %s;"
          + " timePartitioning: %s",
      lazy(() -> BigQueryStrings.toString(tableRef)),
      gcsPaths.size(),
      gcsPaths.isEmpty() ? "(empty)" : gcsPaths.get(0),
      awaitCompletion,
      timePartitioning);

  // Create load conf with minimal requirements.
  JobConfigurationLoad loadConfig = new JobConfigurationLoad();
  loadConfig.setSchema(schema);
  loadConfig.setSourceFormat(sourceFormat.getFormatIdentifier());
  loadConfig.setSourceUris(gcsPaths);
  loadConfig.setDestinationTable(tableRef);
  loadConfig.setTimePartitioning(timePartitioning);
  loadConfig.setCreateDisposition(createDisposition);
  loadConfig.setWriteDisposition(writeDisposition);
  if (!Strings.isNullOrEmpty(kmsKeyName)) {
    loadConfig.setDestinationEncryptionConfiguration(
        new EncryptionConfiguration().setKmsKeyName(kmsKeyName));
  }
  // Auto detect the schema if we're not given one, otherwise use the passed schema.
  if (schema == null) {
    logger.atInfo().log("No import schema provided, auto detecting schema.");
    loadConfig.setAutodetect(true);
  } else {
    logger.atInfo().log("Using provided import schema '%s'.", schema);
  }

  JobConfiguration config = new JobConfiguration();
  config.setLoad(loadConfig);

  // Get the dataset to determine the location
  Dataset dataset;
  try {
    dataset = service.datasets().get(tableRef.getProjectId(), tableRef.getDatasetId()).execute();
  } catch (IOException ioe) {
    throw new IOException(
        String.format(
            "Failed to get dataset '%s' in project '%s' for table '%s'",
            tableRef.getDatasetId(), tableRef.getProjectId(), tableRef),
        ioe);
  }

  JobReference jobReference =
      createJobReference(projectId, "direct-bigqueryhelper-import", dataset.getLocation());
  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  insertJobOrFetchDuplicate(projectId, job);

  if (awaitCompletion) {
    // Poll until job is complete.
    BigQueryUtils.waitForJobCompletion(getRawBigquery(), projectId, jobReference, () -> {});
  }
}
 
Example 12
Source File: BigQueryHelper.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Exports BigQuery results into GCS, polls for completion before returning.
 *
 * @param projectId the project on whose behalf to perform the export.
 * @param tableRef the table to export.
 * @param gcsPaths the GCS paths to export to.
 * @param awaitCompletion if true, block and poll until job completes, otherwise return as soon as
 *     the job has been successfully dispatched.
 * @throws IOException on IO error.
 * @throws InterruptedException on interrupt.
 */
public void exportBigQueryToGcs(
    String projectId, TableReference tableRef, List<String> gcsPaths, boolean awaitCompletion)
    throws IOException, InterruptedException {
  logger.atFine().log(
      "exportBigQueryToGcs(bigquery, '%s', '%s', '%s', '%s')",
      projectId, BigQueryStrings.toString(tableRef), gcsPaths, awaitCompletion);
  logger.atInfo().log(
      "Exporting table '%s' to %s paths; path[0] is '%s'; awaitCompletion: %s",
      BigQueryStrings.toString(tableRef),
      gcsPaths.size(),
      gcsPaths.isEmpty() ? "(empty)" : gcsPaths.get(0),
      awaitCompletion);

  // Create job and configuration.
  JobConfigurationExtract extractConfig = new JobConfigurationExtract();

  // Set source.
  extractConfig.setSourceTable(tableRef);

  // Set destination.
  extractConfig.setDestinationUris(gcsPaths);
  extractConfig.set("destinationFormat", "NEWLINE_DELIMITED_JSON");

  JobConfiguration config = new JobConfiguration();
  config.setExtract(extractConfig);

  // Get the table to determine the location
  Table table = getTable(tableRef);

  JobReference jobReference =
      createJobReference(projectId, "direct-bigqueryhelper-export", table.getLocation());

  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  insertJobOrFetchDuplicate(projectId, job);

  if (awaitCompletion) {
    // Poll until job is complete.
    BigQueryUtils.waitForJobCompletion(service, projectId, jobReference, () -> {});
  }
}
 
Example 13
Source File: BigQueryHelperTest.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws IOException {
  MockitoAnnotations.initMocks(this);
  LoggerConfig.getConfig(GsonBigQueryInputFormat.class).setLevel(Level.FINE);

  // Create fake job reference.
  JobReference fakeJobReference = new JobReference().setProjectId(jobProjectId).setJobId(jobId);

  // Create the job result.
  jobStatus = new JobStatus();
  jobStatus.setState("DONE");
  jobStatus.setErrorResult(null);

  jobHandle = new Job();
  jobHandle.setStatus(jobStatus);
  jobHandle.setJobReference(fakeJobReference);

  // Mocks for Bigquery jobs.
  when(mockBigquery.jobs()).thenReturn(mockBigqueryJobs);

  // Mock getting Bigquery job.
  when(mockBigqueryJobs.get(any(String.class), any(String.class)))
      .thenReturn(mockBigqueryJobsGet);
  when(mockBigqueryJobsGet.setLocation(any(String.class))).thenReturn(mockBigqueryJobsGet);

  // Mock inserting Bigquery job.
  when(mockBigqueryJobs.insert(any(String.class), any(Job.class)))
      .thenReturn(mockBigqueryJobsInsert);

  // Fake table.
  fakeTableSchema = new TableSchema();
  fakeTable = new Table().setSchema(fakeTableSchema).setLocation("test_location");

  // Mocks for Bigquery tables.
  when(mockBigquery.tables()).thenReturn(mockBigqueryTables);
  when(mockBigqueryTables.get(any(String.class), any(String.class), any(String.class)))
      .thenReturn(mockBigqueryTablesGet);

  Datasets datasets = Mockito.mock(Datasets.class);
  Datasets.Get datasetsGet = Mockito.mock(Datasets.Get.class);
  Dataset dataset = new Dataset().setLocation("test_location");
  when(mockBigquery.datasets()).thenReturn(datasets);
  when(datasets.get(any(String.class), any(String.class))).thenReturn(datasetsGet);
  when(datasetsGet.execute()).thenReturn(dataset);

  // Create table reference.
  tableRef = new TableReference();
  tableRef.setProjectId(projectId);
  tableRef.setDatasetId(datasetId);
  tableRef.setTableId(tableId);

  helper = new BigQueryHelper(mockBigquery);
  helper.setErrorExtractor(mockErrorExtractor);
}
 
Example 14
Source File: GsonBigQueryInputFormatTest.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Creates an in-memory GHFS.
 *
 * @throws IOException on IOError.
 */
@Before
public void setUp()
    throws IOException {
  MockitoAnnotations.initMocks(this);
  LoggerConfig.getConfig(GsonBigQueryInputFormat.class).setLevel(Level.FINE);

  // Set the Hadoop job configuration.
  config = new JobConf(InMemoryGoogleHadoopFileSystem.getSampleConfiguration());
  config.set(BigQueryConfiguration.PROJECT_ID.getKey(), jobProjectId);
  config.set(BigQueryConfiguration.INPUT_PROJECT_ID.getKey(), dataProjectId);
  config.set(BigQueryConfiguration.INPUT_DATASET_ID.getKey(), intermediateDataset);
  config.set(BigQueryConfiguration.INPUT_TABLE_ID.getKey(), intermediateTable);
  config.set(BigQueryConfiguration.TEMP_GCS_PATH.getKey(), "gs://test_bucket/other_path");
  config.setClass(
      INPUT_FORMAT_CLASS.getKey(),
      GsonBigQueryInputFormat.class,
      AbstractBigQueryInputFormat.class);
  config.setBoolean(BigQueryConfiguration.DELETE_EXPORT_FILES_FROM_GCS.getKey(), true);

  CredentialConfigurationUtil.addTestConfigurationSettings(config);

  // Create a GoogleHadoopFileSystem to use to initialize and write to
  // the in-memory GcsFs.
  ghfs = new InMemoryGoogleHadoopFileSystem();

  JobReference fakeJobReference =
      new JobReference()
          .setProjectId(jobProjectId)
          .setJobId("bigquery-job-1234")
          .setLocation("test-job-location");

  // Create the job result.
  jobStatus = new JobStatus();
  jobStatus.setState("DONE");
  jobStatus.setErrorResult(null);

  jobHandle = new Job();
  jobHandle.setStatus(jobStatus);
  jobHandle.setJobReference(fakeJobReference);

  // Create table reference.
  tableRef = new TableReference();
  tableRef.setProjectId(dataProjectId);
  tableRef.setDatasetId("test_dataset");
  tableRef.setTableId("test_table");

  table = new Table().setTableReference(tableRef).setLocation("test_location");

  when(mockBigQueryHelper.getRawBigquery())
      .thenReturn(mockBigquery);

  // Mocks for Bigquery jobs.
  when(mockBigquery.jobs())
      .thenReturn(mockBigqueryJobs);

  // Mock getting Bigquery job.
  when(mockBigqueryJobs.get(any(String.class), any(String.class)))
      .thenReturn(mockBigqueryJobsGet);
  when(mockBigqueryJobsGet.setLocation(any(String.class))).thenReturn(mockBigqueryJobsGet);
  when(mockBigqueryJobsGet.execute())
      .thenReturn(jobHandle);

  // Mock inserting Bigquery job.
  when(mockBigqueryJobs.insert(any(String.class), any(Job.class)))
      .thenReturn(mockBigqueryJobsInsert);
  when(mockBigqueryJobsInsert.execute())
      .thenReturn(jobHandle);

  // Mocks for Bigquery tables.
  when(mockBigquery.tables())
      .thenReturn(mockBigqueryTables);

  // Mocks for getting Bigquery table.
  when(mockBigqueryTables.get(any(String.class), any(String.class), any(String.class)))
      .thenReturn(mockBigqueryTablesGet);
  when(mockBigqueryTablesGet.execute())
      .thenReturn(table);

  when(mockBigQueryHelper.getTable(any(TableReference.class)))
      .thenReturn(table);

  when(mockBigQueryHelper.createJobReference(
          any(String.class), any(String.class), any(String.class)))
      .thenReturn(fakeJobReference);
  when(mockBigQueryHelper.insertJobOrFetchDuplicate(any(String.class), any(Job.class)))
      .thenReturn(jobHandle);
}