Java Code Examples for org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setDataflowClient()

The following examples show how to use org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setDataflowClient() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
  GcsUtil mockGcsUtil = mock(GcsUtil.class);
  when(mockGcsUtil.expand(any(GcsPath.class)))
      .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);

  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setJobName("some-job-name");
  options.setProject("some-project");
  options.setRegion("some-region");
  options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
  options.setFilesToStage(new ArrayList<>());
  options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
  options.setGcsUtil(mockGcsUtil);

  // Enable the FileSystems API to know about gs:// URIs in this test.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example 2
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private DataflowPipelineOptions buildPipelineOptions() throws IOException {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setProject(PROJECT_ID);
  options.setTempLocation(VALID_TEMP_BUCKET);
  options.setRegion(REGION_ID);
  // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
  options.setFilesToStage(new ArrayList<>());
  options.setDataflowClient(buildMockDataflow());
  options.setGcsUtil(mockGcsUtil);
  options.setGcpCredential(new TestCredential());

  // Configure the FileSystem registrar to use these options.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example 3
Source File: DataflowGroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Create a test pipeline that uses the {@link DataflowRunner} so that {@link GroupByKey} is not
 * expanded. This is used for verifying that even without expansion the proper errors show up.
 */
private Pipeline createTestServiceRunner() {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setProject("someproject");
  options.setRegion("some-region1");
  options.setGcpTempLocation("gs://staging");
  options.setPathValidatorClass(NoopPathValidator.class);
  options.setDataflowClient(dataflow);
  return Pipeline.create(options);
}
 
Example 4
Source File: DataflowViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private Pipeline createTestBatchRunner() {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setProject("someproject");
  options.setRegion("some-region1");
  options.setGcpTempLocation("gs://staging");
  options.setPathValidatorClass(NoopPathValidator.class);
  options.setDataflowClient(dataflow);
  return Pipeline.create(options);
}
 
Example 5
Source File: DataflowViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private Pipeline createTestStreamingRunner() {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setStreaming(true);
  options.setProject("someproject");
  options.setRegion("some-region1");
  options.setGcpTempLocation("gs://staging");
  options.setPathValidatorClass(NoopPathValidator.class);
  options.setDataflowClient(dataflow);
  return Pipeline.create(options);
}
 
Example 6
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testRunWithFiles() throws IOException {
  // Test that the function DataflowRunner.stageFiles works as expected.
  final String cloudDataflowDataset = "somedataset";

  // Create some temporary files.
  File temp1 = File.createTempFile("DataflowRunnerTest-", ".txt");
  temp1.deleteOnExit();
  File temp2 = File.createTempFile("DataflowRunnerTest2-", ".txt");
  temp2.deleteOnExit();

  String overridePackageName = "alias.txt";

  when(mockGcsUtil.getObjects(anyListOf(GcsPath.class)))
      .thenReturn(
          ImmutableList.of(
              GcsUtil.StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));

  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setFilesToStage(
      ImmutableList.of(
          temp1.getAbsolutePath(), overridePackageName + "=" + temp2.getAbsolutePath()));
  options.setStagingLocation(VALID_STAGING_BUCKET);
  options.setTempLocation(VALID_TEMP_BUCKET);
  options.setTempDatasetId(cloudDataflowDataset);
  options.setProject(PROJECT_ID);
  options.setRegion(REGION_ID);
  options.setJobName("job");
  options.setDataflowClient(buildMockDataflow());
  options.setGcsUtil(mockGcsUtil);
  options.setGcpCredential(new TestCredential());

  when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt()))
      .then(
          invocation ->
              FileChannel.open(
                  Files.createTempFile("channel-", ".tmp"),
                  StandardOpenOption.CREATE,
                  StandardOpenOption.WRITE,
                  StandardOpenOption.DELETE_ON_CLOSE));

  Pipeline p = buildDataflowPipeline(options);

  DataflowPipelineJob job = (DataflowPipelineJob) p.run();
  assertEquals("newid", job.getJobId());

  ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
  Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
  Job workflowJob = jobCaptor.getValue();
  assertValidJob(workflowJob);

  assertEquals(2, workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size());
  DataflowPackage workflowPackage1 =
      workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0);
  assertThat(workflowPackage1.getName(), endsWith(getFileExtension(temp1.getAbsolutePath())));
  DataflowPackage workflowPackage2 =
      workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1);
  assertEquals(overridePackageName, workflowPackage2.getName());

  assertEquals(
      GcsPath.fromUri(VALID_TEMP_BUCKET).toResourceName(),
      workflowJob.getEnvironment().getTempStoragePrefix());
  assertEquals(cloudDataflowDataset, workflowJob.getEnvironment().getDataset());
  assertEquals(
      DataflowRunnerInfo.getDataflowRunnerInfo().getName(),
      workflowJob.getEnvironment().getUserAgent().get("name"));
  assertEquals(
      DataflowRunnerInfo.getDataflowRunnerInfo().getVersion(),
      workflowJob.getEnvironment().getUserAgent().get("version"));
}