org.apache.beam.runners.dataflow.options.DataflowPipelineOptions Java Examples

The following examples show how to use org.apache.beam.runners.dataflow.options.DataflowPipelineOptions. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Tests that all reads are consumed by at least one {@link PTransform}. */
@Test
public void testUnconsumedReads() throws IOException {
  DataflowPipelineOptions dataflowOptions = buildPipelineOptions();
  RuntimeTestOptions options = dataflowOptions.as(RuntimeTestOptions.class);
  Pipeline p = buildDataflowPipeline(dataflowOptions);
  p.apply(TextIO.read().from(options.getInput()));
  DataflowRunner.fromOptions(dataflowOptions).replaceTransforms(p);
  final AtomicBoolean unconsumedSeenAsInput = new AtomicBoolean();
  p.traverseTopologically(
      new PipelineVisitor.Defaults() {
        @Override
        public void visitPrimitiveTransform(Node node) {
          unconsumedSeenAsInput.set(true);
        }
      });
  assertThat(unconsumedSeenAsInput.get(), is(true));
}
 
Example #2
Source File: BatchStatefulParDoOverridesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Ignore(
    "TODO: BEAM-2902 Add support for user state in a ParDo.Multi once PTransformMatcher "
        + "exposes a way to know when the replacement is not required by checking that the "
        + "preceding ParDos to a GBK are key preserving.")
public void testFnApiMultiOutputOverrideNonCrashing() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions("--experiments=beam_fn_api");
  options.setRunner(DataflowRunner.class);
  Pipeline pipeline = Pipeline.create(options);

  TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
  TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {};

  DummyStatefulDoFn fn = new DummyStatefulDoFn();
  pipeline
      .apply(Create.of(KV.of(1, 2)))
      .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}
 
Example #3
Source File: BatchStatefulParDoOverridesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static DataflowPipelineOptions buildPipelineOptions(String... args) throws IOException {
  GcsUtil mockGcsUtil = mock(GcsUtil.class);
  when(mockGcsUtil.expand(any(GcsPath.class)))
      .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);

  DataflowPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setJobName("some-job-name");
  options.setProject("some-project");
  options.setRegion("some-region");
  options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
  options.setFilesToStage(new ArrayList<>());
  options.setGcsUtil(mockGcsUtil);

  // Enable the FileSystems API to know about gs:// URIs in this test.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example #4
Source File: BatchStatefulParDoOverridesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiOutputOverrideNonCrashing() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  options.setRunner(DataflowRunner.class);
  Pipeline pipeline = Pipeline.create(options);

  TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
  TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {};

  DummyStatefulDoFn fn = new DummyStatefulDoFn();
  pipeline
      .apply(Create.of(KV.of(1, 2)))
      .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}
 
Example #5
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the {@link DataflowRunner} with {@code --templateLocation} throws the appropriate
 * exception when an output file is not writable.
 */
@Test
public void testTemplateRunnerLoggedErrorForFile() throws Exception {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setJobName("TestJobName");
  options.setRunner(DataflowRunner.class);
  options.setTemplateLocation("//bad/path");
  options.setProject("test-project");
  options.setRegion(REGION_ID);
  options.setTempLocation(tmpFolder.getRoot().getPath());
  options.setGcpCredential(new TestCredential());
  options.setPathValidatorClass(NoopPathValidator.class);
  Pipeline p = Pipeline.create(options);

  thrown.expectMessage("Cannot create output file at");
  thrown.expect(RuntimeException.class);
  p.run();
}
 
Example #6
Source File: TextImportPipeline.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

    Pipeline p = Pipeline.create(options);

    SpannerConfig spannerConfig =
        SpannerConfig.create()
            .withHost(options.getSpannerHost())
            .withInstanceId(options.getInstanceId())
            .withDatabaseId(options.getDatabaseId());

    p.apply(new TextImportTransform(spannerConfig, options.getImportManifest()));

    PipelineResult result = p.run();
    if (options.getWaitUntilFinish()
        &&
        /* Only if template location is null, there is a dataflow job to wait for. Otherwise it's
         * template generation, which doesn't start a dataflow job.
         */
        options.as(DataflowPipelineOptions.class).getTemplateLocation() == null) {
      result.waitUntilFinish();
    }
  }
 
Example #7
Source File: GCSUploadMain.java    From beam with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  DataflowPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
  FileSystems.setDefaultPipelineOptions(options);
  GcsStager stager = GcsStager.fromOptions(options);
  stager.stageFiles(
      options.getFilesToStage().stream()
          .map(
              (String source) -> {
                try {
                  File file = new File(source);
                  HashCode hashCode = Files.asByteSource(file).hash(Hashing.sha256());
                  return PackageUtil.StagedFile.of(
                      source,
                      hashCode.toString(),
                      Environments.createStagingFileName(file, hashCode));
                } catch (IOException e) {
                  throw new UncheckedIOException(e);
                }
              })
          .collect(Collectors.toList()));
}
 
Example #8
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testInaccessibleProvider() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  Pipeline pipeline = Pipeline.create(options);
  DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);

  pipeline.apply(TextIO.read().from(new TestValueProvider()));

  // Check that translation does not fail.
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
  t.translate(
      pipeline,
      pipelineProto,
      sdkComponents,
      DataflowRunner.fromOptions(options),
      Collections.emptyList());
}
 
Example #9
Source File: IsmReaderFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() {
  options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  logicalReferenceCache = CacheBuilder.newBuilder().weakValues().build();

  executionContext =
      BatchModeExecutionContext.forTesting(
          PipelineOptionsFactory.as(DataflowPipelineOptions.class),
          NameContextsForTests.nameContextForTest().stageName());

  DataflowExecutionState state =
      executionContext
          .getExecutionStateRegistry()
          .getState(
              NameContextsForTests.nameContextForTest(),
              "test",
              null /*container */,
              NoopProfileScope.NOOP);

  operationContext =
      executionContext.createOperationContext(NameContextsForTests.nameContextForTest());
  stateCloseable = executionContext.getExecutionStateTracker().enterState(state);
}
 
Example #10
Source File: DataflowRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Helper to configure the Dataflow Job Environment based on the user's job options. */
private static Map<String, Object> getEnvironmentVersion(DataflowPipelineOptions options) {
  DataflowRunnerInfo runnerInfo = DataflowRunnerInfo.getDataflowRunnerInfo();
  String majorVersion;
  String jobType;
  if (hasExperiment(options, "beam_fn_api")) {
    majorVersion = runnerInfo.getFnApiEnvironmentMajorVersion();
    jobType = options.isStreaming() ? "FNAPI_STREAMING" : "FNAPI_BATCH";
  } else {
    majorVersion = runnerInfo.getLegacyEnvironmentMajorVersion();
    jobType = options.isStreaming() ? "STREAMING" : "JAVA_BATCH_AUTOSCALING";
  }
  return ImmutableMap.of(
      PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, majorVersion,
      PropertyNames.ENVIRONMENT_VERSION_JOB_TYPE_KEY, jobType);
}
 
Example #11
Source File: DataflowRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
static String getContainerImageForJob(DataflowPipelineOptions options) {
  String workerHarnessContainerImage = options.getWorkerHarnessContainerImage();

  String javaVersionId =
      Float.parseFloat(System.getProperty("java.specification.version")) >= 9 ? "java11" : "java";
  if (!workerHarnessContainerImage.contains("IMAGE")) {
    return workerHarnessContainerImage;
  } else if (hasExperiment(options, "beam_fn_api")) {
    return workerHarnessContainerImage.replace("IMAGE", "java");
  } else if (options.isStreaming()) {
    return workerHarnessContainerImage.replace(
        "IMAGE", String.format("beam-%s-streaming", javaVersionId));
  } else {
    return workerHarnessContainerImage.replace(
        "IMAGE", String.format("beam-%s-batch", javaVersionId));
  }
}
 
Example #12
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private DataflowPipelineOptions buildPipelineOptions() throws IOException {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setProject(PROJECT_ID);
  options.setTempLocation(VALID_TEMP_BUCKET);
  options.setRegion(REGION_ID);
  // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
  options.setFilesToStage(new ArrayList<>());
  options.setDataflowClient(buildMockDataflow());
  options.setGcsUtil(mockGcsUtil);
  options.setGcpCredential(new TestCredential());

  // Configure the FileSystem registrar to use these options.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example #13
Source File: WorkerCustomSourcesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
    BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
  Pipeline p = Pipeline.create(options);
  p.begin().apply(Read.from(io));

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  SdkComponents sdkComponents = SdkComponents.create();
  RunnerApi.Environment defaultEnvironmentForDataflow =
      Environments.createDockerEnvironment("dummy-image-url");
  sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);

  Job workflow =
      translator
          .translate(p, pipelineProto, sdkComponents, runner, new ArrayList<DataflowPackage>())
          .getJob();
  Step step = workflow.getSteps().get(0);

  return stepToCloudSource(step);
}
 
Example #14
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testUploadGraph() throws IOException {
  DataflowPipelineOptions options = buildPipelineOptions();
  options.setExperiments(Arrays.asList("upload_graph"));
  Pipeline p = buildDataflowPipeline(options);
  DataflowPipelineJob job = (DataflowPipelineJob) p.run();

  ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
  Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
  assertValidJob(jobCaptor.getValue());
  assertTrue(jobCaptor.getValue().getSteps().isEmpty());
  assertTrue(
      jobCaptor
          .getValue()
          .getStepsLocation()
          .startsWith("gs://valid-bucket/temp/staging/dataflow_graph"));
}
 
Example #15
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
  GcsUtil mockGcsUtil = mock(GcsUtil.class);
  when(mockGcsUtil.expand(any(GcsPath.class)))
      .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);

  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setJobName("some-job-name");
  options.setProject("some-project");
  options.setRegion("some-region");
  options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
  options.setFilesToStage(new ArrayList<>());
  options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
  options.setGcsUtil(mockGcsUtil);

  // Enable the FileSystems API to know about gs:// URIs in this test.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example #16
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testNetworkConfig() throws IOException {
  final String testNetwork = "test-network";

  DataflowPipelineOptions options = buildPipelineOptions();
  options.setNetwork(testNetwork);

  Pipeline p = buildPipeline(options);
  p.traverseTopologically(new RecordingPipelineVisitor());
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  Job job =
      DataflowPipelineTranslator.fromOptions(options)
          .translate(
              p,
              pipelineProto,
              sdkComponents,
              DataflowRunner.fromOptions(options),
              Collections.emptyList())
          .getJob();

  assertEquals(1, job.getEnvironment().getWorkerPools().size());
  assertEquals(testNetwork, job.getEnvironment().getWorkerPools().get(0).getNetwork());
}
 
Example #17
Source File: DataflowTransport.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Returns a Google Cloud Dataflow client builder. */
public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options) {
  String servicePath = options.getDataflowEndpoint();
  ApiComponents components;
  if (servicePath.contains("://")) {
    components = apiComponentsFromUrl(servicePath);
  } else {
    components = new ApiComponents(options.getApiRootUrl(), servicePath);
  }

  return new Dataflow.Builder(
          getTransport(),
          getJsonFactory(),
          chainHttpRequestInitializer(
              options.getGcpCredential(),
              // Do not log 404. It clutters the output and is possibly even required by the
              // caller.
              new RetryHttpRequestInitializer(ImmutableList.of(404))))
      .setApplicationName(options.getAppName())
      .setRootUrl(components.rootUrl)
      .setServicePath(components.servicePath)
      .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
}
 
Example #18
Source File: MonitoringUtil.java    From beam with Apache License 2.0 6 votes vote down vote up
public static String getGcloudCancelCommand(DataflowPipelineOptions options, String jobId) {

    // If using a different Dataflow API than default, prefix command with an API override.
    String dataflowApiOverridePrefix = "";
    String apiUrl = options.getDataflowClient().getBaseUrl();
    if (!apiUrl.equals(Dataflow.DEFAULT_BASE_URL)) {
      dataflowApiOverridePrefix = String.format("%s=%s ", ENDPOINT_OVERRIDE_ENV_VAR, apiUrl);
    }

    // Assemble cancel command from optional prefix and project/job parameters.
    return String.format(
        "%s%s jobs --project=%s cancel --region=%s %s",
        dataflowApiOverridePrefix,
        GCLOUD_DATAFLOW_PREFIX,
        options.getProject(),
        options.getRegion(),
        jobId);
  }
 
Example #19
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the {@link DataflowRunner} with {@code --templateLocation} returns normally when the
 * runner is successfully run.
 */
@Test
public void testTemplateRunnerFullCompletion() throws Exception {
  File existingFile = tmpFolder.newFile();
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setJobName("TestJobName");
  options.setGcpCredential(new TestCredential());
  options.setPathValidatorClass(NoopPathValidator.class);
  options.setProject("test-project");
  options.setRegion(REGION_ID);
  options.setRunner(DataflowRunner.class);
  options.setTemplateLocation(existingFile.getPath());
  options.setTempLocation(tmpFolder.getRoot().getPath());
  Pipeline p = Pipeline.create(options);

  p.run();
  expectedLogs.verifyInfo("Template successfully created");
}
 
Example #20
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSubnetworkConfigMissing() throws IOException {
  DataflowPipelineOptions options = buildPipelineOptions();

  Pipeline p = buildPipeline(options);
  p.traverseTopologically(new RecordingPipelineVisitor());
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  Job job =
      DataflowPipelineTranslator.fromOptions(options)
          .translate(
              p,
              pipelineProto,
              sdkComponents,
              DataflowRunner.fromOptions(options),
              Collections.emptyList())
          .getJob();

  assertEquals(1, job.getEnvironment().getWorkerPools().size());
  assertNull(job.getEnvironment().getWorkerPools().get(0).getSubnetwork());
}
 
Example #21
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testGcsUploadBufferSizeUnchangedWhenNotDefault() throws IOException {
  int gcsUploadBufferSizeBytes = 12345678;
  DataflowPipelineOptions batchOptions = buildPipelineOptions();
  batchOptions.setGcsUploadBufferSizeBytes(gcsUploadBufferSizeBytes);
  batchOptions.setRunner(DataflowRunner.class);
  Pipeline.create(batchOptions);
  assertEquals(gcsUploadBufferSizeBytes, batchOptions.getGcsUploadBufferSizeBytes().intValue());

  DataflowPipelineOptions streamingOptions = buildPipelineOptions();
  streamingOptions.setStreaming(true);
  streamingOptions.setGcsUploadBufferSizeBytes(gcsUploadBufferSizeBytes);
  streamingOptions.setRunner(DataflowRunner.class);
  Pipeline.create(streamingOptions);
  assertEquals(
      gcsUploadBufferSizeBytes, streamingOptions.getGcsUploadBufferSizeBytes().intValue());
}
 
Example #22
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWorkerMachineTypeConfig() throws IOException {
  final String testMachineType = "test-machine-type";

  DataflowPipelineOptions options = buildPipelineOptions();
  options.setWorkerMachineType(testMachineType);

  Pipeline p = buildPipeline(options);
  p.traverseTopologically(new RecordingPipelineVisitor());
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  Job job =
      DataflowPipelineTranslator.fromOptions(options)
          .translate(
              p,
              pipelineProto,
              sdkComponents,
              DataflowRunner.fromOptions(options),
              Collections.emptyList())
          .getJob();

  assertEquals(1, job.getEnvironment().getWorkerPools().size());

  WorkerPool workerPool = job.getEnvironment().getWorkerPools().get(0);
  assertEquals(testMachineType, workerPool.getMachineType());
}
 
Example #23
Source File: MonitoringUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testOverridesEndpointWithStagedDataflowEndpoint() {
  DataflowPipelineOptions options =
      PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
  options.setProject(PROJECT_ID);
  options.setRegion(REGION_ID);
  options.setGcpCredential(new TestCredential());
  String stagingDataflowEndpoint = "v0neverExisted";
  options.setDataflowEndpoint(stagingDataflowEndpoint);
  String cancelCommand = MonitoringUtil.getGcloudCancelCommand(options, JOB_ID);
  assertEquals(
      "CLOUDSDK_API_ENDPOINT_OVERRIDES_DATAFLOW=https://dataflow.googleapis.com/v0neverExisted/ "
          + "gcloud dataflow jobs --project=someProject cancel --region=thatRegion 1234",
      cancelCommand);
}
 
Example #24
Source File: DataflowGroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Create a test pipeline that uses the {@link DataflowRunner} so that {@link GroupByKey} is not
 * expanded. This is used for verifying that even without expansion the proper errors show up.
 */
private Pipeline createTestServiceRunner() {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setProject("someproject");
  options.setRegion("some-region1");
  options.setGcpTempLocation("gs://staging");
  options.setPathValidatorClass(NoopPathValidator.class);
  options.setDataflowClient(dataflow);
  return Pipeline.create(options);
}
 
Example #25
Source File: BatchStatefulParDoOverridesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSingleOutputOverrideNonCrashing() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  options.setRunner(DataflowRunner.class);
  Pipeline pipeline = Pipeline.create(options);

  DummyStatefulDoFn fn = new DummyStatefulDoFn();
  pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn));

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}
 
Example #26
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testTransformTranslator() throws IOException {
  // Test that we can provide a custom translation
  DataflowPipelineOptions options = buildPipelineOptions();
  Pipeline p = Pipeline.create(options);
  TestTransform transform = new TestTransform();

  p.apply(Create.of(Arrays.asList(1, 2, 3)).withCoder(BigEndianIntegerCoder.of()))
      .apply(transform);

  DataflowPipelineTranslator translator = DataflowRunner.fromOptions(options).getTranslator();

  DataflowPipelineTranslator.registerTransformTranslator(
      TestTransform.class,
      (transform1, context) -> {
        transform1.translated = true;

        // Note: This is about the minimum needed to fake out a
        // translation. This obviously isn't a real translation.
        TransformTranslator.StepTranslationContext stepContext =
            context.addStep(transform1, "TestTranslate");
        stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform1));
      });

  SdkComponents sdkComponents = SdkComponents.create(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  translator.translate(
      p,
      pipelineProto,
      sdkComponents,
      DataflowRunner.fromOptions(options),
      Collections.emptyList());
  assertTrue(transform.translated);
}
 
Example #27
Source File: DataflowViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private Pipeline createTestStreamingRunner() {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setStreaming(true);
  options.setProject("someproject");
  options.setRegion("some-region1");
  options.setGcpTempLocation("gs://staging");
  options.setPathValidatorClass(NoopPathValidator.class);
  options.setDataflowClient(dataflow);
  return Pipeline.create(options);
}
 
Example #28
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Test that in translation the name for a collection (in this case just a Create output) is
 * overridden to be what the Dataflow service expects.
 */
@Test
public void testNamesOverridden() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  DataflowRunner runner = DataflowRunner.fromOptions(options);
  options.setStreaming(false);
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);

  Pipeline pipeline = Pipeline.create(options);

  pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle");

  runner.replaceTransforms(pipeline);

  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
  Job job =
      translator
          .translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList())
          .getJob();

  // The Create step
  Step step = job.getSteps().get(0);

  // This is the name that is "set by the user" that the Dataflow translator must override
  String userSpecifiedName =
      getString(
          Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0),
          PropertyNames.USER_NAME);

  // This is the calculated name that must actually be used
  String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0";

  assertThat(userSpecifiedName, equalTo(calculatedName));
}
 
Example #29
Source File: DataflowViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private Pipeline createTestBatchRunner() {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setProject("someproject");
  options.setRegion("some-region1");
  options.setGcpTempLocation("gs://staging");
  options.setPathValidatorClass(NoopPathValidator.class);
  options.setDataflowClient(dataflow);
  return Pipeline.create(options);
}
 
Example #30
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWorkerHarnessContainerImage() {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);

  // default image set
  options.setWorkerHarnessContainerImage("some-container");
  assertThat(getContainerImageForJob(options), equalTo("some-container"));

  // batch, legacy
  options.setWorkerHarnessContainerImage("gcr.io/IMAGE/foo");
  options.setExperiments(null);
  options.setStreaming(false);
  System.setProperty("java.specification.version", "1.8");
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-batch/foo"));
  // batch, legacy, jdk11
  options.setStreaming(false);
  System.setProperty("java.specification.version", "11");
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java11-batch/foo"));
  // streaming, legacy
  System.setProperty("java.specification.version", "1.8");
  options.setStreaming(true);
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-streaming/foo"));
  // streaming, legacy, jdk11
  System.setProperty("java.specification.version", "11");
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java11-streaming/foo"));
  // streaming, fnapi
  options.setExperiments(ImmutableList.of("experiment1", "beam_fn_api"));
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/java/foo"));
}