Java Code Examples for org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setExperiments()

The following examples show how to use org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setExperiments() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testUploadGraph() throws IOException {
  DataflowPipelineOptions options = buildPipelineOptions();
  options.setExperiments(Arrays.asList("upload_graph"));
  Pipeline p = buildDataflowPipeline(options);
  DataflowPipelineJob job = (DataflowPipelineJob) p.run();

  ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
  Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
  assertValidJob(jobCaptor.getValue());
  assertTrue(jobCaptor.getValue().getSteps().isEmpty());
  assertTrue(
      jobCaptor
          .getValue()
          .getStepsLocation()
          .startsWith("gs://valid-bucket/temp/staging/dataflow_graph"));
}
 
Example 2
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the {@link DataflowRunner} with {@code --templateLocation} returns normally when the
 * runner is successfully run with upload_graph experiment turned on. The result template should
 * not contain raw steps and stepsLocation file should be set.
 */
@Test
public void testTemplateRunnerWithUploadGraph() throws Exception {
  File existingFile = tmpFolder.newFile();
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setExperiments(Arrays.asList("upload_graph"));
  options.setJobName("TestJobName");
  options.setGcpCredential(new TestCredential());
  options.setPathValidatorClass(NoopPathValidator.class);
  options.setProject("test-project");
  options.setRegion(REGION_ID);
  options.setRunner(DataflowRunner.class);
  options.setTemplateLocation(existingFile.getPath());
  options.setTempLocation(tmpFolder.getRoot().getPath());
  Pipeline p = Pipeline.create(options);
  p.apply(Create.of(ImmutableList.of(1)));
  p.run();
  expectedLogs.verifyInfo("Template successfully created");
  ObjectMapper objectMapper = new ObjectMapper();
  JsonNode node = objectMapper.readTree(existingFile);
  assertEquals(0, node.get("steps").size());
  assertNotNull(node.get("stepsLocation"));
}
 
Example 3
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testToSingletonTranslationWithFnApiSideInput() throws Exception {
  // A "change detector" test that makes sure the translation
  // of getting a PCollectionView<T> does not change
  // in bad ways during refactor

  DataflowPipelineOptions options = buildPipelineOptions();
  options.setExperiments(Arrays.asList("beam_fn_api"));
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);

  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(Create.of(1)).apply(View.asSingleton());
  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
  Job job =
      translator
          .translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList())
          .getJob();
  assertAllStepOutputsHaveUniqueIds(job);

  List<Step> steps = job.getSteps();
  assertEquals(9, steps.size());

  Step collectionToSingletonStep = steps.get(steps.size() - 1);
  assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());

  @SuppressWarnings("unchecked")
  List<Map<String, Object>> ctsOutputs =
      (List<Map<String, Object>>)
          steps.get(steps.size() - 1).getProperties().get(PropertyNames.OUTPUT_INFO);
  assertTrue(Structs.getBoolean(Iterables.getOnlyElement(ctsOutputs), "use_indexed_format"));
}
 
Example 4
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testToIterableTranslationWithFnApiSideInput() throws Exception {
  // A "change detector" test that makes sure the translation
  // of getting a PCollectionView<Iterable<T>> does not change
  // in bad ways during refactor

  DataflowPipelineOptions options = buildPipelineOptions();
  options.setExperiments(Arrays.asList("beam_fn_api"));
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);

  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(Create.of(1, 2, 3)).apply(View.asIterable());

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
  Job job =
      translator
          .translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList())
          .getJob();
  assertAllStepOutputsHaveUniqueIds(job);

  List<Step> steps = job.getSteps();
  assertEquals(5, steps.size());

  @SuppressWarnings("unchecked")
  List<Map<String, Object>> ctsOutputs =
      (List<Map<String, Object>>)
          steps.get(steps.size() - 1).getProperties().get(PropertyNames.OUTPUT_INFO);
  assertTrue(Structs.getBoolean(Iterables.getOnlyElement(ctsOutputs), "use_indexed_format"));
  Step collectionToSingletonStep = steps.get(steps.size() - 1);
  assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}
 
Example 5
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWorkerHarnessContainerImage() {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);

  // default image set
  options.setWorkerHarnessContainerImage("some-container");
  assertThat(getContainerImageForJob(options), equalTo("some-container"));

  // batch, legacy
  options.setWorkerHarnessContainerImage("gcr.io/IMAGE/foo");
  options.setExperiments(null);
  options.setStreaming(false);
  System.setProperty("java.specification.version", "1.8");
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-batch/foo"));
  // batch, legacy, jdk11
  options.setStreaming(false);
  System.setProperty("java.specification.version", "11");
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java11-batch/foo"));
  // streaming, legacy
  System.setProperty("java.specification.version", "1.8");
  options.setStreaming(true);
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-streaming/foo"));
  // streaming, legacy, jdk11
  System.setProperty("java.specification.version", "11");
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java11-streaming/foo"));
  // streaming, fnapi
  options.setExperiments(ImmutableList.of("experiment1", "beam_fn_api"));
  assertThat(getContainerImageForJob(options), equalTo("gcr.io/java/foo"));
}
 
Example 6
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Smoke test to fail fast if translation of a splittable ParDo in FnAPI. */
@Test
public void testSplittableParDoTranslationFnApi() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  options.setExperiments(Arrays.asList("beam_fn_api"));
  DataflowRunner runner = DataflowRunner.fromOptions(options);
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);

  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> windowedInput =
      pipeline
          .apply(Impulse.create())
          .apply(
              MapElements.via(
                  new SimpleFunction<byte[], String>() {
                    @Override
                    public String apply(byte[] input) {
                      return "";
                    }
                  }))
          .apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));
  windowedInput.apply(ParDo.of(new TestSplittableFn()));

  runner.replaceTransforms(pipeline);

  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
  JobSpecification result =
      translator.translate(
          pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());

  Job job = result.getJob();

  // The job should contain a ParDo step, containing a "restriction_encoding".

  List<Step> steps = job.getSteps();
  Step splittableParDo = null;
  for (Step step : steps) {
    if ("ParallelDo".equals(step.getKind())
        && step.getProperties().containsKey(PropertyNames.RESTRICTION_ENCODING)) {
      assertNull(splittableParDo);
      splittableParDo = step;
    }
  }
  assertNotNull(splittableParDo);

  String fn = Structs.getString(splittableParDo.getProperties(), PropertyNames.SERIALIZED_FN);

  Components componentsProto = result.getPipelineProto().getComponents();
  RehydratedComponents components = RehydratedComponents.forComponents(componentsProto);
  RunnerApi.PTransform splittableTransform = componentsProto.getTransformsOrThrow(fn);
  assertEquals(
      PTransformTranslation.PAR_DO_TRANSFORM_URN, splittableTransform.getSpec().getUrn());
  ParDoPayload payload = ParDoPayload.parseFrom(splittableTransform.getSpec().getPayload());
  assertThat(
      ParDoTranslation.doFnWithExecutionInformationFromProto(payload.getDoFn()).getDoFn(),
      instanceOf(TestSplittableFn.class));
  Coder expectedRestrictionAndStateCoder =
      KvCoder.of(SerializableCoder.of(OffsetRange.class), VoidCoder.of());
  assertEquals(
      expectedRestrictionAndStateCoder, components.getCoder(payload.getRestrictionCoderId()));

  // In the Fn API case, we still translate the restriction coder into the RESTRICTION_CODER
  // property as a CloudObject, and it gets passed through the Dataflow backend, but in the end
  // the Dataflow worker will end up fetching it from the SPK transform payload instead.
  Coder<?> restrictionCoder =
      CloudObjects.coderFromCloudObject(
          (CloudObject)
              Structs.getObject(
                  splittableParDo.getProperties(), PropertyNames.RESTRICTION_ENCODING));
  assertEquals(expectedRestrictionAndStateCoder, restrictionCoder);
}
 
Example 7
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testPortablePipelineContainsExpectedDependenciesAndCapabilities() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  options.setExperiments(Arrays.asList("beam_fn_api"));
  DataflowRunner runner = DataflowRunner.fromOptions(options);
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);

  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> windowedInput =
      pipeline
          .apply(Impulse.create())
          .apply(
              MapElements.via(
                  new SimpleFunction<byte[], String>() {
                    @Override
                    public String apply(byte[] input) {
                      return "";
                    }
                  }))
          .apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));

  runner.replaceTransforms(pipeline);

  File file1 = File.createTempFile("file1-", ".txt");
  file1.deleteOnExit();
  File file2 = File.createTempFile("file2-", ".txt");
  file2.deleteOnExit();
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(
      Environments.createDockerEnvironment(DataflowRunner.getContainerImageForJob(options))
          .toBuilder()
          .addAllDependencies(
              Environments.getArtifacts(
                  ImmutableList.of("file1.txt=" + file1, "file2.txt=" + file2)))
          .addAllCapabilities(Environments.getJavaCapabilities())
          .build());

  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);

  JobSpecification result =
      translator.translate(
          pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());

  Components componentsProto = result.getPipelineProto().getComponents();
  assertThat(
      Iterables.getOnlyElement(componentsProto.getEnvironmentsMap().values())
          .getCapabilitiesList(),
      containsInAnyOrder(Environments.getJavaCapabilities().toArray(new String[0])));
  assertThat(
      Iterables.getOnlyElement(componentsProto.getEnvironmentsMap().values())
          .getDependenciesList(),
      containsInAnyOrder(
          Environments.getArtifacts(ImmutableList.of("file1.txt=" + file1, "file2.txt=" + file2))
              .toArray(new ArtifactInformation[0])));
}