Java Code Examples for org.apache.beam.model.pipeline.v1.RunnerApi#Pipeline

The following examples show how to use org.apache.beam.model.pipeline.v1.RunnerApi#Pipeline . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: InMemoryJobService.java    From beam with Apache License 2.0 6 votes vote down vote up
private RunnerApi.Pipeline resolveDependencies(RunnerApi.Pipeline pipeline, String stagingToken) {
  Map<String, List<RunnerApi.ArtifactInformation>> resolvedDependencies =
      stagingService.getService().getStagedArtifacts(stagingToken);
  Map<String, RunnerApi.Environment> newEnvironments = new HashMap<>();
  for (Map.Entry<String, RunnerApi.Environment> entry :
      pipeline.getComponents().getEnvironmentsMap().entrySet()) {
    if (entry.getValue().getDependenciesCount() > 0 && resolvedDependencies == null) {
      throw new RuntimeException(
          "Artifact dependencies provided but not staged for " + entry.getKey());
    }
    newEnvironments.put(
        entry.getKey(),
        entry.getValue().getDependenciesCount() == 0
            ? entry.getValue()
            : entry
                .getValue()
                .toBuilder()
                .clearDependencies()
                .addAllDependencies(resolvedDependencies.get(entry.getKey()))
                .build());
  }
  RunnerApi.Pipeline.Builder builder = pipeline.toBuilder();
  builder.getComponentsBuilder().clearEnvironments().putAllEnvironments(newEnvironments);
  return builder.build();
}
 
Example 2
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiGraphPipelineSerialization() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  Pipeline p = Pipeline.create(options);

  PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3));

  input.apply(new UnrelatedOutputCreator());
  input.apply(new UnboundOutputCreator());

  DataflowPipelineTranslator t =
      DataflowPipelineTranslator.fromOptions(
          PipelineOptionsFactory.as(DataflowPipelineOptions.class));

  // Check that translation doesn't fail.
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  JobSpecification jobSpecification =
      t.translate(
          p,
          pipelineProto,
          sdkComponents,
          DataflowRunner.fromOptions(options),
          Collections.emptyList());
  assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}
 
Example 3
Source File: SamzaPipelineRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PortablePipelineResult run(final Pipeline pipeline, JobInfo jobInfo) {
  // Expand any splittable DoFns within the graph to enable sizing and splitting of bundles.
  Pipeline pipelineWithSdfExpanded =
      ProtoOverrides.updateTransform(
          PTransformTranslation.PAR_DO_TRANSFORM_URN,
          pipeline,
          SplittableParDoExpander.createSizedReplacement());

  // Fused pipeline proto.
  final RunnerApi.Pipeline fusedPipeline =
      GreedyPipelineFuser.fuse(pipelineWithSdfExpanded).toPipeline();
  LOG.info("Portable pipeline to run:");
  LOG.info(PipelineDotRenderer.toDotString(fusedPipeline));
  // the pipeline option coming from sdk will set the sdk specific runner which will break
  // serialization
  // so we need to reset the runner here to a valid Java runner
  options.setRunner(SamzaRunner.class);
  try {
    final SamzaRunner runner = SamzaRunner.fromOptions(options);
    return runner.runPortablePipeline(fusedPipeline);
  } catch (Exception e) {
    throw new RuntimeException("Failed to invoke samza job", e);
  }
}
 
Example 4
Source File: InsertFetchAndFilterStreamingSideInputNodesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private String findParDoWithSideInput(RunnerApi.Pipeline pipeline) {
  for (Map.Entry<String, RunnerApi.PTransform> entry :
      pipeline.getComponents().getTransformsMap().entrySet()) {
    if (!PTransformTranslation.PAR_DO_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
      continue;
    }
    try {
      ParDoPayload payload = ParDoPayload.parseFrom(entry.getValue().getSpec().getPayload());
      if (!payload.getSideInputsMap().isEmpty()) {
        return entry.getKey();
      }
    } catch (InvalidProtocolBufferException e) {
      throw new IllegalStateException(String.format("Failed to parse PTransform %s", entry));
    }
  }
  throw new IllegalStateException("No side input ptransform found");
}
 
Example 5
Source File: ExpansionServiceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testConstruct() {
  Pipeline p = Pipeline.create();
  p.apply(Impulse.create());
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
  String inputPcollId =
      Iterables.getOnlyElement(
          Iterables.getOnlyElement(pipelineProto.getComponents().getTransformsMap().values())
              .getOutputsMap()
              .values());
  ExpansionApi.ExpansionRequest request =
      ExpansionApi.ExpansionRequest.newBuilder()
          .setComponents(pipelineProto.getComponents())
          .setTransform(
              RunnerApi.PTransform.newBuilder()
                  .setUniqueName(TEST_NAME)
                  .setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(TEST_URN))
                  .putInputs("input", inputPcollId))
          .setNamespace(TEST_NAMESPACE)
          .build();
  ExpansionApi.ExpansionResponse response = expansionService.expand(request);
  RunnerApi.PTransform expandedTransform = response.getTransform();
  assertEquals(TEST_NAMESPACE + TEST_NAME, expandedTransform.getUniqueName());
  // Verify it has the right input.
  assertEquals(inputPcollId, Iterables.getOnlyElement(expandedTransform.getInputsMap().values()));
  // Verify it has the right output.
  assertEquals("output", Iterables.getOnlyElement(expandedTransform.getOutputsMap().keySet()));
  // Loose check that it's composite, and its children are represented.
  assertNotEquals(expandedTransform.getSubtransformsCount(), 0);
  for (String subtransform : expandedTransform.getSubtransformsList()) {
    assertTrue(response.getComponents().containsTransforms(subtransform));
  }
  // Check that any newly generated components are properly namespaced.
  Set<String> originalIds = allIds(request.getComponents());
  for (String id : allIds(response.getComponents())) {
    assertTrue(id, id.startsWith(TEST_NAMESPACE) || originalIds.contains(id));
  }
}
 
Example 6
Source File: ExpansionServiceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testConstructGenerateSequence() {
  ExternalTransforms.ExternalConfigurationPayload payload =
      ExternalTransforms.ExternalConfigurationPayload.newBuilder()
          .putConfiguration(
              "start",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.VARINT))
                  .setPayload(ByteString.copyFrom(new byte[] {0}))
                  .build())
          .putConfiguration(
              "stop",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.VARINT))
                  .setPayload(ByteString.copyFrom(new byte[] {1}))
                  .build())
          .build();
  Pipeline p = Pipeline.create();
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
  ExpansionApi.ExpansionRequest request =
      ExpansionApi.ExpansionRequest.newBuilder()
          .setComponents(pipelineProto.getComponents())
          .setTransform(
              RunnerApi.PTransform.newBuilder()
                  .setUniqueName(TEST_NAME)
                  .setSpec(
                      RunnerApi.FunctionSpec.newBuilder()
                          .setUrn(GenerateSequence.External.URN)
                          .setPayload(payload.toByteString())))
          .setNamespace(TEST_NAMESPACE)
          .build();
  ExpansionApi.ExpansionResponse response = expansionService.expand(request);
  RunnerApi.PTransform expandedTransform = response.getTransform();
  assertEquals(TEST_NAMESPACE + TEST_NAME, expandedTransform.getUniqueName());
  assertThat(expandedTransform.getInputsCount(), Matchers.is(0));
  assertThat(expandedTransform.getOutputsCount(), Matchers.is(1));
  assertThat(expandedTransform.getSubtransformsCount(), Matchers.greaterThan(0));
}
 
Example 7
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testScalingAlgorithmMissing() throws IOException {
  DataflowPipelineOptions options = buildPipelineOptions();

  Pipeline p = buildPipeline(options);
  p.traverseTopologically(new RecordingPipelineVisitor());
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  Job job =
      DataflowPipelineTranslator.fromOptions(options)
          .translate(
              p,
              pipelineProto,
              sdkComponents,
              DataflowRunner.fromOptions(options),
              Collections.emptyList())
          .getJob();

  assertEquals(1, job.getEnvironment().getWorkerPools().size());
  // Autoscaling settings are always set.
  assertNull(
      job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
  assertEquals(
      0,
      job.getEnvironment()
          .getWorkerPools()
          .get(0)
          .getAutoscalingSettings()
          .getMaxNumWorkers()
          .intValue());
}
 
Example 8
Source File: SparkPortableExecutionTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120_000)
public void testExecStageWithMultipleConsumers() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(CrashingRunner.class);
  options
      .as(PortablePipelineOptions.class)
      .setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
  Pipeline pipeline = Pipeline.create(options);
  PCollection<KV<String, Iterable<String>>> f =
      pipeline
          .apply("impulse", Impulse.create())
          .apply("F", ParDo.of(new DoFnWithSideEffect<>("F")))
          // use GBK to prevent fusion of F, G, and H
          .apply(GroupByKey.create());
  f.apply("G", ParDo.of(new DoFnWithSideEffect<>("G")));
  f.apply("H", ParDo.of(new DoFnWithSideEffect<>("H")));
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
  JobInvocation jobInvocation =
      SparkJobInvoker.createJobInvocation(
          "testExecStageWithMultipleConsumers",
          "testExecStageWithMultipleConsumersRetrievalToken",
          sparkJobExecutor,
          pipelineProto,
          options.as(SparkPipelineOptions.class));
  jobInvocation.start();
  Assert.assertEquals(Enum.DONE, jobInvocation.getState());
}
 
Example 9
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testToIterableTranslationWithIsmSideInput() throws Exception {
  // A "change detector" test that makes sure the translation
  // of getting a PCollectionView<Iterable<T>> does not change
  // in bad ways during refactor

  DataflowPipelineOptions options = buildPipelineOptions();
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);

  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(Create.of(1, 2, 3)).apply(View.asIterable());

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
  Job job =
      translator
          .translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList())
          .getJob();
  assertAllStepOutputsHaveUniqueIds(job);

  List<Step> steps = job.getSteps();
  assertEquals(3, steps.size());

  @SuppressWarnings("unchecked")
  List<Map<String, Object>> toIsmRecordOutputs =
      (List<Map<String, Object>>)
          steps.get(steps.size() - 2).getProperties().get(PropertyNames.OUTPUT_INFO);
  assertTrue(
      Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));

  Step collectionToSingletonStep = steps.get(steps.size() - 1);
  assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}
 
Example 10
Source File: DefaultArtifactResolverTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private RunnerApi.Pipeline createEmptyPipeline(
    Iterable<RunnerApi.ArtifactInformation> dependencies) {
  return RunnerApi.Pipeline.newBuilder()
      .setComponents(
          RunnerApi.Components.newBuilder()
              .putEnvironments(
                  "env",
                  RunnerApi.Environment.newBuilder().addAllDependencies(dependencies).build()))
      .build();
}
 
Example 11
Source File: Twister2StreamingPortablePipelineTranslator.java    From twister2 with Apache License 2.0 4 votes vote down vote up
@Override
public void translate(RunnerApi.Pipeline pipeline) {
}
 
Example 12
Source File: SparkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Translates transformNode from Beam into the Spark context. */
void translate(
    PTransformNode transformNode, RunnerApi.Pipeline pipeline, SparkTranslationContext context);
 
Example 13
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Test that in translation the name for collections of a multi-output ParDo - a special case
 * because the user can name tags - are overridden to be what the Dataflow service expects.
 */
@Test
public void testTaggedNamesOverridden() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  DataflowRunner runner = DataflowRunner.fromOptions(options);
  options.setStreaming(false);
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);

  Pipeline pipeline = Pipeline.create(options);

  TupleTag<Integer> tag1 = new TupleTag<Integer>("frazzle") {};
  TupleTag<Integer> tag2 = new TupleTag<Integer>("bazzle") {};
  TupleTag<Integer> tag3 = new TupleTag<Integer>() {};

  PCollectionTuple outputs =
      pipeline
          .apply(Create.of(3))
          .apply(
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void drop() {}
                      })
                  .withOutputTags(tag1, TupleTagList.of(tag2).and(tag3)));

  outputs.get(tag1).setName("bizbazzle");
  outputs.get(tag2).setName("gonzaggle");
  outputs.get(tag3).setName("froonazzle");

  runner.replaceTransforms(pipeline);

  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
  Job job =
      translator
          .translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList())
          .getJob();

  // The ParDo step
  Step step = job.getSteps().get(1);
  String stepName = getString(step.getProperties(), PropertyNames.USER_NAME);

  List<Map<String, Object>> outputInfos =
      Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null);

  assertThat(outputInfos.size(), equalTo(3));

  // The names set by the user _and_ the tags _must_ be ignored, or metrics will not show up.
  for (int i = 0; i < outputInfos.size(); ++i) {
    assertThat(
        getString(outputInfos.get(i), PropertyNames.USER_NAME),
        equalTo(String.format("%s.out%s", stepName, i)));
  }
}
 
Example 14
Source File: ProtoOverridesTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void replacesMultiple() {
  RunnerApi.Pipeline p =
      Pipeline.newBuilder()
          .addAllRootTransformIds(ImmutableList.of("first", "second"))
          .setComponents(
              Components.newBuilder()
                  .putTransforms(
                      "first",
                      PTransform.newBuilder()
                          .setSpec(FunctionSpec.newBuilder().setUrn("beam:first"))
                          .build())
                  .putTransforms(
                      "second",
                      PTransform.newBuilder()
                          .setSpec(FunctionSpec.newBuilder().setUrn("beam:repeated"))
                          .build())
                  .putTransforms(
                      "third",
                      PTransform.newBuilder()
                          .setSpec(FunctionSpec.newBuilder().setUrn("beam:repeated"))
                          .build())
                  .putPcollections(
                      "intermediatePc",
                      PCollection.newBuilder().setUniqueName("intermediate").build())
                  .putCoders(
                      "coder",
                      Coder.newBuilder().setSpec(FunctionSpec.getDefaultInstance()).build()))
          .build();

  ByteString newPayload = ByteString.copyFrom("foo-bar-baz".getBytes(StandardCharsets.UTF_8));
  Pipeline updated =
      ProtoOverrides.updateTransform(
          "beam:repeated",
          p,
          (transformId, existingComponents) -> {
            String subtransform = String.format("%s_sub", transformId);
            return MessageWithComponents.newBuilder()
                .setPtransform(
                    PTransform.newBuilder()
                        .setSpec(
                            FunctionSpec.newBuilder()
                                .setUrn("beam:repeated:replacement")
                                .setPayload(newPayload))
                        .addSubtransforms(subtransform))
                .setComponents(
                    Components.newBuilder()
                        .putTransforms(
                            subtransform,
                            PTransform.newBuilder().setUniqueName(subtransform).build()))
                .build();
          });
  PTransform updatedSecond = updated.getComponents().getTransformsOrThrow("second");
  PTransform updatedThird = updated.getComponents().getTransformsOrThrow("third");

  assertThat(updatedSecond, not(equalTo(p.getComponents().getTransformsOrThrow("second"))));
  assertThat(updatedThird, not(equalTo(p.getComponents().getTransformsOrThrow("third"))));
  assertThat(updatedSecond.getSubtransformsList(), contains("second_sub"));
  assertThat(updatedSecond.getSpec().getPayload(), equalTo(newPayload));
  assertThat(updatedThird.getSubtransformsList(), contains("third_sub"));
  assertThat(updatedThird.getSpec().getPayload(), equalTo(newPayload));

  assertThat(updated.getComponents().getTransformsMap(), hasKey("second_sub"));
  assertThat(updated.getComponents().getTransformsMap(), hasKey("third_sub"));
  assertThat(
      updated.getComponents().getTransformsOrThrow("second_sub").getUniqueName(),
      equalTo("second_sub"));
  assertThat(
      updated.getComponents().getTransformsOrThrow("third_sub").getUniqueName(),
      equalTo("third_sub"));
}
 
Example 15
Source File: JobInvoker.java    From beam with Apache License 2.0 4 votes vote down vote up
JobInvocation invoke(RunnerApi.Pipeline pipeline, Struct options, @Nullable String retrievalToken)
    throws IOException {
  return invokeWithExecutor(pipeline, options, retrievalToken, this.executorService);
}
 
Example 16
Source File: CreateExecutableStageNodeFunction.java    From beam with Apache License 2.0 4 votes vote down vote up
public CreateExecutableStageNodeFunction(RunnerApi.Pipeline pipeline, IdGenerator idGenerator) {
  this.pipeline = pipeline;
  this.idGenerator = idGenerator;
}
 
Example 17
Source File: InsertFetchAndFilterStreamingSideInputNodes.java    From beam with Apache License 2.0 4 votes vote down vote up
private InsertFetchAndFilterStreamingSideInputNodes(RunnerApi.Pipeline pipeline) {
  this.pipeline = pipeline;
}
 
Example 18
Source File: Twister2BatchPortablePipelineTranslator.java    From twister2 with Apache License 2.0 4 votes vote down vote up
@Override
public void translate(RunnerApi.Pipeline pipeline) {
}
 
Example 19
Source File: QueryablePipeline.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Create a new {@link QueryablePipeline} which uses the root transform IDs and components of the
 * provided {@link Pipeline}.
 */
public static QueryablePipeline forPipeline(RunnerApi.Pipeline p) {
  return forTransforms(p.getRootTransformIdsList(), p.getComponents());
}
 
Example 20
Source File: PortablePipelineRunner.java    From beam with Apache License 2.0 votes vote down vote up
PortablePipelineResult run(RunnerApi.Pipeline pipeline, JobInfo jobInfo) throws Exception;