Java Code Examples for org.apache.beam.model.pipeline.v1.RunnerApi#PTransform

The following examples show how to use org.apache.beam.model.pipeline.v1.RunnerApi#PTransform . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TrivialNativeTransformExpander.java    From beam with Apache License 2.0 6 votes vote down vote up
private static RunnerApi.Pipeline makeKnownUrnsPrimitives(
    RunnerApi.Pipeline pipeline, Set<String> knownUrns) {
  RunnerApi.Pipeline.Builder trimmedPipeline = pipeline.toBuilder();
  for (String ptransformId : pipeline.getComponents().getTransformsMap().keySet()) {
    // Skip over previously removed transforms from the original pipeline since we iterate
    // over all transforms from the original pipeline and not the trimmed down version.
    RunnerApi.PTransform currentTransform =
        trimmedPipeline.getComponents().getTransformsOrDefault(ptransformId, null);
    if (currentTransform != null && knownUrns.contains(currentTransform.getSpec().getUrn())) {
      LOG.debug(
          "Removing descendants and environment of known native PTransform {}" + ptransformId);
      removeDescendants(trimmedPipeline, ptransformId);
      trimmedPipeline
          .getComponentsBuilder()
          .putTransforms(
              ptransformId,
              currentTransform.toBuilder().clearSubtransforms().clearEnvironmentId().build());
    }
  }
  return trimmedPipeline.build();
}
 
Example 2
Source File: ParDoTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public RunnerApi.PTransform translate(
    AppliedPTransform<?, ?, ?> appliedPTransform,
    List<AppliedPTransform<?, ?, ?>> subtransforms,
    SdkComponents components)
    throws IOException {
  RunnerApi.PTransform.Builder builder =
      PTransformTranslation.translateAppliedPTransform(
          appliedPTransform, subtransforms, components);

  AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedParDo =
      (AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>>) appliedPTransform;
  ParDoPayload payload = translateParDo(appliedParDo, components);
  builder.setSpec(
      RunnerApi.FunctionSpec.newBuilder()
          .setUrn(PAR_DO_TRANSFORM_URN)
          .setPayload(payload.toByteString())
          .build());
  builder.setEnvironmentId(components.getOnlyEnvironmentId());

  return builder.build();
}
 
Example 3
Source File: WindowMappingFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWindowMapping() throws Exception {
  String pTransformId = "pTransformId";

  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(Environments.createDockerEnvironment("java"));
  RunnerApi.FunctionSpec functionSpec =
      RunnerApi.FunctionSpec.newBuilder()
          .setUrn(WindowMappingFnRunner.URN)
          .setPayload(
              ParDoTranslation.translateWindowMappingFn(
                      new GlobalWindows().getDefaultWindowMappingFn(), components)
                  .toByteString())
          .build();
  RunnerApi.PTransform pTransform =
      RunnerApi.PTransform.newBuilder().setSpec(functionSpec).build();

  ThrowingFunction<KV<Object, BoundedWindow>, KV<Object, BoundedWindow>> mapFunction =
      WindowMappingFnRunner.createMapFunctionForPTransform(pTransformId, pTransform);

  KV<Object, BoundedWindow> input =
      KV.of("abc", new IntervalWindow(Instant.now(), Duration.standardMinutes(1)));

  assertEquals(KV.of(input.getKey(), GlobalWindow.INSTANCE), mapFunction.apply(input));
}
 
Example 4
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private <T> void translateUnboundedRead(
    String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
  RunnerApi.PTransform transform = pipeline.getComponents().getTransformsOrThrow(id);
  String outputCollectionId = Iterables.getOnlyElement(transform.getOutputsMap().values());

  RunnerApi.ReadPayload payload;
  try {
    payload = RunnerApi.ReadPayload.parseFrom(transform.getSpec().getPayload());
  } catch (IOException e) {
    throw new RuntimeException("Failed to parse ReadPayload from transform", e);
  }

  Preconditions.checkState(
      payload.getIsBounded() != RunnerApi.IsBounded.Enum.BOUNDED,
      "Bounded reads should run inside an environment instead of being translated by the Runner.");

  DataStream<WindowedValue<T>> source =
      translateUnboundedSource(
          transform.getUniqueName(),
          outputCollectionId,
          payload,
          pipeline,
          context.getPipelineOptions(),
          context.getExecutionEnvironment());

  context.addDataStream(outputCollectionId, source);
}
 
Example 5
Source File: RegisterNodeFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns an artificial PCollectionView that can be used to fulfill API requirements of a {@link
 * SideInputReader} when used inside the Dataflow runner harness.
 *
 * <p>Generates length prefixed coder variants suitable to be used within the Dataflow Runner
 * harness so that encoding and decoding values matches the length prefixing that occurred when
 * materializing the side input.
 */
public static final PCollectionView<?> transformSideInputForRunner(
    RunnerApi.Pipeline pipeline,
    RunnerApi.PTransform parDoPTransform,
    String sideInputTag,
    RunnerApi.SideInput sideInput) {
  checkArgument(
      Materializations.MULTIMAP_MATERIALIZATION_URN.equals(sideInput.getAccessPattern().getUrn()),
      "This handler is only capable of dealing with %s materializations "
          + "but was asked to handle %s for PCollectionView with tag %s.",
      Materializations.MULTIMAP_MATERIALIZATION_URN,
      sideInput.getAccessPattern().getUrn(),
      sideInputTag);
  String sideInputPCollectionId = parDoPTransform.getInputsOrThrow(sideInputTag);
  RunnerApi.PCollection sideInputPCollection =
      pipeline.getComponents().getPcollectionsOrThrow(sideInputPCollectionId);
  try {
    FullWindowedValueCoder<KV<Object, Object>> runnerSideInputCoder =
        (FullWindowedValueCoder)
            WireCoders.instantiateRunnerWireCoder(
                PipelineNode.pCollection(sideInputPCollectionId, sideInputPCollection),
                pipeline.getComponents());

    return DataflowPortabilityPCollectionView.with(
        new TupleTag<>(sideInputTag), runnerSideInputCoder);
  } catch (IOException e) {
    throw new IllegalStateException("Unable to translate proto to coder", e);
  }
}
 
Example 6
Source File: PCollectionViewTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Create a {@link PCollectionView} from a side input spec and an already-deserialized {@link
 * PCollection} that should be wired up.
 */
public static PCollectionView<?> viewFromProto(
    RunnerApi.SideInput sideInput,
    String localName,
    PCollection<?> pCollection,
    RunnerApi.PTransform parDoTransform,
    RehydratedComponents components)
    throws IOException {
  checkArgument(
      localName != null,
      "%s.viewFromProto: localName must not be null",
      ParDoTranslation.class.getSimpleName());
  TupleTag<?> tag = new TupleTag<>(localName);
  WindowMappingFn<?> windowMappingFn = windowMappingFnFromProto(sideInput.getWindowMappingFn());
  ViewFn<?, ?> viewFn = viewFnFromProto(sideInput.getViewFn());
  WindowingStrategy<?, ?> windowingStrategy = pCollection.getWindowingStrategy().fixDefaults();

  PCollectionView<?> view =
      new RunnerPCollectionView<>(
          pCollection,
          (TupleTag) tag,
          (ViewFn) viewFn,
          windowMappingFn,
          windowingStrategy,
          (Coder) pCollection.getCoder());
  return view;
}
 
Example 7
Source File: NativeTransforms.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns true if an only if the Runner understands this transform and can handle it directly.
 */
public static boolean isNative(RunnerApi.PTransform pTransform) {
  // TODO(BEAM-10109) Use default (context) classloader.
  Iterator<IsNativeTransform> matchers =
      ServiceLoader.load(IsNativeTransform.class, NativeTransforms.class.getClassLoader())
          .iterator();
  while (matchers.hasNext()) {
    if (matchers.next().test(pTransform)) {
      return true;
    }
  }
  return false;
}
 
Example 8
Source File: CreatePCollectionViewTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * @deprecated Since {@link CreatePCollectionView} is not a part of the Beam model, there is no
 *     SDK-agnostic specification. Using this method means your runner is tied to Java.
 */
@Deprecated
public static <ElemT, ViewT> PCollectionView<ViewT> getView(
    AppliedPTransform<
            PCollection<ElemT>,
            PCollection<ElemT>,
            PTransform<PCollection<ElemT>, PCollection<ElemT>>>
        application)
    throws IOException {

  RunnerApi.PTransform transformProto =
      PTransformTranslation.toProto(
          application,
          Collections.emptyList(),
          SdkComponents.create(application.getPipeline().getOptions()));

  checkArgument(
      PTransformTranslation.CREATE_VIEW_TRANSFORM_URN.equals(transformProto.getSpec().getUrn()),
      "Illegal attempt to extract %s from transform %s with name \"%s\" and URN \"%s\"",
      PCollectionView.class.getSimpleName(),
      application.getTransform(),
      application.getFullName(),
      transformProto.getSpec().getUrn());

  return (PCollectionView<ViewT>)
      SerializableUtils.deserializeFromByteArray(
          transformProto.getSpec().getPayload().toByteArray(),
          PCollectionView.class.getSimpleName());
}
 
Example 9
Source File: ExecutableStageTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Extracts an {@link ExecutableStagePayload} from the given transform. */
public static ExecutableStagePayload getExecutableStagePayload(
    AppliedPTransform<?, ?, ?> appliedTransform) throws IOException {
  RunnerApi.PTransform transform =
      PTransformTranslation.toProto(
          appliedTransform, SdkComponents.create(appliedTransform.getPipeline().getOptions()));
  checkArgument(ExecutableStage.URN.equals(transform.getSpec().getUrn()));
  return ExecutableStagePayload.parseFrom(transform.getSpec().getPayload());
}
 
Example 10
Source File: PTransformTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public RunnerApi.PTransform translate(
    AppliedPTransform<?, ?, ?> appliedPTransform,
    List<AppliedPTransform<?, ?, ?>> subtransforms,
    SdkComponents components)
    throws IOException {
  RunnerApi.PTransform.Builder transformBuilder =
      translateAppliedPTransform(appliedPTransform, subtransforms, components);

  PTransform<?, ?> transform = appliedPTransform.getTransform();

  // The raw transform was parsed in the context of other components; this puts it in the
  // context of our current serialization
  FunctionSpec spec = ((RawPTransform<?, ?>) transform).migrate(components);

  // A composite transform is permitted to have a null spec. There are also some pseudo-
  // primitives not yet supported by the portability framework that have null specs
  String urn = "";
  if (spec != null) {
    urn = spec.getUrn();
    transformBuilder.setSpec(spec);
  }

  if (!RUNNER_IMPLEMENTED_TRANSFORMS.contains(urn)) {
    transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
  }
  return transformBuilder.build();
}
 
Example 11
Source File: KafkaIOExternalTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testConstructKafkaWrite() throws Exception {
  String topic = "topic";
  String keySerializer = "org.apache.kafka.common.serialization.ByteArraySerializer";
  String valueSerializer = "org.apache.kafka.common.serialization.LongSerializer";
  ImmutableMap<String, String> producerConfig =
      ImmutableMap.<String, String>builder()
          .put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "server1:port,server2:port")
          .put("retries", "3")
          .build();

  ExternalTransforms.ExternalConfigurationPayload payload =
      ExternalTransforms.ExternalConfigurationPayload.newBuilder()
          .putConfiguration(
              "topic",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(topic)))
                  .build())
          .putConfiguration(
              "producer_config",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:iterable:v1")
                  .addCoderUrn("beam:coder:kv:v1")
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(mapAsBytes(producerConfig)))
                  .build())
          .putConfiguration(
              "key_serializer",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(keySerializer)))
                  .build())
          .putConfiguration(
              "value_serializer",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(valueSerializer)))
                  .build())
          .build();

  Pipeline p = Pipeline.create();
  p.apply(Impulse.create()).apply(WithKeys.of("key"));
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
  String inputPCollection =
      Iterables.getOnlyElement(
          Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values())
              .getOutputsMap()
              .values());

  ExpansionApi.ExpansionRequest request =
      ExpansionApi.ExpansionRequest.newBuilder()
          .setComponents(pipelineProto.getComponents())
          .setTransform(
              RunnerApi.PTransform.newBuilder()
                  .setUniqueName("test")
                  .putInputs("input", inputPCollection)
                  .setSpec(
                      RunnerApi.FunctionSpec.newBuilder()
                          .setUrn("beam:external:java:kafka:write:v1")
                          .setPayload(payload.toByteString())))
          .setNamespace("test_namespace")
          .build();

  ExpansionService expansionService = new ExpansionService();
  TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
  expansionService.expand(request, observer);

  ExpansionApi.ExpansionResponse result = observer.result;
  RunnerApi.PTransform transform = result.getTransform();
  assertThat(
      transform.getSubtransformsList(),
      Matchers.contains(
          "test_namespacetest/Kafka ProducerRecord", "test_namespacetest/KafkaIO.WriteRecords"));
  assertThat(transform.getInputsCount(), Matchers.is(1));
  assertThat(transform.getOutputsCount(), Matchers.is(0));

  RunnerApi.PTransform writeComposite =
      result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));
  RunnerApi.PTransform writeParDo =
      result
          .getComponents()
          .getTransformsOrThrow(
              result
                  .getComponents()
                  .getTransformsOrThrow(writeComposite.getSubtransforms(0))
                  .getSubtransforms(0));

  RunnerApi.ParDoPayload parDoPayload =
      RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
  DoFn kafkaWriter = ParDoTranslation.getDoFn(parDoPayload);
  assertThat(kafkaWriter, Matchers.instanceOf(KafkaWriter.class));
  KafkaIO.WriteRecords spec =
      (KafkaIO.WriteRecords) Whitebox.getInternalState(kafkaWriter, "spec");

  assertThat(spec.getProducerConfig(), Matchers.is(producerConfig));
  assertThat(spec.getTopic(), Matchers.is(topic));
  assertThat(spec.getKeySerializer().getName(), Matchers.is(keySerializer));
  assertThat(spec.getValueSerializer().getName(), Matchers.is(valueSerializer));
}
 
Example 12
Source File: KafkaIOExternalTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testConstructKafkaRead() throws Exception {
  List<String> topics = ImmutableList.of("topic1", "topic2");
  String keyDeserializer = "org.apache.kafka.common.serialization.ByteArrayDeserializer";
  String valueDeserializer = "org.apache.kafka.common.serialization.LongDeserializer";
  ImmutableMap<String, String> consumerConfig =
      ImmutableMap.<String, String>builder()
          .put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "server1:port,server2:port")
          .put("key2", "value2")
          .put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, keyDeserializer)
          .put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, valueDeserializer)
          .build();

  ExternalTransforms.ExternalConfigurationPayload payload =
      ExternalTransforms.ExternalConfigurationPayload.newBuilder()
          .putConfiguration(
              "topics",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:iterable:v1")
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(listAsBytes(topics)))
                  .build())
          .putConfiguration(
              "consumer_config",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:iterable:v1")
                  .addCoderUrn("beam:coder:kv:v1")
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(mapAsBytes(consumerConfig)))
                  .build())
          .putConfiguration(
              "key_deserializer",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(keyDeserializer)))
                  .build())
          .putConfiguration(
              "value_deserializer",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(valueDeserializer)))
                  .build())
          .build();

  RunnerApi.Components defaultInstance = RunnerApi.Components.getDefaultInstance();
  ExpansionApi.ExpansionRequest request =
      ExpansionApi.ExpansionRequest.newBuilder()
          .setComponents(defaultInstance)
          .setTransform(
              RunnerApi.PTransform.newBuilder()
                  .setUniqueName("test")
                  .setSpec(
                      RunnerApi.FunctionSpec.newBuilder()
                          .setUrn("beam:external:java:kafka:read:v1")
                          .setPayload(payload.toByteString())))
          .setNamespace("test_namespace")
          .build();

  ExpansionService expansionService = new ExpansionService();
  TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
  expansionService.expand(request, observer);

  ExpansionApi.ExpansionResponse result = observer.result;
  RunnerApi.PTransform transform = result.getTransform();
  assertThat(
      transform.getSubtransformsList(),
      Matchers.contains(
          "test_namespacetest/KafkaIO.Read", "test_namespacetest/Remove Kafka Metadata"));
  assertThat(transform.getInputsCount(), Matchers.is(0));
  assertThat(transform.getOutputsCount(), Matchers.is(1));

  RunnerApi.PTransform kafkaComposite =
      result.getComponents().getTransformsOrThrow(transform.getSubtransforms(0));
  RunnerApi.PTransform kafkaRead =
      result.getComponents().getTransformsOrThrow(kafkaComposite.getSubtransforms(0));
  RunnerApi.ReadPayload readPayload =
      RunnerApi.ReadPayload.parseFrom(kafkaRead.getSpec().getPayload());
  KafkaUnboundedSource source =
      (KafkaUnboundedSource) ReadTranslation.unboundedSourceFromProto(readPayload);
  KafkaIO.Read spec = source.getSpec();

  assertThat(spec.getConsumerConfig(), Matchers.is(consumerConfig));
  assertThat(spec.getTopics(), Matchers.is(topics));
  assertThat(
      spec.getKeyDeserializerProvider()
          .getDeserializer(spec.getConsumerConfig(), true)
          .getClass()
          .getName(),
      Matchers.is(keyDeserializer));
  assertThat(
      spec.getValueDeserializerProvider()
          .getDeserializer(spec.getConsumerConfig(), false)
          .getClass()
          .getName(),
      Matchers.is(valueDeserializer));
}
 
Example 13
Source File: SparkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public boolean test(RunnerApi.PTransform pTransform) {
  return PTransformTranslation.RESHUFFLE_URN.equals(
      PTransformTranslation.urnForTransformOrNull(pTransform));
}
 
Example 14
Source File: External.java    From beam with Apache License 2.0 4 votes vote down vote up
RunnerApi.PTransform getExpandedTransform() {
  return expandedTransform;
}
 
Example 15
Source File: PipelineTranslation.java    From beam with Apache License 2.0 4 votes vote down vote up
private static RunnerApi.Pipeline elideDeprecatedViews(RunnerApi.Pipeline pipeline) {
  // Record data on CreateView operations.
  Set<String> viewTransforms = new HashSet<>();
  Map<String, String> viewOutputsToInputs = new HashMap<>();
  pipeline
      .getComponents()
      .getTransformsMap()
      .forEach(
          (transformId, transform) -> {
            if (transform
                .getSpec()
                .getUrn()
                .equals(PTransformTranslation.CREATE_VIEW_TRANSFORM_URN)) {
              viewTransforms.add(transformId);
              viewOutputsToInputs.put(
                  Iterables.getOnlyElement(transform.getOutputsMap().values()),
                  Iterables.getOnlyElement(transform.getInputsMap().values()));
            }
          });
  // Fix up view references.
  Map<String, RunnerApi.PTransform> newTransforms = new HashMap<>();
  pipeline
      .getComponents()
      .getTransformsMap()
      .forEach(
          (transformId, transform) -> {
            RunnerApi.PTransform.Builder transformBuilder = transform.toBuilder();
            transform
                .getInputsMap()
                .forEach(
                    (key, value) -> {
                      if (viewOutputsToInputs.containsKey(value)) {
                        transformBuilder.putInputs(key, viewOutputsToInputs.get(value));
                      }
                    });
            transform
                .getOutputsMap()
                .forEach(
                    (key, value) -> {
                      if (viewOutputsToInputs.containsKey(value)) {
                        transformBuilder.putOutputs(key, viewOutputsToInputs.get(value));
                      }
                    });
            // Unfortunately transformBuilder.getSubtransformsList().removeAll(viewTransforms)
            // throws UnsupportedOperationException.
            transformBuilder.clearSubtransforms();
            transformBuilder.addAllSubtransforms(
                transform.getSubtransformsList().stream()
                    .filter(id -> !viewTransforms.contains(id))
                    .collect(Collectors.toList()));
            newTransforms.put(transformId, transformBuilder.build());
          });

  RunnerApi.Pipeline.Builder newPipeline = pipeline.toBuilder();
  // Replace transforms.
  newPipeline.getComponentsBuilder().putAllTransforms(newTransforms);
  // Remove CreateView operation components.
  viewTransforms.forEach(newPipeline.getComponentsBuilder()::removeTransforms);
  viewOutputsToInputs.keySet().forEach(newPipeline.getComponentsBuilder()::removePcollections);
  newPipeline.clearRootTransformIds();
  newPipeline.addAllRootTransformIds(
      pipeline.getRootTransformIdsList().stream()
          .filter(id -> !viewTransforms.contains(id))
          .collect(Collectors.toList()));
  return newPipeline.build();
}
 
Example 16
Source File: CreateExecutableStageNodeFunction.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Transforms a CombineValuesFn {@link ParDoInstruction} to an Apache Beam {@link
 * RunnerApi.FunctionSpec}.
 */
private RunnerApi.FunctionSpec.Builder transformCombineValuesFnToFunctionSpec(
    CloudObject userFn) {
  // Grab the Combine PTransform. This transform is the composite PTransform representing the
  // entire CombinePerKey, and it contains the CombinePayload we need.
  String combinePTransformId = getString(userFn, PropertyNames.SERIALIZED_FN);

  RunnerApi.PTransform combinePerKeyPTransform =
      pipeline.getComponents().getTransformsOrDefault(combinePTransformId, null);
  checkArgument(
      combinePerKeyPTransform != null,
      "Transform with id \"%s\" not found in pipeline.",
      combinePTransformId);

  checkArgument(
      combinePerKeyPTransform.getSpec().getUrn().equals(COMBINE_PER_KEY_URN),
      "Found transform \"%s\" for Combine instruction, "
          + "but that transform had unexpected URN \"%s\" (expected \"%s\")",
      combinePerKeyPTransform,
      combinePerKeyPTransform.getSpec().getUrn(),
      COMBINE_PER_KEY_URN);

  RunnerApi.CombinePayload combinePayload;
  try {
    combinePayload =
        RunnerApi.CombinePayload.parseFrom(combinePerKeyPTransform.getSpec().getPayload());
  } catch (InvalidProtocolBufferException exc) {
    throw new RuntimeException("Combine did not have a CombinePayload", exc);
  }

  String phase = getString(userFn, WorkerPropertyNames.PHASE, CombinePhase.ALL);
  String urn;

  switch (phase) {
    case CombinePhase.ALL:
      urn = COMBINE_GROUPED_VALUES_URN;
      break;
    case CombinePhase.ADD:
      urn = COMBINE_PRECOMBINE_URN;
      break;
    case CombinePhase.MERGE:
      urn = COMBINE_MERGE_URN;
      break;
    case CombinePhase.EXTRACT:
      urn = COMBINE_EXTRACT_URN;
      break;
    default:
      throw new RuntimeException("Encountered unknown Combine Phase: " + phase);
  }
  return RunnerApi.FunctionSpec.newBuilder()
      .setUrn(urn)
      .setPayload(combinePayload.toByteString());
}
 
Example 17
Source File: BeamFnDataWriteRunnerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testCreatingAndProcessingBeamFnDataWriteRunner() throws Exception {
  String bundleId = "57L";

  PCollectionConsumerRegistry consumers =
      new PCollectionConsumerRegistry(
          mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class));
  PTransformFunctionRegistry startFunctionRegistry =
      new PTransformFunctionRegistry(
          mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "start");
  PTransformFunctionRegistry finishFunctionRegistry =
      new PTransformFunctionRegistry(
          mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "finish");
  List<ThrowingRunnable> teardownFunctions = new ArrayList<>();

  String localInputId = "inputPC";
  RunnerApi.PTransform pTransform =
      RemoteGrpcPortWrite.writeToPort(localInputId, PORT_SPEC).toPTransform();

  new BeamFnDataWriteRunner.Factory<String>()
      .createRunnerForPTransform(
          PipelineOptionsFactory.create(),
          mockBeamFnDataClient,
          null /* beamFnStateClient */,
          null /* beamFnTimerClient */,
          TRANSFORM_ID,
          pTransform,
          Suppliers.ofInstance(bundleId)::get,
          ImmutableMap.of(
              localInputId, RunnerApi.PCollection.newBuilder().setCoderId(ELEM_CODER_ID).build()),
          COMPONENTS.getCodersMap(),
          COMPONENTS.getWindowingStrategiesMap(),
          consumers,
          startFunctionRegistry,
          finishFunctionRegistry,
          teardownFunctions::add,
          null /* addProgressRequestCallback */,
          null /* splitListener */,
          null /* bundleFinalizer */);

  assertThat(teardownFunctions, empty());

  verifyZeroInteractions(mockBeamFnDataClient);

  List<WindowedValue<String>> outputValues = new ArrayList<>();
  AtomicBoolean wasCloseCalled = new AtomicBoolean();
  CloseableFnDataReceiver<WindowedValue<String>> outputConsumer =
      new CloseableFnDataReceiver<WindowedValue<String>>() {
        @Override
        public void close() throws Exception {
          wasCloseCalled.set(true);
        }

        @Override
        public void accept(WindowedValue<String> t) throws Exception {
          outputValues.add(t);
        }

        @Override
        public void flush() throws Exception {
          throw new UnsupportedOperationException("Flush is not supported");
        }
      };

  when(mockBeamFnDataClient.send(any(), any(), Matchers.<Coder<WindowedValue<String>>>any()))
      .thenReturn(outputConsumer);
  Iterables.getOnlyElement(startFunctionRegistry.getFunctions()).run();
  verify(mockBeamFnDataClient)
      .send(
          eq(PORT_SPEC.getApiServiceDescriptor()),
          eq(LogicalEndpoint.data(bundleId, TRANSFORM_ID)),
          eq(WIRE_CODER));

  assertThat(consumers.keySet(), containsInAnyOrder(localInputId));
  consumers.getMultiplexingConsumer(localInputId).accept(valueInGlobalWindow("TestValue"));
  assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
  outputValues.clear();

  assertFalse(wasCloseCalled.get());
  Iterables.getOnlyElement(finishFunctionRegistry.getFunctions()).run();
  assertTrue(wasCloseCalled.get());

  verifyNoMoreInteractions(mockBeamFnDataClient);
}
 
Example 18
Source File: RegisterNodeFunction.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Transforms a CombineValuesFn {@link ParDoInstruction} to an Apache Beam {@link
 * RunnerApi.FunctionSpec}.
 */
private RunnerApi.FunctionSpec.Builder transformCombineValuesFnToFunctionSpec(
    CloudObject userFn) {
  // Grab the Combine PTransform. This transform is the composite PTransform representing the
  // entire CombinePerKey, and it contains the CombinePayload we need.
  String combinePTransformId = getString(userFn, PropertyNames.SERIALIZED_FN);

  RunnerApi.PTransform combinePerKeyPTransform =
      pipeline.getComponents().getTransformsOrDefault(combinePTransformId, null);
  checkArgument(
      combinePerKeyPTransform != null,
      "Transform with id \"%s\" not found in pipeline.",
      combinePTransformId);

  checkArgument(
      combinePerKeyPTransform.getSpec().getUrn().equals(COMBINE_PER_KEY_URN),
      "Found transform \"%s\" for Combine instruction, "
          + "but that transform had unexpected URN \"%s\" (expected \"%s\")",
      combinePerKeyPTransform,
      combinePerKeyPTransform.getSpec().getUrn(),
      COMBINE_PER_KEY_URN);

  RunnerApi.CombinePayload combinePayload;
  try {
    combinePayload =
        RunnerApi.CombinePayload.parseFrom(combinePerKeyPTransform.getSpec().getPayload());
  } catch (InvalidProtocolBufferException exc) {
    throw new RuntimeException("Combine did not have a CombinePayload", exc);
  }

  String phase = getString(userFn, WorkerPropertyNames.PHASE, CombinePhase.ALL);
  String urn;

  switch (phase) {
    case CombinePhase.ALL:
      urn = COMBINE_GROUPED_VALUES_URN;
      break;
    case CombinePhase.ADD:
      urn = COMBINE_PRECOMBINE_URN;
      break;
    case CombinePhase.MERGE:
      urn = COMBINE_MERGE_URN;
      break;
    case CombinePhase.EXTRACT:
      urn = COMBINE_EXTRACT_URN;
      break;
    default:
      throw new RuntimeException("Encountered unknown Combine Phase: " + phase);
  }
  return RunnerApi.FunctionSpec.newBuilder()
      .setUrn(urn)
      .setPayload(combinePayload.toByteString());
}
 
Example 19
Source File: ParDoTranslationTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void toTransformProto() throws Exception {
  Map<TupleTag<?>, PValue> inputs = new HashMap<>();
  inputs.put(new TupleTag<KV<Long, String>>("mainInputName") {}, mainInput);
  inputs.putAll(parDo.getAdditionalInputs());
  PCollectionTuple output = mainInput.apply(parDo);

  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));

  // Encode
  RunnerApi.PTransform protoTransform =
      PTransformTranslation.toProto(
          AppliedPTransform.<PCollection<KV<Long, String>>, PCollection<Void>, MultiOutput>of(
              "foo", inputs, output.expand(), parDo, p),
          sdkComponents);
  RunnerApi.Components components = sdkComponents.toComponents();
  RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);

  // Decode
  ParDoPayload parDoPayload = ParDoPayload.parseFrom(protoTransform.getSpec().getPayload());
  for (PCollectionView<?> view : parDo.getSideInputs().values()) {
    SideInput sideInput = parDoPayload.getSideInputsOrThrow(view.getTagInternal().getId());
    PCollectionView<?> restoredView =
        PCollectionViewTranslation.viewFromProto(
            sideInput,
            view.getTagInternal().getId(),
            view.getPCollection(),
            protoTransform,
            rehydratedComponents);
    assertThat(restoredView.getTagInternal(), equalTo(view.getTagInternal()));
    assertThat(restoredView.getViewFn(), instanceOf(view.getViewFn().getClass()));
    assertThat(
        restoredView.getWindowMappingFn(), instanceOf(view.getWindowMappingFn().getClass()));
    assertThat(
        restoredView.getWindowingStrategyInternal(),
        equalTo(view.getWindowingStrategyInternal().fixDefaults()));
    assertThat(restoredView.getCoderInternal(), equalTo(view.getCoderInternal()));
  }
  String mainInputId = sdkComponents.registerPCollection(mainInput);
  assertThat(
      ParDoTranslation.getMainInput(protoTransform, components),
      equalTo(components.getPcollectionsOrThrow(mainInputId)));
  assertThat(ParDoTranslation.getMainInputName(protoTransform), equalTo("mainInputName"));

  // Ensure the correct timer coder components are used from the main input PCollection's key
  // and window coders.
  for (RunnerApi.TimerFamilySpec timerFamilySpec :
      parDoPayload.getTimerFamilySpecsMap().values()) {
    Coder<?> timerCoder =
        CoderTranslation.fromProto(
            components.getCodersOrThrow(timerFamilySpec.getTimerFamilyCoderId()),
            rehydratedComponents,
            TranslationContext.DEFAULT);
    assertEquals(
        org.apache.beam.runners.core.construction.Timer.Coder.of(
            VarLongCoder.of(), GlobalWindow.Coder.INSTANCE),
        timerCoder);
  }
}
 
Example 20
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public boolean test(RunnerApi.PTransform pTransform) {
  return STREAMING_IMPULSE_TRANSFORM_URN.equals(
      PTransformTranslation.urnForTransformOrNull(pTransform));
}