Java Code Examples for org.apache.beam.runners.core.construction.ParDoTranslation#getDoFn()

The following examples show how to use org.apache.beam.runners.core.construction.ParDoTranslation#getDoFn() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Utils.java    From beam with Apache License 2.0 6 votes vote down vote up
static DoFn<?, ?> getDoFn(AppliedPTransform<?, ?, ?> appliedTransform) {
  try {
    DoFn<?, ?> doFn = ParDoTranslation.getDoFn(appliedTransform);
    if (DoFnSignatures.isSplittable(doFn)) {
      throw new IllegalStateException(
          "Not expected to directly translate splittable DoFn, should have been overridden: "
              + doFn); // todo
    }
    if (DoFnSignatures.requiresTimeSortedInput(doFn)) {
      throw new UnsupportedOperationException(
          String.format(
              "%s doesn't current support @RequiresTimeSortedInput annotation.",
              JetRunner.class.getSimpleName()));
    }
    return doFn;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 2
Source File: ParDoTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private DoFn<InputT, OutputT> getDoFn(TranslationContext context) {
  DoFn<InputT, OutputT> doFn;
  try {
    doFn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(context.getCurrentTransform());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  return doFn;
}
 
Example 3
Source File: ParDoMultiOverrideFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementForApplication(
    AppliedPTransform<
            PCollection<? extends InputT>,
            PCollectionTuple,
            PTransform<PCollection<? extends InputT>, PCollectionTuple>>
        application)
    throws IOException {

  DoFn<InputT, OutputT> fn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(application);

  DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());

  if (signature.processElement().isSplittable()) {
    return SplittableParDo.forAppliedParDo((AppliedPTransform) application);
  } else if (signature.stateDeclarations().size() > 0
      || signature.timerDeclarations().size() > 0
      || signature.timerFamilyDeclarations().size() > 0) {
    return new GbkThenStatefulParDo(
        fn,
        ParDoTranslation.getMainOutputTag(application),
        ParDoTranslation.getAdditionalOutputTags(application),
        ParDoTranslation.getSideInputs(application),
        ParDoTranslation.getSchemaInformation(application),
        ParDoTranslation.getSideInputMapping(application));
  } else {
    return application.getTransform();
  }
}
 
Example 4
Source File: PipelineTranslator.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
/**
 * @param ctx          provides translation context.
 * @param beamNode     the beam node to be translated.
 * @param sideInputMap side inputs.
 * @return the created DoFnTransform.
 */
private static AbstractDoFnTransform createDoFnTransform(final PipelineTranslationContext ctx,
                                                         final TransformHierarchy.Node beamNode,
                                                         final Map<Integer, PCollectionView<?>> sideInputMap) {
  try {
    final AppliedPTransform pTransform = beamNode.toAppliedPTransform(ctx.getPipeline());
    final DoFn doFn = ParDoTranslation.getDoFn(pTransform);
    final TupleTag mainOutputTag = ParDoTranslation.getMainOutputTag(pTransform);
    final TupleTagList additionalOutputTags = ParDoTranslation.getAdditionalOutputTags(pTransform);

    final PCollection<?> mainInput = (PCollection<?>)
      Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(pTransform));

    final HasDisplayData displayData = (builder) -> builder.add(DisplayData.item("name", beamNode.getFullName()));
    final DoFnSchemaInformation doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(beamNode.toAppliedPTransform(ctx.getPipeline()));

    if (sideInputMap.isEmpty()) {
      return new DoFnTransform(
        doFn,
        mainInput.getCoder(),
        getOutputCoders(pTransform),
        mainOutputTag,
        additionalOutputTags.getAll(),
        mainInput.getWindowingStrategy(),
        ctx.getPipelineOptions(),
        DisplayData.from(displayData),
        doFnSchemaInformation,
        Collections.emptyMap());
    } else {
      return new PushBackDoFnTransform(
        doFn,
        mainInput.getCoder(),
        getOutputCoders(pTransform),
        mainOutputTag,
        additionalOutputTags.getAll(),
        mainInput.getWindowingStrategy(),
        sideInputMap,
        ctx.getPipelineOptions(),
        DisplayData.from(displayData),
        doFnSchemaInformation,
        Collections.emptyMap());
    }
  } catch (final IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 5
Source File: PubsubIOExternalTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testConstructPubsubWrite() throws Exception {
  String topic = "projects/project-1234/topics/topic_name";
  String idAttribute = "id_foo";

  ExternalTransforms.ExternalConfigurationPayload payload =
      ExternalTransforms.ExternalConfigurationPayload.newBuilder()
          .putConfiguration(
              "topic",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(topic)))
                  .build())
          .putConfiguration(
              "id_label",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(idAttribute)))
                  .build())
          .build();

  Pipeline p = Pipeline.create();
  p.apply("unbounded", Create.of(1, 2, 3)).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);

  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
  String inputPCollection =
      Iterables.getOnlyElement(
          Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values())
              .getOutputsMap()
              .values());

  ExpansionApi.ExpansionRequest request =
      ExpansionApi.ExpansionRequest.newBuilder()
          .setComponents(pipelineProto.getComponents())
          .setTransform(
              RunnerApi.PTransform.newBuilder()
                  .setUniqueName("test")
                  .putInputs("input", inputPCollection)
                  .setSpec(
                      RunnerApi.FunctionSpec.newBuilder()
                          .setUrn("beam:external:java:pubsub:write:v1")
                          .setPayload(payload.toByteString())))
          .setNamespace("test_namespace")
          .build();

  ExpansionService expansionService = new ExpansionService();
  TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
  expansionService.expand(request, observer);

  ExpansionApi.ExpansionResponse result = observer.result;

  RunnerApi.PTransform transform = result.getTransform();
  assertThat(
      transform.getSubtransformsList(),
      Matchers.contains(
          "test_namespacetest/MapElements", "test_namespacetest/PubsubUnboundedSink"));
  assertThat(transform.getInputsCount(), Matchers.is(1));
  assertThat(transform.getOutputsCount(), Matchers.is(0));

  // test_namespacetest/PubsubUnboundedSink
  RunnerApi.PTransform writeComposite =
      result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));

  // test_namespacetest/PubsubUnboundedSink/PubsubUnboundedSink.Writer
  RunnerApi.PTransform writeComposite2 =
      result.getComponents().getTransformsOrThrow(writeComposite.getSubtransforms(3));

  // test_namespacetest/PubsubUnboundedSink/PubsubUnboundedSink.Writer/ParMultiDo(Writer)
  RunnerApi.PTransform writeParDo =
      result.getComponents().getTransformsOrThrow(writeComposite2.getSubtransforms(0));

  RunnerApi.ParDoPayload parDoPayload =
      RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
  DoFn pubsubWriter = ParDoTranslation.getDoFn(parDoPayload);

  String idAttributeActual = (String) Whitebox.getInternalState(pubsubWriter, "idAttribute");

  ValueProvider<PubsubClient.TopicPath> topicActual =
      (ValueProvider<PubsubClient.TopicPath>) Whitebox.getInternalState(pubsubWriter, "topic");

  assertThat(topicActual == null ? null : String.valueOf(topicActual), Matchers.is(topic));
  assertThat(idAttributeActual, Matchers.is(idAttribute));
}
 
Example 6
Source File: KafkaIOExternalTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testConstructKafkaWrite() throws Exception {
  String topic = "topic";
  String keySerializer = "org.apache.kafka.common.serialization.ByteArraySerializer";
  String valueSerializer = "org.apache.kafka.common.serialization.LongSerializer";
  ImmutableMap<String, String> producerConfig =
      ImmutableMap.<String, String>builder()
          .put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "server1:port,server2:port")
          .put("retries", "3")
          .build();

  ExternalTransforms.ExternalConfigurationPayload payload =
      ExternalTransforms.ExternalConfigurationPayload.newBuilder()
          .putConfiguration(
              "topic",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(topic)))
                  .build())
          .putConfiguration(
              "producer_config",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:iterable:v1")
                  .addCoderUrn("beam:coder:kv:v1")
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(mapAsBytes(producerConfig)))
                  .build())
          .putConfiguration(
              "key_serializer",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(keySerializer)))
                  .build())
          .putConfiguration(
              "value_serializer",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(valueSerializer)))
                  .build())
          .build();

  Pipeline p = Pipeline.create();
  p.apply(Impulse.create()).apply(WithKeys.of("key"));
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
  String inputPCollection =
      Iterables.getOnlyElement(
          Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values())
              .getOutputsMap()
              .values());

  ExpansionApi.ExpansionRequest request =
      ExpansionApi.ExpansionRequest.newBuilder()
          .setComponents(pipelineProto.getComponents())
          .setTransform(
              RunnerApi.PTransform.newBuilder()
                  .setUniqueName("test")
                  .putInputs("input", inputPCollection)
                  .setSpec(
                      RunnerApi.FunctionSpec.newBuilder()
                          .setUrn("beam:external:java:kafka:write:v1")
                          .setPayload(payload.toByteString())))
          .setNamespace("test_namespace")
          .build();

  ExpansionService expansionService = new ExpansionService();
  TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
  expansionService.expand(request, observer);

  ExpansionApi.ExpansionResponse result = observer.result;
  RunnerApi.PTransform transform = result.getTransform();
  assertThat(
      transform.getSubtransformsList(),
      Matchers.contains(
          "test_namespacetest/Kafka ProducerRecord", "test_namespacetest/KafkaIO.WriteRecords"));
  assertThat(transform.getInputsCount(), Matchers.is(1));
  assertThat(transform.getOutputsCount(), Matchers.is(0));

  RunnerApi.PTransform writeComposite =
      result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));
  RunnerApi.PTransform writeParDo =
      result
          .getComponents()
          .getTransformsOrThrow(
              result
                  .getComponents()
                  .getTransformsOrThrow(writeComposite.getSubtransforms(0))
                  .getSubtransforms(0));

  RunnerApi.ParDoPayload parDoPayload =
      RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
  DoFn kafkaWriter = ParDoTranslation.getDoFn(parDoPayload);
  assertThat(kafkaWriter, Matchers.instanceOf(KafkaWriter.class));
  KafkaIO.WriteRecords spec =
      (KafkaIO.WriteRecords) Whitebox.getInternalState(kafkaWriter, "spec");

  assertThat(spec.getProducerConfig(), Matchers.is(producerConfig));
  assertThat(spec.getTopic(), Matchers.is(topic));
  assertThat(spec.getKeySerializer().getName(), Matchers.is(keySerializer));
  assertThat(spec.getValueSerializer().getName(), Matchers.is(valueSerializer));
}