org.apache.beam.runners.core.construction.ParDoTranslation Java Examples

The following examples show how to use org.apache.beam.runners.core.construction.ParDoTranslation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Utils.java    From beam with Apache License 2.0 6 votes vote down vote up
static DoFn<?, ?> getDoFn(AppliedPTransform<?, ?, ?> appliedTransform) {
  try {
    DoFn<?, ?> doFn = ParDoTranslation.getDoFn(appliedTransform);
    if (DoFnSignatures.isSplittable(doFn)) {
      throw new IllegalStateException(
          "Not expected to directly translate splittable DoFn, should have been overridden: "
              + doFn); // todo
    }
    if (DoFnSignatures.requiresTimeSortedInput(doFn)) {
      throw new UnsupportedOperationException(
          String.format(
              "%s doesn't current support @RequiresTimeSortedInput annotation.",
              JetRunner.class.getSimpleName()));
    }
    return doFn;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #2
Source File: ParDoEvaluatorFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public <T> TransformEvaluator<T> forApplication(
    AppliedPTransform<?, ?, ?> application, CommittedBundle<?> inputBundle) throws Exception {

  @SuppressWarnings({"unchecked", "rawtypes"})
  TransformEvaluator<T> evaluator =
      (TransformEvaluator<T>)
          createEvaluator(
              (AppliedPTransform) application,
              (PCollection<InputT>) inputBundle.getPCollection(),
              inputBundle.getKey(),
              ParDoTranslation.getSideInputs(application),
              (TupleTag<OutputT>) ParDoTranslation.getMainOutputTag(application),
              ParDoTranslation.getAdditionalOutputTags(application).getAll(),
              ParDoTranslation.getSchemaInformation(application),
              ParDoTranslation.getSideInputMapping(application));
  return evaluator;
}
 
Example #3
Source File: UserStateReference.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Create a user state reference from a UserStateId proto and components. */
public static UserStateReference fromUserStateId(
    UserStateId userStateId, RunnerApi.Components components) {
  PTransform transform = components.getTransformsOrThrow(userStateId.getTransformId());
  String mainInputCollectionId;
  try {
    mainInputCollectionId =
        transform.getInputsOrThrow(ParDoTranslation.getMainInputName(transform));
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  return UserStateReference.of(
      PipelineNode.pTransform(userStateId.getTransformId(), transform),
      userStateId.getLocalName(),
      PipelineNode.pCollection(
          mainInputCollectionId, components.getPcollectionsOrThrow(mainInputCollectionId)));
}
 
Example #4
Source File: WindowMappingFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWindowMapping() throws Exception {
  String pTransformId = "pTransformId";

  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(Environments.createDockerEnvironment("java"));
  RunnerApi.FunctionSpec functionSpec =
      RunnerApi.FunctionSpec.newBuilder()
          .setUrn(WindowMappingFnRunner.URN)
          .setPayload(
              ParDoTranslation.translateWindowMappingFn(
                      new GlobalWindows().getDefaultWindowMappingFn(), components)
                  .toByteString())
          .build();
  RunnerApi.PTransform pTransform =
      RunnerApi.PTransform.newBuilder().setSpec(functionSpec).build();

  ThrowingFunction<KV<Object, BoundedWindow>, KV<Object, BoundedWindow>> mapFunction =
      WindowMappingFnRunner.createMapFunctionForPTransform(pTransformId, pTransform);

  KV<Object, BoundedWindow> input =
      KV.of("abc", new IntervalWindow(Instant.now(), Duration.standardMinutes(1)));

  assertEquals(KV.of(input.getKey(), GlobalWindow.INSTANCE), mapFunction.apply(input));
}
 
Example #5
Source File: ParDoEvaluatorFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
static CacheLoader<AppliedPTransform<?, ?, ?>, DoFnLifecycleManager> basicDoFnCacheLoader() {
  return new CacheLoader<AppliedPTransform<?, ?, ?>, DoFnLifecycleManager>() {
    @Override
    public DoFnLifecycleManager load(AppliedPTransform<?, ?, ?> application) throws Exception {
      return DoFnLifecycleManager.of(ParDoTranslation.getDoFn(application));
    }
  };
}
 
Example #6
Source File: ParDoMultiOverrideFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementForApplication(
    AppliedPTransform<
            PCollection<? extends InputT>,
            PCollectionTuple,
            PTransform<PCollection<? extends InputT>, PCollectionTuple>>
        application)
    throws IOException {

  DoFn<InputT, OutputT> fn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(application);

  DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());

  if (signature.processElement().isSplittable()) {
    return SplittableParDo.forAppliedParDo((AppliedPTransform) application);
  } else if (signature.stateDeclarations().size() > 0
      || signature.timerDeclarations().size() > 0
      || signature.timerFamilyDeclarations().size() > 0) {
    return new GbkThenStatefulParDo(
        fn,
        ParDoTranslation.getMainOutputTag(application),
        ParDoTranslation.getAdditionalOutputTags(application),
        ParDoTranslation.getSideInputs(application),
        ParDoTranslation.getSchemaInformation(application),
        ParDoTranslation.getSideInputMapping(application));
  } else {
    return application.getTransform();
  }
}
 
Example #7
Source File: PipelineValidator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static void validateParDo(
    String id, PTransform transform, Components components, Set<String> requirements)
    throws Exception {
  ParDoPayload payload = ParDoPayload.parseFrom(transform.getSpec().getPayload());
  // side_inputs
  for (String sideInputId : payload.getSideInputsMap().keySet()) {
    checkArgument(
        transform.containsInputs(sideInputId),
        "Transform %s side input %s is not listed in the transform's inputs",
        id,
        sideInputId);
  }
  if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
    checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN));
    // TODO: Validate state_specs and timer_specs
  }
  if (!payload.getRestrictionCoderId().isEmpty()) {
    checkArgument(components.containsCoders(payload.getRestrictionCoderId()));
    checkArgument(requirements.contains(ParDoTranslation.REQUIRES_SPLITTABLE_DOFN_URN));
  }
  if (payload.getRequestsFinalization()) {
    checkArgument(requirements.contains(ParDoTranslation.REQUIRES_BUNDLE_FINALIZATION_URN));
  }
  if (payload.getRequiresStableInput()) {
    checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STABLE_INPUT_URN));
  }
  if (payload.getRequiresTimeSortedInput()) {
    checkArgument(requirements.contains(ParDoTranslation.REQUIRES_TIME_SORTED_INPUT_URN));
  }
}
 
Example #8
Source File: Utils.java    From beam with Apache License 2.0 5 votes vote down vote up
static boolean usesStateOrTimers(AppliedPTransform<?, ?, ?> appliedTransform) {
  try {
    return ParDoTranslation.usesStateOrTimers(appliedTransform);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #9
Source File: ParDoTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
private List<PCollectionView<?>> getSideInputs(TranslationContext context) {
  List<PCollectionView<?>> sideInputs;
  try {
    sideInputs = ParDoTranslation.getSideInputs(context.getCurrentTransform());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  return sideInputs;
}
 
Example #10
Source File: ParDoTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
private TupleTag<?> getTupleTag(TranslationContext context) {
  TupleTag<?> mainOutputTag;
  try {
    mainOutputTag = ParDoTranslation.getMainOutputTag(context.getCurrentTransform());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  return mainOutputTag;
}
 
Example #11
Source File: ParDoTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private DoFn<InputT, OutputT> getDoFn(TranslationContext context) {
  DoFn<InputT, OutputT> doFn;
  try {
    doFn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(context.getCurrentTransform());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  return doFn;
}
 
Example #12
Source File: ParDoMultiOutputTranslatorBatch.java    From twister2 with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(
    ParDo.MultiOutput<IT, OT> transform, Twister2BatchTranslationContext context) {
  DoFn<IT, OT> doFn;
  doFn = transform.getFn();
  BatchTSetImpl<WindowedValue<IT>> inputTTSet =
      context.getInputDataSet(context.getInput(transform));

  WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy();
  Coder<IT> inputCoder = (Coder<IT>) context.getInput(transform).getCoder();

  Map<TupleTag<?>, PValue> outputs = context.getOutputs();
  Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputCoders();

  DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  TupleTag<OT> mainOutput = transform.getMainOutputTag();
  List<TupleTag<?>> additionalOutputTags = new ArrayList<>(outputs.size() - 1);
  Collection<PCollectionView<?>> sideInputs = transform.getSideInputs();

  // construct a map from side input to WindowingStrategy so that
  // the DoFn runner can map main-input windows to side input windows
  Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
  for (PCollectionView<?> sideInput : sideInputs) {
    sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
  }

  TupleTag<?> mainOutputTag;
  try {
    mainOutputTag = ParDoTranslation.getMainOutputTag(context.getCurrentTransform());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
  // put the main output at index 0, FlinkMultiOutputDoFnFunction  expects this
  outputMap.put(mainOutputTag, 0);
  int count = 1;
  for (TupleTag<?> tag : outputs.keySet()) {
    if (!outputMap.containsKey(tag)) {
      outputMap.put(tag, count++);
    }
  }

  ComputeTSet<RawUnionValue, Iterator<WindowedValue<IT>>> outputTSet =
      inputTTSet
          .direct()
          .<RawUnionValue>compute(
              new DoFnFunction<OT, IT>(
                  context,
                  doFn,
                  inputCoder,
                  outputCoders,
                  additionalOutputTags,
                  windowingStrategy,
                  sideInputStrategies,
                  mainOutput,
                  doFnSchemaInformation,
                  outputMap));

  for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
    ComputeTSet<WindowedValue<OT>, Iterator<RawUnionValue>> tempTSet =
        outputTSet.direct().compute(new OutputTagFilter(outputMap.get(output.getKey())));
    context.setOutputDataSet((PCollection) output.getValue(), tempTSet);
  }
}
 
Example #13
Source File: FnApiDoFnRunnerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testProcessElementForPairWithRestriction() throws Exception {
  Pipeline p = Pipeline.create();
  PCollection<String> valuePCollection = p.apply(Create.of("unused"));
  PCollectionView<String> singletonSideInputView = valuePCollection.apply(View.asSingleton());
  valuePCollection.apply(
      TEST_TRANSFORM_ID,
      ParDo.of(new WindowObservingTestSplittableDoFn(singletonSideInputView))
          .withSideInputs(singletonSideInputView));

  RunnerApi.Pipeline pProto =
      ProtoOverrides.updateTransform(
          PTransformTranslation.PAR_DO_TRANSFORM_URN,
          PipelineTranslation.toProto(p, SdkComponents.create(p.getOptions()), true),
          SplittableParDoExpander.createSizedReplacement());
  String expandedTransformId =
      Iterables.find(
              pProto.getComponents().getTransformsMap().entrySet(),
              entry ->
                  entry
                          .getValue()
                          .getSpec()
                          .getUrn()
                          .equals(PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN)
                      && entry.getValue().getUniqueName().contains(TEST_TRANSFORM_ID))
          .getKey();
  RunnerApi.PTransform pTransform =
      pProto.getComponents().getTransformsOrThrow(expandedTransformId);
  String inputPCollectionId =
      pTransform.getInputsOrThrow(ParDoTranslation.getMainInputName(pTransform));
  String outputPCollectionId = Iterables.getOnlyElement(pTransform.getOutputsMap().values());

  FakeBeamFnStateClient fakeClient = new FakeBeamFnStateClient(ImmutableMap.of());

  List<WindowedValue<KV<String, OffsetRange>>> mainOutputValues = new ArrayList<>();
  MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
  PCollectionConsumerRegistry consumers =
      new PCollectionConsumerRegistry(
          metricsContainerRegistry, mock(ExecutionStateTracker.class));
  consumers.register(outputPCollectionId, TEST_TRANSFORM_ID, ((List) mainOutputValues)::add);
  PTransformFunctionRegistry startFunctionRegistry =
      new PTransformFunctionRegistry(
          mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "start");
  PTransformFunctionRegistry finishFunctionRegistry =
      new PTransformFunctionRegistry(
          mock(MetricsContainerStepMap.class), mock(ExecutionStateTracker.class), "finish");
  List<ThrowingRunnable> teardownFunctions = new ArrayList<>();

  new FnApiDoFnRunner.Factory<>()
      .createRunnerForPTransform(
          PipelineOptionsFactory.create(),
          null /* beamFnDataClient */,
          fakeClient,
          null /* beamFnTimerClient */,
          TEST_TRANSFORM_ID,
          pTransform,
          Suppliers.ofInstance("57L")::get,
          pProto.getComponents().getPcollectionsMap(),
          pProto.getComponents().getCodersMap(),
          pProto.getComponents().getWindowingStrategiesMap(),
          consumers,
          startFunctionRegistry,
          finishFunctionRegistry,
          teardownFunctions::add,
          null /* addProgressRequestCallback */,
          null /* bundleSplitListener */,
          null /* bundleFinalizer */);

  assertTrue(startFunctionRegistry.getFunctions().isEmpty());
  mainOutputValues.clear();

  assertThat(consumers.keySet(), containsInAnyOrder(inputPCollectionId, outputPCollectionId));

  FnDataReceiver<WindowedValue<?>> mainInput =
      consumers.getMultiplexingConsumer(inputPCollectionId);
  mainInput.accept(valueInGlobalWindow("5"));
  mainInput.accept(valueInGlobalWindow("2"));
  assertThat(
      mainOutputValues,
      contains(
          valueInGlobalWindow(
              KV.of("5", KV.of(new OffsetRange(0, 5), GlobalWindow.TIMESTAMP_MIN_VALUE))),
          valueInGlobalWindow(
              KV.of("2", KV.of(new OffsetRange(0, 2), GlobalWindow.TIMESTAMP_MIN_VALUE)))));
  mainOutputValues.clear();

  assertTrue(finishFunctionRegistry.getFunctions().isEmpty());
  assertThat(mainOutputValues, empty());

  Iterables.getOnlyElement(teardownFunctions).run();
  assertThat(mainOutputValues, empty());
}
 
Example #14
Source File: InsertFetchAndFilterStreamingSideInputNodesTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testSdkParDoWithSideInput() throws Exception {
  Pipeline p = Pipeline.create();
  PCollection<String> pc = p.apply(Create.of("a", "b", "c"));
  PCollectionView<List<String>> pcView = pc.apply(View.asList());
  pc.apply(ParDo.of(new TestDoFn(pcView)).withSideInputs(pcView));
  RunnerApi.Pipeline pipeline = PipelineTranslation.toProto(p);

  Node predecessor = createParDoNode("predecessor");
  InstructionOutputNode mainInput =
      InstructionOutputNode.create(new InstructionOutput(), "fakeId");
  Node sideInputParDo = createParDoNode(findParDoWithSideInput(pipeline));

  MutableNetwork<Node, Edge> network = createEmptyNetwork();
  network.addNode(predecessor);
  network.addNode(mainInput);
  network.addNode(sideInputParDo);
  network.addEdge(predecessor, mainInput, DefaultEdge.create());
  network.addEdge(mainInput, sideInputParDo, DefaultEdge.create());

  Network<Node, Edge> inputNetwork = ImmutableNetwork.copyOf(network);
  network = InsertFetchAndFilterStreamingSideInputNodes.with(pipeline).forNetwork(network);

  Node mainInputClone = InstructionOutputNode.create(mainInput.getInstructionOutput(), "fakeId");
  Node fetchAndFilter =
      FetchAndFilterStreamingSideInputsNode.create(
          pcView.getWindowingStrategyInternal(),
          ImmutableMap.of(
              pcView,
              ParDoTranslation.translateWindowMappingFn(
                  pcView.getWindowMappingFn(),
                  SdkComponents.create(PipelineOptionsFactory.create()))),
          NameContextsForTests.nameContextForTest());

  MutableNetwork<Node, Edge> expectedNetwork = createEmptyNetwork();
  expectedNetwork.addNode(predecessor);
  expectedNetwork.addNode(mainInputClone);
  expectedNetwork.addNode(fetchAndFilter);
  expectedNetwork.addNode(mainInput);
  expectedNetwork.addNode(sideInputParDo);
  expectedNetwork.addEdge(predecessor, mainInputClone, DefaultEdge.create());
  expectedNetwork.addEdge(mainInputClone, fetchAndFilter, DefaultEdge.create());
  expectedNetwork.addEdge(fetchAndFilter, mainInput, DefaultEdge.create());
  expectedNetwork.addEdge(mainInput, sideInputParDo, DefaultEdge.create());

  assertThatNetworksAreIdentical(expectedNetwork, network);
}
 
Example #15
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Smoke test to fail fast if translation of a splittable ParDo in FnAPI. */
@Test
public void testSplittableParDoTranslationFnApi() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  options.setExperiments(Arrays.asList("beam_fn_api"));
  DataflowRunner runner = DataflowRunner.fromOptions(options);
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);

  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> windowedInput =
      pipeline
          .apply(Impulse.create())
          .apply(
              MapElements.via(
                  new SimpleFunction<byte[], String>() {
                    @Override
                    public String apply(byte[] input) {
                      return "";
                    }
                  }))
          .apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));
  windowedInput.apply(ParDo.of(new TestSplittableFn()));

  runner.replaceTransforms(pipeline);

  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
  JobSpecification result =
      translator.translate(
          pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());

  Job job = result.getJob();

  // The job should contain a ParDo step, containing a "restriction_encoding".

  List<Step> steps = job.getSteps();
  Step splittableParDo = null;
  for (Step step : steps) {
    if ("ParallelDo".equals(step.getKind())
        && step.getProperties().containsKey(PropertyNames.RESTRICTION_ENCODING)) {
      assertNull(splittableParDo);
      splittableParDo = step;
    }
  }
  assertNotNull(splittableParDo);

  String fn = Structs.getString(splittableParDo.getProperties(), PropertyNames.SERIALIZED_FN);

  Components componentsProto = result.getPipelineProto().getComponents();
  RehydratedComponents components = RehydratedComponents.forComponents(componentsProto);
  RunnerApi.PTransform splittableTransform = componentsProto.getTransformsOrThrow(fn);
  assertEquals(
      PTransformTranslation.PAR_DO_TRANSFORM_URN, splittableTransform.getSpec().getUrn());
  ParDoPayload payload = ParDoPayload.parseFrom(splittableTransform.getSpec().getPayload());
  assertThat(
      ParDoTranslation.doFnWithExecutionInformationFromProto(payload.getDoFn()).getDoFn(),
      instanceOf(TestSplittableFn.class));
  Coder expectedRestrictionAndStateCoder =
      KvCoder.of(SerializableCoder.of(OffsetRange.class), VoidCoder.of());
  assertEquals(
      expectedRestrictionAndStateCoder, components.getCoder(payload.getRestrictionCoderId()));

  // In the Fn API case, we still translate the restriction coder into the RESTRICTION_CODER
  // property as a CloudObject, and it gets passed through the Dataflow backend, but in the end
  // the Dataflow worker will end up fetching it from the SPK transform payload instead.
  Coder<?> restrictionCoder =
      CloudObjects.coderFromCloudObject(
          (CloudObject)
              Structs.getObject(
                  splittableParDo.getProperties(), PropertyNames.RESTRICTION_ENCODING));
  assertEquals(expectedRestrictionAndStateCoder, restrictionCoder);
}
 
Example #16
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <InputT, OutputT, RestrictionT, WatermarkEstimatorStateT> void translateTyped(
    SplittableParDo.ProcessKeyedElements<
            InputT, OutputT, RestrictionT, WatermarkEstimatorStateT>
        transform,
    TranslationContext context) {
  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
  StepTranslationContext stepContext =
      context.addStep(transform, "SplittableProcessKeyed");
  Map<TupleTag<?>, Coder<?>> outputCoders =
      context.getOutputs(transform).entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder()));
  translateInputs(
      stepContext, context.getInput(transform), transform.getSideInputs(), context);
  translateOutputs(context.getOutputs(transform), stepContext);
  String ptransformId =
      context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform());
  translateFn(
      stepContext,
      ptransformId,
      transform.getFn(),
      transform.getInputWindowingStrategy(),
      transform.getSideInputs(),
      transform.getElementCoder(),
      context,
      transform.getMainOutputTag(),
      outputCoders,
      doFnSchemaInformation,
      sideInputMapping);

  stepContext.addInput(
      PropertyNames.RESTRICTION_CODER,
      translateCoder(
          KvCoder.of(
              transform.getRestrictionCoder(),
              transform.getWatermarkEstimatorStateCoder()),
          context));
}
 
Example #17
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <InputT, OutputT> void translateSingleHelper(
    ParDoSingle<InputT, OutputT> transform, TranslationContext context) {

  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
  StepTranslationContext stepContext = context.addStep(transform, "ParallelDo");
  Map<TupleTag<?>, Coder<?>> outputCoders =
      context.getOutputs(transform).entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder()));

  translateInputs(
      stepContext,
      context.getInput(transform),
      transform.getSideInputs().values(),
      context);
  stepContext.addOutput(
      transform.getMainOutputTag().getId(), context.getOutput(transform));
  String ptransformId =
      context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform());
  translateFn(
      stepContext,
      ptransformId,
      transform.getFn(),
      context.getInput(transform).getWindowingStrategy(),
      transform.getSideInputs().values(),
      context.getInput(transform).getCoder(),
      context,
      transform.getMainOutputTag(),
      outputCoders,
      doFnSchemaInformation,
      sideInputMapping);

  // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is
  // removed.
  if (context.isFnApi()) {
    DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn());
    if (signature.processElement().isSplittable()) {
      DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn());
      Coder<?> restrictionAndWatermarkStateCoder =
          KvCoder.of(
              doFnInvoker.invokeGetRestrictionCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()),
              doFnInvoker.invokeGetWatermarkEstimatorStateCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()));
      stepContext.addInput(
          PropertyNames.RESTRICTION_ENCODING,
          translateCoder(restrictionAndWatermarkStateCoder, context));
    }
  }
}
 
Example #18
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <InputT, OutputT> void translateMultiHelper(
    ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) {
  StepTranslationContext stepContext = context.addStep(transform, "ParallelDo");
  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
  Map<TupleTag<?>, Coder<?>> outputCoders =
      context.getOutputs(transform).entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder()));
  translateInputs(
      stepContext,
      context.getInput(transform),
      transform.getSideInputs().values(),
      context);
  translateOutputs(context.getOutputs(transform), stepContext);
  String ptransformId =
      context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform());
  translateFn(
      stepContext,
      ptransformId,
      transform.getFn(),
      context.getInput(transform).getWindowingStrategy(),
      transform.getSideInputs().values(),
      context.getInput(transform).getCoder(),
      context,
      transform.getMainOutputTag(),
      outputCoders,
      doFnSchemaInformation,
      sideInputMapping);

  // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is
  // removed.
  if (context.isFnApi()) {
    DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn());
    if (signature.processElement().isSplittable()) {
      DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn());
      Coder<?> restrictionAndWatermarkStateCoder =
          KvCoder.of(
              doFnInvoker.invokeGetRestrictionCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()),
              doFnInvoker.invokeGetWatermarkEstimatorStateCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()));
      stepContext.addInput(
          PropertyNames.RESTRICTION_ENCODING,
          translateCoder(restrictionAndWatermarkStateCoder, context));
    }
  }
}
 
Example #19
Source File: ParDoBoundMultiTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <InT, OutT> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  Map<String, String> outputs = transform.getTransform().getOutputsMap();

  final RunnerApi.ExecutableStagePayload stagePayload;
  try {
    stagePayload =
        RunnerApi.ExecutableStagePayload.parseFrom(
            transform.getTransform().getSpec().getPayload());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  String inputId = stagePayload.getInput();
  final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStreamById(inputId);
  // TODO: support side input
  final List<MessageStream<OpMessage<InT>>> sideInputStreams = Collections.emptyList();

  final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>();
  final Map<String, TupleTag<?>> idToTupleTagMap = new HashMap<>();

  // first output as the main output
  final TupleTag<OutT> mainOutputTag =
      outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());

  AtomicInteger index = new AtomicInteger(0);
  outputs
      .keySet()
      .iterator()
      .forEachRemaining(
          outputName -> {
            TupleTag<?> tupleTag = new TupleTag<>(outputName);
            tagToIndexMap.put(tupleTag, index.get());
            index.incrementAndGet();
            String collectionId = outputs.get(outputName);
            idToTupleTagMap.put(collectionId, tupleTag);
          });

  WindowedValue.WindowedValueCoder<InT> windowedInputCoder =
      ctx.instantiateCoder(inputId, pipeline.getComponents());

  final DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation = ParDoTranslation.getSchemaInformation(transform.getTransform());

  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(transform.getTransform());

  final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
  final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);

  final DoFnOp<InT, OutT, RawUnionValue> op =
      new DoFnOp<>(
          mainOutputTag,
          new NoOpDoFn<>(),
          null, // key coder not in use
          windowedInputCoder.getValueCoder(), // input coder not in use
          windowedInputCoder,
          Collections.emptyMap(), // output coders not in use
          Collections.emptyList(), // sideInputs not in use until side input support
          new ArrayList<>(idToTupleTagMap.values()), // used by java runner only
          SamzaPipelineTranslatorUtils.getPortableWindowStrategy(transform, pipeline),
          Collections.emptyMap(), // idToViewMap not in use until side input support
          new DoFnOp.MultiOutputManagerFactory(tagToIndexMap),
          ctx.getTransformFullName(),
          ctx.getTransformId(),
          isBounded,
          true,
          stagePayload,
          idToTupleTagMap,
          doFnSchemaInformation,
          sideInputMapping);

  final MessageStream<OpMessage<InT>> mergedStreams;
  if (sideInputStreams.isEmpty()) {
    mergedStreams = inputStream;
  } else {
    MessageStream<OpMessage<InT>> mergedSideInputStreams =
        MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn());
    mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams));
  }

  final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream =
      mergedStreams.flatMap(OpAdapter.adapt(op));

  for (int outputIndex : tagToIndexMap.values()) {
    final MessageStream<OpMessage<OutT>> outputStream =
        taggedOutputStream
            .filter(
                message ->
                    message.getType() != OpMessage.Type.ELEMENT
                        || message.getElement().getValue().getUnionTag() == outputIndex)
            .flatMap(OpAdapter.adapt(new RawUnionValueToValue()));

    ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
  }
}
 
Example #20
Source File: ParDoTranslatorBatch.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollection<InputT>, PCollectionTuple> transform, TranslationContext context) {
  String stepName = context.getCurrentTransform().getFullName();

  // Check for not supported advanced features
  // TODO: add support of Splittable DoFn
  DoFn<InputT, OutputT> doFn = getDoFn(context);
  checkState(
      !DoFnSignatures.isSplittable(doFn),
      "Not expected to directly translate splittable DoFn, should have been overridden: %s",
      doFn);

  // TODO: add support of states and timers
  checkState(
      !DoFnSignatures.isStateful(doFn), "States and timers are not supported for the moment.");

  checkState(
      !DoFnSignatures.requiresTimeSortedInput(doFn),
      "@RequiresTimeSortedInput is not " + "supported for the moment");

  DoFnSchemaInformation doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(context.getCurrentTransform());

  // Init main variables
  PValue input = context.getInput();
  Dataset<WindowedValue<InputT>> inputDataSet = context.getDataset(input);
  Map<TupleTag<?>, PValue> outputs = context.getOutputs();
  TupleTag<?> mainOutputTag = getTupleTag(context);
  List<TupleTag<?>> outputTags = new ArrayList<>(outputs.keySet());
  WindowingStrategy<?, ?> windowingStrategy =
      ((PCollection<InputT>) input).getWindowingStrategy();
  Coder<InputT> inputCoder = ((PCollection<InputT>) input).getCoder();
  Coder<? extends BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();

  // construct a map from side input to WindowingStrategy so that
  // the DoFn runner can map main-input windows to side input windows
  List<PCollectionView<?>> sideInputs = getSideInputs(context);
  Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
  for (PCollectionView<?> sideInput : sideInputs) {
    sideInputStrategies.put(sideInput, sideInput.getPCollection().getWindowingStrategy());
  }

  SideInputBroadcast broadcastStateData = createBroadcastSideInputs(sideInputs, context);

  Map<TupleTag<?>, Coder<?>> outputCoderMap = context.getOutputCoders();
  MetricsContainerStepMapAccumulator metricsAccum = MetricsAccumulator.getInstance();

  List<TupleTag<?>> additionalOutputTags = new ArrayList<>();
  for (TupleTag<?> tag : outputTags) {
    if (!tag.equals(mainOutputTag)) {
      additionalOutputTags.add(tag);
    }
  }

  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
  @SuppressWarnings("unchecked")
  DoFnFunction<InputT, OutputT> doFnWrapper =
      new DoFnFunction(
          metricsAccum,
          stepName,
          doFn,
          windowingStrategy,
          sideInputStrategies,
          context.getSerializableOptions(),
          additionalOutputTags,
          mainOutputTag,
          inputCoder,
          outputCoderMap,
          broadcastStateData,
          doFnSchemaInformation,
          sideInputMapping);

  MultiOuputCoder multipleOutputCoder =
      MultiOuputCoder.of(SerializableCoder.of(TupleTag.class), outputCoderMap, windowCoder);
  Dataset<Tuple2<TupleTag<?>, WindowedValue<?>>> allOutputs =
      inputDataSet.mapPartitions(doFnWrapper, EncoderHelpers.fromBeamCoder(multipleOutputCoder));
  if (outputs.entrySet().size() > 1) {
    allOutputs.persist();
    for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
      pruneOutputFilteredByTag(context, allOutputs, output, windowCoder);
    }
  } else {
    Coder<OutputT> outputCoder = ((PCollection<OutputT>) outputs.get(mainOutputTag)).getCoder();
    Coder<WindowedValue<?>> windowedValueCoder =
        (Coder<WindowedValue<?>>) (Coder<?>) WindowedValue.getFullCoder(outputCoder, windowCoder);
    Dataset<WindowedValue<?>> outputDataset =
        allOutputs.map(
            (MapFunction<Tuple2<TupleTag<?>, WindowedValue<?>>, WindowedValue<?>>)
                value -> value._2,
            EncoderHelpers.fromBeamCoder(windowedValueCoder));
    context.putDatasetWildcard(outputs.entrySet().iterator().next().getValue(), outputDataset);
  }
}
 
Example #21
Source File: PipelineTranslator.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
/**
 * @param ctx          provides translation context.
 * @param beamNode     the beam node to be translated.
 * @param sideInputMap side inputs.
 * @return the created DoFnTransform.
 */
private static AbstractDoFnTransform createDoFnTransform(final PipelineTranslationContext ctx,
                                                         final TransformHierarchy.Node beamNode,
                                                         final Map<Integer, PCollectionView<?>> sideInputMap) {
  try {
    final AppliedPTransform pTransform = beamNode.toAppliedPTransform(ctx.getPipeline());
    final DoFn doFn = ParDoTranslation.getDoFn(pTransform);
    final TupleTag mainOutputTag = ParDoTranslation.getMainOutputTag(pTransform);
    final TupleTagList additionalOutputTags = ParDoTranslation.getAdditionalOutputTags(pTransform);

    final PCollection<?> mainInput = (PCollection<?>)
      Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(pTransform));

    final HasDisplayData displayData = (builder) -> builder.add(DisplayData.item("name", beamNode.getFullName()));
    final DoFnSchemaInformation doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(beamNode.toAppliedPTransform(ctx.getPipeline()));

    if (sideInputMap.isEmpty()) {
      return new DoFnTransform(
        doFn,
        mainInput.getCoder(),
        getOutputCoders(pTransform),
        mainOutputTag,
        additionalOutputTags.getAll(),
        mainInput.getWindowingStrategy(),
        ctx.getPipelineOptions(),
        DisplayData.from(displayData),
        doFnSchemaInformation,
        Collections.emptyMap());
    } else {
      return new PushBackDoFnTransform(
        doFn,
        mainInput.getCoder(),
        getOutputCoders(pTransform),
        mainOutputTag,
        additionalOutputTags.getAll(),
        mainInput.getWindowingStrategy(),
        sideInputMap,
        ctx.getPipelineOptions(),
        DisplayData.from(displayData),
        doFnSchemaInformation,
        Collections.emptyMap());
    }
  } catch (final IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #22
Source File: GreedyPipelineFuserTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void parDoWithStateAndTimerRootsStage() {
  PTransform timerTransform =
      PTransform.newBuilder()
          .setUniqueName("TimerParDo")
          .putInputs("input", "impulse.out")
          .putInputs("timer", "timer.out")
          .putOutputs("timer", "timer.out")
          .putOutputs("output", "output.out")
          .setSpec(
              FunctionSpec.newBuilder()
                  .setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
                  .setPayload(
                      ParDoPayload.newBuilder()
                          .setDoFn(FunctionSpec.newBuilder())
                          .putStateSpecs("state", StateSpec.getDefaultInstance())
                          .putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance())
                          .build()
                          .toByteString()))
          .setEnvironmentId("common")
          .build();

  Components components =
      partialComponents
          .toBuilder()
          .putTransforms("timer", timerTransform)
          .putPcollections("timer.out", pc("timer.out"))
          .putPcollections("output.out", pc("output.out"))
          .putEnvironments("common", Environments.createDockerEnvironment("common"))
          .build();

  FusedPipeline fused =
      GreedyPipelineFuser.fuse(
          Pipeline.newBuilder()
              .setComponents(components)
              .addRequirements(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN)
              .build());

  assertThat(
      fused.getRunnerExecutedTransforms(),
      containsInAnyOrder(
          PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse"))));
  assertThat(
      fused.getFusedStages(),
      contains(
          ExecutableStageMatcher.withInput("impulse.out")
              .withNoOutputs()
              .withTransforms("timer")));
}
 
Example #23
Source File: GreedyPipelineFuserTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void parDoWithTimerRootsStage() {
  // (impulse.out) -> parDo -> (parDo.out)
  // (parDo.out) -> timer -> timer.out
  // timer has a timer spec which prevents it from fusing with an upstream ParDo
  PTransform parDoTransform =
      PTransform.newBuilder()
          .setUniqueName("ParDo")
          .putInputs("input", "impulse.out")
          .putOutputs("output", "parDo.out")
          .setSpec(
              FunctionSpec.newBuilder()
                  .setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
                  .setPayload(
                      ParDoPayload.newBuilder()
                          .setDoFn(FunctionSpec.newBuilder())
                          .build()
                          .toByteString()))
          .setEnvironmentId("common")
          .build();
  PTransform timerTransform =
      PTransform.newBuilder()
          .setUniqueName("TimerParDo")
          .putInputs("input", "parDo.out")
          .putInputs("timer", "timer.out")
          .putOutputs("timer", "timer.out")
          .putOutputs("output", "output.out")
          .setSpec(
              FunctionSpec.newBuilder()
                  .setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
                  .setPayload(
                      ParDoPayload.newBuilder()
                          .setDoFn(FunctionSpec.newBuilder())
                          .putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance())
                          .build()
                          .toByteString()))
          .setEnvironmentId("common")
          .build();

  Components components =
      partialComponents
          .toBuilder()
          .putTransforms("parDo", parDoTransform)
          .putPcollections("parDo.out", pc("parDo.out"))
          .putTransforms("timer", timerTransform)
          .putPcollections("timer.out", pc("timer.out"))
          .putPcollections("output.out", pc("output.out"))
          .putEnvironments("common", Environments.createDockerEnvironment("common"))
          .build();

  FusedPipeline fused =
      GreedyPipelineFuser.fuse(
          Pipeline.newBuilder()
              .setComponents(components)
              .addRequirements(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN)
              .build());

  assertThat(
      fused.getRunnerExecutedTransforms(),
      containsInAnyOrder(
          PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse"))));
  assertThat(
      fused.getFusedStages(),
      containsInAnyOrder(
          ExecutableStageMatcher.withInput("impulse.out")
              .withOutputs("parDo.out")
              .withTransforms("parDo"),
          ExecutableStageMatcher.withInput("parDo.out").withNoOutputs().withTransforms("timer")));
}
 
Example #24
Source File: GreedyPipelineFuserTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void statefulParDoRootsStage() {
  // (impulse.out) -> parDo -> (parDo.out)
  // (parDo.out) -> stateful -> stateful.out
  // stateful has a state spec which prevents it from fusing with an upstream ParDo
  PTransform parDoTransform =
      PTransform.newBuilder()
          .setUniqueName("ParDo")
          .putInputs("input", "impulse.out")
          .putOutputs("output", "parDo.out")
          .setSpec(
              FunctionSpec.newBuilder()
                  .setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
                  .setPayload(
                      ParDoPayload.newBuilder()
                          .setDoFn(FunctionSpec.newBuilder())
                          .build()
                          .toByteString()))
          .setEnvironmentId("common")
          .build();
  PTransform statefulTransform =
      PTransform.newBuilder()
          .setUniqueName("StatefulParDo")
          .putInputs("input", "parDo.out")
          .putOutputs("output", "stateful.out")
          .setSpec(
              FunctionSpec.newBuilder()
                  .setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)
                  .setPayload(
                      ParDoPayload.newBuilder()
                          .setDoFn(FunctionSpec.newBuilder())
                          .putStateSpecs("state", StateSpec.getDefaultInstance())
                          .build()
                          .toByteString()))
          .setEnvironmentId("common")
          .build();

  Components components =
      partialComponents
          .toBuilder()
          .putTransforms("parDo", parDoTransform)
          .putPcollections("parDo.out", pc("parDo.out"))
          .putTransforms("stateful", statefulTransform)
          .putPcollections("stateful.out", pc("stateful.out"))
          .putEnvironments("common", Environments.createDockerEnvironment("common"))
          .build();
  FusedPipeline fused =
      GreedyPipelineFuser.fuse(
          Pipeline.newBuilder()
              .setComponents(components)
              .addRequirements(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN)
              .build());

  assertThat(
      fused.getRunnerExecutedTransforms(),
      containsInAnyOrder(
          PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse"))));
  assertThat(
      fused.getFusedStages(),
      containsInAnyOrder(
          ExecutableStageMatcher.withInput("impulse.out")
              .withOutputs("parDo.out")
              .withTransforms("parDo"),
          ExecutableStageMatcher.withInput("parDo.out")
              .withNoOutputs()
              .withTransforms("stateful")));
}
 
Example #25
Source File: ParDoMultiOutputTranslatorBatch.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(
    ParDo.MultiOutput<InputT, OutputT> transform, Twister2BatchTranslationContext context) {
  DoFn<InputT, OutputT> doFn;
  doFn = transform.getFn();
  if (DoFnSignatures.signatureForDoFn(doFn).processElement().isSplittable()) {
    throw new UnsupportedOperationException(
        String.format(
            "Not expected to directly translate splittable DoFn, should have been overridden: %s",
            doFn));
  }
  BatchTSetImpl<WindowedValue<InputT>> inputTTSet =
      context.getInputDataSet(context.getInput(transform));

  WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy();
  Coder<InputT> inputCoder = (Coder<InputT>) context.getInput(transform).getCoder();
  Map<String, PCollectionView<?>> sideInputMapping;

  Map<TupleTag<?>, PValue> outputs = context.getOutputs();
  Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputCoders();

  // DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform());

  TupleTag<OutputT> mainOutput = transform.getMainOutputTag();
  List<TupleTag<?>> additionalOutputTags =
      new ArrayList<>(transform.getAdditionalOutputTags().getAll());
  Map<String, PCollectionView<?>> sideInputs = transform.getSideInputs();
  // TODO : note change from List to map in sideinputs

  // construct a map from side input to WindowingStrategy so that
  // the DoFn runner can map main-input windows to side input windows
  Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
  for (PCollectionView<?> sideInput : sideInputs.values()) {
    sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
  }

  TupleTag<?> mainOutputTag;
  try {
    mainOutputTag = ParDoTranslation.getMainOutputTag(context.getCurrentTransform());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
  outputMap.put(mainOutputTag, 0);
  int count = 1;
  for (TupleTag<?> tag : outputs.keySet()) {
    if (!outputMap.containsKey(tag)) {
      outputMap.put(tag, count++);
    }
  }

  ComputeTSet<RawUnionValue, Iterator<WindowedValue<InputT>>> outputTSet =
      inputTTSet
          .direct()
          .<RawUnionValue>compute(
              new DoFnFunction<OutputT, InputT>(
                  context,
                  doFn,
                  inputCoder,
                  outputCoders,
                  additionalOutputTags,
                  windowingStrategy,
                  sideInputStrategies,
                  mainOutput,
                  doFnSchemaInformation,
                  outputMap,
                  sideInputMapping));

  for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
    ComputeTSet<WindowedValue<OutputT>, Iterator<RawUnionValue>> tempTSet =
        outputTSet.direct().compute(new OutputTagFilter(outputMap.get(output.getKey())));
    context.setOutputDataSet((PCollection) output.getValue(), tempTSet);
  }
}
 
Example #26
Source File: KafkaIOExternalTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testConstructKafkaWrite() throws Exception {
  String topic = "topic";
  String keySerializer = "org.apache.kafka.common.serialization.ByteArraySerializer";
  String valueSerializer = "org.apache.kafka.common.serialization.LongSerializer";
  ImmutableMap<String, String> producerConfig =
      ImmutableMap.<String, String>builder()
          .put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "server1:port,server2:port")
          .put("retries", "3")
          .build();

  ExternalTransforms.ExternalConfigurationPayload payload =
      ExternalTransforms.ExternalConfigurationPayload.newBuilder()
          .putConfiguration(
              "topic",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(topic)))
                  .build())
          .putConfiguration(
              "producer_config",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:iterable:v1")
                  .addCoderUrn("beam:coder:kv:v1")
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(mapAsBytes(producerConfig)))
                  .build())
          .putConfiguration(
              "key_serializer",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(keySerializer)))
                  .build())
          .putConfiguration(
              "value_serializer",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(valueSerializer)))
                  .build())
          .build();

  Pipeline p = Pipeline.create();
  p.apply(Impulse.create()).apply(WithKeys.of("key"));
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
  String inputPCollection =
      Iterables.getOnlyElement(
          Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values())
              .getOutputsMap()
              .values());

  ExpansionApi.ExpansionRequest request =
      ExpansionApi.ExpansionRequest.newBuilder()
          .setComponents(pipelineProto.getComponents())
          .setTransform(
              RunnerApi.PTransform.newBuilder()
                  .setUniqueName("test")
                  .putInputs("input", inputPCollection)
                  .setSpec(
                      RunnerApi.FunctionSpec.newBuilder()
                          .setUrn("beam:external:java:kafka:write:v1")
                          .setPayload(payload.toByteString())))
          .setNamespace("test_namespace")
          .build();

  ExpansionService expansionService = new ExpansionService();
  TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
  expansionService.expand(request, observer);

  ExpansionApi.ExpansionResponse result = observer.result;
  RunnerApi.PTransform transform = result.getTransform();
  assertThat(
      transform.getSubtransformsList(),
      Matchers.contains(
          "test_namespacetest/Kafka ProducerRecord", "test_namespacetest/KafkaIO.WriteRecords"));
  assertThat(transform.getInputsCount(), Matchers.is(1));
  assertThat(transform.getOutputsCount(), Matchers.is(0));

  RunnerApi.PTransform writeComposite =
      result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));
  RunnerApi.PTransform writeParDo =
      result
          .getComponents()
          .getTransformsOrThrow(
              result
                  .getComponents()
                  .getTransformsOrThrow(writeComposite.getSubtransforms(0))
                  .getSubtransforms(0));

  RunnerApi.ParDoPayload parDoPayload =
      RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
  DoFn kafkaWriter = ParDoTranslation.getDoFn(parDoPayload);
  assertThat(kafkaWriter, Matchers.instanceOf(KafkaWriter.class));
  KafkaIO.WriteRecords spec =
      (KafkaIO.WriteRecords) Whitebox.getInternalState(kafkaWriter, "spec");

  assertThat(spec.getProducerConfig(), Matchers.is(producerConfig));
  assertThat(spec.getTopic(), Matchers.is(topic));
  assertThat(spec.getKeySerializer().getName(), Matchers.is(keySerializer));
  assertThat(spec.getValueSerializer().getName(), Matchers.is(valueSerializer));
}
 
Example #27
Source File: PubsubIOExternalTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testConstructPubsubWrite() throws Exception {
  String topic = "projects/project-1234/topics/topic_name";
  String idAttribute = "id_foo";

  ExternalTransforms.ExternalConfigurationPayload payload =
      ExternalTransforms.ExternalConfigurationPayload.newBuilder()
          .putConfiguration(
              "topic",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(topic)))
                  .build())
          .putConfiguration(
              "id_label",
              ExternalTransforms.ConfigValue.newBuilder()
                  .addCoderUrn("beam:coder:string_utf8:v1")
                  .setPayload(ByteString.copyFrom(encodeString(idAttribute)))
                  .build())
          .build();

  Pipeline p = Pipeline.create();
  p.apply("unbounded", Create.of(1, 2, 3)).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);

  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
  String inputPCollection =
      Iterables.getOnlyElement(
          Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values())
              .getOutputsMap()
              .values());

  ExpansionApi.ExpansionRequest request =
      ExpansionApi.ExpansionRequest.newBuilder()
          .setComponents(pipelineProto.getComponents())
          .setTransform(
              RunnerApi.PTransform.newBuilder()
                  .setUniqueName("test")
                  .putInputs("input", inputPCollection)
                  .setSpec(
                      RunnerApi.FunctionSpec.newBuilder()
                          .setUrn("beam:external:java:pubsub:write:v1")
                          .setPayload(payload.toByteString())))
          .setNamespace("test_namespace")
          .build();

  ExpansionService expansionService = new ExpansionService();
  TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
  expansionService.expand(request, observer);

  ExpansionApi.ExpansionResponse result = observer.result;

  RunnerApi.PTransform transform = result.getTransform();
  assertThat(
      transform.getSubtransformsList(),
      Matchers.contains(
          "test_namespacetest/MapElements", "test_namespacetest/PubsubUnboundedSink"));
  assertThat(transform.getInputsCount(), Matchers.is(1));
  assertThat(transform.getOutputsCount(), Matchers.is(0));

  // test_namespacetest/PubsubUnboundedSink
  RunnerApi.PTransform writeComposite =
      result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));

  // test_namespacetest/PubsubUnboundedSink/PubsubUnboundedSink.Writer
  RunnerApi.PTransform writeComposite2 =
      result.getComponents().getTransformsOrThrow(writeComposite.getSubtransforms(3));

  // test_namespacetest/PubsubUnboundedSink/PubsubUnboundedSink.Writer/ParMultiDo(Writer)
  RunnerApi.PTransform writeParDo =
      result.getComponents().getTransformsOrThrow(writeComposite2.getSubtransforms(0));

  RunnerApi.ParDoPayload parDoPayload =
      RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
  DoFn pubsubWriter = ParDoTranslation.getDoFn(parDoPayload);

  String idAttributeActual = (String) Whitebox.getInternalState(pubsubWriter, "idAttribute");

  ValueProvider<PubsubClient.TopicPath> topicActual =
      (ValueProvider<PubsubClient.TopicPath>) Whitebox.getInternalState(pubsubWriter, "topic");

  assertThat(topicActual == null ? null : String.valueOf(topicActual), Matchers.is(topic));
  assertThat(idAttributeActual, Matchers.is(idAttribute));
}