Java Code Examples for org.apache.beam.sdk.transforms.DoFnSchemaInformation

The following examples show how to use org.apache.beam.sdk.transforms.DoFnSchemaInformation. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: CombineValuesFnFactory.java    License: Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo(
    AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) {
  GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner =
      GlobalCombineFnRunners.create(combineFn.getFn());
  DoFn<KV<K, AccumT>, KV<K, OutputT>> doFn =
      new ExtractOutputDoFn<>(combineFnRunner, sideInputReader);

  KvCoder<K, AccumT> inputCoder = null;
  if (combineFn.getKvCoder() != null) {
    inputCoder =
        KvCoder.of(combineFn.getKvCoder().getKeyCoder(), combineFn.getAccumulatorCoder());
  }
  return DoFnInfo.forFn(
      doFn,
      combineFn.getWindowingStrategy(),
      combineFn.getSideInputViews(),
      inputCoder,
      Collections.emptyMap(), // Not needed here.
      new TupleTag<>(PropertyNames.OUTPUT),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 2
Source Project: beam   Source File: DoFnInfo.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link DoFnInfo} for the given {@link DoFn}.
 *
 * <p>This method exists for backwards compatibility with the Dataflow runner. Once the Dataflow
 * runner has been updated to use the new constructor, remove this one.
 */
public static <InputT, OutputT> DoFnInfo<InputT, OutputT> forFn(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    Iterable<PCollectionView<?>> sideInputViews,
    Coder<InputT> inputCoder,
    TupleTag<OutputT> mainOutput,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  return new DoFnInfo<>(
      doFn,
      windowingStrategy,
      sideInputViews,
      inputCoder,
      Collections.emptyMap(),
      mainOutput,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example 3
Source Project: beam   Source File: DoFnInfo.java    License: Apache License 2.0 6 votes vote down vote up
/** Creates a {@link DoFnInfo} for the given {@link DoFn}. */
public static <InputT, OutputT> DoFnInfo<InputT, OutputT> forFn(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    Iterable<PCollectionView<?>> sideInputViews,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    TupleTag<OutputT> mainOutput,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  return new DoFnInfo<>(
      doFn,
      windowingStrategy,
      sideInputViews,
      inputCoder,
      outputCoders,
      mainOutput,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example 4
Source Project: beam   Source File: FlinkDoFnFunctionTest.java    License: Apache License 2.0 6 votes vote down vote up
public TestDoFnFunction(
    String stepName,
    WindowingStrategy windowingStrategy,
    Map sideInputs,
    PipelineOptions options,
    Map outputMap,
    TupleTag mainOutputTag,
    Coder inputCoder,
    Map outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map sideInputMapping) {
  super(
      new IdentityFn(),
      stepName,
      windowingStrategy,
      sideInputs,
      options,
      outputMap,
      mainOutputTag,
      inputCoder,
      outputCoderMap,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example 5
Source Project: beam   Source File: ParDoTranslation.java    License: Apache License 2.0 6 votes vote down vote up
public static ParDoPayload translateParDo(
    AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedPTransform, SdkComponents components)
    throws IOException {
  final ParDo.MultiOutput<?, ?> parDo = appliedPTransform.getTransform();
  final Pipeline pipeline = appliedPTransform.getPipeline();
  final DoFn<?, ?> doFn = parDo.getFn();

  // Get main input.
  Set<String> allInputs =
      appliedPTransform.getInputs().keySet().stream()
          .map(TupleTag::getId)
          .collect(Collectors.toSet());
  Set<String> sideInputs =
      parDo.getSideInputs().values().stream()
          .map(s -> s.getTagInternal().getId())
          .collect(Collectors.toSet());
  String mainInputName = Iterables.getOnlyElement(Sets.difference(allInputs, sideInputs));
  PCollection<?> mainInput =
      (PCollection<?>) appliedPTransform.getInputs().get(new TupleTag<>(mainInputName));

  final DoFnSchemaInformation doFnSchemaInformation =
      ParDo.getDoFnSchemaInformation(doFn, mainInput);
  return translateParDo(
      (ParDo.MultiOutput) parDo, mainInput, doFnSchemaInformation, pipeline, components);
}
 
Example 6
Source Project: beam   Source File: ParDoTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testToProto() throws Exception {
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          parDo,
          PCollection.createPrimitiveOutputInternal(
              p,
              WindowingStrategy.globalDefault(),
              IsBounded.BOUNDED,
              KvCoder.of(VarLongCoder.of(), StringUtf8Coder.of())),
          DoFnSchemaInformation.create(),
          p,
          components);

  assertThat(ParDoTranslation.getDoFn(payload), equalTo(parDo.getFn()));
  assertThat(ParDoTranslation.getMainOutputTag(payload), equalTo(parDo.getMainOutputTag()));
  for (PCollectionView<?> view : parDo.getSideInputs().values()) {
    payload.getSideInputsOrThrow(view.getTagInternal().getId());
  }
  assertFalse(payload.getRequestsFinalization());
  assertEquals(
      parDo.getFn() instanceof StateTimerDropElementsFn,
      components.requirements().contains(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN));
}
 
Example 7
Source Project: beam   Source File: ParDoTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testStartBundle() throws Exception {
  Pipeline p = Pipeline.create();
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          ParDo.of(new StartBundleDoFn())
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
          PCollection.createPrimitiveOutputInternal(
              p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()),
          DoFnSchemaInformation.create(),
          TestPipeline.create(),
          sdkComponents);

  assertTrue(payload.getRequestsFinalization());
}
 
Example 8
Source Project: beam   Source File: ParDoTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testProcessContext() throws Exception {
  Pipeline p = Pipeline.create();
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          ParDo.of(new ProcessContextDoFn())
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
          PCollection.createPrimitiveOutputInternal(
              p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()),
          DoFnSchemaInformation.create(),
          TestPipeline.create(),
          sdkComponents);

  assertTrue(payload.getRequestsFinalization());
}
 
Example 9
Source Project: beam   Source File: ParDoTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFinishBundle() throws Exception {
  Pipeline p = Pipeline.create();
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          ParDo.of(new FinishBundleDoFn())
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
          PCollection.createPrimitiveOutputInternal(
              p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()),
          DoFnSchemaInformation.create(),
          TestPipeline.create(),
          sdkComponents);

  assertTrue(payload.getRequestsFinalization());
}
 
Example 10
Source Project: beam   Source File: StreamingSideInputDoFnRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private <ReceiverT> StreamingSideInputDoFnRunner<String, String, IntervalWindow> createRunner(
    WindowFn<?, ?> windowFn,
    DoFnRunners.OutputManager outputManager,
    List<PCollectionView<String>> views,
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher)
    throws Exception {
  DoFnRunner<String, String> simpleDoFnRunner =
      DoFnRunners.simpleRunner(
          PipelineOptionsFactory.create(),
          new SideInputFn(views),
          mockSideInputReader,
          outputManager,
          mainOutputTag,
          Arrays.<TupleTag<?>>asList(),
          stepContext,
          null,
          Collections.emptyMap(),
          WindowingStrategy.of(windowFn),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());
  return new StreamingSideInputDoFnRunner<>(simpleDoFnRunner, sideInputFetcher);
}
 
Example 11
private DoFnRunner<KV<String, Integer>, Integer> getDoFnRunner(
    DoFn<KV<String, Integer>, Integer> fn) {
  return new SimpleDoFnRunner<>(
      null,
      fn,
      NullSideInputReader.empty(),
      null,
      null,
      Collections.emptyList(),
      mockStepContext,
      null,
      Collections.emptyMap(),
      WINDOWING_STRATEGY,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 12
Source Project: beam   Source File: StatefulDoFnRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
private DoFnRunner<KV<String, Integer>, Integer> getDoFnRunner(
    DoFn<KV<String, Integer>, Integer> fn, @Nullable OutputManager outputManager) {
  return new SimpleDoFnRunner<>(
      null,
      fn,
      NullSideInputReader.empty(),
      MoreObjects.firstNonNull(outputManager, discardingOutputManager()),
      outputTag,
      Collections.emptyList(),
      mockStepContext,
      null,
      Collections.emptyMap(),
      WINDOWING_STRATEGY,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 13
Source Project: beam   Source File: SimpleDoFnRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFinishBundleExceptionsWrappedAsUserCodeException() {
  ThrowingDoFn fn = new ThrowingDoFn();
  DoFnRunner<String, String> runner =
      new SimpleDoFnRunner<>(
          null,
          fn,
          NullSideInputReader.empty(),
          null,
          null,
          Collections.emptyList(),
          mockStepContext,
          null,
          Collections.emptyMap(),
          WindowingStrategy.of(new GlobalWindows()),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  thrown.expect(UserCodeException.class);
  thrown.expectCause(is(fn.exceptionToThrow));

  runner.finishBundle();
}
 
Example 14
Source Project: beam   Source File: CombineValuesFnFactory.java    License: Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo(
    AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) {
  GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner =
      GlobalCombineFnRunners.create(combineFn.getFn());
  DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>> doFn =
      new CombineValuesDoFn<>(combineFnRunner, sideInputReader);

  Coder<KV<K, Iterable<InputT>>> inputCoder = null;
  if (combineFn.getKvCoder() != null) {
    inputCoder =
        KvCoder.of(
            combineFn.getKvCoder().getKeyCoder(),
            IterableCoder.of(combineFn.getKvCoder().getValueCoder()));
  }
  return DoFnInfo.forFn(
      doFn,
      combineFn.getWindowingStrategy(),
      combineFn.getSideInputViews(),
      inputCoder,
      Collections.emptyMap(), // Not needed here.
      new TupleTag<>(PropertyNames.OUTPUT),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 15
Source Project: beam   Source File: DoFnInstanceManagersTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testInstanceIgnoresAbort() throws Exception {
  DoFnInfo<?, ?> info =
      DoFnInfo.forFn(
          initialFn,
          WindowingStrategy.globalDefault(),
          null /* side input views */,
          null /* input coder */,
          new TupleTag<>(PropertyNames.OUTPUT) /* main output id */,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  DoFnInstanceManager mgr = DoFnInstanceManagers.singleInstance(info);
  mgr.abort(mgr.get());
  // TestFn#teardown would fail the test after multiple calls
  mgr.abort(mgr.get());
  // The returned info is still the initial info
  assertThat(mgr.get(), Matchers.<DoFnInfo<?, ?>>theInstance(info));
  assertThat(mgr.get().getDoFn(), theInstance(initialFn));
}
 
Example 16
Source Project: beam   Source File: FlinkStatefulDoFnFunctionTest.java    License: Apache License 2.0 6 votes vote down vote up
public TestDoFnFunction(
    String stepName,
    WindowingStrategy windowingStrategy,
    Map sideInputs,
    PipelineOptions options,
    Map outputMap,
    TupleTag mainOutputTag,
    Coder inputCoder,
    Map outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map sideInputMapping) {
  super(
      new IdentityFn(),
      stepName,
      windowingStrategy,
      sideInputs,
      options,
      outputMap,
      mainOutputTag,
      inputCoder,
      outputCoderMap,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example 17
Source Project: beam   Source File: DoFnInstanceManagersTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCloningPoolReusesAfterComplete() throws Exception {
  DoFnInfo<?, ?> info =
      DoFnInfo.forFn(
          initialFn,
          WindowingStrategy.globalDefault(),
          null /* side input views */,
          null /* input coder */,
          new TupleTag<>(PropertyNames.OUTPUT) /* main output id */,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  DoFnInstanceManager mgr = DoFnInstanceManagers.cloningPool(info);
  DoFnInfo<?, ?> retrievedInfo = mgr.get();
  assertThat(retrievedInfo, not(Matchers.<DoFnInfo<?, ?>>theInstance(info)));
  assertThat(retrievedInfo.getDoFn(), not(theInstance(info.getDoFn())));

  mgr.complete(retrievedInfo);
  DoFnInfo<?, ?> afterCompleteInfo = mgr.get();
  assertThat(afterCompleteInfo, Matchers.<DoFnInfo<?, ?>>theInstance(retrievedInfo));
  assertThat(afterCompleteInfo.getDoFn(), theInstance(retrievedInfo.getDoFn()));
}
 
Example 18
Source Project: beam   Source File: FlinkStreamingTransformTranslators.java    License: Apache License 2.0 6 votes vote down vote up
DoFnOperator<InputT, OutputT> createDoFnOperator(
DoFn<InputT, OutputT> doFn,
String stepName,
List<PCollectionView<?>> sideInputs,
TupleTag<OutputT> mainOutputTag,
List<TupleTag<?>> additionalOutputTags,
FlinkStreamingTranslationContext context,
WindowingStrategy<?, ?> windowingStrategy,
Map<TupleTag<?>, OutputTag<WindowedValue<?>>> tagsToOutputTags,
Map<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders,
Map<TupleTag<?>, Integer> tagsToIds,
Coder<WindowedValue<InputT>> windowedInputCoder,
Map<TupleTag<?>, Coder<?>> outputCoders,
Coder keyCoder,
KeySelector<WindowedValue<InputT>, ?> keySelector,
Map<Integer, PCollectionView<?>> transformedSideInputs,
DoFnSchemaInformation doFnSchemaInformation,
Map<String, PCollectionView<?>> sideInputMapping);
 
Example 19
Source Project: beam   Source File: FlinkStatefulDoFnFunction.java    License: Apache License 2.0 6 votes vote down vote up
public FlinkStatefulDoFnFunction(
    DoFn<KV<K, V>, OutputT> dofn,
    String stepName,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions,
    Map<TupleTag<?>, Integer> outputMap,
    TupleTag<OutputT> mainOutputTag,
    Coder<KV<K, V>> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {

  this.dofn = dofn;
  this.stepName = stepName;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.outputMap = outputMap;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.outputCoderMap = outputCoderMap;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example 20
Source Project: beam   Source File: DoFnInstanceManagersTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCloningPoolTearsDownAfterAbort() throws Exception {
  DoFnInfo<?, ?> info =
      DoFnInfo.forFn(
          initialFn,
          WindowingStrategy.globalDefault(),
          null /* side input views */,
          null /* input coder */,
          new TupleTag<>(PropertyNames.OUTPUT) /* main output id */,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  DoFnInstanceManager mgr = DoFnInstanceManagers.cloningPool(info);
  DoFnInfo<?, ?> retrievedInfo = mgr.get();

  mgr.abort(retrievedInfo);
  TestFn fn = (TestFn) retrievedInfo.getDoFn();
  assertThat(fn.tornDown, is(true));

  DoFnInfo<?, ?> afterAbortInfo = mgr.get();
  assertThat(afterAbortInfo, not(Matchers.<DoFnInfo<?, ?>>theInstance(retrievedInfo)));
  assertThat(afterAbortInfo.getDoFn(), not(theInstance(retrievedInfo.getDoFn())));
  assertThat(((TestFn) afterAbortInfo.getDoFn()).tornDown, is(false));
}
 
Example 21
Source Project: beam   Source File: FlinkPipelineOptionsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = Exception.class)
public void parDoBaseClassPipelineOptionsNullTest() {
  TupleTag<String> mainTag = new TupleTag<>("main-output");
  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  new DoFnOperator<>(
      new TestDoFn(),
      "stepName",
      coder,
      Collections.emptyMap(),
      mainTag,
      Collections.emptyList(),
      new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, coder),
      WindowingStrategy.globalDefault(),
      new HashMap<>(),
      Collections.emptyList(),
      null,
      null, /* key coder */
      null /* key selector */,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 22
Source Project: beam   Source File: ParDoEvaluatorTest.java    License: Apache License 2.0 5 votes vote down vote up
private ParDoEvaluator<Integer> createEvaluator(
    PCollectionView<Integer> singletonView,
    RecorderFn fn,
    PCollection<Integer> input,
    PCollection<Integer> output) {
  when(evaluationContext.createSideInputReader(ImmutableList.of(singletonView)))
      .thenReturn(new ReadyInGlobalWindowReader());
  DirectExecutionContext executionContext = mock(DirectExecutionContext.class);
  DirectStepContext stepContext = mock(DirectStepContext.class);
  when(executionContext.getStepContext(Mockito.any(String.class))).thenReturn(stepContext);
  when(stepContext.getTimerUpdate()).thenReturn(TimerUpdate.empty());
  when(evaluationContext.getExecutionContext(
          Mockito.any(AppliedPTransform.class), Mockito.any(StructuralKey.class)))
      .thenReturn(executionContext);

  DirectGraphs.performDirectOverrides(p);
  @SuppressWarnings("unchecked")
  AppliedPTransform<PCollection<Integer>, ?, ?> transform =
      (AppliedPTransform<PCollection<Integer>, ?, ?>) DirectGraphs.getProducer(output);
  return ParDoEvaluator.create(
      evaluationContext,
      PipelineOptionsFactory.create(),
      stepContext,
      transform,
      input.getCoder(),
      input.getWindowingStrategy(),
      fn,
      null /* key */,
      ImmutableList.of(singletonView),
      mainOutputTag,
      additionalOutputTags,
      ImmutableMap.of(mainOutputTag, output),
      DoFnSchemaInformation.create(),
      Collections.emptyMap(),
      ParDoEvaluator.defaultRunnerFactory());
}
 
Example 23
Source Project: incubator-nemo   Source File: DoFnTransformTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testSingleOutput() {

  final TupleTag<String> outputTag = new TupleTag<>("main-output");

  final DoFnTransform<String, String> doFnTransform =
    new DoFnTransform<>(
      new IdentityDoFn<>(),
      NULL_INPUT_CODER,
      NULL_OUTPUT_CODERS,
      outputTag,
      Collections.emptyList(),
      WindowingStrategy.globalDefault(),
      PipelineOptionsFactory.as(NemoPipelineOptions.class),
      DisplayData.none(),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());

  final Transform.Context context = mock(Transform.Context.class);
  final OutputCollector<WindowedValue<String>> oc = new TestOutputCollector<>();
  doFnTransform.prepare(context, oc);

  doFnTransform.onData(WindowedValue.valueInGlobalWindow("Hello"));

  assertEquals(((TestOutputCollector<String>) oc).outputs.get(0), WindowedValue.valueInGlobalWindow("Hello"));

  doFnTransform.close();
}
 
Example 24
Source Project: beam   Source File: DoFnWithExecutionInformation.java    License: Apache License 2.0 5 votes vote down vote up
public static DoFnWithExecutionInformation of(
    DoFn<?, ?> fn,
    TupleTag<?> tag,
    Map<String, PCollectionView<?>> sideInputMapping,
    DoFnSchemaInformation doFnSchemaInformation) {
  return new AutoValue_DoFnWithExecutionInformation(
      fn, tag, sideInputMapping, doFnSchemaInformation);
}
 
Example 25
Source Project: beam   Source File: ParDoEvaluatorFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates an evaluator for an arbitrary {@link AppliedPTransform} node, with the pieces of the
 * {@link ParDo} unpacked.
 *
 * <p>This can thus be invoked regardless of whether the types in the {@link AppliedPTransform}
 * correspond with the type in the unpacked {@link DoFn}, side inputs, and output tags.
 */
@SuppressWarnings({"unchecked", "rawtypes"})
DoFnLifecycleManagerRemovingTransformEvaluator<InputT> createEvaluator(
    AppliedPTransform<PCollection<InputT>, PCollectionTuple, ?> application,
    PCollection<InputT> mainInput,
    StructuralKey<?> inputBundleKey,
    List<PCollectionView<?>> sideInputs,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping)
    throws Exception {
  String stepName = evaluationContext.getStepName(application);
  DirectStepContext stepContext =
      evaluationContext.getExecutionContext(application, inputBundleKey).getStepContext(stepName);

  DoFnLifecycleManager fnManager = fnClones.getUnchecked(application);

  return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(
      createParDoEvaluator(
          application,
          inputBundleKey,
          mainInput,
          sideInputs,
          mainOutputTag,
          additionalOutputTags,
          stepContext,
          fnManager.get(),
          doFnSchemaInformation,
          sideInputMapping,
          fnManager),
      fnManager);
}
 
Example 26
Source Project: beam   Source File: FlinkStatefulDoFnFunctionTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
  FlinkStatefulDoFnFunction doFnFunction =
      new TestDoFnFunction(
          "step",
          WindowingStrategy.globalDefault(),
          Collections.emptyMap(),
          PipelineOptionsFactory.create(),
          Collections.emptyMap(),
          new TupleTag<>(),
          null,
          Collections.emptyMap(),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  doFnFunction.open(new Configuration());

  String metricContainerFieldName = "metricContainer";
  FlinkMetricContainer monitoredContainer =
      Mockito.spy(
          (FlinkMetricContainer)
              Whitebox.getInternalState(doFnFunction, metricContainerFieldName));
  Whitebox.setInternalState(doFnFunction, metricContainerFieldName, monitoredContainer);

  doFnFunction.close();
  Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
}
 
Example 27
Source Project: beam   Source File: DoFnRunners.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns an implementation of {@link DoFnRunner} that for a {@link DoFn}.
 *
 * <p>If the {@link DoFn} observes the window, this runner will explode the windows of a
 * compressed {@link WindowedValue}. It is the responsibility of the runner to perform any key
 * partitioning needed, etc.
 */
public static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner(
    PipelineOptions options,
    DoFn<InputT, OutputT> fn,
    SideInputReader sideInputReader,
    OutputManager outputManager,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    StepContext stepContext,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  return new SimpleDoFnRunner<>(
      options,
      fn,
      sideInputReader,
      outputManager,
      mainOutputTag,
      additionalOutputTags,
      stepContext,
      inputCoder,
      outputCoders,
      windowingStrategy,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example 28
Source Project: beam   Source File: DoFnRunners.java    License: Apache License 2.0 5 votes vote down vote up
public static <InputT, OutputT, RestrictionT>
    ProcessFnRunner<InputT, OutputT, RestrictionT> newProcessFnRunner(
        ProcessFn<InputT, OutputT, RestrictionT, ?, ?> fn,
        PipelineOptions options,
        Collection<PCollectionView<?>> views,
        ReadyCheckingSideInputReader sideInputReader,
        OutputManager outputManager,
        TupleTag<OutputT> mainOutputTag,
        List<TupleTag<?>> additionalOutputTags,
        StepContext stepContext,
        @Nullable Coder<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> inputCoder,
        Map<TupleTag<?>, Coder<?>> outputCoders,
        WindowingStrategy<?, ?> windowingStrategy,
        DoFnSchemaInformation doFnSchemaInformation,
        Map<String, PCollectionView<?>> sideInputMapping) {
  return new ProcessFnRunner<>(
      simpleRunner(
          options,
          fn,
          sideInputReader,
          outputManager,
          mainOutputTag,
          additionalOutputTags,
          stepContext,
          inputCoder,
          outputCoders,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping),
      views,
      sideInputReader);
}
 
Example 29
Source Project: beam   Source File: SimpleDoFnRunnerTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that a users call to set a timer gets properly dispatched to the timer internals. From
 * there on, it is the duty of the runner & step context to set it in whatever way is right for
 * that runner.
 */
@Test
public void testTimerSet() {
  WindowFn<?, ?> windowFn = new GlobalWindows();
  DoFnWithTimers<GlobalWindow> fn = new DoFnWithTimers(windowFn.windowCoder());
  DoFnRunner<String, String> runner =
      new SimpleDoFnRunner<>(
          null,
          fn,
          NullSideInputReader.empty(),
          null,
          null,
          Collections.emptyList(),
          mockStepContext,
          null,
          Collections.emptyMap(),
          WindowingStrategy.of(new GlobalWindows()),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  // Setting the timer needs the current time, as it is set relative
  Instant currentTime = new Instant(42);
  when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(currentTime);

  runner.processElement(WindowedValue.valueInGlobalWindow("anyValue"));

  verify(mockTimerInternals)
      .setTimer(
          StateNamespaces.window(new GlobalWindows().windowCoder(), GlobalWindow.INSTANCE),
          TimerDeclaration.PREFIX + DoFnWithTimers.TIMER_ID,
          "",
          currentTime.plus(DoFnWithTimers.TIMER_OFFSET),
          currentTime.plus(DoFnWithTimers.TIMER_OFFSET),
          TimeDomain.EVENT_TIME);
}
 
Example 30
Source Project: beam   Source File: SimpleDoFnRunnerTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Demonstrates that attempting to output an element before the timestamp of the current element
 * with zero {@link DoFn#getAllowedTimestampSkew() allowed timestamp skew} throws.
 */
@Test
public void testBackwardsInTimeNoSkew() {
  SkewingDoFn fn = new SkewingDoFn(Duration.ZERO);
  DoFnRunner<Duration, Duration> runner =
      new SimpleDoFnRunner<>(
          null,
          fn,
          NullSideInputReader.empty(),
          new ListOutputManager(),
          new TupleTag<>(),
          Collections.emptyList(),
          mockStepContext,
          null,
          Collections.emptyMap(),
          WindowingStrategy.of(new GlobalWindows()),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  runner.startBundle();
  // An element output at the current timestamp is fine.
  runner.processElement(
      WindowedValue.timestampedValueInGlobalWindow(Duration.ZERO, new Instant(0)));
  thrown.expect(UserCodeException.class);
  thrown.expectCause(isA(IllegalArgumentException.class));
  thrown.expectMessage("must be no earlier");
  thrown.expectMessage(
      String.format("timestamp of the current input (%s)", new Instant(0).toString()));
  thrown.expectMessage(
      String.format(
          "the allowed skew (%s)", PeriodFormat.getDefault().print(Duration.ZERO.toPeriod())));
  // An element output before (current time - skew) is forbidden
  runner.processElement(
      WindowedValue.timestampedValueInGlobalWindow(Duration.millis(1L), new Instant(0)));
}