Java Code Examples for org.apache.beam.sdk.util.WindowedValue#getValueOnlyCoder()

The following examples show how to use org.apache.beam.sdk.util.WindowedValue#getValueOnlyCoder() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PrecombineGroupingTable.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a {@link GroupingTable} that combines inputs into a accumulator with sampling {@link
 * SizeEstimator SizeEstimators}.
 */
public static <K, InputT, AccumT>
    GroupingTable<WindowedValue<K>, InputT, AccumT> combiningAndSampling(
        PipelineOptions options,
        CombineFn<InputT, AccumT, ?> combineFn,
        Coder<K> keyCoder,
        Coder<? super AccumT> accumulatorCoder,
        double sizeEstimatorSampleRate) {
  Combiner<WindowedValue<K>, InputT, AccumT, ?> valueCombiner =
      new ValueCombiner<>(
          GlobalCombineFnRunners.create(combineFn), NullSideInputReader.empty(), options);
  return new PrecombineGroupingTable<>(
      getGroupingTableSizeBytes(options),
      new WindowingCoderGroupingKeyCreator<>(keyCoder),
      WindowedPairInfo.create(),
      valueCombiner,
      new SamplingSizeEstimator<>(
          new CoderSizeEstimator<>(WindowedValue.getValueOnlyCoder(keyCoder)),
          sizeEstimatorSampleRate,
          1.0),
      new SamplingSizeEstimator<>(
          new CoderSizeEstimator<>(accumulatorCoder), sizeEstimatorSampleRate, 1.0));
}
 
Example 2
Source File: PrecombineGroupingTable.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Returns a {@link GroupingTable} that combines inputs into a accumulator. */
public static <K, InputT, AccumT> GroupingTable<WindowedValue<K>, InputT, AccumT> combining(
    PipelineOptions options,
    CombineFn<InputT, AccumT, ?> combineFn,
    Coder<K> keyCoder,
    Coder<? super AccumT> accumulatorCoder) {
  Combiner<WindowedValue<K>, InputT, AccumT, ?> valueCombiner =
      new ValueCombiner<>(
          GlobalCombineFnRunners.create(combineFn), NullSideInputReader.empty(), options);
  return new PrecombineGroupingTable<>(
      getGroupingTableSizeBytes(options),
      new WindowingCoderGroupingKeyCreator<>(keyCoder),
      WindowedPairInfo.create(),
      valueCombiner,
      new CoderSizeEstimator<>(WindowedValue.getValueOnlyCoder(keyCoder)),
      new CoderSizeEstimator<>(accumulatorCoder));
}
 
Example 3
Source File: PartialGroupByKeyParDoFnsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateWithCombinerAndStreaming() throws Exception {
  StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
  options.setStreaming(true);

  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();
  KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)),
          counterSet,
          NameContextsForTests.nameContextForTest());

  ParDoFn pgbk =
      PartialGroupByKeyParDoFns.create(
          options,
          kvCoder,
          AppliedCombineFn.withInputCoder(
              Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder),
          NullSideInputReader.empty(),
          receiver,
          null);
  assertTrue(pgbk instanceof SimplePartialGroupByKeyParDoFn);
}
 
Example 4
Source File: FlinkPipelineOptionsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test(expected = Exception.class)
public void parDoBaseClassPipelineOptionsNullTest() {
  TupleTag<String> mainTag = new TupleTag<>("main-output");
  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  new DoFnOperator<>(
      new TestDoFn(),
      "stepName",
      coder,
      Collections.emptyMap(),
      mainTag,
      Collections.emptyList(),
      new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, coder),
      WindowingStrategy.globalDefault(),
      new HashMap<>(),
      Collections.emptyList(),
      null,
      null, /* key coder */
      null /* key selector */,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 5
Source File: PartialGroupByKeyParDoFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateWithCombinerAndStreamingSideInputs() throws Exception {
  StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
  options.setStreaming(true);

  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();
  KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)),
          counterSet,
          NameContextsForTests.nameContextForTest());

  when(mockSideInputReader.isEmpty()).thenReturn(false);
  when(mockStreamingStepContext.stateInternals()).thenReturn((StateInternals) mockStateInternals);
  when(mockStateInternals.state(Matchers.<StateNamespace>any(), Matchers.<StateTag>any()))
      .thenReturn(mockState);
  when(mockState.read()).thenReturn(Maps.newHashMap());

  ParDoFn pgbk =
      PartialGroupByKeyParDoFns.create(
          options,
          kvCoder,
          AppliedCombineFn.withInputCoder(
              Sum.ofIntegers(),
              CoderRegistry.createDefault(),
              kvCoder,
              ImmutableList.<PCollectionView<?>>of(),
              WindowingStrategy.globalDefault()),
          mockSideInputReader,
          receiver,
          mockStreamingStepContext);
  assertTrue(pgbk instanceof StreamingSideInputPGBKParDoFn);
}
 
Example 6
Source File: PartialGroupByKeyParDoFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateWithCombinerAndBatchSideInputs() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();

  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();
  KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)),
          counterSet,
          NameContextsForTests.nameContextForTest());

  StepContext stepContext =
      BatchModeExecutionContext.forTesting(options, "testStage")
          .getStepContext(TestOperationContext.create(counterSet));

  when(mockSideInputReader.isEmpty()).thenReturn(false);

  ParDoFn pgbk =
      PartialGroupByKeyParDoFns.create(
          options,
          kvCoder,
          AppliedCombineFn.withInputCoder(
              Sum.ofIntegers(),
              CoderRegistry.createDefault(),
              kvCoder,
              ImmutableList.<PCollectionView<?>>of(),
              WindowingStrategy.globalDefault()),
          mockSideInputReader,
          receiver,
          stepContext);
  assertTrue(pgbk instanceof BatchSideInputPGBKParDoFn);
}
 
Example 7
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static DoFnOperator getOperatorForCleanupInspection() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setParallelism(4);

  TupleTag<String> outputTag = new TupleTag<>("main-output");
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  IdentityDoFn<String> doFn =
      new IdentityDoFn<String>() {
        @FinishBundle
        public void finishBundle(FinishBundleContext context) {
          context.output(
              "finishBundle", BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
        }
      };

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  return new DoFnOperator<>(
      doFn,
      "stepName",
      windowedValueCoder,
      Collections.emptyMap(),
      outputTag,
      Collections.emptyList(),
      outputManagerFactory,
      WindowingStrategy.globalDefault(),
      new HashMap<>(), /* side-input mapping */
      Collections.emptyList(), /* side inputs */
      options,
      null,
      null,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 8
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testSingleOutput() throws Exception {

  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  DoFnOperator<String, String> doFnOperator =
      new DoFnOperator<>(
          new IdentityDoFn<>(),
          "stepName",
          coder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("Hello")));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("Hello")));

  testHarness.close();
}
 
Example 9
Source File: FlinkStreamingTransformTranslators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KeyedWorkItem<K, InputT>>> transform,
    FlinkStreamingTranslationContext context) {

  PCollection<KV<K, InputT>> input = context.getInput(transform);

  KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();

  SingletonKeyedWorkItemCoder<K, InputT> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputKvCoder.getKeyCoder(),
          inputKvCoder.getValueCoder(),
          input.getWindowingStrategy().getWindowFn().windowCoder());

  WindowedValue.ValueOnlyWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>>
      windowedWorkItemCoder = WindowedValue.getValueOnlyCoder(workItemCoder);

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
      inputDataStream
          .flatMap(new ToKeyedWorkItemInGlobalWindow<>(context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer>
      keyedWorkItemStream =
          workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()));

  context.setOutputDataStream(context.getOutput(transform), keyedWorkItemStream);
}
 
Example 10
Source File: BoundedDataset.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("ConstantConditions")
public JavaRDD<WindowedValue<T>> getRDD() {
  if (rdd == null) {
    WindowedValue.ValueOnlyWindowedValueCoder<T> windowCoder =
        WindowedValue.getValueOnlyCoder(coder);
    rdd =
        jsc.parallelize(CoderHelpers.toByteArrays(windowedValues, windowCoder))
            .map(CoderHelpers.fromByteFunction(windowCoder));
  }
  return rdd;
}
 
Example 11
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testMultiOutputOutput() throws Exception {

  WindowedValue.ValueOnlyWindowedValueCoder<String> coder =
      WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());

  TupleTag<String> mainOutput = new TupleTag<>("main-output");
  TupleTag<String> additionalOutput1 = new TupleTag<>("output-1");
  TupleTag<String> additionalOutput2 = new TupleTag<>("output-2");
  ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags =
      ImmutableMap.<TupleTag<?>, OutputTag<?>>builder()
          .put(additionalOutput1, new OutputTag<String>(additionalOutput1.getId()) {})
          .put(additionalOutput2, new OutputTag<String>(additionalOutput2.getId()) {})
          .build();
  ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders =
      ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder()
          .put(mainOutput, (Coder) coder)
          .put(additionalOutput1, coder)
          .put(additionalOutput2, coder)
          .build();
  ImmutableMap<TupleTag<?>, Integer> tagsToIds =
      ImmutableMap.<TupleTag<?>, Integer>builder()
          .put(mainOutput, 0)
          .put(additionalOutput1, 1)
          .put(additionalOutput2, 2)
          .build();

  DoFnOperator<String, String> doFnOperator =
      new DoFnOperator<>(
          new MultiOutputDoFn(additionalOutput1, additionalOutput2),
          "stepName",
          coder,
          Collections.emptyMap(),
          mainOutput,
          ImmutableList.of(additionalOutput1, additionalOutput2),
          new DoFnOperator.MultiOutputOutputManagerFactory(
              mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds),
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("one")));
  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("two")));
  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("hello")));

  assertThat(
      this.stripStreamRecord(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("got: hello")));

  assertThat(
      this.stripStreamRecord(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1))),
      contains(
          WindowedValue.valueInGlobalWindow("extra: one"),
          WindowedValue.valueInGlobalWindow("got: hello")));

  assertThat(
      this.stripStreamRecord(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2))),
      contains(
          WindowedValue.valueInGlobalWindow("extra: two"),
          WindowedValue.valueInGlobalWindow("got: hello")));

  testHarness.close();
}
 
Example 12
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testCheckpointBufferingWithMultipleBundles() throws Exception {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setMaxBundleSize(10L);
  options.setCheckpointingInterval(1L);

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder coder = StringUtf8Coder.of();
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(coder);

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory<>(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  @SuppressWarnings("unchecked")
  Supplier<DoFnOperator<String, String>> doFnOperatorSupplier =
      () ->
          new DoFnOperator<>(
              new IdentityDoFn(),
              "stepName",
              windowedValueCoder,
              Collections.emptyMap(),
              outputTag,
              Collections.emptyList(),
              outputManagerFactory,
              WindowingStrategy.globalDefault(),
              new HashMap<>(), /* side-input mapping */
              Collections.emptyList(), /* side inputs */
              options,
              null,
              null,
              DoFnSchemaInformation.create(),
              Collections.emptyMap());

  DoFnOperator<String, String> doFnOperator = doFnOperatorSupplier.get();
  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  // start a bundle
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));

  // This callback will be executed in the snapshotState function in the course of
  // finishing the currently active bundle. Everything emitted in the callback should
  // be buffered and not sent downstream.
  doFnOperator.setBundleFinishedCallback(
      () -> {
        try {
          // Clear this early for the test here because we want to finish the bundle from within
          // the callback which would otherwise cause an infinitive recursion
          doFnOperator.setBundleFinishedCallback(null);
          testHarness.processElement(
              new StreamRecord<>(WindowedValue.valueInGlobalWindow("trigger another bundle")));
          doFnOperator.invokeFinishBundle();
          testHarness.processElement(
              new StreamRecord<>(
                  WindowedValue.valueInGlobalWindow(
                      "check that the previous element is not flushed")));
        } catch (Exception e) {
          throw new RuntimeException(e);
        }
      });

  OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("regular element")));
  testHarness.close();

  // Restore
  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness2 =
      new OneInputStreamOperatorTestHarness<>(doFnOperatorSupplier.get());

  testHarness2.initializeState(snapshot);
  testHarness2.open();

  testHarness2.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow("after restore")));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness2.getOutput()),
      contains(
          WindowedValue.valueInGlobalWindow("trigger another bundle"),
          WindowedValue.valueInGlobalWindow("check that the previous element is not flushed"),
          WindowedValue.valueInGlobalWindow("after restore")));
}
 
Example 13
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testFailOnRequiresStableInputAndDisabledCheckpointing() {
  TupleTag<String> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  KvToByteBufferKeySelector keySelector = new KvToByteBufferKeySelector<>(keyCoder);
  KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of());
  WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(kvCoder);

  DoFn<String, String> doFn =
      new DoFn<String, String>() {
        @ProcessElement
        // Use RequiresStableInput to force buffering elements
        @RequiresStableInput
        public void processElement(ProcessContext context) {
          context.output(context.element());
        }
      };

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  // should make the DoFnOperator creation fail
  options.setCheckpointingInterval(-1L);
  new DoFnOperator(
      doFn,
      "stepName",
      windowedValueCoder,
      Collections.emptyMap(),
      outputTag,
      Collections.emptyList(),
      outputManagerFactory,
      WindowingStrategy.globalDefault(),
      new HashMap<>(), /* side-input mapping */
      Collections.emptyList(), /* side inputs */
      options,
      keyCoder,
      keySelector,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 14
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testBundleProcessingExceptionIsFatalDuringCheckpointing() throws Exception {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setMaxBundleSize(10L);
  options.setCheckpointingInterval(1L);

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder coder = StringUtf8Coder.of();
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(coder);

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  @SuppressWarnings("unchecked")
  DoFnOperator doFnOperator =
      new DoFnOperator<>(
          new IdentityDoFn() {
            @FinishBundle
            public void finishBundle() {
              throw new RuntimeException("something went wrong here");
            }
          },
          "stepName",
          windowedValueCoder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          outputManagerFactory,
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          options,
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  @SuppressWarnings("unchecked")
  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  // start a bundle
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));

  // Make sure we throw Error, not a regular Exception.
  // A regular exception would just cause the checkpoint to fail.
  assertThrows(Error.class, () -> testHarness.snapshot(0, 0));
}
 
Example 15
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testSerialization() {
  WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder =
      WindowedValue.getValueOnlyCoder(VarIntCoder.of());

  TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
  TupleTag<Integer> additionalOutput = new TupleTag<>("additional-output");
  ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags =
      ImmutableMap.<TupleTag<?>, OutputTag<?>>builder()
          .put(
              additionalOutput,
              new OutputTag<>(additionalOutput.getId(), TypeInformation.of(Integer.class)))
          .build();
  ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders =
      ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder()
          .put(mainOutput, (Coder) coder)
          .put(additionalOutput, coder)
          .build();
  ImmutableMap<TupleTag<?>, Integer> tagsToIds =
      ImmutableMap.<TupleTag<?>, Integer>builder()
          .put(mainOutput, 0)
          .put(additionalOutput, 1)
          .build();

  DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds);

  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);

  ExecutableStageDoFnOperator<Integer, Integer> operator =
      new ExecutableStageDoFnOperator<>(
          "transform",
          WindowedValue.getValueOnlyCoder(VarIntCoder.of()),
          Collections.emptyMap(),
          mainOutput,
          ImmutableList.of(additionalOutput),
          outputManagerFactory,
          Collections.emptyMap() /* sideInputTagMapping */,
          Collections.emptyList() /* sideInputs */,
          Collections.emptyMap() /* sideInputId mapping */,
          options,
          stagePayload,
          jobInfo,
          FlinkExecutableStageContextFactory.getInstance(),
          createOutputMap(mainOutput, ImmutableList.of(additionalOutput)),
          WindowingStrategy.globalDefault(),
          null,
          null);

  ExecutableStageDoFnOperator<Integer, Integer> clone = SerializationUtils.clone(operator);
  assertNotNull(clone);
  assertNotEquals(operator, clone);
}
 
Example 16
Source File: PartialGroupByKeyParDoFnsTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testPartialGroupByKey() throws Exception {
  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(
              WindowedValue.getValueOnlyCoder(
                  KvCoder.of(keyCoder, IterableCoder.of(valueCoder)))),
          counterSet,
          NameContextsForTests.nameContextForTest());

  ParDoFn pgbkParDoFn =
      new SimplePartialGroupByKeyParDoFn(
          GroupingTables.buffering(
              new WindowingCoderGroupingKeyCreator(keyCoder),
              PairInfo.create(),
              new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
              new CoderSizeEstimator(valueCoder)),
          receiver);

  pgbkParDoFn.startBundle(receiver);

  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));

  pgbkParDoFn.finishBundle();

  assertThat(
      receiver.outputElems,
      IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
          WindowedValue.valueInGlobalWindow(KV.of("hi", Arrays.asList(4, 6, 9))),
          WindowedValue.valueInGlobalWindow(KV.of("there", Arrays.asList(5, 8))),
          WindowedValue.valueInGlobalWindow(KV.of("joe", Arrays.asList(7)))));

  // Exact counter values depend on size of encoded data.  If encoding
  // changes, then these expected counters should change to match.
  CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
  counterSet.extractUpdates(false, updateExtractor);
  verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
  verify(updateExtractor)
      .longMean(
          getMeanByteCounterName("test_receiver_out"),
          false,
          LongCounterMean.ZERO.addValue(49L, 3));
  verifyNoMoreInteractions(updateExtractor);
}
 
Example 17
Source File: PartialGroupByKeyParDoFnsTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testPartialGroupByKeyWithCombiner() throws Exception {
  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(
              WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))),
          counterSet,
          NameContextsForTests.nameContextForTest());

  Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn = new TestCombiner();

  ParDoFn pgbkParDoFn =
      new SimplePartialGroupByKeyParDoFn(
          GroupingTables.combining(
              new WindowingCoderGroupingKeyCreator(keyCoder),
              PairInfo.create(),
              combineFn,
              new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
              new CoderSizeEstimator(valueCoder)),
          receiver);

  pgbkParDoFn.startBundle(receiver);

  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));

  pgbkParDoFn.finishBundle();

  assertThat(
      receiver.outputElems,
      IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
          WindowedValue.valueInGlobalWindow(KV.of("hi", 19)),
          WindowedValue.valueInGlobalWindow(KV.of("there", 13)),
          WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));

  // Exact counter values depend on size of encoded data.  If encoding
  // changes, then these expected counters should change to match.
  CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
  counterSet.extractUpdates(false, updateExtractor);
  verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
  verify(updateExtractor)
      .longMean(
          getMeanByteCounterName("test_receiver_out"),
          false,
          LongCounterMean.ZERO.addValue(25L, 3));
  verifyNoMoreInteractions(updateExtractor);
}
 
Example 18
Source File: PartialGroupByKeyParDoFnsTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testPartialGroupByKeyWithCombinerAndSideInputs() throws Exception {
  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(
              WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))),
          counterSet,
          NameContextsForTests.nameContextForTest());

  Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn = new TestCombiner();

  ParDoFn pgbkParDoFn =
      new StreamingSideInputPGBKParDoFn(
          GroupingTables.combining(
              new WindowingCoderGroupingKeyCreator(keyCoder),
              PairInfo.create(),
              combineFn,
              new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
              new CoderSizeEstimator(valueCoder)),
          receiver,
          mockSideInputFetcher);

  Set<BoundedWindow> readyWindows = ImmutableSet.<BoundedWindow>of(GlobalWindow.INSTANCE);
  when(mockSideInputFetcher.getReadyWindows()).thenReturn(readyWindows);
  when(mockSideInputFetcher.prefetchElements(readyWindows))
      .thenReturn(ImmutableList.of(elemsBag));
  when(elemsBag.read())
      .thenReturn(
          ImmutableList.of(
              WindowedValue.valueInGlobalWindow(KV.of("hi", 4)),
              WindowedValue.valueInGlobalWindow(KV.of("there", 5))));
  when(mockSideInputFetcher.storeIfBlocked(Matchers.<WindowedValue<KV<String, Integer>>>any()))
      .thenReturn(false, false, false, true);

  pgbkParDoFn.startBundle(receiver);

  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
  pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));

  pgbkParDoFn.finishBundle();

  assertThat(
      receiver.outputElems,
      IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
          WindowedValue.valueInGlobalWindow(KV.of("hi", 10)),
          WindowedValue.valueInGlobalWindow(KV.of("there", 13)),
          WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));

  // Exact counter values depend on size of encoded data.  If encoding
  // changes, then these expected counters should change to match.
  CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
  counterSet.extractUpdates(false, updateExtractor);
  verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
  verify(updateExtractor)
      .longMean(
          getMeanByteCounterName("test_receiver_out"),
          false,
          LongCounterMean.ZERO.addValue(25L, 3));
  verifyNoMoreInteractions(updateExtractor);
}
 
Example 19
Source File: FlinkPipelineOptionsTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Tests that PipelineOptions are present after serialization. */
@Test
public void parDoBaseClassPipelineOptionsSerializationTest() throws Exception {

  TupleTag<String> mainTag = new TupleTag<>("main-output");

  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  DoFnOperator<String, String> doFnOperator =
      new DoFnOperator<>(
          new TestDoFn(),
          "stepName",
          coder,
          Collections.emptyMap(),
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, coder),
          WindowingStrategy.globalDefault(),
          new HashMap<>(),
          Collections.emptyList(),
          options,
          null, /* key coder */
          null /* key selector */,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  final byte[] serialized = SerializationUtils.serialize(doFnOperator);

  @SuppressWarnings("unchecked")
  DoFnOperator<Object, Object> deserialized = SerializationUtils.deserialize(serialized);

  TypeInformation<WindowedValue<Object>> typeInformation =
      TypeInformation.of(new TypeHint<WindowedValue<Object>>() {});

  OneInputStreamOperatorTestHarness<WindowedValue<Object>, WindowedValue<Object>> testHarness =
      new OneInputStreamOperatorTestHarness<>(
          deserialized, typeInformation.createSerializer(new ExecutionConfig()));
  testHarness.open();

  // execute once to access options
  testHarness.processElement(
      new StreamRecord<>(
          WindowedValue.of(
              new Object(), Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING)));

  testHarness.close();
}