Java Code Examples for org.apache.beam.sdk.values.WindowingStrategy#globalDefault()

The following examples show how to use org.apache.beam.sdk.values.WindowingStrategy#globalDefault() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FlinkPipelineOptionsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test(expected = Exception.class)
public void parDoBaseClassPipelineOptionsNullTest() {
  TupleTag<String> mainTag = new TupleTag<>("main-output");
  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  new DoFnOperator<>(
      new TestDoFn(),
      "stepName",
      coder,
      Collections.emptyMap(),
      mainTag,
      Collections.emptyList(),
      new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, coder),
      WindowingStrategy.globalDefault(),
      new HashMap<>(),
      Collections.emptyList(),
      null,
      null, /* key coder */
      null /* key selector */,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 2
Source File: DoFnTransformTest.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testSingleOutput() {

  final TupleTag<String> outputTag = new TupleTag<>("main-output");

  final DoFnTransform<String, String> doFnTransform =
    new DoFnTransform<>(
      new IdentityDoFn<>(),
      NULL_INPUT_CODER,
      NULL_OUTPUT_CODERS,
      outputTag,
      Collections.emptyList(),
      WindowingStrategy.globalDefault(),
      PipelineOptionsFactory.as(NemoPipelineOptions.class),
      DisplayData.none(),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());

  final Transform.Context context = mock(Transform.Context.class);
  final OutputCollector<WindowedValue<String>> oc = new TestOutputCollector<>();
  doFnTransform.prepare(context, oc);

  doFnTransform.onData(WindowedValue.valueInGlobalWindow("Hello"));

  assertEquals(((TestOutputCollector<String>) oc).outputs.get(0), WindowedValue.valueInGlobalWindow("Hello"));

  doFnTransform.close();
}
 
Example 3
Source File: FlinkDoFnFunctionTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
  FlinkDoFnFunction doFnFunction =
      new TestDoFnFunction(
          "step",
          WindowingStrategy.globalDefault(),
          Collections.emptyMap(),
          PipelineOptionsFactory.create(),
          Collections.emptyMap(),
          new TupleTag<>(),
          null,
          Collections.emptyMap(),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  doFnFunction.open(new Configuration());

  String metricContainerFieldName = "metricContainer";
  FlinkMetricContainer monitoredContainer =
      Mockito.spy(
          (FlinkMetricContainer)
              Whitebox.getInternalState(doFnFunction, metricContainerFieldName));
  Whitebox.setInternalState(doFnFunction, metricContainerFieldName, monitoredContainer);

  doFnFunction.close();
  Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
}
 
Example 4
Source File: FlinkStatefulDoFnFunctionTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
  FlinkStatefulDoFnFunction doFnFunction =
      new TestDoFnFunction(
          "step",
          WindowingStrategy.globalDefault(),
          Collections.emptyMap(),
          PipelineOptionsFactory.create(),
          Collections.emptyMap(),
          new TupleTag<>(),
          null,
          Collections.emptyMap(),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  doFnFunction.open(new Configuration());

  String metricContainerFieldName = "metricContainer";
  FlinkMetricContainer monitoredContainer =
      Mockito.spy(
          (FlinkMetricContainer)
              Whitebox.getInternalState(doFnFunction, metricContainerFieldName));
  Whitebox.setInternalState(doFnFunction, metricContainerFieldName, monitoredContainer);

  doFnFunction.close();
  Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
}
 
Example 5
Source File: Flatten.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<T> expand(PCollectionList<T> inputs) {
  WindowingStrategy<?, ?> windowingStrategy;
  IsBounded isBounded = IsBounded.BOUNDED;
  if (!inputs.getAll().isEmpty()) {
    windowingStrategy = inputs.get(0).getWindowingStrategy();
    for (PCollection<?> input : inputs.getAll()) {
      WindowingStrategy<?, ?> other = input.getWindowingStrategy();
      if (!windowingStrategy.getWindowFn().isCompatible(other.getWindowFn())) {
        throw new IllegalStateException(
            "Inputs to Flatten had incompatible window windowFns: "
                + windowingStrategy.getWindowFn()
                + ", "
                + other.getWindowFn());
      }

      if (!windowingStrategy.getTrigger().isCompatible(other.getTrigger())) {
        throw new IllegalStateException(
            "Inputs to Flatten had incompatible triggers: "
                + windowingStrategy.getTrigger()
                + ", "
                + other.getTrigger());
      }
      isBounded = isBounded.and(input.isBounded());
    }
  } else {
    windowingStrategy = WindowingStrategy.globalDefault();
  }

  return PCollection.createPrimitiveOutputInternal(
      inputs.getPipeline(),
      windowingStrategy,
      isBounded,
      // Take coder from first collection. If there are none, will be left unspecified.
      inputs.getAll().isEmpty() ? null : inputs.get(0).getCoder());
}
 
Example 6
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testEnsureStateCleanupWithKeyedInputCleanupTimer() {
  InMemoryTimerInternals inMemoryTimerInternals = new InMemoryTimerInternals();
  KeyedStateBackend keyedStateBackend = Mockito.mock(KeyedStateBackend.class);
  Lock stateBackendLock = Mockito.mock(Lock.class);
  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  GlobalWindow window = GlobalWindow.INSTANCE;
  GlobalWindow.Coder windowCoder = GlobalWindow.Coder.INSTANCE;

  // Test that cleanup timer is set correctly
  ExecutableStageDoFnOperator.CleanupTimer cleanupTimer =
      new ExecutableStageDoFnOperator.CleanupTimer<>(
          inMemoryTimerInternals,
          stateBackendLock,
          WindowingStrategy.globalDefault(),
          keyCoder,
          windowCoder,
          keyedStateBackend);
  cleanupTimer.setForWindow(KV.of("key", "string"), window);

  Mockito.verify(stateBackendLock).lock();
  ByteBuffer key = FlinkKeyUtils.encodeKey("key", keyCoder);
  Mockito.verify(keyedStateBackend).setCurrentKey(key);
  assertThat(
      inMemoryTimerInternals.getNextTimer(TimeDomain.EVENT_TIME),
      is(window.maxTimestamp().plus(1)));
  Mockito.verify(stateBackendLock).unlock();
}
 
Example 7
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testSingleOutput() throws Exception {

  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  DoFnOperator<String, String> doFnOperator =
      new DoFnOperator<>(
          new IdentityDoFn<>(),
          "stepName",
          coder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("Hello")));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("Hello")));

  testHarness.close();
}
 
Example 8
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static DoFnOperator getOperatorForCleanupInspection() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setParallelism(4);

  TupleTag<String> outputTag = new TupleTag<>("main-output");
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  IdentityDoFn<String> doFn =
      new IdentityDoFn<String>() {
        @FinishBundle
        public void finishBundle(FinishBundleContext context) {
          context.output(
              "finishBundle", BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
        }
      };

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  return new DoFnOperator<>(
      doFn,
      "stepName",
      windowedValueCoder,
      Collections.emptyMap(),
      outputTag,
      Collections.emptyList(),
      outputManagerFactory,
      WindowingStrategy.globalDefault(),
      new HashMap<>(), /* side-input mapping */
      Collections.emptyList(), /* side inputs */
      options,
      null,
      null,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 9
Source File: DoFnTransformTest.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testCountBundle() {

  final TupleTag<String> outputTag = new TupleTag<>("main-output");
  final NemoPipelineOptions pipelineOptions = PipelineOptionsFactory.as(NemoPipelineOptions.class);
  pipelineOptions.setMaxBundleSize(3L);
  pipelineOptions.setMaxBundleTimeMills(10000000L);

  final List<Integer> bundleOutput = new ArrayList<>();

  final DoFnTransform<String, String> doFnTransform =
    new DoFnTransform<>(
      new BundleTestDoFn(bundleOutput),
      NULL_INPUT_CODER,
      NULL_OUTPUT_CODERS,
      outputTag,
      Collections.emptyList(),
      WindowingStrategy.globalDefault(),
      pipelineOptions,
      DisplayData.none(),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());

  final Transform.Context context = mock(Transform.Context.class);
  final OutputCollector<WindowedValue<String>> oc = new TestOutputCollector<>();
  doFnTransform.prepare(context, oc);

  doFnTransform.onData(WindowedValue.valueInGlobalWindow("a"));
  doFnTransform.onData(WindowedValue.valueInGlobalWindow("a"));
  doFnTransform.onData(WindowedValue.valueInGlobalWindow("a"));

  assertEquals(3, (int) bundleOutput.get(0));

  bundleOutput.clear();

  doFnTransform.onData(WindowedValue.valueInGlobalWindow("a"));
  doFnTransform.onData(WindowedValue.valueInGlobalWindow("a"));
  doFnTransform.onData(WindowedValue.valueInGlobalWindow("a"));

  assertEquals(3, (int) bundleOutput.get(0));

  doFnTransform.close();
}
 
Example 10
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testSerialization() {
  WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder =
      WindowedValue.getValueOnlyCoder(VarIntCoder.of());

  TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
  TupleTag<Integer> additionalOutput = new TupleTag<>("additional-output");
  ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags =
      ImmutableMap.<TupleTag<?>, OutputTag<?>>builder()
          .put(
              additionalOutput,
              new OutputTag<>(additionalOutput.getId(), TypeInformation.of(Integer.class)))
          .build();
  ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders =
      ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder()
          .put(mainOutput, (Coder) coder)
          .put(additionalOutput, coder)
          .build();
  ImmutableMap<TupleTag<?>, Integer> tagsToIds =
      ImmutableMap.<TupleTag<?>, Integer>builder()
          .put(mainOutput, 0)
          .put(additionalOutput, 1)
          .build();

  DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds);

  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);

  ExecutableStageDoFnOperator<Integer, Integer> operator =
      new ExecutableStageDoFnOperator<>(
          "transform",
          WindowedValue.getValueOnlyCoder(VarIntCoder.of()),
          Collections.emptyMap(),
          mainOutput,
          ImmutableList.of(additionalOutput),
          outputManagerFactory,
          Collections.emptyMap() /* sideInputTagMapping */,
          Collections.emptyList() /* sideInputs */,
          Collections.emptyMap() /* sideInputId mapping */,
          options,
          stagePayload,
          jobInfo,
          FlinkExecutableStageContextFactory.getInstance(),
          createOutputMap(mainOutput, ImmutableList.of(additionalOutput)),
          WindowingStrategy.globalDefault(),
          null,
          null);

  ExecutableStageDoFnOperator<Integer, Integer> clone = SerializationUtils.clone(operator);
  assertNotNull(clone);
  assertNotEquals(operator, clone);
}
 
Example 11
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testBundleProcessingExceptionIsFatalDuringCheckpointing() throws Exception {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setMaxBundleSize(10L);
  options.setCheckpointingInterval(1L);

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder coder = StringUtf8Coder.of();
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(coder);

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  @SuppressWarnings("unchecked")
  DoFnOperator doFnOperator =
      new DoFnOperator<>(
          new IdentityDoFn() {
            @FinishBundle
            public void finishBundle() {
              throw new RuntimeException("something went wrong here");
            }
          },
          "stepName",
          windowedValueCoder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          outputManagerFactory,
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          options,
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  @SuppressWarnings("unchecked")
  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  // start a bundle
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));

  // Make sure we throw Error, not a regular Exception.
  // A regular exception would just cause the checkpoint to fail.
  assertThrows(Error.class, () -> testHarness.snapshot(0, 0));
}
 
Example 12
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testFailOnRequiresStableInputAndDisabledCheckpointing() {
  TupleTag<String> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  KvToByteBufferKeySelector keySelector = new KvToByteBufferKeySelector<>(keyCoder);
  KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of());
  WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(kvCoder);

  DoFn<String, String> doFn =
      new DoFn<String, String>() {
        @ProcessElement
        // Use RequiresStableInput to force buffering elements
        @RequiresStableInput
        public void processElement(ProcessContext context) {
          context.output(context.element());
        }
      };

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  // should make the DoFnOperator creation fail
  options.setCheckpointingInterval(-1L);
  new DoFnOperator(
      doFn,
      "stepName",
      windowedValueCoder,
      Collections.emptyMap(),
      outputTag,
      Collections.emptyList(),
      outputManagerFactory,
      WindowingStrategy.globalDefault(),
      new HashMap<>(), /* side-input mapping */
      Collections.emptyList(), /* side inputs */
      options,
      keyCoder,
      keySelector,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 13
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testCheckpointBufferingWithMultipleBundles() throws Exception {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setMaxBundleSize(10L);
  options.setCheckpointingInterval(1L);

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder coder = StringUtf8Coder.of();
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(coder);

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory<>(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  @SuppressWarnings("unchecked")
  Supplier<DoFnOperator<String, String>> doFnOperatorSupplier =
      () ->
          new DoFnOperator<>(
              new IdentityDoFn(),
              "stepName",
              windowedValueCoder,
              Collections.emptyMap(),
              outputTag,
              Collections.emptyList(),
              outputManagerFactory,
              WindowingStrategy.globalDefault(),
              new HashMap<>(), /* side-input mapping */
              Collections.emptyList(), /* side inputs */
              options,
              null,
              null,
              DoFnSchemaInformation.create(),
              Collections.emptyMap());

  DoFnOperator<String, String> doFnOperator = doFnOperatorSupplier.get();
  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  // start a bundle
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));

  // This callback will be executed in the snapshotState function in the course of
  // finishing the currently active bundle. Everything emitted in the callback should
  // be buffered and not sent downstream.
  doFnOperator.setBundleFinishedCallback(
      () -> {
        try {
          // Clear this early for the test here because we want to finish the bundle from within
          // the callback which would otherwise cause an infinitive recursion
          doFnOperator.setBundleFinishedCallback(null);
          testHarness.processElement(
              new StreamRecord<>(WindowedValue.valueInGlobalWindow("trigger another bundle")));
          doFnOperator.invokeFinishBundle();
          testHarness.processElement(
              new StreamRecord<>(
                  WindowedValue.valueInGlobalWindow(
                      "check that the previous element is not flushed")));
        } catch (Exception e) {
          throw new RuntimeException(e);
        }
      });

  OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("regular element")));
  testHarness.close();

  // Restore
  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness2 =
      new OneInputStreamOperatorTestHarness<>(doFnOperatorSupplier.get());

  testHarness2.initializeState(snapshot);
  testHarness2.open();

  testHarness2.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow("after restore")));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness2.getOutput()),
      contains(
          WindowedValue.valueInGlobalWindow("trigger another bundle"),
          WindowedValue.valueInGlobalWindow("check that the previous element is not flushed"),
          WindowedValue.valueInGlobalWindow("after restore")));
}
 
Example 14
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testStateRestore() throws Exception {
  DoFn<KV<String, Long>, KV<String, Long>> filterElementsEqualToCountFn =
      new DoFn<KV<String, Long>, KV<String, Long>>() {

        @StateId("counter")
        private final StateSpec<ValueState<Long>> counterSpec =
            StateSpecs.value(VarLongCoder.of());

        @ProcessElement
        public void processElement(
            ProcessContext context, @StateId("counter") ValueState<Long> count) {
          long currentCount = Optional.ofNullable(count.read()).orElse(0L);
          currentCount = currentCount + 1;
          count.write(currentCount);

          KV<String, Long> currentElement = context.element();
          if (currentCount == currentElement.getValue()) {
            context.output(currentElement);
          }
        }
      };

  WindowingStrategy<Object, GlobalWindow> windowingStrategy = WindowingStrategy.globalDefault();

  TupleTag<KV<String, Long>> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  KvToByteBufferKeySelector keySelector = new KvToByteBufferKeySelector<>(keyCoder);
  KvCoder<String, Long> coder = KvCoder.of(keyCoder, VarLongCoder.of());

  FullWindowedValueCoder<KV<String, Long>> kvCoder =
      WindowedValue.getFullCoder(coder, windowingStrategy.getWindowFn().windowCoder());

  CoderTypeInformation<ByteBuffer> keyCoderInfo =
      new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of());

  OneInputStreamOperatorTestHarness<
          WindowedValue<KV<String, Long>>, WindowedValue<KV<String, Long>>>
      testHarness =
          createTestHarness(
              windowingStrategy,
              filterElementsEqualToCountFn,
              kvCoder,
              kvCoder,
              keyCoder,
              outputTag,
              keyCoderInfo,
              keySelector);
  testHarness.open();

  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));

  OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
  testHarness.close();

  testHarness =
      createTestHarness(
          windowingStrategy,
          filterElementsEqualToCountFn,
          kvCoder,
          kvCoder,
          keyCoder,
          outputTag,
          keyCoderInfo,
          keySelector);
  testHarness.initializeState(snapshot);
  testHarness.open();

  // after restore: counter = 2
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 4L))));
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 5L))));
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(
          WindowedValue.valueInGlobalWindow(KV.of("a", 4L)),
          WindowedValue.valueInGlobalWindow(KV.of("a", 5L))));

  testHarness.close();
}
 
Example 15
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testMultiOutputOutput() throws Exception {

  WindowedValue.ValueOnlyWindowedValueCoder<String> coder =
      WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());

  TupleTag<String> mainOutput = new TupleTag<>("main-output");
  TupleTag<String> additionalOutput1 = new TupleTag<>("output-1");
  TupleTag<String> additionalOutput2 = new TupleTag<>("output-2");
  ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags =
      ImmutableMap.<TupleTag<?>, OutputTag<?>>builder()
          .put(additionalOutput1, new OutputTag<String>(additionalOutput1.getId()) {})
          .put(additionalOutput2, new OutputTag<String>(additionalOutput2.getId()) {})
          .build();
  ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders =
      ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder()
          .put(mainOutput, (Coder) coder)
          .put(additionalOutput1, coder)
          .put(additionalOutput2, coder)
          .build();
  ImmutableMap<TupleTag<?>, Integer> tagsToIds =
      ImmutableMap.<TupleTag<?>, Integer>builder()
          .put(mainOutput, 0)
          .put(additionalOutput1, 1)
          .put(additionalOutput2, 2)
          .build();

  DoFnOperator<String, String> doFnOperator =
      new DoFnOperator<>(
          new MultiOutputDoFn(additionalOutput1, additionalOutput2),
          "stepName",
          coder,
          Collections.emptyMap(),
          mainOutput,
          ImmutableList.of(additionalOutput1, additionalOutput2),
          new DoFnOperator.MultiOutputOutputManagerFactory(
              mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds),
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("one")));
  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("two")));
  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("hello")));

  assertThat(
      this.stripStreamRecord(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("got: hello")));

  assertThat(
      this.stripStreamRecord(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1))),
      contains(
          WindowedValue.valueInGlobalWindow("extra: one"),
          WindowedValue.valueInGlobalWindow("got: hello")));

  assertThat(
      this.stripStreamRecord(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2))),
      contains(
          WindowedValue.valueInGlobalWindow("extra: two"),
          WindowedValue.valueInGlobalWindow("got: hello")));

  testHarness.close();
}
 
Example 16
Source File: FlinkPipelineOptionsTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Tests that PipelineOptions are present after serialization. */
@Test
public void parDoBaseClassPipelineOptionsSerializationTest() throws Exception {

  TupleTag<String> mainTag = new TupleTag<>("main-output");

  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  DoFnOperator<String, String> doFnOperator =
      new DoFnOperator<>(
          new TestDoFn(),
          "stepName",
          coder,
          Collections.emptyMap(),
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, coder),
          WindowingStrategy.globalDefault(),
          new HashMap<>(),
          Collections.emptyList(),
          options,
          null, /* key coder */
          null /* key selector */,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  final byte[] serialized = SerializationUtils.serialize(doFnOperator);

  @SuppressWarnings("unchecked")
  DoFnOperator<Object, Object> deserialized = SerializationUtils.deserialize(serialized);

  TypeInformation<WindowedValue<Object>> typeInformation =
      TypeInformation.of(new TypeHint<WindowedValue<Object>>() {});

  OneInputStreamOperatorTestHarness<WindowedValue<Object>, WindowedValue<Object>> testHarness =
      new OneInputStreamOperatorTestHarness<>(
          deserialized, typeInformation.createSerializer(new ExecutionConfig()));
  testHarness.open();

  // execute once to access options
  testHarness.processElement(
      new StreamRecord<>(
          WindowedValue.of(
              new Object(), Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING)));

  testHarness.close();
}
 
Example 17
Source File: DoFnTransformTest.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Test
public void testSideInputs() {
  // mock context
  final Transform.Context context = mock(Transform.Context.class);
  TupleTag<Tuple<String, Iterable<String>>> outputTag = new TupleTag<>("main-output");

  WindowedValue<String> firstElement = WindowedValue.valueInGlobalWindow("first");
  WindowedValue<String> secondElement = WindowedValue.valueInGlobalWindow("second");

  SideInputElement firstSideinput = new SideInputElement<>(0, ImmutableList.of("1"));
  SideInputElement secondSideinput = new SideInputElement(1, ImmutableList.of("2"));

  final Map<Integer, PCollectionView<?>> sideInputMap = new HashMap<>();
  sideInputMap.put(firstSideinput.getSideInputIndex(), view1);
  sideInputMap.put(secondSideinput.getSideInputIndex(), view2);
  final PushBackDoFnTransform<String, String> doFnTransform =
    new PushBackDoFnTransform(
      new SimpleSideInputDoFn<String>(view1, view2),
      NULL_INPUT_CODER,
      NULL_OUTPUT_CODERS,
      outputTag,
      Collections.emptyList(),
      WindowingStrategy.globalDefault(),
      sideInputMap, /* side inputs */
      PipelineOptionsFactory.as(NemoPipelineOptions.class),
      DisplayData.none(),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());

  final TestOutputCollector<String> oc = new TestOutputCollector<>();
  doFnTransform.prepare(context, oc);

  // Main input first, Side inputs later
  doFnTransform.onData(firstElement);

  doFnTransform.onData(WindowedValue.valueInGlobalWindow(firstSideinput));
  doFnTransform.onData(WindowedValue.valueInGlobalWindow(secondSideinput));
  assertEquals(
    WindowedValue.valueInGlobalWindow(
      concat(firstElement.getValue(), firstSideinput.getSideInputValue(), secondSideinput.getSideInputValue())),
    oc.getOutput().get(0));

  // Side inputs first, Main input later
  doFnTransform.onData(secondElement);
  assertEquals(
    WindowedValue.valueInGlobalWindow(
      concat(secondElement.getValue(), firstSideinput.getSideInputValue(), secondSideinput.getSideInputValue())),
    oc.getOutput().get(1));

  // There should be only 2 final outputs
  assertEquals(2, oc.getOutput().size());

  // The side inputs should be "READY"
  assertTrue(doFnTransform.getSideInputReader().isReady(view1, GlobalWindow.INSTANCE));
  assertTrue(doFnTransform.getSideInputReader().isReady(view2, GlobalWindow.INSTANCE));

  // This watermark should remove the side inputs. (Now should be "NOT READY")
  doFnTransform.onWatermark(new Watermark(GlobalWindow.TIMESTAMP_MAX_VALUE.getMillis()));
  Iterable materializedSideInput1 = doFnTransform.getSideInputReader().get(view1, GlobalWindow.INSTANCE);
  Iterable materializedSideInput2 = doFnTransform.getSideInputReader().get(view2, GlobalWindow.INSTANCE);
  assertFalse(materializedSideInput1.iterator().hasNext());
  assertFalse(materializedSideInput2.iterator().hasNext());

  // There should be only 2 final outputs
  doFnTransform.close();
  assertEquals(2, oc.getOutput().size());
}
 
Example 18
Source File: DoFnTransformTest.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testMultiOutputOutput() {

  TupleTag<String> mainOutput = new TupleTag<>("main-output");
  TupleTag<String> additionalOutput1 = new TupleTag<>("output-1");
  TupleTag<String> additionalOutput2 = new TupleTag<>("output-2");

  ImmutableList<TupleTag<?>> tags = ImmutableList.of(additionalOutput1, additionalOutput2);

  ImmutableMap<String, String> tagsMap =
    ImmutableMap.<String, String>builder()
      .put(additionalOutput1.getId(), additionalOutput1.getId())
      .put(additionalOutput2.getId(), additionalOutput2.getId())
      .build();

  final DoFnTransform<String, String> doFnTransform =
    new DoFnTransform<>(
      new MultiOutputDoFn(additionalOutput1, additionalOutput2),
      NULL_INPUT_CODER,
      NULL_OUTPUT_CODERS,
      mainOutput,
      tags,
      WindowingStrategy.globalDefault(),
      PipelineOptionsFactory.as(NemoPipelineOptions.class),
      DisplayData.none(),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());

  // mock context
  final Transform.Context context = mock(Transform.Context.class);

  final OutputCollector<WindowedValue<String>> oc = new TestOutputCollector<>();
  doFnTransform.prepare(context, oc);

  doFnTransform.onData(WindowedValue.valueInGlobalWindow("one"));
  doFnTransform.onData(WindowedValue.valueInGlobalWindow("two"));
  doFnTransform.onData(WindowedValue.valueInGlobalWindow("hello"));

  // main output
  assertEquals(WindowedValue.valueInGlobalWindow("got: hello"),
    ((TestOutputCollector<String>) oc).outputs.get(0));

  // additional output 1
  assertTrue(((TestOutputCollector<String>) oc).getTaggedOutputs().contains(
    new Tuple<>(additionalOutput1.getId(), WindowedValue.valueInGlobalWindow("extra: one"))
  ));
  assertTrue(((TestOutputCollector<String>) oc).getTaggedOutputs().contains(
    new Tuple<>(additionalOutput1.getId(), WindowedValue.valueInGlobalWindow("got: hello"))
  ));

  // additional output 2
  assertTrue(((TestOutputCollector<String>) oc).getTaggedOutputs().contains(
    new Tuple<>(additionalOutput2.getId(), WindowedValue.valueInGlobalWindow("extra: two"))
  ));
  assertTrue(((TestOutputCollector<String>) oc).getTaggedOutputs().contains(
    new Tuple<>(additionalOutput2.getId(), WindowedValue.valueInGlobalWindow("got: hello"))
  ));

  doFnTransform.close();
}
 
Example 19
Source File: DoFnTransformTest.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testTimeBundle() {

  final long maxBundleTimeMills = 1000L;
  final TupleTag<String> outputTag = new TupleTag<>("main-output");
  final NemoPipelineOptions pipelineOptions = PipelineOptionsFactory.as(NemoPipelineOptions.class);
  pipelineOptions.setMaxBundleSize(10000000L);
  pipelineOptions.setMaxBundleTimeMills(maxBundleTimeMills);

  final List<Integer> bundleOutput = new ArrayList<>();

  final DoFnTransform<String, String> doFnTransform =
    new DoFnTransform<>(
      new BundleTestDoFn(bundleOutput),
      NULL_INPUT_CODER,
      NULL_OUTPUT_CODERS,
      outputTag,
      Collections.emptyList(),
      WindowingStrategy.globalDefault(),
      pipelineOptions,
      DisplayData.none(),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());

  final Transform.Context context = mock(Transform.Context.class);
  final OutputCollector<WindowedValue<String>> oc = new TestOutputCollector<>();

  long startTime = System.currentTimeMillis();
  doFnTransform.prepare(context, oc);

  int count = 0;
  while (bundleOutput.isEmpty()) {
    doFnTransform.onData(WindowedValue.valueInGlobalWindow("a"));
    count += 1;
    try {
      Thread.sleep(10);
    } catch (InterruptedException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }
  }

  long endTime = System.currentTimeMillis();
  assertEquals(count, (int) bundleOutput.get(0));
  assertTrue(endTime - startTime >= maxBundleTimeMills);

  doFnTransform.close();
}
 
Example 20
Source File: UnboundedWrite.java    From components with Apache License 2.0 2 votes vote down vote up
/**
 * Applies a window to the input collection if one hasn't already been specified.
 *
 * @return the input collection if it already has been windowed, otherwise a the same collection inside a default
 * window.
 */
public static <T> PCollection<T> ofDefaultWindow(PCollection<T> in) {
    if (in.getWindowingStrategy() != WindowingStrategy.globalDefault() && in.getWindowingStrategy() != null)
        return in;
    return in.apply("ApplyDefaultWindow", Window.<T> into(FixedWindows.of(DEFAULT_WINDOW_SIZE)));
}