org.apache.beam.sdk.transforms.windowing.Sessions Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.windowing.Sessions. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataflowGroupByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testInvalidWindowsService() {
  Pipeline p = createTestServiceRunner();

  List<KV<String, Integer>> ungroupedPairs = Arrays.asList();

  PCollection<KV<String, Integer>> input =
      p.apply(
              Create.of(ungroupedPairs)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(Sessions.withGapDuration(Duration.standardMinutes(1))));

  thrown.expect(IllegalStateException.class);
  thrown.expectMessage("GroupByKey must have a valid Window merge function");
  input.apply("GroupByKey", GroupByKey.create()).apply("GroupByKeyAgain", GroupByKey.create());
}
 
Example #2
Source File: AfterProcessingTimeStateMachineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testAfterProcessingTimeWithMergingWindow() throws Exception {
  SimpleTriggerStateMachineTester<IntervalWindow> tester =
      TriggerStateMachineTester.forTrigger(
          AfterProcessingTimeStateMachine.pastFirstElementInPane()
              .plusDelayOf(Duration.millis(5)),
          Sessions.withGapDuration(Duration.millis(10)));

  tester.advanceProcessingTime(new Instant(10));
  tester.injectElements(1); // in [1, 11), timer for 15
  IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
  assertFalse(tester.shouldFire(firstWindow));

  tester.advanceProcessingTime(new Instant(12));
  tester.injectElements(3); // in [3, 13), timer for 17
  IntervalWindow secondWindow = new IntervalWindow(new Instant(3), new Instant(13));
  assertFalse(tester.shouldFire(secondWindow));

  tester.mergeWindows();
  IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(13));

  tester.advanceProcessingTime(new Instant(16));
  assertTrue(tester.shouldFire(mergedWindow));
}
 
Example #3
Source File: AfterSynchronizedProcessingTimeStateMachineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testAfterProcessingTimeWithMergingWindow() throws Exception {
  Duration windowDuration = Duration.millis(10);
  SimpleTriggerStateMachineTester<IntervalWindow> tester =
      TriggerStateMachineTester.forTrigger(
          AfterProcessingTimeStateMachine.pastFirstElementInPane()
              .plusDelayOf(Duration.millis(5)),
          Sessions.withGapDuration(windowDuration));

  tester.advanceProcessingTime(new Instant(10));
  tester.injectElements(1); // in [1, 11), timer for 15
  IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
  assertFalse(tester.shouldFire(firstWindow));

  tester.advanceProcessingTime(new Instant(12));
  tester.injectElements(3); // in [3, 13), timer for 17
  IntervalWindow secondWindow = new IntervalWindow(new Instant(3), new Instant(13));
  assertFalse(tester.shouldFire(secondWindow));

  tester.mergeWindows();
  IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(13));

  tester.advanceProcessingTime(new Instant(16));
  assertTrue(tester.shouldFire(mergedWindow));
}
 
Example #4
Source File: RepeatedlyStateMachineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testShouldFireAfterMerge() throws Exception {
  tester =
      TriggerStateMachineTester.forTrigger(
          RepeatedlyStateMachine.forever(AfterPaneStateMachine.elementCountAtLeast(2)),
          Sessions.withGapDuration(Duration.millis(10)));

  tester.injectElements(1);
  IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
  assertFalse(tester.shouldFire(firstWindow));

  tester.injectElements(5);
  IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
  assertFalse(tester.shouldFire(secondWindow));

  // Merge them, if the merged window were on the second trigger, it would be ready
  tester.mergeWindows();
  IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
  assertTrue(tester.shouldFire(mergedWindow));
}
 
Example #5
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWatermarkHoldForLateNewWindow() throws Exception {
  Duration allowedLateness = Duration.standardMinutes(1);
  Duration gapDuration = Duration.millis(10);
  ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(Sessions.withGapDuration(gapDuration))
              .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
              .withTrigger(
                  Repeatedly.forever(
                      AfterWatermark.pastEndOfWindow()
                          .withLateFirings(AfterPane.elementCountAtLeast(1))))
              .withAllowedLateness(allowedLateness));
  tester.setAutoAdvanceOutputWatermark(false);

  assertEquals(null, tester.getWatermarkHold());
  assertEquals(null, tester.getOutputWatermark());
  tester.advanceInputWatermark(new Instant(40));
  injectElements(tester, 1);
  assertThat(tester.getWatermarkHold(), nullValue());
  injectElements(tester, 10);
  assertThat(tester.getWatermarkHold(), nullValue());
}
 
Example #6
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * When the watermark passes the end-of-window and window expiration time in a single update, this
 * tests that it does not crash.
 */
@Test
public void testSessionEowAndGcTogether() throws Exception {
  ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
      ReduceFnTester.nonCombining(
          Sessions.withGapDuration(Duration.millis(10)),
          DefaultTriggerStateMachine.of(),
          AccumulationMode.ACCUMULATING_FIRED_PANES,
          Duration.millis(50),
          ClosingBehavior.FIRE_ALWAYS);

  tester.setAutoAdvanceOutputWatermark(true);

  tester.advanceInputWatermark(new Instant(0));
  injectElement(tester, 1);
  tester.advanceInputWatermark(new Instant(100));

  assertThat(
      tester.extractOutput(),
      contains(
          isSingleWindowedValue(
              contains(1), 1, 1, 11, PaneInfo.createPane(true, true, Timing.ON_TIME))));
}
 
Example #7
Source File: AfterWatermarkStateMachineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testEarlyAndLateOnMergeSubtriggerMerges() throws Exception {
  tester =
      TriggerStateMachineTester.forTrigger(
          AfterWatermarkStateMachine.pastEndOfWindow()
              .withEarlyFirings(mockEarly)
              .withLateFirings(mockLate),
          Sessions.withGapDuration(Duration.millis(10)));

  tester.injectElements(1);
  tester.injectElements(5);

  // Merging should re-activate the early trigger in the merged window
  tester.mergeWindows();
  verify(mockEarly).onMerge(Mockito.any(OnMergeContext.class));
}
 
Example #8
Source File: WriteFilesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Test a WriteFiles with sessions. */
@Test
@Category(NeedsRunner.class)
public void testWriteWithSessions() throws IOException {
  List<String> inputs =
      Arrays.asList(
          "Critical canary",
          "Apprehensive eagle",
          "Intimidating pigeon",
          "Pedantic gull",
          "Frisky finch");

  runWrite(
      inputs,
      new WindowAndReshuffle<>(Window.into(Sessions.withGapDuration(Duration.millis(1)))),
      getBaseOutputFilename(),
      WriteFiles.to(makeSimpleSink()));
}
 
Example #9
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testWindowFnInvalidation() {

  List<KV<String, Integer>> ungroupedPairs = Arrays.asList();

  PCollection<KV<String, Integer>> input =
      p.apply(
              Create.of(ungroupedPairs)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(Sessions.withGapDuration(Duration.standardMinutes(1))));

  PCollection<KV<String, Iterable<Integer>>> output = input.apply(GroupByKey.create());

  p.run();

  Assert.assertTrue(
      output
          .getWindowingStrategy()
          .getWindowFn()
          .isCompatible(
              new InvalidWindows(
                  "Invalid", Sessions.withGapDuration(Duration.standardMinutes(1)))));
}
 
Example #10
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void verifyMergingStatefulParDoRejected(PipelineOptions options) throws Exception {
  Pipeline p = Pipeline.create(options);

  p.apply(Create.of(KV.of(13, 42)))
      .apply(Window.into(Sessions.withGapDuration(Duration.millis(1))))
      .apply(
          ParDo.of(
              new DoFn<KV<Integer, Integer>, Void>() {
                @StateId("fizzle")
                private final StateSpec<ValueState<Void>> voidState = StateSpecs.value();

                @ProcessElement
                public void process() {}
              }));

  thrown.expectMessage("merging");
  thrown.expect(UnsupportedOperationException.class);
  p.run();
}
 
Example #11
Source File: FlattenTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testCompatibleWindowFnPropagation() {
  PCollection<String> input1 =
      p.apply("CreateInput1", Create.of("Input1"))
          .apply("Window1", Window.into(Sessions.withGapDuration(Duration.standardMinutes(1))));
  PCollection<String> input2 =
      p.apply("CreateInput2", Create.of("Input2"))
          .apply("Window2", Window.into(Sessions.withGapDuration(Duration.standardMinutes(2))));

  PCollection<String> output =
      PCollectionList.of(input1).and(input2).apply(Flatten.pCollections());

  p.run();

  Assert.assertTrue(
      output
          .getWindowingStrategy()
          .getWindowFn()
          .isCompatible(Sessions.withGapDuration(Duration.standardMinutes(2))));
}
 
Example #12
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testSessionsCombine() {
  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              Create.timestamped(
                      TimestampedValue.of(KV.of("a", 1), new Instant(0L)),
                      TimestampedValue.of(KV.of("a", 1), new Instant(4L)),
                      TimestampedValue.of(KV.of("a", 4), new Instant(7L)),
                      TimestampedValue.of(KV.of("b", 1), new Instant(10L)),
                      TimestampedValue.of(KV.of("b", 13), new Instant(16L)))
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(Sessions.withGapDuration(Duration.millis(5))));

  PCollection<Integer> sum =
      input.apply(Values.create()).apply(Combine.globally(new SumInts()).withoutDefaults());

  PCollection<KV<String, String>> sumPerKey = input.apply(Combine.perKey(new TestCombineFn()));

  PAssert.that(sum).containsInAnyOrder(7, 13);
  PAssert.that(sumPerKey)
      .containsInAnyOrder(Arrays.asList(KV.of("a", "114"), KV.of("b", "1"), KV.of("b", "13")));
  pipeline.run();
}
 
Example #13
Source File: ReshuffleTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testReshuffleAfterSessionsAndGroupByKey() {

  PCollection<KV<String, Iterable<Integer>>> input =
      pipeline
          .apply(
              Create.of(GBK_TESTABLE_KVS)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
          .apply(Window.into(Sessions.withGapDuration(Duration.standardMinutes(10))))
          .apply(GroupByKey.create());

  PCollection<KV<String, Iterable<Integer>>> output = input.apply(Reshuffle.of());

  PAssert.that(output).satisfies(new AssertThatHasExpectedContents());

  assertEquals(input.getWindowingStrategy(), output.getWindowingStrategy());

  pipeline.run();
}
 
Example #14
Source File: GroupAlsoByWindowProperties.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the given GABW implementation correctly groups elements into merged sessions with
 * output timestamps at the end of the merged window.
 */
public static void groupsElementsInMergedSessionsWithLatestTimestamp(
    GroupAlsoByWindowDoFnFactory<String, String, Iterable<String>> gabwFactory) throws Exception {

  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
          .withTimestampCombiner(TimestampCombiner.LATEST);

  BoundedWindow unmergedWindow = window(15, 25);
  List<WindowedValue<KV<String, Iterable<String>>>> result =
      runGABW(
          gabwFactory,
          windowingStrategy,
          "k",
          WindowedValue.of(
              "v1", new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
          WindowedValue.of(
              "v2", new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING),
          WindowedValue.of(
              "v3", new Instant(15), Arrays.asList(unmergedWindow), PaneInfo.NO_FIRING));

  assertThat(result, hasSize(2));

  BoundedWindow mergedWindow = window(0, 15);
  TimestampedValue<KV<String, Iterable<String>>> item0 =
      getOnlyElementInWindow(result, mergedWindow);
  assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
  assertThat(item0.getTimestamp(), equalTo(new Instant(5)));

  TimestampedValue<KV<String, Iterable<String>>> item1 =
      getOnlyElementInWindow(result, unmergedWindow);
  assertThat(item1.getValue().getValue(), contains("v3"));
  assertThat(item1.getTimestamp(), equalTo(new Instant(15)));
}
 
Example #15
Source File: WindowRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<IndexedRecord> expand(PCollection<IndexedRecord> indexedRecordPCollection) {
    PCollection<IndexedRecord> windowed_items;

    if (properties.windowLength.getValue() < 1) {
        TalendRuntimeException.build(CommonErrorCodes.UNEXPECTED_ARGUMENT).setAndThrow(properties.windowLength.getName(),
                String.valueOf(properties.windowLength.getValue()));
    }

    // Session Window
    if (properties.windowSession.getValue()) {
        windowed_items = indexedRecordPCollection.apply(Window.<IndexedRecord> into(
                Sessions.withGapDuration(Duration.millis(properties.windowLength.getValue().intValue()))));
        return windowed_items;
    }

    if (properties.windowSlideLength.getValue() < 1) {
        // Fixed Window
        windowed_items = indexedRecordPCollection.apply(
                Window.<IndexedRecord> into(FixedWindows.of(new Duration(properties.windowLength.getValue().intValue()))));
    } else {
        // Sliding Window
        windowed_items = indexedRecordPCollection.apply(
                Window.<IndexedRecord> into(SlidingWindows.of(new Duration(properties.windowLength.getValue().intValue()))
                        .every(new Duration(properties.windowSlideLength.getValue().intValue()))));
    }
    return windowed_items;
}
 
Example #16
Source File: BatchGroupAlsoByWindowFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateCombiningWithTrigger() throws Exception {
  AppliedCombineFn<String, Long, ?, Long> appliedFn =
      AppliedCombineFn.withInputCoder(
          Sum.ofLongs(),
          CoderRegistry.createDefault(),
          KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
          .withTrigger(AfterPane.elementCountAtLeast(1));

  assertThat(
      BatchGroupAlsoByWindowsDoFns.create(windowingStrategy, appliedFn),
      instanceOf(BatchGroupAlsoByWindowAndCombineFn.class));
}
 
Example #17
Source File: DefaultTriggerStateMachineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefaultTriggerSessions() throws Exception {
  tester =
      TriggerStateMachineTester.forTrigger(
          DefaultTriggerStateMachine.of(), Sessions.withGapDuration(Duration.millis(100)));

  tester.injectElements(
      1, // [1, 101)
      50); // [50, 150)
  tester.mergeWindows();

  IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(101));
  IntervalWindow secondWindow = new IntervalWindow(new Instant(50), new Instant(150));
  IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(150));

  // Not ready in any window yet
  tester.advanceInputWatermark(new Instant(100));
  assertFalse(tester.shouldFire(firstWindow));
  assertFalse(tester.shouldFire(secondWindow));
  assertFalse(tester.shouldFire(mergedWindow));

  // The first window is "ready": the caller owns knowledge of which windows are merged away
  tester.advanceInputWatermark(new Instant(149));
  assertTrue(tester.shouldFire(firstWindow));
  assertFalse(tester.shouldFire(secondWindow));
  assertFalse(tester.shouldFire(mergedWindow));

  // Now ready on all windows
  tester.advanceInputWatermark(new Instant(150));
  assertTrue(tester.shouldFire(firstWindow));
  assertTrue(tester.shouldFire(secondWindow));
  assertTrue(tester.shouldFire(mergedWindow));

  // Ensure it repeats
  tester.fireIfShouldFire(mergedWindow);
  assertTrue(tester.shouldFire(mergedWindow));

  assertFalse(tester.isMarkedFinished(mergedWindow));
}
 
Example #18
Source File: OrFinallyStateMachineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that if the first trigger rewinds to be non-finished in the merged window, then it
 * becomes the currently active trigger again, with real triggers.
 */
@Test
public void testShouldFireAfterMerge() throws Exception {
  tester =
      TriggerStateMachineTester.forTrigger(
          AfterEachStateMachine.inOrder(
              AfterPaneStateMachine.elementCountAtLeast(5)
                  .orFinally(AfterWatermarkStateMachine.pastEndOfWindow()),
              RepeatedlyStateMachine.forever(AfterPaneStateMachine.elementCountAtLeast(1))),
          Sessions.withGapDuration(Duration.millis(10)));

  // Finished the orFinally in the first window
  tester.injectElements(1);
  IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
  assertFalse(tester.shouldFire(firstWindow));
  tester.advanceInputWatermark(new Instant(11));
  assertTrue(tester.shouldFire(firstWindow));
  tester.fireIfShouldFire(firstWindow);

  // Set up second window where it is not done
  tester.injectElements(5);
  IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
  assertFalse(tester.shouldFire(secondWindow));

  // Merge them, if the merged window were on the second trigger, it would be ready
  tester.mergeWindows();
  IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
  assertFalse(tester.shouldFire(mergedWindow));

  // Now adding 3 more makes the main trigger ready to fire
  tester.injectElements(1, 2, 3, 4, 5);
  tester.mergeWindows();
  assertTrue(tester.shouldFire(mergedWindow));
}
 
Example #19
Source File: BatchGroupAlsoByWindowFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateCombiningMerging() throws Exception {
  AppliedCombineFn<String, Long, ?, Long> appliedFn =
      AppliedCombineFn.withInputCoder(
          Sum.ofLongs(),
          CoderRegistry.createDefault(),
          KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));

  assertThat(
      BatchGroupAlsoByWindowsDoFns.create(windowingStrategy, appliedFn),
      instanceOf(BatchGroupAlsoByWindowAndCombineFn.class));
}
 
Example #20
Source File: AfterPaneStateMachineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testAfterPaneElementCountSessions() throws Exception {
  tester =
      TriggerStateMachineTester.forTrigger(
          AfterPaneStateMachine.elementCountAtLeast(2),
          Sessions.withGapDuration(Duration.millis(10)));

  tester.injectElements(
      1, // in [1, 11)
      2); // in [2, 12)

  assertFalse(tester.shouldFire(new IntervalWindow(new Instant(1), new Instant(11))));
  assertFalse(tester.shouldFire(new IntervalWindow(new Instant(2), new Instant(12))));

  tester.mergeWindows();

  IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(12));
  assertTrue(tester.shouldFire(mergedWindow));
  tester.fireIfShouldFire(mergedWindow);
  assertTrue(tester.isMarkedFinished(mergedWindow));

  // Because we closed the previous window, we don't have it around to merge with. So there
  // will be a new FIRE_AND_FINISH result.
  tester.injectElements(
      7, // in [7, 17)
      9); // in [9, 19)

  tester.mergeWindows();

  IntervalWindow newMergedWindow = new IntervalWindow(new Instant(7), new Instant(19));
  assertTrue(tester.shouldFire(newMergedWindow));
  tester.fireIfShouldFire(newMergedWindow);
  assertTrue(tester.isMarkedFinished(newMergedWindow));
}
 
Example #21
Source File: BatchGroupAlsoByWindowFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateNoncombiningMerging() throws Exception {
  Coder<Long> inputCoder = VarLongCoder.of();
  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));

  assertThat(
      BatchGroupAlsoByWindowsDoFns.createForIterable(
          windowingStrategy, new InMemoryStateInternalsFactory<>(), inputCoder),
      instanceOf(BatchGroupAlsoByWindowViaOutputBufferFn.class));
}
 
Example #22
Source File: GroupAlsoByWindowProperties.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the given {@link BatchGroupAlsoByWindowFn} implementation combines elements per
 * session window correctly according to the provided {@link CombineFn}.
 */
public static void combinesElementsPerSessionWithEndOfWindowTimestamp(
    GroupAlsoByWindowDoFnFactory<String, Long, Long> gabwFactory,
    CombineFn<Long, ?, Long> combineFn)
    throws Exception {

  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
          .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW);

  BoundedWindow secondWindow = window(15, 25);
  List<WindowedValue<KV<String, Long>>> result =
      runGABW(
          gabwFactory,
          windowingStrategy,
          "k",
          WindowedValue.of(1L, new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
          WindowedValue.of(2L, new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING),
          WindowedValue.of(4L, new Instant(15), Arrays.asList(secondWindow), PaneInfo.NO_FIRING));

  assertThat(result, hasSize(2));

  BoundedWindow firstResultWindow = window(0, 15);
  TimestampedValue<KV<String, Long>> item0 = getOnlyElementInWindow(result, firstResultWindow);
  assertThat(item0.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L, 2L))));
  assertThat(item0.getTimestamp(), equalTo(firstResultWindow.maxTimestamp()));

  TimestampedValue<KV<String, Long>> item1 = getOnlyElementInWindow(result, secondWindow);
  assertThat(item1.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(4L))));
  assertThat(item1.getTimestamp(), equalTo(secondWindow.maxTimestamp()));
}
 
Example #23
Source File: AfterWatermarkStateMachineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the trigger rewinds to be non-finished in the merged window.
 *
 * <p>Because windows are discarded when a trigger finishes, we need to embed this in a sequence
 * in order to check that it is re-activated. So this test is potentially sensitive to other
 * triggers' correctness.
 */
@Test
public void testOnMergeRewinds() throws Exception {
  tester =
      TriggerStateMachineTester.forTrigger(
          AfterEachStateMachine.inOrder(
              AfterWatermarkStateMachine.pastEndOfWindow(),
              RepeatedlyStateMachine.forever(AfterPaneStateMachine.elementCountAtLeast(1))),
          Sessions.withGapDuration(Duration.millis(10)));

  tester.injectElements(1);
  tester.injectElements(5);
  IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
  IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
  IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));

  // Finish the AfterWatermark.pastEndOfWindow() trigger in only the first window
  tester.advanceInputWatermark(new Instant(11));
  assertTrue(tester.shouldFire(firstWindow));
  assertFalse(tester.shouldFire(secondWindow));
  tester.fireIfShouldFire(firstWindow);

  // Confirm that we are on the second trigger by probing
  assertFalse(tester.shouldFire(firstWindow));
  tester.injectElements(1);
  assertTrue(tester.shouldFire(firstWindow));
  tester.fireIfShouldFire(firstWindow);

  // Merging should re-activate the watermark trigger in the merged window
  tester.mergeWindows();

  // Confirm that we are not on the second trigger by probing
  assertFalse(tester.shouldFire(mergedWindow));
  tester.injectElements(1);
  assertFalse(tester.shouldFire(mergedWindow));

  // And confirm that advancing the watermark fires again
  tester.advanceInputWatermark(new Instant(15));
  assertTrue(tester.shouldFire(mergedWindow));
}
 
Example #24
Source File: GroupAlsoByWindowProperties.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the given {@link BatchGroupAlsoByWindowFn} implementation combines elements per
 * session window correctly according to the provided {@link CombineFn}.
 */
public static void combinesElementsPerSession(
    GroupAlsoByWindowDoFnFactory<String, Long, Long> gabwFactory,
    CombineFn<Long, ?, Long> combineFn)
    throws Exception {

  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));

  List<WindowedValue<KV<String, Long>>> result =
      runGABW(
          gabwFactory,
          windowingStrategy,
          "k",
          WindowedValue.of(1L, new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
          WindowedValue.of(2L, new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING),
          WindowedValue.of(
              4L, new Instant(15), Arrays.asList(window(15, 25)), PaneInfo.NO_FIRING));

  assertThat(result, hasSize(2));

  TimestampedValue<KV<String, Long>> item0 = getOnlyElementInWindow(result, window(0, 15));
  assertThat(item0.getValue().getKey(), equalTo("k"));
  assertThat(item0.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L, 2L))));
  assertThat(item0.getTimestamp(), equalTo(window(0, 15).maxTimestamp()));

  TimestampedValue<KV<String, Long>> item1 = getOnlyElementInWindow(result, window(15, 25));
  assertThat(item1.getValue().getKey(), equalTo("k"));
  assertThat(item1.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(4L))));
  assertThat(item1.getTimestamp(), equalTo(window(15, 25).maxTimestamp()));
}
 
Example #25
Source File: AfterWatermarkStateMachineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the trigger rewinds to be non-finished in the merged window.
 *
 * <p>Because windows are discarded when a trigger finishes, we need to embed this in a sequence
 * in order to check that it is re-activated. So this test is potentially sensitive to other
 * triggers' correctness.
 */
@Test
public void testEarlyAndLateOnMergeRewinds() throws Exception {
  tester =
      TriggerStateMachineTester.forTrigger(
          AfterWatermarkStateMachine.pastEndOfWindow()
              .withEarlyFirings(AfterPaneStateMachine.elementCountAtLeast(100))
              .withLateFirings(AfterPaneStateMachine.elementCountAtLeast(1)),
          Sessions.withGapDuration(Duration.millis(10)));

  tester.injectElements(1);
  tester.injectElements(5);
  IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
  IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
  IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));

  // Finish the AfterWatermark.pastEndOfWindow() bit of the trigger in only the first window
  tester.advanceInputWatermark(new Instant(11));
  assertTrue(tester.shouldFire(firstWindow));
  assertFalse(tester.shouldFire(secondWindow));
  tester.fireIfShouldFire(firstWindow);

  // Confirm that we are on the late trigger by probing
  assertFalse(tester.shouldFire(firstWindow));
  tester.injectElements(1);
  assertTrue(tester.shouldFire(firstWindow));
  tester.fireIfShouldFire(firstWindow);

  // Merging should re-activate the early trigger in the merged window
  tester.mergeWindows();

  // Confirm that we are not on the second trigger by probing
  assertFalse(tester.shouldFire(mergedWindow));
  tester.injectElements(1);
  assertFalse(tester.shouldFire(mergedWindow));

  // And confirm that advancing the watermark fires again
  tester.advanceInputWatermark(new Instant(15));
  assertTrue(tester.shouldFire(mergedWindow));
}
 
Example #26
Source File: AfterFirstStateMachineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that if the first trigger rewinds to be non-finished in the merged window, then it
 * becomes the currently active trigger again, with real triggers.
 */
@Test
public void testShouldFireAfterMerge() throws Exception {
  tester =
      TriggerStateMachineTester.forTrigger(
          AfterEachStateMachine.inOrder(
              AfterFirstStateMachine.of(
                  AfterPaneStateMachine.elementCountAtLeast(5),
                  AfterWatermarkStateMachine.pastEndOfWindow()),
              RepeatedlyStateMachine.forever(AfterPaneStateMachine.elementCountAtLeast(1))),
          Sessions.withGapDuration(Duration.millis(10)));

  // Finished the AfterFirst in the first window
  tester.injectElements(1);
  IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
  assertFalse(tester.shouldFire(firstWindow));
  tester.advanceInputWatermark(new Instant(11));
  assertTrue(tester.shouldFire(firstWindow));
  tester.fireIfShouldFire(firstWindow);

  // Set up second window where it is not done
  tester.injectElements(5);
  IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
  assertFalse(tester.shouldFire(secondWindow));

  // Merge them, if the merged window were on the second trigger, it would be ready
  tester.mergeWindows();
  IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
  assertFalse(tester.shouldFire(mergedWindow));

  // Now adding 3 more makes the AfterFirst ready to fire
  tester.injectElements(1, 2, 3, 4, 5);
  tester.mergeWindows();
  assertTrue(tester.shouldFire(mergedWindow));
}
 
Example #27
Source File: SparkCombineFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSessionCombineFn() throws Exception {
  WindowingStrategy<Object, IntervalWindow> strategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(1000)));

  SparkCombineFn<KV<String, Integer>, Integer, Long, Long> sparkCombineFn =
      SparkCombineFn.keyed(combineFn, opts, Collections.emptyMap(), strategy);

  Instant now = Instant.ofEpochMilli(0);
  WindowedValue<KV<String, Integer>> first =
      input("key", 1, now.plus(5000), strategy.getWindowFn());
  WindowedValue<KV<String, Integer>> second =
      input("key", 2, now.plus(1000), strategy.getWindowFn());
  WindowedValue<KV<String, Integer>> third =
      input("key", 3, now.plus(500), strategy.getWindowFn());
  SparkCombineFn.WindowedAccumulator<KV<String, Integer>, Integer, Long, ?> c1 =
      sparkCombineFn.createCombiner(first);
  SparkCombineFn.WindowedAccumulator<KV<String, Integer>, Integer, Long, ?> c2 =
      sparkCombineFn.createCombiner(third);
  sparkCombineFn.mergeValue(c1, second);
  SparkCombineFn.WindowedAccumulator<KV<String, Integer>, Integer, Long, ?> c3 =
      sparkCombineFn.mergeCombiners(c1, c2);
  Iterable<WindowedValue<Long>> output = sparkCombineFn.extractOutput(c3);
  assertEquals(2, Iterables.size(output));
  List<String> format =
      StreamSupport.stream(output.spliterator(), false)
          .map(val -> val.getValue() + ":" + val.getTimestamp().getMillis())
          .collect(Collectors.toList());
  assertEquals(Lists.newArrayList("5:1999", "1:5999"), format);
}
 
Example #28
Source File: GroupAlsoByWindowProperties.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the given GABW implementation correctly groups elements into merged sessions with
 * output timestamps at the end of the merged window.
 */
public static void groupsElementsInMergedSessionsWithEndOfWindowTimestamp(
    GroupAlsoByWindowDoFnFactory<String, String, Iterable<String>> gabwFactory) throws Exception {

  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
          .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW);

  List<WindowedValue<KV<String, Iterable<String>>>> result =
      runGABW(
          gabwFactory,
          windowingStrategy,
          "k",
          WindowedValue.of(
              "v1", new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
          WindowedValue.of(
              "v2", new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING),
          WindowedValue.of(
              "v3", new Instant(15), Arrays.asList(window(15, 25)), PaneInfo.NO_FIRING));

  assertThat(result, hasSize(2));

  TimestampedValue<KV<String, Iterable<String>>> item0 =
      getOnlyElementInWindow(result, window(0, 15));
  assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
  assertThat(item0.getTimestamp(), equalTo(window(0, 15).maxTimestamp()));

  TimestampedValue<KV<String, Iterable<String>>> item1 =
      getOnlyElementInWindow(result, window(15, 25));
  assertThat(item1.getValue().getValue(), contains("v3"));
  assertThat(item1.getTimestamp(), equalTo(window(15, 25).maxTimestamp()));
}
 
Example #29
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(Sessions.withGapDuration(ONE_MINUTE))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(Duration.standardDays(1000))
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #30
Source File: GroupAlsoByWindowProperties.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Tests that the given GABW implementation correctly groups elements into merged sessions. */
public static void groupsElementsInMergedSessions(
    GroupAlsoByWindowDoFnFactory<String, String, Iterable<String>> gabwFactory) throws Exception {

  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));

  List<WindowedValue<KV<String, Iterable<String>>>> result =
      runGABW(
          gabwFactory,
          windowingStrategy,
          "key",
          WindowedValue.of(
              "v1", new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
          WindowedValue.of(
              "v2", new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING),
          WindowedValue.of(
              "v3", new Instant(15), Arrays.asList(window(15, 25)), PaneInfo.NO_FIRING));

  assertThat(result, hasSize(2));

  TimestampedValue<KV<String, Iterable<String>>> item0 =
      getOnlyElementInWindow(result, window(0, 15));
  assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
  assertThat(item0.getTimestamp(), equalTo(window(0, 15).maxTimestamp()));

  TimestampedValue<KV<String, Iterable<String>>> item1 =
      getOnlyElementInWindow(result, window(15, 25));
  assertThat(item1.getValue().getValue(), contains("v3"));
  assertThat(item1.getTimestamp(), equalTo(window(15, 25).maxTimestamp()));
}